plurfmt.h revision 50294ead5e5d23f5bbfed76e00e6b510bd41eee1
1/* 2******************************************************************************* 3* Copyright (C) 2007-2010, International Business Machines Corporation and 4* others. All Rights Reserved. 5******************************************************************************* 6* 7 8* File PLURFMT.H 9* 10* Modification History:* 11* Date Name Description 12* 13******************************************************************************** 14*/ 15 16#ifndef PLURFMT 17#define PLURFMT 18 19#include "unicode/utypes.h" 20 21/** 22 * \file 23 * \brief C++ API: PluralFormat object 24 */ 25 26#if !UCONFIG_NO_FORMATTING 27 28#include "unicode/numfmt.h" 29#include "unicode/plurrule.h" 30 31U_NAMESPACE_BEGIN 32 33class Hashtable; 34 35/** 36 * <p> 37 * <code>PluralFormat</code> supports the creation of internationalized 38 * messages with plural inflection. It is based on <i>plural 39 * selection</i>, i.e. the caller specifies messages for each 40 * plural case that can appear in the users language and the 41 * <code>PluralFormat</code> selects the appropriate message based on 42 * the number. 43 * </p> 44 * <h4>The Problem of Plural Forms in Internationalized Messages</h4> 45 * <p> 46 * Different languages have different ways to inflect 47 * plurals. Creating internationalized messages that include plural 48 * forms is only feasible when the framework is able to handle plural 49 * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code> 50 * doesn't handle this well, because it attaches a number interval to 51 * each message and selects the message whose interval contains a 52 * given number. This can only handle a finite number of 53 * intervals. But in some languages, like Polish, one plural case 54 * applies to infinitely many intervals (e.g., paucal applies to 55 * numbers ending with 2, 3, or 4 except those ending with 12, 13, or 56 * 14). Thus <code>ChoiceFormat</code> is not adequate. 57 * </p><p> 58 * <code>PluralFormat</code> deals with this by breaking the problem 59 * into two parts: 60 * <ul> 61 * <li>It uses <code>PluralRules</code> that can define more complex 62 * conditions for a plural case than just a single interval. These plural 63 * rules define both what plural cases exist in a language, and to 64 * which numbers these cases apply. 65 * <li>It provides predefined plural rules for many locales. Thus, the programmer 66 * need not worry about the plural cases of a language. On the flip side, 67 * the localizer does not have to specify the plural cases; he can simply 68 * use the predefined keywords. The whole plural formatting of messages can 69 * be done using localized patterns from resource bundles. 70 * </ul> 71 * </p> 72 * <h4>Usage of <code>PluralFormat</code></h4> 73 * <p> 74 * This discussion assumes that you use <code>PluralFormat</code> with 75 * a predefined set of plural rules. You can create one using one of 76 * the constructors that takes a <code>locale</code> object. To 77 * specify the message pattern, you can either pass it to the 78 * constructor or set it explicitly using the 79 * <code>applyPattern()</code> method. The <code>format()</code> 80 * method takes a number object and selects the message of the 81 * matching plural case. This message will be returned. 82 * </p> 83 * <h5>Patterns and Their Interpretation</h5> 84 * <p> 85 * The pattern text defines the message output for each plural case of the 86 * used locale. The pattern is a sequence of 87 * <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white 88 * space characters. Each clause assigns the message <code><i>message</i></code> 89 * to the plural case identified by <code><i>caseKeyword</i></code>. 90 * </p><p> 91 * You always have to define a message text for the default plural case 92 * "<code>other</code>" which is contained in every rule set. If the plural 93 * rules of the <code>PluralFormat</code> object do not contain a plural case 94 * identified by <code><i>caseKeyword</i></code>, U_DEFAULT_KEYWORD_MISSING 95 * will be set to status. 96 * If you do not specify a message text for a particular plural case, the 97 * message text of the plural case "<code>other</code>" gets assigned to this 98 * plural case. If you specify more than one message for the same plural case, 99 * U_DUPLICATE_KEYWORD will be set to status. 100 * <br> 101 * Spaces between <code><i>caseKeyword</i></code> and 102 * <code><i>message</i></code> will be ignored; spaces within 103 * <code><i>message</i></code> will be preserved. 104 * </p><p> 105 * The message text for a particular plural case may contain other message 106 * format patterns. <code>PluralFormat</code> preserves these so that you 107 * can use the strings produced by <code>PluralFormat</code> with other 108 * formatters. If you are using <code>PluralFormat</code> inside a 109 * <code>MessageFormat</code> pattern, <code>MessageFormat</code> will 110 * automatically evaluate the resulting format pattern.<br> 111 * Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed 112 * in message texts to define a nested format pattern.<br> 113 * The pound sign (<code>#</code>) will be interpreted as the number placeholder 114 * in the message text, if it is not contained in curly braces (to preserve 115 * <code>NumberFormat</code> patterns). <code>PluralFormat</code> will 116 * replace each of those pound signs by the number passed to the 117 * <code>format()</code> method. It will be formatted using a 118 * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you 119 * need special number formatting, you have to explicitly specify a 120 * <code>NumberFormat</code> for the <code>PluralFormat</code> to use. 121 * </p> 122 * Example 123 * <pre> 124 * UErrorCode status = U_ZERO_ERROR; 125 * MessageFormat* msgFmt = new MessageFormat(UnicodeString("{0, plural, 126 * one{{0, number, C''est #,##0.0# fichier}} other {Ce sont # fichiers}} dans la liste."), 127 * Locale("fr"), status); 128 * if (U_FAILURE(status)) { 129 * return; 130 * } 131 * Formattable args1[] = {(int32_t)0}; 132 * Formattable args2[] = {(int32_t)3}; 133 * FieldPosition ignore(FieldPosition::DONT_CARE); 134 * UnicodeString result; 135 * msgFmt->format(args1, 1, result, ignore, status); 136 * cout << result << endl; 137 * result.remove(); 138 * msgFmt->format(args2, 1, result, ignore, status); 139 * cout << result << endl; 140 * </pre> 141 * Produces the output:<br> 142 * <code>C'est 0,0 fichier dans la liste.</code><br> 143 * <code>Ce sont 3 fichiers dans la liste."</code> 144 * <p> 145 * <strong>Note:</strong><br> 146 * Currently <code>PluralFormat</code> 147 * does not make use of quotes like <code>MessageFormat</code>. 148 * If you use plural format strings with <code>MessageFormat</code> and want 149 * to use a quote sign "<code>'</code>", you have to write "<code>''</code>". 150 * <code>MessageFormat</code> unquotes this pattern and passes the unquoted 151 * pattern to <code>PluralFormat</code>. It's a bit trickier if you use 152 * nested formats that do quoting. In the example above, we wanted to insert 153 * "<code>'</code>" in the number format pattern. Since 154 * <code>NumberFormat</code> supports quotes, we had to insert 155 * "<code>''</code>". But since <code>MessageFormat</code> unquotes the 156 * pattern before it gets passed to <code>PluralFormat</code>, we have to 157 * double these quotes, i.e. write "<code>''''</code>". 158 * </p> 159 * <h4>Defining Custom Plural Rules</h4> 160 * <p>If you need to use <code>PluralFormat</code> with custom rules, you can 161 * create a <code>PluralRules</code> object and pass it to 162 * <code>PluralFormat</code>'s constructor. If you also specify a locale in this 163 * constructor, this locale will be used to format the number in the message 164 * texts. 165 * </p><p> 166 * For more information about <code>PluralRules</code>, see 167 * {@link PluralRules}. 168 * </p> 169 * 170 * ported from Java 171 * @stable ICU 4.0 172 */ 173 174class U_I18N_API PluralFormat : public Format { 175public: 176 177 /** 178 * Creates a new <code>PluralFormat</code> for the default locale. 179 * This locale will be used to get the set of plural rules and for standard 180 * number formatting. 181 * @param status output param set to success/failure code on exit, which 182 * must not indicate a failure before the function call. 183 * @stable ICU 4.0 184 */ 185 PluralFormat(UErrorCode& status); 186 187 /** 188 * Creates a new <code>PluralFormat</code> for a given locale. 189 * @param locale the <code>PluralFormat</code> will be configured with 190 * rules for this locale. This locale will also be used for 191 * standard number formatting. 192 * @param status output param set to success/failure code on exit, which 193 * must not indicate a failure before the function call. 194 * @stable ICU 4.0 195 */ 196 PluralFormat(const Locale& locale, UErrorCode& status); 197 198 /** 199 * Creates a new <code>PluralFormat</code> for a given set of rules. 200 * The standard number formatting will be done using the default locale. 201 * @param rules defines the behavior of the <code>PluralFormat</code> 202 * object. 203 * @param status output param set to success/failure code on exit, which 204 * must not indicate a failure before the function call. 205 * @stable ICU 4.0 206 */ 207 PluralFormat(const PluralRules& rules, UErrorCode& status); 208 209 /** 210 * Creates a new <code>PluralFormat</code> for a given set of rules. 211 * The standard number formatting will be done using the given locale. 212 * @param locale the default number formatting will be done using this 213 * locale. 214 * @param rules defines the behavior of the <code>PluralFormat</code> 215 * object. 216 * @param status output param set to success/failure code on exit, which 217 * must not indicate a failure before the function call. 218 * @stable ICU 4.0 219 */ 220 PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status); 221 222 /** 223 * Creates a new <code>PluralFormat</code> for a given pattern string. 224 * The default locale will be used to get the set of plural rules and for 225 * standard number formatting. 226 * @param pattern the pattern for this <code>PluralFormat</code>. 227 * errors are returned to status if the pattern is invalid. 228 * @param status output param set to success/failure code on exit, which 229 * must not indicate a failure before the function call. 230 * @stable ICU 4.0 231 */ 232 PluralFormat(const UnicodeString& pattern, UErrorCode& status); 233 234 /** 235 * Creates a new <code>PluralFormat</code> for a given pattern string and 236 * locale. 237 * The locale will be used to get the set of plural rules and for 238 * standard number formatting. 239 * @param locale the <code>PluralFormat</code> will be configured with 240 * rules for this locale. This locale will also be used for 241 * standard number formatting. 242 * @param pattern the pattern for this <code>PluralFormat</code>. 243 * errors are returned to status if the pattern is invalid. 244 * @param status output param set to success/failure code on exit, which 245 * must not indicate a failure before the function call. 246 * @stable ICU 4.0 247 */ 248 PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status); 249 250 /** 251 * Creates a new <code>PluralFormat</code> for a given set of rules, a 252 * pattern and a locale. 253 * @param rules defines the behavior of the <code>PluralFormat</code> 254 * object. 255 * @param pattern the pattern for this <code>PluralFormat</code>. 256 * errors are returned to status if the pattern is invalid. 257 * @param status output param set to success/failure code on exit, which 258 * must not indicate a failure before the function call. 259 * @stable ICU 4.0 260 */ 261 PluralFormat(const PluralRules& rules, 262 const UnicodeString& pattern, 263 UErrorCode& status); 264 265 /** 266 * Creates a new <code>PluralFormat</code> for a given set of rules, a 267 * pattern and a locale. 268 * @param locale the <code>PluralFormat</code> will be configured with 269 * rules for this locale. This locale will also be used for 270 * standard number formatting. 271 * @param rules defines the behavior of the <code>PluralFormat</code> 272 * object. 273 * @param pattern the pattern for this <code>PluralFormat</code>. 274 * errors are returned to status if the pattern is invalid. 275 * @param status output param set to success/failure code on exit, which 276 * must not indicate a failure before the function call. 277 * @stable ICU 4.0 278 */ 279 PluralFormat(const Locale& locale, 280 const PluralRules& rules, 281 const UnicodeString& pattern, 282 UErrorCode& status); 283 284 /** 285 * copy constructor. 286 * @stable ICU 4.0 287 */ 288 PluralFormat(const PluralFormat& other); 289 290 /** 291 * Destructor. 292 * @stable ICU 4.0 293 */ 294 virtual ~PluralFormat(); 295 296 /** 297 * Sets the pattern used by this plural format. 298 * The method parses the pattern and creates a map of format strings 299 * for the plural rules. 300 * Patterns and their interpretation are specified in the class description. 301 * 302 * @param pattern the pattern for this plural format 303 * errors are returned to status if the pattern is invalid. 304 * @param status output param set to success/failure code on exit, which 305 * must not indicate a failure before the function call. 306 * @stable ICU 4.0 307 */ 308 void applyPattern(const UnicodeString& pattern, UErrorCode& status); 309 310 311 using Format::format; 312 313 /** 314 * Formats a plural message for a given number. 315 * 316 * @param number a number for which the plural message should be formatted 317 * for. If no pattern has been applied to this 318 * <code>PluralFormat</code> object yet, the formatted number 319 * will be returned. 320 * @param status output param set to success/failure code on exit, which 321 * must not indicate a failure before the function call. 322 * @return the string containing the formatted plural message. 323 * @stable ICU 4.0 324 */ 325 UnicodeString format(int32_t number, UErrorCode& status) const; 326 327 /** 328 * Formats a plural message for a given number. 329 * 330 * @param number a number for which the plural message should be formatted 331 * for. If no pattern has been applied to this 332 * PluralFormat object yet, the formatted number 333 * will be returned. 334 * @param status output param set to success or failure code on exit, which 335 * must not indicate a failure before the function call. 336 * @return the string containing the formatted plural message. 337 * @stable ICU 4.0 338 */ 339 UnicodeString format(double number, UErrorCode& status) const; 340 341 /** 342 * Formats a plural message for a given number. 343 * 344 * @param number a number for which the plural message should be formatted 345 * for. If no pattern has been applied to this 346 * <code>PluralFormat</code> object yet, the formatted number 347 * will be returned. 348 * @param appendTo output parameter to receive result. 349 * result is appended to existing contents. 350 * @param pos On input: an alignment field, if desired. 351 * On output: the offsets of the alignment field. 352 * @param status output param set to success/failure code on exit, which 353 * must not indicate a failure before the function call. 354 * @return the string containing the formatted plural message. 355 * @stable ICU 4.0 356 */ 357 UnicodeString& format(int32_t number, 358 UnicodeString& appendTo, 359 FieldPosition& pos, 360 UErrorCode& status) const; 361 362 /** 363 * Formats a plural message for a given number. 364 * 365 * @param number a number for which the plural message should be formatted 366 * for. If no pattern has been applied to this 367 * PluralFormat object yet, the formatted number 368 * will be returned. 369 * @param appendTo output parameter to receive result. 370 * result is appended to existing contents. 371 * @param pos On input: an alignment field, if desired. 372 * On output: the offsets of the alignment field. 373 * @param status output param set to success/failure code on exit, which 374 * must not indicate a failure before the function call. 375 * @return the string containing the formatted plural message. 376 * @stable ICU 4.0 377 */ 378 UnicodeString& format(double number, 379 UnicodeString& appendTo, 380 FieldPosition& pos, 381 UErrorCode& status) const; 382 383 /** 384 * Sets the locale used by this <code>PluraFormat</code> object. 385 * Note: Calling this method resets this <code>PluraFormat</code> object, 386 * i.e., a pattern that was applied previously will be removed, 387 * and the NumberFormat is set to the default number format for 388 * the locale. The resulting format behaves the same as one 389 * constructed from {@link #PluralFormat(const Locale& locale, UErrorCode& status)}. 390 * @param locale the <code>locale</code> to use to configure the formatter. 391 * @param status output param set to success/failure code on exit, which 392 * must not indicate a failure before the function call. 393 * @stable ICU 4.0 394 */ 395 void setLocale(const Locale& locale, UErrorCode& status); 396 397 /** 398 * Sets the number format used by this formatter. You only need to 399 * call this if you want a different number format than the default 400 * formatter for the locale. 401 * @param format the number format to use. 402 * @param status output param set to success/failure code on exit, which 403 * must not indicate a failure before the function call. 404 * @stable ICU 4.0 405 */ 406 void setNumberFormat(const NumberFormat* format, UErrorCode& status); 407 408 /** 409 * Assignment operator 410 * 411 * @param other the PluralFormat object to copy from. 412 * @stable ICU 4.0 413 */ 414 PluralFormat& operator=(const PluralFormat& other); 415 416 /** 417 * Return true if another object is semantically equal to this one. 418 * 419 * @param other the PluralFormat object to be compared with. 420 * @return true if other is semantically equal to this. 421 * @stable ICU 4.0 422 */ 423 virtual UBool operator==(const Format& other) const; 424 425 /** 426 * Return true if another object is semantically unequal to this one. 427 * 428 * @param other the PluralFormat object to be compared with. 429 * @return true if other is semantically unequal to this. 430 * @stable ICU 4.0 431 */ 432 virtual UBool operator!=(const Format& other) const; 433 434 /** 435 * Clones this Format object polymorphically. The caller owns the 436 * result and should delete it when done. 437 * @stable ICU 4.0 438 */ 439 virtual Format* clone(void) const; 440 441 /** 442 * Redeclared Format method. 443 * 444 * @param obj The object to be formatted into a string. 445 * @param appendTo output parameter to receive result. 446 * Result is appended to existing contents. 447 * @param pos On input: an alignment field, if desired. 448 * On output: the offsets of the alignment field. 449 * @param status output param filled with success/failure status. 450 * @return Reference to 'appendTo' parameter. 451 * @stable ICU 4.0 452 */ 453 UnicodeString& format(const Formattable& obj, 454 UnicodeString& appendTo, 455 FieldPosition& pos, 456 UErrorCode& status) const; 457 458 /** 459 * Returns the pattern from applyPattern() or constructor(). 460 * 461 * @param appendTo output parameter to receive result. 462 * Result is appended to existing contents. 463 * @return the UnicodeString with inserted pattern. 464 * @stable ICU 4.0 465 */ 466 UnicodeString& toPattern(UnicodeString& appendTo); 467 468 /** 469 * This method is not yet supported by <code>PluralFormat</code>. 470 * <P> 471 * Before calling, set parse_pos.index to the offset you want to start 472 * parsing at in the source. After calling, parse_pos.index is the end of 473 * the text you parsed. If error occurs, index is unchanged. 474 * <P> 475 * When parsing, leading whitespace is discarded (with a successful parse), 476 * while trailing whitespace is left as is. 477 * <P> 478 * See Format::parseObject() for more. 479 * 480 * @param source The string to be parsed into an object. 481 * @param result Formattable to be set to the parse result. 482 * If parse fails, return contents are undefined. 483 * @param parse_pos The position to start parsing at. Upon return 484 * this param is set to the position after the 485 * last character successfully parsed. If the 486 * source is not parsed successfully, this param 487 * will remain unchanged. 488 * @stable ICU 4.0 489 */ 490 virtual void parseObject(const UnicodeString& source, 491 Formattable& result, 492 ParsePosition& parse_pos) const; 493 494 /** 495 * ICU "poor man's RTTI", returns a UClassID for this class. 496 * 497 * @stable ICU 4.0 498 * 499 */ 500 static UClassID U_EXPORT2 getStaticClassID(void); 501 502 /** 503 * ICU "poor man's RTTI", returns a UClassID for the actual class. 504 * 505 * @stable ICU 4.0 506 */ 507 virtual UClassID getDynamicClassID() const; 508 509private: 510 typedef enum fmtToken { 511 none, 512 tLetter, 513 tNumber, 514 tSpace, 515 tNumberSign, 516 tLeftBrace, 517 tRightBrace 518 }fmtToken; 519 520 Locale locale; 521 PluralRules* pluralRules; 522 UnicodeString pattern; 523 Hashtable *fParsedValuesHash; 524 NumberFormat* numberFormat; 525 NumberFormat* replacedNumberFormat; 526 527 PluralFormat(); // default constructor not implemented 528 void init(const PluralRules* rules, const Locale& curlocale, UErrorCode& status); 529 UBool inRange(UChar ch, fmtToken& type); 530 UBool checkSufficientDefinition(); 531 void parsingFailure(); 532 UnicodeString insertFormattedNumber(double number, 533 UnicodeString& message, 534 UnicodeString& appendTo, 535 FieldPosition& pos) const; 536 void copyHashtable(Hashtable *other, UErrorCode& status); 537}; 538 539U_NAMESPACE_END 540 541#endif /* #if !UCONFIG_NO_FORMATTING */ 542 543#endif // _PLURFMT 544//eof 545