1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2011, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************** 6 * 7 * File MSGFMT.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 02/19/97 aliu Converted from java. 13 * 03/20/97 helena Finished first cut of implementation. 14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi. 15 * 06/11/97 helena Fixed addPattern to take the pattern correctly. 16 * 06/17/97 helena Fixed the getPattern to return the correct pattern. 17 * 07/09/97 helena Made ParsePosition into a class. 18 * 02/22/99 stephen Removed character literals for EBCDIC safety 19 * 11/01/09 kirtig Added SelectFormat 20 ********************************************************************/ 21 22#include "unicode/utypes.h" 23 24#if !UCONFIG_NO_FORMATTING 25 26#include "unicode/appendable.h" 27#include "unicode/choicfmt.h" 28#include "unicode/datefmt.h" 29#include "unicode/decimfmt.h" 30#include "unicode/localpointer.h" 31#include "unicode/msgfmt.h" 32#include "unicode/plurfmt.h" 33#include "unicode/rbnf.h" 34#include "unicode/selfmt.h" 35#include "unicode/smpdtfmt.h" 36#include "unicode/umsg.h" 37#include "unicode/ustring.h" 38#include "cmemory.h" 39#include "patternprops.h" 40#include "messageimpl.h" 41#include "msgfmt_impl.h" 42#include "uassert.h" 43#include "ustrfmt.h" 44#include "util.h" 45#include "uvector.h" 46 47// ***************************************************************************** 48// class MessageFormat 49// ***************************************************************************** 50 51#define SINGLE_QUOTE ((UChar)0x0027) 52#define COMMA ((UChar)0x002C) 53#define LEFT_CURLY_BRACE ((UChar)0x007B) 54#define RIGHT_CURLY_BRACE ((UChar)0x007D) 55 56//--------------------------------------- 57// static data 58 59static const UChar ID_NUMBER[] = { 60 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */ 61}; 62static const UChar ID_DATE[] = { 63 0x64, 0x61, 0x74, 0x65, 0 /* "date" */ 64}; 65static const UChar ID_TIME[] = { 66 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */ 67}; 68static const UChar ID_SPELLOUT[] = { 69 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */ 70}; 71static const UChar ID_ORDINAL[] = { 72 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */ 73}; 74static const UChar ID_DURATION[] = { 75 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */ 76}; 77 78// MessageFormat Type List Number, Date, Time or Choice 79static const UChar * const TYPE_IDS[] = { 80 ID_NUMBER, 81 ID_DATE, 82 ID_TIME, 83 ID_SPELLOUT, 84 ID_ORDINAL, 85 ID_DURATION, 86 NULL, 87}; 88 89static const UChar ID_EMPTY[] = { 90 0 /* empty string, used for default so that null can mark end of list */ 91}; 92static const UChar ID_CURRENCY[] = { 93 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */ 94}; 95static const UChar ID_PERCENT[] = { 96 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */ 97}; 98static const UChar ID_INTEGER[] = { 99 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */ 100}; 101 102// NumberFormat modifier list, default, currency, percent or integer 103static const UChar * const NUMBER_STYLE_IDS[] = { 104 ID_EMPTY, 105 ID_CURRENCY, 106 ID_PERCENT, 107 ID_INTEGER, 108 NULL, 109}; 110 111static const UChar ID_SHORT[] = { 112 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */ 113}; 114static const UChar ID_MEDIUM[] = { 115 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */ 116}; 117static const UChar ID_LONG[] = { 118 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */ 119}; 120static const UChar ID_FULL[] = { 121 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */ 122}; 123 124// DateFormat modifier list, default, short, medium, long or full 125static const UChar * const DATE_STYLE_IDS[] = { 126 ID_EMPTY, 127 ID_SHORT, 128 ID_MEDIUM, 129 ID_LONG, 130 ID_FULL, 131 NULL, 132}; 133 134static const U_NAMESPACE_QUALIFIER DateFormat::EStyle DATE_STYLES[] = { 135 U_NAMESPACE_QUALIFIER DateFormat::kDefault, 136 U_NAMESPACE_QUALIFIER DateFormat::kShort, 137 U_NAMESPACE_QUALIFIER DateFormat::kMedium, 138 U_NAMESPACE_QUALIFIER DateFormat::kLong, 139 U_NAMESPACE_QUALIFIER DateFormat::kFull, 140}; 141 142static const int32_t DEFAULT_INITIAL_CAPACITY = 10; 143 144static const UChar NULL_STRING[] = { 145 0x6E, 0x75, 0x6C, 0x6C, 0 // "null" 146}; 147 148static const UChar OTHER_STRING[] = { 149 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" 150}; 151 152U_CDECL_BEGIN 153static UBool U_CALLCONV equalFormatsForHash(const UHashTok key1, 154 const UHashTok key2) { 155 return U_NAMESPACE_QUALIFIER MessageFormat::equalFormats(key1.pointer, key2.pointer); 156} 157 158U_CDECL_END 159 160U_NAMESPACE_BEGIN 161 162// ------------------------------------- 163UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat) 164UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(MessageFormat::DummyFormat) 165UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration) 166 167//-------------------------------------------------------------------- 168 169/** 170 * Convert an integer value to a string and append the result to 171 * the given UnicodeString. 172 */ 173static UnicodeString& itos(int32_t i, UnicodeString& appendTo) { 174 UChar temp[16]; 175 uprv_itou(temp,16,i,10,0); // 10 == radix 176 appendTo.append(temp); 177 return appendTo; 178} 179 180 181// AppendableWrapper: encapsulates the result of formatting, keeping track 182// of the string and its length. 183class AppendableWrapper : public UMemory { 184public: 185 AppendableWrapper(Appendable& appendable) : app(appendable), len(0) { 186 } 187 void append(const UnicodeString& s) { 188 app.appendString(s.getBuffer(), s.length()); 189 len += s.length(); 190 } 191 void append(const UChar* s, const int32_t sLength) { 192 app.appendString(s, sLength); 193 len += sLength; 194 } 195 void append(const UnicodeString& s, int32_t start, int32_t length) { 196 append(s.tempSubString(start, length)); 197 } 198 void formatAndAppend(const Format* formatter, const Formattable& arg, UErrorCode& ec) { 199 UnicodeString s; 200 formatter->format(arg, s, ec); 201 if (U_SUCCESS(ec)) { 202 append(s); 203 } 204 } 205 int32_t length() { 206 return len; 207 } 208private: 209 Appendable& app; 210 int32_t len; 211}; 212 213 214// ------------------------------------- 215// Creates a MessageFormat instance based on the pattern. 216 217MessageFormat::MessageFormat(const UnicodeString& pattern, 218 UErrorCode& success) 219: fLocale(Locale::getDefault()), // Uses the default locale 220 msgPattern(success), 221 formatAliases(NULL), 222 formatAliasesCapacity(0), 223 argTypes(NULL), 224 argTypeCount(0), 225 argTypeCapacity(0), 226 hasArgTypeConflicts(FALSE), 227 defaultNumberFormat(NULL), 228 defaultDateFormat(NULL), 229 cachedFormatters(NULL), 230 customFormatArgStarts(NULL), 231 pluralProvider(&fLocale) 232{ 233 setLocaleIDs(fLocale.getName(), fLocale.getName()); 234 applyPattern(pattern, success); 235} 236 237MessageFormat::MessageFormat(const UnicodeString& pattern, 238 const Locale& newLocale, 239 UErrorCode& success) 240: fLocale(newLocale), 241 msgPattern(success), 242 formatAliases(NULL), 243 formatAliasesCapacity(0), 244 argTypes(NULL), 245 argTypeCount(0), 246 argTypeCapacity(0), 247 hasArgTypeConflicts(FALSE), 248 defaultNumberFormat(NULL), 249 defaultDateFormat(NULL), 250 cachedFormatters(NULL), 251 customFormatArgStarts(NULL), 252 pluralProvider(&fLocale) 253{ 254 setLocaleIDs(fLocale.getName(), fLocale.getName()); 255 applyPattern(pattern, success); 256} 257 258MessageFormat::MessageFormat(const UnicodeString& pattern, 259 const Locale& newLocale, 260 UParseError& parseError, 261 UErrorCode& success) 262: fLocale(newLocale), 263 msgPattern(success), 264 formatAliases(NULL), 265 formatAliasesCapacity(0), 266 argTypes(NULL), 267 argTypeCount(0), 268 argTypeCapacity(0), 269 hasArgTypeConflicts(FALSE), 270 defaultNumberFormat(NULL), 271 defaultDateFormat(NULL), 272 cachedFormatters(NULL), 273 customFormatArgStarts(NULL), 274 pluralProvider(&fLocale) 275{ 276 setLocaleIDs(fLocale.getName(), fLocale.getName()); 277 applyPattern(pattern, parseError, success); 278} 279 280MessageFormat::MessageFormat(const MessageFormat& that) 281: 282 Format(that), 283 fLocale(that.fLocale), 284 msgPattern(that.msgPattern), 285 formatAliases(NULL), 286 formatAliasesCapacity(0), 287 argTypes(NULL), 288 argTypeCount(0), 289 argTypeCapacity(0), 290 hasArgTypeConflicts(that.hasArgTypeConflicts), 291 defaultNumberFormat(NULL), 292 defaultDateFormat(NULL), 293 cachedFormatters(NULL), 294 customFormatArgStarts(NULL), 295 pluralProvider(&fLocale) 296{ 297 // This will take care of creating the hash tables (since they are NULL). 298 UErrorCode ec = U_ZERO_ERROR; 299 copyObjects(that, ec); 300 if (U_FAILURE(ec)) { 301 resetPattern(); 302 } 303} 304 305MessageFormat::~MessageFormat() 306{ 307 uhash_close(cachedFormatters); 308 uhash_close(customFormatArgStarts); 309 310 uprv_free(argTypes); 311 uprv_free(formatAliases); 312 delete defaultNumberFormat; 313 delete defaultDateFormat; 314} 315 316//-------------------------------------------------------------------- 317// Variable-size array management 318 319/** 320 * Allocate argTypes[] to at least the given capacity and return 321 * TRUE if successful. If not, leave argTypes[] unchanged. 322 * 323 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it 324 * if necessary to be at least as large as specified. 325 */ 326UBool MessageFormat::allocateArgTypes(int32_t capacity, UErrorCode& status) { 327 if (U_FAILURE(status)) { 328 return FALSE; 329 } 330 if (argTypeCapacity >= capacity) { 331 return TRUE; 332 } 333 if (capacity < DEFAULT_INITIAL_CAPACITY) { 334 capacity = DEFAULT_INITIAL_CAPACITY; 335 } else if (capacity < 2*argTypeCapacity) { 336 capacity = 2*argTypeCapacity; 337 } 338 Formattable::Type* a = (Formattable::Type*) 339 uprv_realloc(argTypes, sizeof(*argTypes) * capacity); 340 if (a == NULL) { 341 status = U_MEMORY_ALLOCATION_ERROR; 342 return FALSE; 343 } 344 argTypes = a; 345 argTypeCapacity = capacity; 346 return TRUE; 347} 348 349// ------------------------------------- 350// assignment operator 351 352const MessageFormat& 353MessageFormat::operator=(const MessageFormat& that) 354{ 355 if (this != &that) { 356 // Calls the super class for assignment first. 357 Format::operator=(that); 358 359 setLocale(that.fLocale); 360 msgPattern = that.msgPattern; 361 hasArgTypeConflicts = that.hasArgTypeConflicts; 362 363 UErrorCode ec = U_ZERO_ERROR; 364 copyObjects(that, ec); 365 if (U_FAILURE(ec)) { 366 resetPattern(); 367 } 368 } 369 return *this; 370} 371 372UBool 373MessageFormat::operator==(const Format& rhs) const 374{ 375 if (this == &rhs) return TRUE; 376 377 MessageFormat& that = (MessageFormat&)rhs; 378 379 // Check class ID before checking MessageFormat members 380 if (!Format::operator==(rhs) || 381 msgPattern != that.msgPattern || 382 fLocale != that.fLocale) { 383 return FALSE; 384 } 385 386 // Compare hashtables. 387 if ((customFormatArgStarts == NULL) != (that.customFormatArgStarts == NULL)) { 388 return FALSE; 389 } 390 if (customFormatArgStarts == NULL) { 391 return TRUE; 392 } 393 394 UErrorCode ec = U_ZERO_ERROR; 395 const int32_t count = uhash_count(customFormatArgStarts); 396 const int32_t rhs_count = uhash_count(that.customFormatArgStarts); 397 if (count != rhs_count) { 398 return FALSE; 399 } 400 int32_t idx = 0, rhs_idx = 0, pos = -1, rhs_pos = -1; 401 for (; idx < count && rhs_idx < rhs_count && U_SUCCESS(ec); ++idx, ++rhs_idx) { 402 const UHashElement* cur = uhash_nextElement(customFormatArgStarts, &pos); 403 const UHashElement* rhs_cur = uhash_nextElement(that.customFormatArgStarts, &rhs_pos); 404 if (cur->key.integer != rhs_cur->key.integer) { 405 return FALSE; 406 } 407 const Format* format = (const Format*)uhash_iget(cachedFormatters, cur->key.integer); 408 const Format* rhs_format = (const Format*)uhash_iget(that.cachedFormatters, rhs_cur->key.integer); 409 if (*format != *rhs_format) { 410 return FALSE; 411 } 412 } 413 return TRUE; 414} 415 416// ------------------------------------- 417// Creates a copy of this MessageFormat, the caller owns the copy. 418 419Format* 420MessageFormat::clone() const 421{ 422 return new MessageFormat(*this); 423} 424 425// ------------------------------------- 426// Sets the locale of this MessageFormat object to theLocale. 427 428void 429MessageFormat::setLocale(const Locale& theLocale) 430{ 431 if (fLocale != theLocale) { 432 delete defaultNumberFormat; 433 defaultNumberFormat = NULL; 434 delete defaultDateFormat; 435 defaultDateFormat = NULL; 436 fLocale = theLocale; 437 setLocaleIDs(fLocale.getName(), fLocale.getName()); 438 pluralProvider.reset(&fLocale); 439 } 440} 441 442// ------------------------------------- 443// Gets the locale of this MessageFormat object. 444 445const Locale& 446MessageFormat::getLocale() const 447{ 448 return fLocale; 449} 450 451void 452MessageFormat::applyPattern(const UnicodeString& newPattern, 453 UErrorCode& status) 454{ 455 UParseError parseError; 456 applyPattern(newPattern,parseError,status); 457} 458 459 460// ------------------------------------- 461// Applies the new pattern and returns an error if the pattern 462// is not correct. 463void 464MessageFormat::applyPattern(const UnicodeString& pattern, 465 UParseError& parseError, 466 UErrorCode& ec) 467{ 468 if(U_FAILURE(ec)) { 469 return; 470 } 471 msgPattern.parse(pattern, &parseError, ec); 472 cacheExplicitFormats(ec); 473 474 if (U_FAILURE(ec)) { 475 resetPattern(); 476 } 477} 478 479void MessageFormat::resetPattern() { 480 msgPattern.clear(); 481 uhash_close(cachedFormatters); 482 cachedFormatters = NULL; 483 uhash_close(customFormatArgStarts); 484 customFormatArgStarts = NULL; 485 argTypeCount = 0; 486 hasArgTypeConflicts = FALSE; 487} 488 489void 490MessageFormat::applyPattern(const UnicodeString& pattern, 491 UMessagePatternApostropheMode aposMode, 492 UParseError* parseError, 493 UErrorCode& status) { 494 if (aposMode != msgPattern.getApostropheMode()) { 495 msgPattern.clearPatternAndSetApostropheMode(aposMode); 496 } 497 applyPattern(pattern, *parseError, status); 498} 499 500// ------------------------------------- 501// Converts this MessageFormat instance to a pattern. 502 503UnicodeString& 504MessageFormat::toPattern(UnicodeString& appendTo) const { 505 if ((customFormatArgStarts != NULL && 0 != uhash_count(customFormatArgStarts)) || 506 0 == msgPattern.countParts() 507 ) { 508 appendTo.setToBogus(); 509 return appendTo; 510 } 511 return appendTo.append(msgPattern.getPatternString()); 512} 513 514int32_t MessageFormat::nextTopLevelArgStart(int32_t partIndex) const { 515 if (partIndex != 0) { 516 partIndex = msgPattern.getLimitPartIndex(partIndex); 517 } 518 for (;;) { 519 UMessagePatternPartType type = msgPattern.getPartType(++partIndex); 520 if (type == UMSGPAT_PART_TYPE_ARG_START) { 521 return partIndex; 522 } 523 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 524 return -1; 525 } 526 } 527} 528 529void MessageFormat::setArgStartFormat(int32_t argStart, 530 Format* formatter, 531 UErrorCode& status) { 532 if (U_FAILURE(status)) { 533 delete formatter; 534 } 535 if (cachedFormatters == NULL) { 536 cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong, 537 equalFormatsForHash, &status); 538 if (U_FAILURE(status)) { 539 delete formatter; 540 return; 541 } 542 uhash_setValueDeleter(cachedFormatters, uhash_deleteUObject); 543 } 544 if (formatter == NULL) { 545 formatter = new DummyFormat(); 546 } 547 uhash_iput(cachedFormatters, argStart, formatter, &status); 548} 549 550 551UBool MessageFormat::argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber) { 552 const MessagePattern::Part& part = msgPattern.getPart(partIndex); 553 return part.getType() == UMSGPAT_PART_TYPE_ARG_NAME ? 554 msgPattern.partSubstringMatches(part, argName) : 555 part.getValue() == argNumber; // ARG_NUMBER 556} 557 558// Sets a custom formatter for a MessagePattern ARG_START part index. 559// "Custom" formatters are provided by the user via setFormat() or similar APIs. 560void MessageFormat::setCustomArgStartFormat(int32_t argStart, 561 Format* formatter, 562 UErrorCode& status) { 563 setArgStartFormat(argStart, formatter, status); 564 if (customFormatArgStarts == NULL) { 565 customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong, 566 NULL, &status); 567 } 568 uhash_iputi(customFormatArgStarts, argStart, 1, &status); 569} 570 571Format* MessageFormat::getCachedFormatter(int32_t argumentNumber) const { 572 if (cachedFormatters == NULL) { 573 return NULL; 574 } 575 void* ptr = uhash_iget(cachedFormatters, argumentNumber); 576 if (ptr != NULL && dynamic_cast<DummyFormat*>((Format*)ptr) == NULL) { 577 return (Format*) ptr; 578 } else { 579 // Not cached, or a DummyFormat representing setFormat(NULL). 580 return NULL; 581 } 582} 583 584// ------------------------------------- 585// Adopts the new formats array and updates the array count. 586// This MessageFormat instance owns the new formats. 587void 588MessageFormat::adoptFormats(Format** newFormats, 589 int32_t count) { 590 if (newFormats == NULL || count < 0) { 591 return; 592 } 593 // Throw away any cached formatters. 594 if (cachedFormatters != NULL) { 595 uhash_removeAll(cachedFormatters); 596 } 597 if (customFormatArgStarts != NULL) { 598 uhash_removeAll(customFormatArgStarts); 599 } 600 601 int32_t formatNumber = 0; 602 UErrorCode status = U_ZERO_ERROR; 603 for (int32_t partIndex = 0; 604 formatNumber < count && U_SUCCESS(status) && 605 (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 606 setCustomArgStartFormat(partIndex, newFormats[formatNumber], status); 607 ++formatNumber; 608 } 609 // Delete those that didn't get used (if any). 610 for (; formatNumber < count; ++formatNumber) { 611 delete newFormats[formatNumber]; 612 } 613 614} 615 616// ------------------------------------- 617// Sets the new formats array and updates the array count. 618// This MessageFormat instance maks a copy of the new formats. 619 620void 621MessageFormat::setFormats(const Format** newFormats, 622 int32_t count) { 623 if (newFormats == NULL || count < 0) { 624 return; 625 } 626 // Throw away any cached formatters. 627 if (cachedFormatters != NULL) { 628 uhash_removeAll(cachedFormatters); 629 } 630 if (customFormatArgStarts != NULL) { 631 uhash_removeAll(customFormatArgStarts); 632 } 633 634 UErrorCode status = U_ZERO_ERROR; 635 int32_t formatNumber = 0; 636 for (int32_t partIndex = 0; 637 formatNumber < count && U_SUCCESS(status) && (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 638 Format* newFormat = NULL; 639 if (newFormats[formatNumber] != NULL) { 640 newFormat = newFormats[formatNumber]->clone(); 641 if (newFormat == NULL) { 642 status = U_MEMORY_ALLOCATION_ERROR; 643 } 644 } 645 setCustomArgStartFormat(partIndex, newFormat, status); 646 ++formatNumber; 647 } 648 if (U_FAILURE(status)) { 649 resetPattern(); 650 } 651} 652 653// ------------------------------------- 654// Adopt a single format by format number. 655// Do nothing if the format number is not less than the array count. 656 657void 658MessageFormat::adoptFormat(int32_t n, Format *newFormat) { 659 LocalPointer<Format> p(newFormat); 660 if (n >= 0) { 661 int32_t formatNumber = 0; 662 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 663 if (n == formatNumber) { 664 UErrorCode status = U_ZERO_ERROR; 665 setCustomArgStartFormat(partIndex, p.orphan(), status); 666 return; 667 } 668 ++formatNumber; 669 } 670 } 671} 672 673// ------------------------------------- 674// Adopt a single format by format name. 675// Do nothing if there is no match of formatName. 676void 677MessageFormat::adoptFormat(const UnicodeString& formatName, 678 Format* formatToAdopt, 679 UErrorCode& status) { 680 LocalPointer<Format> p(formatToAdopt); 681 if (U_FAILURE(status)) { 682 return; 683 } 684 int32_t argNumber = MessagePattern::validateArgumentName(formatName); 685 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { 686 status = U_ILLEGAL_ARGUMENT_ERROR; 687 return; 688 } 689 for (int32_t partIndex = 0; 690 (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status); 691 ) { 692 if (argNameMatches(partIndex + 1, formatName, argNumber)) { 693 Format* f; 694 if (p.isValid()) { 695 f = p.orphan(); 696 } else if (formatToAdopt == NULL) { 697 f = NULL; 698 } else { 699 f = formatToAdopt->clone(); 700 if (f == NULL) { 701 status = U_MEMORY_ALLOCATION_ERROR; 702 return; 703 } 704 } 705 setCustomArgStartFormat(partIndex, f, status); 706 } 707 } 708} 709 710// ------------------------------------- 711// Set a single format. 712// Do nothing if the variable is not less than the array count. 713void 714MessageFormat::setFormat(int32_t n, const Format& newFormat) { 715 716 if (n >= 0) { 717 int32_t formatNumber = 0; 718 for (int32_t partIndex = 0; 719 (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 720 if (n == formatNumber) { 721 Format* new_format = newFormat.clone(); 722 if (new_format) { 723 UErrorCode status = U_ZERO_ERROR; 724 setCustomArgStartFormat(partIndex, new_format, status); 725 } 726 return; 727 } 728 ++formatNumber; 729 } 730 } 731} 732 733// ------------------------------------- 734// Get a single format by format name. 735// Do nothing if the variable is not less than the array count. 736Format * 737MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) { 738 if (U_FAILURE(status) || cachedFormatters == NULL) return NULL; 739 740 int32_t argNumber = MessagePattern::validateArgumentName(formatName); 741 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { 742 status = U_ILLEGAL_ARGUMENT_ERROR; 743 return NULL; 744 } 745 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 746 if (argNameMatches(partIndex + 1, formatName, argNumber)) { 747 return getCachedFormatter(partIndex); 748 } 749 } 750 return NULL; 751} 752 753// ------------------------------------- 754// Set a single format by format name 755// Do nothing if the variable is not less than the array count. 756void 757MessageFormat::setFormat(const UnicodeString& formatName, 758 const Format& newFormat, 759 UErrorCode& status) { 760 if (U_FAILURE(status)) return; 761 762 int32_t argNumber = MessagePattern::validateArgumentName(formatName); 763 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { 764 status = U_ILLEGAL_ARGUMENT_ERROR; 765 return; 766 } 767 for (int32_t partIndex = 0; 768 (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status); 769 ) { 770 if (argNameMatches(partIndex + 1, formatName, argNumber)) { 771 if (&newFormat == NULL) { 772 setCustomArgStartFormat(partIndex, NULL, status); 773 } else { 774 Format* new_format = newFormat.clone(); 775 if (new_format == NULL) { 776 status = U_MEMORY_ALLOCATION_ERROR; 777 return; 778 } 779 setCustomArgStartFormat(partIndex, new_format, status); 780 } 781 } 782 } 783} 784 785// ------------------------------------- 786// Gets the format array. 787const Format** 788MessageFormat::getFormats(int32_t& cnt) const 789{ 790 // This old API returns an array (which we hold) of Format* 791 // pointers. The array is valid up to the next call to any 792 // method on this object. We construct and resize an array 793 // on demand that contains aliases to the subformats[i].format 794 // pointers. 795 MessageFormat* t = const_cast<MessageFormat*> (this); 796 cnt = 0; 797 if (formatAliases == NULL) { 798 t->formatAliasesCapacity = (argTypeCount<10) ? 10 : argTypeCount; 799 Format** a = (Format**) 800 uprv_malloc(sizeof(Format*) * formatAliasesCapacity); 801 if (a == NULL) { 802 t->formatAliasesCapacity = 0; 803 return NULL; 804 } 805 t->formatAliases = a; 806 } else if (argTypeCount > formatAliasesCapacity) { 807 Format** a = (Format**) 808 uprv_realloc(formatAliases, sizeof(Format*) * argTypeCount); 809 if (a == NULL) { 810 t->formatAliasesCapacity = 0; 811 return NULL; 812 } 813 t->formatAliases = a; 814 t->formatAliasesCapacity = argTypeCount; 815 } 816 817 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 818 t->formatAliases[cnt++] = getCachedFormatter(partIndex); 819 } 820 821 return (const Format**)formatAliases; 822} 823 824 825UnicodeString MessageFormat::getArgName(int32_t partIndex) { 826 const MessagePattern::Part& part = msgPattern.getPart(partIndex); 827 if (part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) { 828 return msgPattern.getSubstring(part); 829 } else { 830 UnicodeString temp; 831 return itos(part.getValue(), temp); 832 } 833} 834 835StringEnumeration* 836MessageFormat::getFormatNames(UErrorCode& status) { 837 if (U_FAILURE(status)) return NULL; 838 839 UVector *fFormatNames = new UVector(status); 840 if (U_FAILURE(status)) { 841 status = U_MEMORY_ALLOCATION_ERROR; 842 return NULL; 843 } 844 fFormatNames->setDeleter(uhash_deleteUObject); 845 846 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { 847 fFormatNames->addElement(new UnicodeString(getArgName(partIndex)), status); 848 } 849 850 StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status); 851 return nameEnumerator; 852} 853 854// ------------------------------------- 855// Formats the source Formattable array and copy into the result buffer. 856// Ignore the FieldPosition result for error checking. 857 858UnicodeString& 859MessageFormat::format(const Formattable* source, 860 int32_t cnt, 861 UnicodeString& appendTo, 862 FieldPosition& ignore, 863 UErrorCode& success) const 864{ 865 return format(source, NULL, cnt, appendTo, &ignore, success); 866} 867 868// ------------------------------------- 869// Internally creates a MessageFormat instance based on the 870// pattern and formats the arguments Formattable array and 871// copy into the appendTo buffer. 872 873UnicodeString& 874MessageFormat::format( const UnicodeString& pattern, 875 const Formattable* arguments, 876 int32_t cnt, 877 UnicodeString& appendTo, 878 UErrorCode& success) 879{ 880 MessageFormat temp(pattern, success); 881 return temp.format(arguments, NULL, cnt, appendTo, NULL, success); 882} 883 884// ------------------------------------- 885// Formats the source Formattable object and copy into the 886// appendTo buffer. The Formattable object must be an array 887// of Formattable instances, returns error otherwise. 888 889UnicodeString& 890MessageFormat::format(const Formattable& source, 891 UnicodeString& appendTo, 892 FieldPosition& ignore, 893 UErrorCode& success) const 894{ 895 if (U_FAILURE(success)) 896 return appendTo; 897 if (source.getType() != Formattable::kArray) { 898 success = U_ILLEGAL_ARGUMENT_ERROR; 899 return appendTo; 900 } 901 int32_t cnt; 902 const Formattable* tmpPtr = source.getArray(cnt); 903 return format(tmpPtr, NULL, cnt, appendTo, &ignore, success); 904} 905 906UnicodeString& 907MessageFormat::format(const UnicodeString* argumentNames, 908 const Formattable* arguments, 909 int32_t count, 910 UnicodeString& appendTo, 911 UErrorCode& success) const { 912 return format(arguments, argumentNames, count, appendTo, NULL, success); 913} 914 915// Does linear search to find the match for an ArgName. 916const Formattable* MessageFormat::getArgFromListByName(const Formattable* arguments, 917 const UnicodeString *argumentNames, 918 int32_t cnt, UnicodeString& name) const { 919 for (int32_t i = 0; i < cnt; ++i) { 920 if (0 == argumentNames[i].compare(name)) { 921 return arguments + i; 922 } 923 } 924 return NULL; 925} 926 927 928UnicodeString& 929MessageFormat::format(const Formattable* arguments, 930 const UnicodeString *argumentNames, 931 int32_t cnt, 932 UnicodeString& appendTo, 933 FieldPosition* pos, 934 UErrorCode& status) const { 935 if (U_FAILURE(status)) { 936 return appendTo; 937 } 938 939 UnicodeStringAppendable usapp(appendTo); 940 AppendableWrapper app(usapp); 941 format(0, 0.0, arguments, argumentNames, cnt, app, pos, status); 942 return appendTo; 943} 944 945// if argumentNames is NULL, this means arguments is a numeric array. 946// arguments can not be NULL. 947void MessageFormat::format(int32_t msgStart, double pluralNumber, 948 const Formattable* arguments, 949 const UnicodeString *argumentNames, 950 int32_t cnt, 951 AppendableWrapper& appendTo, 952 FieldPosition* ignore, 953 UErrorCode& success) const { 954 if (U_FAILURE(success)) { 955 return; 956 } 957 958 const UnicodeString& msgString = msgPattern.getPatternString(); 959 int32_t prevIndex = msgPattern.getPart(msgStart).getLimit(); 960 for (int32_t i = msgStart + 1; U_SUCCESS(success) ; ++i) { 961 const MessagePattern::Part* part = &msgPattern.getPart(i); 962 const UMessagePatternPartType type = part->getType(); 963 int32_t index = part->getIndex(); 964 appendTo.append(msgString, prevIndex, index - prevIndex); 965 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 966 return; 967 } 968 prevIndex = part->getLimit(); 969 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { 970 const NumberFormat* nf = getDefaultNumberFormat(success); 971 appendTo.formatAndAppend(nf, Formattable(pluralNumber), success); 972 continue; 973 } 974 if (type != UMSGPAT_PART_TYPE_ARG_START) { 975 continue; 976 } 977 int32_t argLimit = msgPattern.getLimitPartIndex(i); 978 UMessagePatternArgType argType = part->getArgType(); 979 part = &msgPattern.getPart(++i); 980 const Formattable* arg; 981 UnicodeString noArg; 982 if (argumentNames == NULL) { 983 int32_t argNumber = part->getValue(); // ARG_NUMBER 984 if (0 <= argNumber && argNumber < cnt) { 985 arg = arguments + argNumber; 986 } else { 987 arg = NULL; 988 noArg.append(LEFT_CURLY_BRACE); 989 itos(argNumber, noArg); 990 noArg.append(RIGHT_CURLY_BRACE); 991 } 992 } else { 993 UnicodeString key; 994 if (part->getType() == UMSGPAT_PART_TYPE_ARG_NAME) { 995 key = msgPattern.getSubstring(*part); 996 } else /* UMSGPAT_PART_TYPE_ARG_NUMBER */ { 997 itos(part->getValue(), key); 998 } 999 arg = getArgFromListByName(arguments, argumentNames, cnt, key); 1000 if (arg == NULL) { 1001 noArg.append(LEFT_CURLY_BRACE); 1002 noArg.append(key); 1003 noArg.append(RIGHT_CURLY_BRACE); 1004 } 1005 } 1006 ++i; 1007 int32_t prevDestLength = appendTo.length(); 1008 const Format* formatter = NULL; 1009 if (!noArg.isEmpty()) { 1010 appendTo.append(noArg); 1011 } else if (arg == NULL) { 1012 appendTo.append(NULL_STRING, 4); 1013 } else if ((formatter = getCachedFormatter(i -2))) { 1014 // Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings. 1015 if (dynamic_cast<const ChoiceFormat*>(formatter) || 1016 dynamic_cast<const PluralFormat*>(formatter) || 1017 dynamic_cast<const SelectFormat*>(formatter)) { 1018 // We only handle nested formats here if they were provided via 1019 // setFormat() or its siblings. Otherwise they are not cached and instead 1020 // handled below according to argType. 1021 UnicodeString subMsgString; 1022 formatter->format(*arg, subMsgString, success); 1023 if (subMsgString.indexOf(LEFT_CURLY_BRACE) >= 0 || 1024 (subMsgString.indexOf(SINGLE_QUOTE) >= 0 && !MessageImpl::jdkAposMode(msgPattern)) 1025 ) { 1026 MessageFormat subMsgFormat(subMsgString, fLocale, success); 1027 subMsgFormat.format(0, 0, arguments, argumentNames, cnt, appendTo, ignore, success); 1028 } else { 1029 appendTo.append(subMsgString); 1030 } 1031 } else { 1032 appendTo.formatAndAppend(formatter, *arg, success); 1033 } 1034 } else if (argType == UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i - 2))) { 1035 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. 1036 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check 1037 // for the hash table containind DummyFormat. 1038 if (arg->isNumeric()) { 1039 const NumberFormat* nf = getDefaultNumberFormat(success); 1040 appendTo.formatAndAppend(nf, *arg, success); 1041 } else if (arg->getType() == Formattable::kDate) { 1042 const DateFormat* df = getDefaultDateFormat(success); 1043 appendTo.formatAndAppend(df, *arg, success); 1044 } else { 1045 appendTo.append(arg->getString(success)); 1046 } 1047 } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) { 1048 if (!arg->isNumeric()) { 1049 success = U_ILLEGAL_ARGUMENT_ERROR; 1050 return; 1051 } 1052 // We must use the Formattable::getDouble() variant with the UErrorCode parameter 1053 // because only this one converts non-double numeric types to double. 1054 const double number = arg->getDouble(success); 1055 int32_t subMsgStart = ChoiceFormat::findSubMessage(msgPattern, i, number); 1056 formatComplexSubMessage(subMsgStart, 0, arguments, argumentNames, 1057 cnt, appendTo, success); 1058 } else if (argType == UMSGPAT_ARG_TYPE_PLURAL) { 1059 if (!arg->isNumeric()) { 1060 success = U_ILLEGAL_ARGUMENT_ERROR; 1061 return; 1062 } 1063 // We must use the Formattable::getDouble() variant with the UErrorCode parameter 1064 // because only this one converts non-double numeric types to double. 1065 double number = arg->getDouble(success); 1066 int32_t subMsgStart = PluralFormat::findSubMessage(msgPattern, i, pluralProvider, number, 1067 success); 1068 double offset = msgPattern.getPluralOffset(i); 1069 formatComplexSubMessage(subMsgStart, number-offset, arguments, argumentNames, 1070 cnt, appendTo, success); 1071 } else if (argType == UMSGPAT_ARG_TYPE_SELECT) { 1072 int32_t subMsgStart = SelectFormat::findSubMessage(msgPattern, i, arg->getString(success), success); 1073 formatComplexSubMessage(subMsgStart, 0, arguments, argumentNames, 1074 cnt, appendTo, success); 1075 } else { 1076 // This should never happen. 1077 success = U_INTERNAL_PROGRAM_ERROR; 1078 return; 1079 } 1080 ignore = updateMetaData(appendTo, prevDestLength, ignore, arg); 1081 prevIndex = msgPattern.getPart(argLimit).getLimit(); 1082 i = argLimit; 1083 } 1084} 1085 1086 1087void MessageFormat::formatComplexSubMessage(int32_t msgStart, 1088 double pluralNumber, 1089 const Formattable* arguments, 1090 const UnicodeString *argumentNames, 1091 int32_t cnt, 1092 AppendableWrapper& appendTo, 1093 UErrorCode& success) const { 1094 if (U_FAILURE(success)) { 1095 return; 1096 } 1097 1098 if (!MessageImpl::jdkAposMode(msgPattern)) { 1099 format(msgStart, pluralNumber, arguments, argumentNames, cnt, appendTo, NULL, success); 1100 return; 1101 } 1102 1103 // JDK compatibility mode: (see JDK MessageFormat.format() API docs) 1104 // - remove SKIP_SYNTAX; that is, remove half of the apostrophes 1105 // - if the result string contains an open curly brace '{' then 1106 // instantiate a temporary MessageFormat object and format again; 1107 // otherwise just append the result string 1108 const UnicodeString& msgString = msgPattern.getPatternString(); 1109 UnicodeString sb; 1110 int32_t prevIndex = msgPattern.getPart(msgStart).getLimit(); 1111 for (int32_t i = msgStart;;) { 1112 const MessagePattern::Part& part = msgPattern.getPart(++i); 1113 const UMessagePatternPartType type = part.getType(); 1114 int32_t index = part.getIndex(); 1115 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 1116 sb.append(msgString, prevIndex, index - prevIndex); 1117 break; 1118 } else if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER || type == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { 1119 sb.append(msgString, prevIndex, index - prevIndex); 1120 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { 1121 const NumberFormat* nf = getDefaultNumberFormat(success); 1122 sb.append(nf->format(pluralNumber, sb, success)); 1123 } 1124 prevIndex = part.getLimit(); 1125 } else if (type == UMSGPAT_PART_TYPE_ARG_START) { 1126 sb.append(msgString, prevIndex, index - prevIndex); 1127 prevIndex = index; 1128 i = msgPattern.getLimitPartIndex(i); 1129 index = msgPattern.getPart(i).getLimit(); 1130 MessageImpl::appendReducedApostrophes(msgString, prevIndex, index, sb); 1131 prevIndex = index; 1132 } 1133 } 1134 if (sb.indexOf(LEFT_CURLY_BRACE) >= 0) { 1135 UnicodeString emptyPattern; // gcc 3.3.3 fails with "UnicodeString()" as the first parameter. 1136 MessageFormat subMsgFormat(emptyPattern, fLocale, success); 1137 subMsgFormat.applyPattern(sb, UMSGPAT_APOS_DOUBLE_REQUIRED, NULL, success); 1138 subMsgFormat.format(0, 0, arguments, argumentNames, cnt, appendTo, NULL, success); 1139 } else { 1140 appendTo.append(sb); 1141 } 1142} 1143 1144 1145UnicodeString MessageFormat::getLiteralStringUntilNextArgument(int32_t from) const { 1146 const UnicodeString& msgString=msgPattern.getPatternString(); 1147 int32_t prevIndex=msgPattern.getPart(from).getLimit(); 1148 UnicodeString b; 1149 for (int32_t i = from + 1; ; ++i) { 1150 const MessagePattern::Part& part = msgPattern.getPart(i); 1151 const UMessagePatternPartType type=part.getType(); 1152 int32_t index=part.getIndex(); 1153 b.append(msgString, prevIndex, index - prevIndex); 1154 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_MSG_LIMIT) { 1155 return b; 1156 } 1157 // Unexpected Part "part" in parsed message. 1158 U_ASSERT(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR); 1159 prevIndex=part.getLimit(); 1160 } 1161} 1162 1163 1164FieldPosition* MessageFormat::updateMetaData(AppendableWrapper& /*dest*/, int32_t /*prevLength*/, 1165 FieldPosition* /*fp*/, const Formattable* /*argId*/) const { 1166 // Unlike in Java, there are no field attributes defined for MessageFormat. Do nothing. 1167 return NULL; 1168 /* 1169 if (fp != NULL && Field.ARGUMENT.equals(fp.getFieldAttribute())) { 1170 fp->setBeginIndex(prevLength); 1171 fp->setEndIndex(dest.get_length()); 1172 return NULL; 1173 } 1174 return fp; 1175 */ 1176} 1177 1178void MessageFormat::copyObjects(const MessageFormat& that, UErrorCode& ec) { 1179 // Deep copy pointer fields. 1180 // We need not copy the formatAliases because they are re-filled 1181 // in each getFormats() call. 1182 // The defaultNumberFormat, defaultDateFormat and pluralProvider.rules 1183 // also get created on demand. 1184 argTypeCount = that.argTypeCount; 1185 if (argTypeCount > 0) { 1186 if (!allocateArgTypes(argTypeCount, ec)) { 1187 return; 1188 } 1189 uprv_memcpy(argTypes, that.argTypes, argTypeCount * sizeof(argTypes[0])); 1190 } 1191 if (cachedFormatters != NULL) { 1192 uhash_removeAll(cachedFormatters); 1193 } 1194 if (customFormatArgStarts != NULL) { 1195 uhash_removeAll(customFormatArgStarts); 1196 } 1197 if (that.cachedFormatters) { 1198 if (cachedFormatters == NULL) { 1199 cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong, 1200 equalFormatsForHash, &ec); 1201 if (U_FAILURE(ec)) { 1202 return; 1203 } 1204 uhash_setValueDeleter(cachedFormatters, uhash_deleteUObject); 1205 } 1206 1207 const int32_t count = uhash_count(that.cachedFormatters); 1208 int32_t pos, idx; 1209 for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) { 1210 const UHashElement* cur = uhash_nextElement(that.cachedFormatters, &pos); 1211 Format* newFormat = ((Format*)(cur->value.pointer))->clone(); 1212 if (newFormat) { 1213 uhash_iput(cachedFormatters, cur->key.integer, newFormat, &ec); 1214 } else { 1215 ec = U_MEMORY_ALLOCATION_ERROR; 1216 return; 1217 } 1218 } 1219 } 1220 if (that.customFormatArgStarts) { 1221 if (customFormatArgStarts == NULL) { 1222 customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong, 1223 NULL, &ec); 1224 } 1225 const int32_t count = uhash_count(that.customFormatArgStarts); 1226 int32_t pos, idx; 1227 for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) { 1228 const UHashElement* cur = uhash_nextElement(that.customFormatArgStarts, &pos); 1229 uhash_iputi(customFormatArgStarts, cur->key.integer, cur->value.integer, &ec); 1230 } 1231 } 1232} 1233 1234 1235Formattable* 1236MessageFormat::parse(int32_t msgStart, 1237 const UnicodeString& source, 1238 ParsePosition& pos, 1239 int32_t& count, 1240 UErrorCode& ec) const { 1241 count = 0; 1242 if (U_FAILURE(ec)) { 1243 pos.setErrorIndex(pos.getIndex()); 1244 return NULL; 1245 } 1246 // parse() does not work with named arguments. 1247 if (msgPattern.hasNamedArguments()) { 1248 ec = U_ARGUMENT_TYPE_MISMATCH; 1249 pos.setErrorIndex(pos.getIndex()); 1250 return NULL; 1251 } 1252 LocalArray<Formattable> resultArray(new Formattable[argTypeCount ? argTypeCount : 1]); 1253 const UnicodeString& msgString=msgPattern.getPatternString(); 1254 int32_t prevIndex=msgPattern.getPart(msgStart).getLimit(); 1255 int32_t sourceOffset = pos.getIndex(); 1256 ParsePosition tempStatus(0); 1257 1258 for(int32_t i=msgStart+1; ; ++i) { 1259 UBool haveArgResult = FALSE; 1260 const MessagePattern::Part* part=&msgPattern.getPart(i); 1261 const UMessagePatternPartType type=part->getType(); 1262 int32_t index=part->getIndex(); 1263 // Make sure the literal string matches. 1264 int32_t len = index - prevIndex; 1265 if (len == 0 || (0 == msgString.compare(prevIndex, len, source, sourceOffset, len))) { 1266 sourceOffset += len; 1267 prevIndex += len; 1268 } else { 1269 pos.setErrorIndex(sourceOffset); 1270 return NULL; // leave index as is to signal error 1271 } 1272 if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) { 1273 // Things went well! Done. 1274 pos.setIndex(sourceOffset); 1275 return resultArray.orphan(); 1276 } 1277 if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR) { 1278 prevIndex=part->getLimit(); 1279 continue; 1280 } 1281 // We do not support parsing Plural formats. (No REPLACE_NUMBER here.) 1282 // Unexpected Part "part" in parsed message. 1283 U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_START); 1284 int32_t argLimit=msgPattern.getLimitPartIndex(i); 1285 1286 UMessagePatternArgType argType=part->getArgType(); 1287 part=&msgPattern.getPart(++i); 1288 int32_t argNumber = part->getValue(); // ARG_NUMBER 1289 UnicodeString key; 1290 ++i; 1291 const Format* formatter = NULL; 1292 Formattable& argResult = resultArray[argNumber]; 1293 1294 if(cachedFormatters!=NULL && (formatter = getCachedFormatter(i - 2))!=NULL) { 1295 // Just parse using the formatter. 1296 tempStatus.setIndex(sourceOffset); 1297 formatter->parseObject(source, argResult, tempStatus); 1298 if (tempStatus.getIndex() == sourceOffset) { 1299 pos.setErrorIndex(sourceOffset); 1300 return NULL; // leave index as is to signal error 1301 } 1302 sourceOffset = tempStatus.getIndex(); 1303 haveArgResult = TRUE; 1304 } else if( 1305 argType==UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i -2))) { 1306 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. 1307 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check 1308 // for the hash table containind DummyFormat. 1309 1310 // Match as a string. 1311 // if at end, use longest possible match 1312 // otherwise uses first match to intervening string 1313 // does NOT recursively try all possibilities 1314 UnicodeString stringAfterArgument = getLiteralStringUntilNextArgument(argLimit); 1315 int32_t next; 1316 if (!stringAfterArgument.isEmpty()) { 1317 next = source.indexOf(stringAfterArgument, sourceOffset); 1318 } else { 1319 next = source.length(); 1320 } 1321 if (next < 0) { 1322 pos.setErrorIndex(sourceOffset); 1323 return NULL; // leave index as is to signal error 1324 } else { 1325 UnicodeString strValue(source.tempSubString(sourceOffset, next - sourceOffset)); 1326 UnicodeString compValue; 1327 compValue.append(LEFT_CURLY_BRACE); 1328 itos(argNumber, compValue); 1329 compValue.append(RIGHT_CURLY_BRACE); 1330 if (0 != strValue.compare(compValue)) { 1331 argResult.setString(strValue); 1332 haveArgResult = TRUE; 1333 } 1334 sourceOffset = next; 1335 } 1336 } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) { 1337 tempStatus.setIndex(sourceOffset); 1338 double choiceResult = ChoiceFormat::parseArgument(msgPattern, i, source, tempStatus); 1339 if (tempStatus.getIndex() == sourceOffset) { 1340 pos.setErrorIndex(sourceOffset); 1341 return NULL; // leave index as is to signal error 1342 } 1343 argResult.setDouble(choiceResult); 1344 haveArgResult = TRUE; 1345 sourceOffset = tempStatus.getIndex(); 1346 } else if(argType==UMSGPAT_ARG_TYPE_PLURAL || argType==UMSGPAT_ARG_TYPE_SELECT) { 1347 // Parsing not supported. 1348 ec = U_UNSUPPORTED_ERROR; 1349 return NULL; 1350 } else { 1351 // This should never happen. 1352 ec = U_INTERNAL_PROGRAM_ERROR; 1353 return NULL; 1354 } 1355 if (haveArgResult && count <= argNumber) { 1356 count = argNumber + 1; 1357 } 1358 prevIndex=msgPattern.getPart(argLimit).getLimit(); 1359 i=argLimit; 1360 } 1361} 1362// ------------------------------------- 1363// Parses the source pattern and returns the Formattable objects array, 1364// the array count and the ending parse position. The caller of this method 1365// owns the array. 1366 1367Formattable* 1368MessageFormat::parse(const UnicodeString& source, 1369 ParsePosition& pos, 1370 int32_t& count) const { 1371 UErrorCode ec = U_ZERO_ERROR; 1372 return parse(0, source, pos, count, ec); 1373} 1374 1375// ------------------------------------- 1376// Parses the source string and returns the array of 1377// Formattable objects and the array count. The caller 1378// owns the returned array. 1379 1380Formattable* 1381MessageFormat::parse(const UnicodeString& source, 1382 int32_t& cnt, 1383 UErrorCode& success) const 1384{ 1385 if (msgPattern.hasNamedArguments()) { 1386 success = U_ARGUMENT_TYPE_MISMATCH; 1387 return NULL; 1388 } 1389 ParsePosition status(0); 1390 // Calls the actual implementation method and starts 1391 // from zero offset of the source text. 1392 Formattable* result = parse(source, status, cnt); 1393 if (status.getIndex() == 0) { 1394 success = U_MESSAGE_PARSE_ERROR; 1395 delete[] result; 1396 return NULL; 1397 } 1398 return result; 1399} 1400 1401// ------------------------------------- 1402// Parses the source text and copy into the result buffer. 1403 1404void 1405MessageFormat::parseObject( const UnicodeString& source, 1406 Formattable& result, 1407 ParsePosition& status) const 1408{ 1409 int32_t cnt = 0; 1410 Formattable* tmpResult = parse(source, status, cnt); 1411 if (tmpResult != NULL) 1412 result.adoptArray(tmpResult, cnt); 1413} 1414 1415UnicodeString 1416MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) { 1417 UnicodeString result; 1418 if (U_SUCCESS(status)) { 1419 int32_t plen = pattern.length(); 1420 const UChar* pat = pattern.getBuffer(); 1421 int32_t blen = plen * 2 + 1; // space for null termination, convenience 1422 UChar* buf = result.getBuffer(blen); 1423 if (buf == NULL) { 1424 status = U_MEMORY_ALLOCATION_ERROR; 1425 } else { 1426 int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status); 1427 result.releaseBuffer(U_SUCCESS(status) ? len : 0); 1428 } 1429 } 1430 if (U_FAILURE(status)) { 1431 result.setToBogus(); 1432 } 1433 return result; 1434} 1435 1436// ------------------------------------- 1437 1438static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) { 1439 RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec); 1440 if (fmt == NULL) { 1441 ec = U_MEMORY_ALLOCATION_ERROR; 1442 } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) { 1443 UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set 1444 fmt->setDefaultRuleSet(defaultRuleSet, localStatus); 1445 } 1446 return fmt; 1447} 1448 1449void MessageFormat::cacheExplicitFormats(UErrorCode& status) { 1450 if (U_FAILURE(status)) { 1451 return; 1452 } 1453 1454 if (cachedFormatters != NULL) { 1455 uhash_removeAll(cachedFormatters); 1456 } 1457 if (customFormatArgStarts != NULL) { 1458 uhash_removeAll(customFormatArgStarts); 1459 } 1460 1461 // The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT 1462 // which we need not examine. 1463 int32_t limit = msgPattern.countParts() - 2; 1464 argTypeCount = 0; 1465 // We also need not look at the first two "parts" 1466 // (at most MSG_START and ARG_START) in this loop. 1467 // We determine the argTypeCount first so that we can allocateArgTypes 1468 // so that the next loop can set argTypes[argNumber]. 1469 // (This is for the C API which needs the argTypes to read its va_arg list.) 1470 for (int32_t i = 2; i < limit && U_SUCCESS(status); ++i) { 1471 const MessagePattern::Part& part = msgPattern.getPart(i); 1472 if (part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { 1473 const int argNumber = part.getValue(); 1474 if (argNumber >= argTypeCount) { 1475 argTypeCount = argNumber + 1; 1476 } 1477 } 1478 } 1479 if (!allocateArgTypes(argTypeCount, status)) { 1480 return; 1481 } 1482 // Set all argTypes to kObject, as a "none" value, for lack of any better value. 1483 // We never use kObject for real arguments. 1484 for (int32_t i = 0; i < argTypeCount; ++i) { 1485 argTypes[i] = Formattable::kObject; 1486 } 1487 hasArgTypeConflicts = FALSE; 1488 1489 // This loop starts at part index 1 because we do need to examine 1490 // ARG_START parts. (But we can ignore the MSG_START.) 1491 for (int32_t i = 1; i < limit && U_SUCCESS(status); ++i) { 1492 const MessagePattern::Part* part = &msgPattern.getPart(i); 1493 if (part->getType() != UMSGPAT_PART_TYPE_ARG_START) { 1494 continue; 1495 } 1496 UMessagePatternArgType argType = part->getArgType(); 1497 1498 int32_t argNumber = -1; 1499 part = &msgPattern.getPart(i + 1); 1500 if (part->getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { 1501 argNumber = part->getValue(); 1502 } 1503 Formattable::Type formattableType; 1504 1505 switch (argType) { 1506 case UMSGPAT_ARG_TYPE_NONE: 1507 formattableType = Formattable::kString; 1508 break; 1509 case UMSGPAT_ARG_TYPE_SIMPLE: { 1510 int32_t index = i; 1511 i += 2; 1512 UnicodeString explicitType = msgPattern.getSubstring(msgPattern.getPart(i++)); 1513 UnicodeString style; 1514 if ((part = &msgPattern.getPart(i))->getType() == UMSGPAT_PART_TYPE_ARG_STYLE) { 1515 style = msgPattern.getSubstring(*part); 1516 ++i; 1517 } 1518 UParseError parseError; 1519 Format* formatter = createAppropriateFormat(explicitType, style, formattableType, parseError, status); 1520 setArgStartFormat(index, formatter, status); 1521 break; 1522 } 1523 case UMSGPAT_ARG_TYPE_CHOICE: 1524 case UMSGPAT_ARG_TYPE_PLURAL: 1525 formattableType = Formattable::kDouble; 1526 break; 1527 case UMSGPAT_ARG_TYPE_SELECT: 1528 formattableType = Formattable::kString; 1529 break; 1530 default: 1531 status = U_INTERNAL_PROGRAM_ERROR; // Should be unreachable. 1532 formattableType = Formattable::kString; 1533 break; 1534 } 1535 if (argNumber != -1) { 1536 if (argTypes[argNumber] != Formattable::kObject && argTypes[argNumber] != formattableType) { 1537 hasArgTypeConflicts = TRUE; 1538 } 1539 argTypes[argNumber] = formattableType; 1540 } 1541 } 1542} 1543 1544 1545Format* MessageFormat::createAppropriateFormat(UnicodeString& type, UnicodeString& style, 1546 Formattable::Type& formattableType, UParseError& parseError, 1547 UErrorCode& ec) { 1548 if (U_FAILURE(ec)) { 1549 return NULL; 1550 } 1551 Format* fmt = NULL; 1552 int32_t typeID, styleID; 1553 DateFormat::EStyle date_style; 1554 1555 switch (typeID = findKeyword(type, TYPE_IDS)) { 1556 case 0: // number 1557 formattableType = Formattable::kDouble; 1558 switch (findKeyword(style, NUMBER_STYLE_IDS)) { 1559 case 0: // default 1560 fmt = NumberFormat::createInstance(fLocale, ec); 1561 break; 1562 case 1: // currency 1563 fmt = NumberFormat::createCurrencyInstance(fLocale, ec); 1564 break; 1565 case 2: // percent 1566 fmt = NumberFormat::createPercentInstance(fLocale, ec); 1567 break; 1568 case 3: // integer 1569 formattableType = Formattable::kLong; 1570 fmt = createIntegerFormat(fLocale, ec); 1571 break; 1572 default: // pattern 1573 fmt = NumberFormat::createInstance(fLocale, ec); 1574 if (fmt) { 1575 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fmt); 1576 if (decfmt != NULL) { 1577 decfmt->applyPattern(style,parseError,ec); 1578 } 1579 } 1580 break; 1581 } 1582 break; 1583 1584 case 1: // date 1585 case 2: // time 1586 formattableType = Formattable::kDate; 1587 styleID = findKeyword(style, DATE_STYLE_IDS); 1588 date_style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault; 1589 1590 if (typeID == 1) { 1591 fmt = DateFormat::createDateInstance(date_style, fLocale); 1592 } else { 1593 fmt = DateFormat::createTimeInstance(date_style, fLocale); 1594 } 1595 1596 if (styleID < 0 && fmt != NULL) { 1597 SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt); 1598 if (sdtfmt != NULL) { 1599 sdtfmt->applyPattern(style); 1600 } 1601 } 1602 break; 1603 1604 case 3: // spellout 1605 formattableType = Formattable::kDouble; 1606 fmt = makeRBNF(URBNF_SPELLOUT, fLocale, style, ec); 1607 break; 1608 case 4: // ordinal 1609 formattableType = Formattable::kDouble; 1610 fmt = makeRBNF(URBNF_ORDINAL, fLocale, style, ec); 1611 break; 1612 case 5: // duration 1613 formattableType = Formattable::kDouble; 1614 fmt = makeRBNF(URBNF_DURATION, fLocale, style, ec); 1615 break; 1616 default: 1617 formattableType = Formattable::kString; 1618 ec = U_ILLEGAL_ARGUMENT_ERROR; 1619 break; 1620 } 1621 1622 return fmt; 1623} 1624 1625 1626//------------------------------------- 1627// Finds the string, s, in the string array, list. 1628int32_t MessageFormat::findKeyword(const UnicodeString& s, 1629 const UChar * const *list) 1630{ 1631 if (s.isEmpty()) { 1632 return 0; // default 1633 } 1634 1635 int32_t length = s.length(); 1636 const UChar *ps = PatternProps::trimWhiteSpace(s.getBuffer(), length); 1637 UnicodeString buffer(FALSE, ps, length); 1638 // Trims the space characters and turns all characters 1639 // in s to lower case. 1640 buffer.toLower(""); 1641 for (int32_t i = 0; list[i]; ++i) { 1642 if (!buffer.compare(list[i], u_strlen(list[i]))) { 1643 return i; 1644 } 1645 } 1646 return -1; 1647} 1648 1649/** 1650 * Convenience method that ought to be in NumberFormat 1651 */ 1652NumberFormat* 1653MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const { 1654 NumberFormat *temp = NumberFormat::createInstance(locale, status); 1655 DecimalFormat *temp2; 1656 if (temp != NULL && (temp2 = dynamic_cast<DecimalFormat*>(temp)) != NULL) { 1657 temp2->setMaximumFractionDigits(0); 1658 temp2->setDecimalSeparatorAlwaysShown(FALSE); 1659 temp2->setParseIntegerOnly(TRUE); 1660 } 1661 1662 return temp; 1663} 1664 1665/** 1666 * Return the default number format. Used to format a numeric 1667 * argument when subformats[i].format is NULL. Returns NULL 1668 * on failure. 1669 * 1670 * Semantically const but may modify *this. 1671 */ 1672const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const { 1673 if (defaultNumberFormat == NULL) { 1674 MessageFormat* t = (MessageFormat*) this; 1675 t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec); 1676 if (U_FAILURE(ec)) { 1677 delete t->defaultNumberFormat; 1678 t->defaultNumberFormat = NULL; 1679 } else if (t->defaultNumberFormat == NULL) { 1680 ec = U_MEMORY_ALLOCATION_ERROR; 1681 } 1682 } 1683 return defaultNumberFormat; 1684} 1685 1686/** 1687 * Return the default date format. Used to format a date 1688 * argument when subformats[i].format is NULL. Returns NULL 1689 * on failure. 1690 * 1691 * Semantically const but may modify *this. 1692 */ 1693const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const { 1694 if (defaultDateFormat == NULL) { 1695 MessageFormat* t = (MessageFormat*) this; 1696 t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale); 1697 if (t->defaultDateFormat == NULL) { 1698 ec = U_MEMORY_ALLOCATION_ERROR; 1699 } 1700 } 1701 return defaultDateFormat; 1702} 1703 1704UBool 1705MessageFormat::usesNamedArguments() const { 1706 return msgPattern.hasNamedArguments(); 1707} 1708 1709int32_t 1710MessageFormat::getArgTypeCount() const { 1711 return argTypeCount; 1712} 1713 1714UBool MessageFormat::equalFormats(const void* left, const void* right) { 1715 return *(const Format*)left==*(const Format*)right; 1716} 1717 1718 1719UBool MessageFormat::DummyFormat::operator==(const Format&) const { 1720 return TRUE; 1721} 1722 1723Format* MessageFormat::DummyFormat::clone() const { 1724 return new DummyFormat(); 1725} 1726 1727UnicodeString& MessageFormat::DummyFormat::format(const Formattable&, 1728 UnicodeString& appendTo, 1729 FieldPosition&, 1730 UErrorCode& status) const { 1731 if (U_SUCCESS(status)) { 1732 status = U_UNSUPPORTED_ERROR; 1733 } 1734 return appendTo; 1735} 1736 1737void MessageFormat::DummyFormat::parseObject(const UnicodeString&, 1738 Formattable&, 1739 ParsePosition& ) const { 1740} 1741 1742 1743FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) { 1744 pos=0; 1745 fFormatNames = fNameList; 1746} 1747 1748const UnicodeString* 1749FormatNameEnumeration::snext(UErrorCode& status) { 1750 if (U_SUCCESS(status) && pos < fFormatNames->size()) { 1751 return (const UnicodeString*)fFormatNames->elementAt(pos++); 1752 } 1753 return NULL; 1754} 1755 1756void 1757FormatNameEnumeration::reset(UErrorCode& /*status*/) { 1758 pos=0; 1759} 1760 1761int32_t 1762FormatNameEnumeration::count(UErrorCode& /*status*/) const { 1763 return (fFormatNames==NULL) ? 0 : fFormatNames->size(); 1764} 1765 1766FormatNameEnumeration::~FormatNameEnumeration() { 1767 delete fFormatNames; 1768} 1769 1770 1771MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const Locale* loc) 1772 : locale(loc), rules(NULL) { 1773} 1774 1775MessageFormat::PluralSelectorProvider::~PluralSelectorProvider() { 1776 // We own the rules but not the locale. 1777 delete rules; 1778} 1779 1780UnicodeString MessageFormat::PluralSelectorProvider::select(double number, UErrorCode& ec) const { 1781 if (U_FAILURE(ec)) { 1782 return UnicodeString(FALSE, OTHER_STRING, 5); 1783 } 1784 MessageFormat::PluralSelectorProvider* t = const_cast<MessageFormat::PluralSelectorProvider*>(this); 1785 if(rules == NULL) { 1786 t->rules = PluralRules::forLocale(*locale, ec); 1787 if (U_FAILURE(ec)) { 1788 return UnicodeString(FALSE, OTHER_STRING, 5); 1789 } 1790 } 1791 return rules->select(number); 1792} 1793 1794void MessageFormat::PluralSelectorProvider::reset(const Locale* loc) { 1795 locale = loc; 1796 delete rules; 1797 rules = NULL; 1798} 1799 1800 1801U_NAMESPACE_END 1802 1803#endif /* #if !UCONFIG_NO_FORMATTING */ 1804 1805//eof 1806