1// Copyright (C) 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* Copyright (C) 1997-2013, International Business Machines Corporation and * 6* others. All Rights Reserved. * 7******************************************************************************* 8* 9* File CHOICFMT.CPP 10* 11* Modification History: 12* 13* Date Name Description 14* 02/19/97 aliu Converted from java. 15* 03/20/97 helena Finished first cut of implementation and got rid 16* of nextDouble/previousDouble and replaced with 17* boolean array. 18* 4/10/97 aliu Clean up. Modified to work on AIX. 19* 06/04/97 helena Fixed applyPattern(), toPattern() and not to include 20* wchar.h. 21* 07/09/97 helena Made ParsePosition into a class. 22* 08/06/97 nos removed overloaded constructor, fixed 'format(array)' 23* 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags) 24* 02/22/99 stephen Removed character literals for EBCDIC safety 25******************************************************************************** 26*/ 27 28#include "unicode/utypes.h" 29 30#if !UCONFIG_NO_FORMATTING 31 32#include "unicode/choicfmt.h" 33#include "unicode/numfmt.h" 34#include "unicode/locid.h" 35#include "cpputils.h" 36#include "cstring.h" 37#include "messageimpl.h" 38#include "putilimp.h" 39#include "uassert.h" 40#include <stdio.h> 41#include <float.h> 42 43// ***************************************************************************** 44// class ChoiceFormat 45// ***************************************************************************** 46 47U_NAMESPACE_BEGIN 48 49UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat) 50 51// Special characters used by ChoiceFormat. There are two characters 52// used interchangeably to indicate <=. Either is parsed, but only 53// LESS_EQUAL is generated by toPattern(). 54#define SINGLE_QUOTE ((UChar)0x0027) /*'*/ 55#define LESS_THAN ((UChar)0x003C) /*<*/ 56#define LESS_EQUAL ((UChar)0x0023) /*#*/ 57#define LESS_EQUAL2 ((UChar)0x2264) 58#define VERTICAL_BAR ((UChar)0x007C) /*|*/ 59#define MINUS ((UChar)0x002D) /*-*/ 60 61static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/ 62static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/ 63 64#ifdef INFINITY 65#undef INFINITY 66#endif 67#define INFINITY ((UChar)0x221E) 68 69//static const UChar gPositiveInfinity[] = {INFINITY, 0}; 70//static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0}; 71#define POSITIVE_INF_STRLEN 1 72#define NEGATIVE_INF_STRLEN 2 73 74// ------------------------------------- 75// Creates a ChoiceFormat instance based on the pattern. 76 77ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, 78 UErrorCode& status) 79: constructorErrorCode(status), 80 msgPattern(status) 81{ 82 applyPattern(newPattern, status); 83} 84 85// ------------------------------------- 86// Creates a ChoiceFormat instance with the limit array and 87// format strings for each limit. 88 89ChoiceFormat::ChoiceFormat(const double* limits, 90 const UnicodeString* formats, 91 int32_t cnt ) 92: constructorErrorCode(U_ZERO_ERROR), 93 msgPattern(constructorErrorCode) 94{ 95 setChoices(limits, NULL, formats, cnt, constructorErrorCode); 96} 97 98// ------------------------------------- 99 100ChoiceFormat::ChoiceFormat(const double* limits, 101 const UBool* closures, 102 const UnicodeString* formats, 103 int32_t cnt ) 104: constructorErrorCode(U_ZERO_ERROR), 105 msgPattern(constructorErrorCode) 106{ 107 setChoices(limits, closures, formats, cnt, constructorErrorCode); 108} 109 110// ------------------------------------- 111// copy constructor 112 113ChoiceFormat::ChoiceFormat(const ChoiceFormat& that) 114: NumberFormat(that), 115 constructorErrorCode(that.constructorErrorCode), 116 msgPattern(that.msgPattern) 117{ 118} 119 120// ------------------------------------- 121// Private constructor that creates a 122// ChoiceFormat instance based on the 123// pattern and populates UParseError 124 125ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern, 126 UParseError& parseError, 127 UErrorCode& status) 128: constructorErrorCode(status), 129 msgPattern(status) 130{ 131 applyPattern(newPattern,parseError, status); 132} 133// ------------------------------------- 134 135UBool 136ChoiceFormat::operator==(const Format& that) const 137{ 138 if (this == &that) return TRUE; 139 if (!NumberFormat::operator==(that)) return FALSE; 140 ChoiceFormat& thatAlias = (ChoiceFormat&)that; 141 return msgPattern == thatAlias.msgPattern; 142} 143 144// ------------------------------------- 145// copy constructor 146 147const ChoiceFormat& 148ChoiceFormat::operator=(const ChoiceFormat& that) 149{ 150 if (this != &that) { 151 NumberFormat::operator=(that); 152 constructorErrorCode = that.constructorErrorCode; 153 msgPattern = that.msgPattern; 154 } 155 return *this; 156} 157 158// ------------------------------------- 159 160ChoiceFormat::~ChoiceFormat() 161{ 162} 163 164// ------------------------------------- 165 166/** 167 * Convert a double value to a string without the overhead of NumberFormat. 168 */ 169UnicodeString& 170ChoiceFormat::dtos(double value, 171 UnicodeString& string) 172{ 173 /* Buffer to contain the digits and any extra formatting stuff. */ 174 char temp[DBL_DIG + 16]; 175 char *itrPtr = temp; 176 char *expPtr; 177 178 sprintf(temp, "%.*g", DBL_DIG, value); 179 180 /* Find and convert the decimal point. 181 Using setlocale on some machines will cause sprintf to use a comma for certain locales. 182 */ 183 while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) { 184 itrPtr++; 185 } 186 if (*itrPtr != 0 && *itrPtr != 'e') { 187 /* We reached something that looks like a decimal point. 188 In case someone used setlocale(), which changes the decimal point. */ 189 *itrPtr = '.'; 190 itrPtr++; 191 } 192 /* Search for the exponent */ 193 while (*itrPtr && *itrPtr != 'e') { 194 itrPtr++; 195 } 196 if (*itrPtr == 'e') { 197 itrPtr++; 198 /* Verify the exponent sign */ 199 if (*itrPtr == '+' || *itrPtr == '-') { 200 itrPtr++; 201 } 202 /* Remove leading zeros. You will see this on Windows machines. */ 203 expPtr = itrPtr; 204 while (*itrPtr == '0') { 205 itrPtr++; 206 } 207 if (*itrPtr && expPtr != itrPtr) { 208 /* Shift the exponent without zeros. */ 209 while (*itrPtr) { 210 *(expPtr++) = *(itrPtr++); 211 } 212 // NULL terminate 213 *expPtr = 0; 214 } 215 } 216 217 string = UnicodeString(temp, -1, US_INV); /* invariant codepage */ 218 return string; 219} 220 221// ------------------------------------- 222// calls the overloaded applyPattern method. 223 224void 225ChoiceFormat::applyPattern(const UnicodeString& pattern, 226 UErrorCode& status) 227{ 228 msgPattern.parseChoiceStyle(pattern, NULL, status); 229 constructorErrorCode = status; 230} 231 232// ------------------------------------- 233// Applies the pattern to this ChoiceFormat instance. 234 235void 236ChoiceFormat::applyPattern(const UnicodeString& pattern, 237 UParseError& parseError, 238 UErrorCode& status) 239{ 240 msgPattern.parseChoiceStyle(pattern, &parseError, status); 241 constructorErrorCode = status; 242} 243// ------------------------------------- 244// Returns the input pattern string. 245 246UnicodeString& 247ChoiceFormat::toPattern(UnicodeString& result) const 248{ 249 return result = msgPattern.getPatternString(); 250} 251 252// ------------------------------------- 253// Sets the limit and format arrays. 254void 255ChoiceFormat::setChoices( const double* limits, 256 const UnicodeString* formats, 257 int32_t cnt ) 258{ 259 UErrorCode errorCode = U_ZERO_ERROR; 260 setChoices(limits, NULL, formats, cnt, errorCode); 261} 262 263// ------------------------------------- 264// Sets the limit and format arrays. 265void 266ChoiceFormat::setChoices( const double* limits, 267 const UBool* closures, 268 const UnicodeString* formats, 269 int32_t cnt ) 270{ 271 UErrorCode errorCode = U_ZERO_ERROR; 272 setChoices(limits, closures, formats, cnt, errorCode); 273} 274 275void 276ChoiceFormat::setChoices(const double* limits, 277 const UBool* closures, 278 const UnicodeString* formats, 279 int32_t count, 280 UErrorCode &errorCode) { 281 if (U_FAILURE(errorCode)) { 282 return; 283 } 284 if (limits == NULL || formats == NULL) { 285 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 286 return; 287 } 288 // Reconstruct the original input pattern. 289 // Modified version of the pre-ICU 4.8 toPattern() implementation. 290 UnicodeString result; 291 for (int32_t i = 0; i < count; ++i) { 292 if (i != 0) { 293 result += VERTICAL_BAR; 294 } 295 UnicodeString buf; 296 if (uprv_isPositiveInfinity(limits[i])) { 297 result += INFINITY; 298 } else if (uprv_isNegativeInfinity(limits[i])) { 299 result += MINUS; 300 result += INFINITY; 301 } else { 302 result += dtos(limits[i], buf); 303 } 304 if (closures != NULL && closures[i]) { 305 result += LESS_THAN; 306 } else { 307 result += LESS_EQUAL; 308 } 309 // Append formats[i], using quotes if there are special 310 // characters. Single quotes themselves must be escaped in 311 // either case. 312 const UnicodeString& text = formats[i]; 313 int32_t textLength = text.length(); 314 int32_t nestingLevel = 0; 315 for (int32_t j = 0; j < textLength; ++j) { 316 UChar c = text[j]; 317 if (c == SINGLE_QUOTE && nestingLevel == 0) { 318 // Double each top-level apostrophe. 319 result.append(c); 320 } else if (c == VERTICAL_BAR && nestingLevel == 0) { 321 // Surround each pipe symbol with apostrophes for quoting. 322 // If the next character is an apostrophe, then that will be doubled, 323 // and although the parser will see the apostrophe pairs beginning 324 // and ending one character earlier than our doubling, the result 325 // is as desired. 326 // | -> '|' 327 // |' -> '|''' 328 // |'' -> '|''''' etc. 329 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE); 330 continue; // Skip the append(c) at the end of the loop body. 331 } else if (c == LEFT_CURLY_BRACE) { 332 ++nestingLevel; 333 } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) { 334 --nestingLevel; 335 } 336 result.append(c); 337 } 338 } 339 // Apply the reconstructed pattern. 340 applyPattern(result, errorCode); 341} 342 343// ------------------------------------- 344// Gets the limit array. 345 346const double* 347ChoiceFormat::getLimits(int32_t& cnt) const 348{ 349 cnt = 0; 350 return NULL; 351} 352 353// ------------------------------------- 354// Gets the closures array. 355 356const UBool* 357ChoiceFormat::getClosures(int32_t& cnt) const 358{ 359 cnt = 0; 360 return NULL; 361} 362 363// ------------------------------------- 364// Gets the format array. 365 366const UnicodeString* 367ChoiceFormat::getFormats(int32_t& cnt) const 368{ 369 cnt = 0; 370 return NULL; 371} 372 373// ------------------------------------- 374// Formats an int64 number, it's actually formatted as 375// a double. The returned format string may differ 376// from the input number because of this. 377 378UnicodeString& 379ChoiceFormat::format(int64_t number, 380 UnicodeString& appendTo, 381 FieldPosition& status) const 382{ 383 return format((double) number, appendTo, status); 384} 385 386// ------------------------------------- 387// Formats an int32_t number, it's actually formatted as 388// a double. 389 390UnicodeString& 391ChoiceFormat::format(int32_t number, 392 UnicodeString& appendTo, 393 FieldPosition& status) const 394{ 395 return format((double) number, appendTo, status); 396} 397 398// ------------------------------------- 399// Formats a double number. 400 401UnicodeString& 402ChoiceFormat::format(double number, 403 UnicodeString& appendTo, 404 FieldPosition& /*pos*/) const 405{ 406 if (msgPattern.countParts() == 0) { 407 // No pattern was applied, or it failed. 408 return appendTo; 409 } 410 // Get the appropriate sub-message. 411 int32_t msgStart = findSubMessage(msgPattern, 0, number); 412 if (!MessageImpl::jdkAposMode(msgPattern)) { 413 int32_t patternStart = msgPattern.getPart(msgStart).getLimit(); 414 int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart); 415 appendTo.append(msgPattern.getPatternString(), 416 patternStart, 417 msgPattern.getPatternIndex(msgLimit) - patternStart); 418 return appendTo; 419 } 420 // JDK compatibility mode: Remove SKIP_SYNTAX. 421 return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo); 422} 423 424int32_t 425ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) { 426 int32_t count = pattern.countParts(); 427 int32_t msgStart; 428 // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples 429 // until ARG_LIMIT or end of choice-only pattern. 430 // Ignore the first number and selector and start the loop on the first message. 431 partIndex += 2; 432 for (;;) { 433 // Skip but remember the current sub-message. 434 msgStart = partIndex; 435 partIndex = pattern.getLimitPartIndex(partIndex); 436 if (++partIndex >= count) { 437 // Reached the end of the choice-only pattern. 438 // Return with the last sub-message. 439 break; 440 } 441 const MessagePattern::Part &part = pattern.getPart(partIndex++); 442 UMessagePatternPartType type = part.getType(); 443 if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) { 444 // Reached the end of the ChoiceFormat style. 445 // Return with the last sub-message. 446 break; 447 } 448 // part is an ARG_INT or ARG_DOUBLE 449 U_ASSERT(MessagePattern::Part::hasNumericValue(type)); 450 double boundary = pattern.getNumericValue(part); 451 // Fetch the ARG_SELECTOR character. 452 int32_t selectorIndex = pattern.getPatternIndex(partIndex++); 453 UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex); 454 if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) { 455 // The number is in the interval between the previous boundary and the current one. 456 // Return with the sub-message between them. 457 // The !(a>b) and !(a>=b) comparisons are equivalent to 458 // (a<=b) and (a<b) except they "catch" NaN. 459 break; 460 } 461 } 462 return msgStart; 463} 464 465// ------------------------------------- 466// Formats an array of objects. Checks if the data type of the objects 467// to get the right value for formatting. 468 469UnicodeString& 470ChoiceFormat::format(const Formattable* objs, 471 int32_t cnt, 472 UnicodeString& appendTo, 473 FieldPosition& pos, 474 UErrorCode& status) const 475{ 476 if(cnt < 0) { 477 status = U_ILLEGAL_ARGUMENT_ERROR; 478 return appendTo; 479 } 480 if (msgPattern.countParts() == 0) { 481 status = U_INVALID_STATE_ERROR; 482 return appendTo; 483 } 484 485 for (int32_t i = 0; i < cnt; i++) { 486 double objDouble = objs[i].getDouble(status); 487 if (U_SUCCESS(status)) { 488 format(objDouble, appendTo, pos); 489 } 490 } 491 492 return appendTo; 493} 494 495// ------------------------------------- 496 497void 498ChoiceFormat::parse(const UnicodeString& text, 499 Formattable& result, 500 ParsePosition& pos) const 501{ 502 result.setDouble(parseArgument(msgPattern, 0, text, pos)); 503} 504 505double 506ChoiceFormat::parseArgument( 507 const MessagePattern &pattern, int32_t partIndex, 508 const UnicodeString &source, ParsePosition &pos) { 509 // find the best number (defined as the one with the longest parse) 510 int32_t start = pos.getIndex(); 511 int32_t furthest = start; 512 double bestNumber = uprv_getNaN(); 513 double tempNumber = 0.0; 514 int32_t count = pattern.countParts(); 515 while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) { 516 tempNumber = pattern.getNumericValue(pattern.getPart(partIndex)); 517 partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR 518 int32_t msgLimit = pattern.getLimitPartIndex(partIndex); 519 int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start); 520 if (len >= 0) { 521 int32_t newIndex = start + len; 522 if (newIndex > furthest) { 523 furthest = newIndex; 524 bestNumber = tempNumber; 525 if (furthest == source.length()) { 526 break; 527 } 528 } 529 } 530 partIndex = msgLimit + 1; 531 } 532 if (furthest == start) { 533 pos.setErrorIndex(start); 534 } else { 535 pos.setIndex(furthest); 536 } 537 return bestNumber; 538} 539 540int32_t 541ChoiceFormat::matchStringUntilLimitPart( 542 const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex, 543 const UnicodeString &source, int32_t sourceOffset) { 544 int32_t matchingSourceLength = 0; 545 const UnicodeString &msgString = pattern.getPatternString(); 546 int32_t prevIndex = pattern.getPart(partIndex).getLimit(); 547 for (;;) { 548 const MessagePattern::Part &part = pattern.getPart(++partIndex); 549 if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { 550 int32_t index = part.getIndex(); 551 int32_t length = index - prevIndex; 552 if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) { 553 return -1; // mismatch 554 } 555 matchingSourceLength += length; 556 if (partIndex == limitPartIndex) { 557 return matchingSourceLength; 558 } 559 prevIndex = part.getLimit(); // SKIP_SYNTAX 560 } 561 } 562} 563 564// ------------------------------------- 565 566Format* 567ChoiceFormat::clone() const 568{ 569 ChoiceFormat *aCopy = new ChoiceFormat(*this); 570 return aCopy; 571} 572 573U_NAMESPACE_END 574 575#endif /* #if !UCONFIG_NO_FORMATTING */ 576 577//eof 578