selfmt.cpp revision 50294ead5e5d23f5bbfed76e00e6b510bd41eee1
1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 * Copyright (C) 2010 , Yahoo! Inc. 6 ******************************************************************** 7 * 8 * File SELFMT.CPP 9 * 10 * Modification History: 11 * 12 * Date Name Description 13 * 11/11/09 kirtig Finished first cut of implementation. 14 * 11/16/09 kirtig Improved version 15 ********************************************************************/ 16 17#include "unicode/utypes.h" 18#include "unicode/ustring.h" 19#include "unicode/ucnv_err.h" 20#include "unicode/uchar.h" 21#include "unicode/umsg.h" 22#include "unicode/rbnf.h" 23#include "cmemory.h" 24#include "util.h" 25#include "uassert.h" 26#include "ustrfmt.h" 27#include "uvector.h" 28 29#include "unicode/selfmt.h" 30#include "selfmtimpl.h" 31 32#if !UCONFIG_NO_FORMATTING 33 34U_NAMESPACE_BEGIN 35 36UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat) 37 38#define MAX_KEYWORD_SIZE 30 39static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0}; 40 41SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) { 42 if (U_FAILURE(status)) { 43 return; 44 } 45 init(status); 46 applyPattern(pat, status); 47} 48 49SelectFormat::SelectFormat(const SelectFormat& other) : Format(other) { 50 UErrorCode status = U_ZERO_ERROR; 51 pattern = other.pattern; 52 copyHashtable(other.parsedValuesHash, status); 53} 54 55SelectFormat::~SelectFormat() { 56 delete parsedValuesHash; 57} 58 59void 60SelectFormat::init(UErrorCode& status) { 61 if (U_FAILURE(status)) { 62 return; 63 } 64 parsedValuesHash = NULL; 65 pattern.remove(); 66} 67 68 69void 70SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { 71 if (U_FAILURE(status)) { 72 return; 73 } 74 75 this->pattern = newPattern; 76 enum State{ startState, keywordState, pastKeywordState, phraseState}; 77 78 //Initialization 79 UnicodeString keyword ; 80 UnicodeString phrase ; 81 UnicodeString* ptrPhrase ; 82 int32_t braceCount = 0; 83 84 delete parsedValuesHash; 85 this->parsedValuesHash = NULL; 86 parsedValuesHash = new Hashtable(TRUE, status); 87 if (U_FAILURE(status)) { 88 return; 89 } 90 parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString); 91 92 //Process the state machine 93 State state = startState; 94 for (int32_t i = 0; i < pattern.length(); ++i) { 95 //Get the character and check its type 96 UChar ch = pattern.charAt(i); 97 CharacterClass type = classifyCharacter(ch); 98 99 //Allow any character in phrase but nowhere else 100 if ( type == tOther ) { 101 if ( state == phraseState ){ 102 phrase += ch; 103 continue; 104 }else { 105 status = U_PATTERN_SYNTAX_ERROR; 106 return; 107 } 108 } 109 110 //Process the state machine 111 switch (state) { 112 //At the start of pattern 113 case startState: 114 switch (type) { 115 case tSpace: 116 break; 117 case tStartKeyword: 118 state = keywordState; 119 keyword += ch; 120 break; 121 //If anything else is encountered, it's a syntax error 122 default: 123 status = U_PATTERN_SYNTAX_ERROR; 124 return; 125 }//end of switch(type) 126 break; 127 128 //Handle the keyword state 129 case keywordState: 130 switch (type) { 131 case tSpace: 132 state = pastKeywordState; 133 break; 134 case tStartKeyword: 135 case tContinueKeyword: 136 keyword += ch; 137 break; 138 case tLeftBrace: 139 state = phraseState; 140 break; 141 //If anything else is encountered, it's a syntax error 142 default: 143 status = U_PATTERN_SYNTAX_ERROR; 144 return; 145 }//end of switch(type) 146 break; 147 148 //Handle the pastkeyword state 149 case pastKeywordState: 150 switch (type) { 151 case tSpace: 152 break; 153 case tLeftBrace: 154 state = phraseState; 155 break; 156 //If anything else is encountered, it's a syntax error 157 default: 158 status = U_PATTERN_SYNTAX_ERROR; 159 return; 160 }//end of switch(type) 161 break; 162 163 //Handle the phrase state 164 case phraseState: 165 switch (type) { 166 case tLeftBrace: 167 braceCount++; 168 phrase += ch; 169 break; 170 case tRightBrace: 171 //Matching keyword, phrase pair found 172 if (braceCount == 0){ 173 //Check validity of keyword 174 if (parsedValuesHash->get(keyword) != NULL) { 175 status = U_DUPLICATE_KEYWORD; 176 return; 177 } 178 if (keyword.length() == 0) { 179 status = U_PATTERN_SYNTAX_ERROR; 180 return; 181 } 182 183 //Store the keyword, phrase pair in hashTable 184 ptrPhrase = new UnicodeString(phrase); 185 parsedValuesHash->put( keyword, ptrPhrase, status); 186 187 //Reinitialize 188 keyword.remove(); 189 phrase.remove(); 190 ptrPhrase = NULL; 191 state = startState; 192 } 193 194 if (braceCount > 0){ 195 braceCount-- ; 196 phrase += ch; 197 } 198 break; 199 default: 200 phrase += ch; 201 }//end of switch(type) 202 break; 203 204 //Handle the default case of switch(state) 205 default: 206 status = U_PATTERN_SYNTAX_ERROR; 207 return; 208 209 }//end of switch(state) 210 } 211 212 //Check if the state machine is back to startState 213 if ( state != startState){ 214 status = U_PATTERN_SYNTAX_ERROR; 215 return; 216 } 217 218 //Check if "other" keyword is present 219 if ( !checkSufficientDefinition() ) { 220 status = U_DEFAULT_KEYWORD_MISSING; 221 } 222 return; 223} 224 225UnicodeString& 226SelectFormat::format(const Formattable& obj, 227 UnicodeString& appendTo, 228 FieldPosition& pos, 229 UErrorCode& status) const 230{ 231 switch (obj.getType()) 232 { 233 case Formattable::kString: 234 return format(obj.getString(), appendTo, pos, status); 235 default: 236 if( U_SUCCESS(status) ){ 237 status = U_ILLEGAL_ARGUMENT_ERROR; 238 } 239 return appendTo; 240 } 241} 242 243UnicodeString& 244SelectFormat::format(const UnicodeString& keyword, 245 UnicodeString& appendTo, 246 FieldPosition& pos, 247 UErrorCode& status) const { 248 249 if (U_FAILURE(status)) return appendTo; 250 251 //Check for the validity of the keyword 252 if ( !checkValidKeyword(keyword) ){ 253 status = U_ILLEGAL_ARGUMENT_ERROR; 254 return appendTo; 255 } 256 257 if (parsedValuesHash == NULL) { 258 status = U_INVALID_FORMAT_ERROR; 259 return appendTo; 260 } 261 262 UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(keyword); 263 if (selectedPattern == NULL) { 264 selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER); 265 } 266 267 return appendTo += *selectedPattern; 268} 269 270UnicodeString& 271SelectFormat::toPattern(UnicodeString& appendTo) { 272 return appendTo += pattern; 273} 274 275SelectFormat::CharacterClass 276SelectFormat::classifyCharacter(UChar ch) const{ 277 if ((ch >= CAP_A) && (ch <= CAP_Z)) { 278 return tStartKeyword; 279 } 280 if ((ch >= LOW_A) && (ch <= LOW_Z)) { 281 return tStartKeyword; 282 } 283 if ((ch >= U_ZERO) && (ch <= U_NINE)) { 284 return tContinueKeyword; 285 } 286 if ( uprv_isRuleWhiteSpace(ch) ){ 287 return tSpace; 288 } 289 switch (ch) { 290 case LEFTBRACE: 291 return tLeftBrace; 292 case RIGHTBRACE: 293 return tRightBrace; 294 case HYPHEN: 295 case LOWLINE: 296 return tContinueKeyword; 297 default : 298 return tOther; 299 } 300} 301 302UBool 303SelectFormat::checkSufficientDefinition() { 304 // Check that at least the default rule is defined. 305 return (parsedValuesHash != NULL && 306 parsedValuesHash->get(SELECT_KEYWORD_OTHER) != NULL) ; 307} 308 309UBool 310SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{ 311 int32_t len = argKeyword.length(); 312 if (len < 1){ 313 return FALSE; 314 } 315 CharacterClass type = classifyCharacter(argKeyword.charAt(0)); 316 if( type != tStartKeyword ){ 317 return FALSE; 318 } 319 320 for (int32_t i = 0; i < argKeyword.length(); ++i) { 321 type = classifyCharacter(argKeyword.charAt(i)); 322 if( type != tStartKeyword && type != tContinueKeyword ){ 323 return FALSE; 324 } 325 } 326 return TRUE; 327} 328 329Format* SelectFormat::clone() const 330{ 331 return new SelectFormat(*this); 332} 333 334SelectFormat& 335SelectFormat::operator=(const SelectFormat& other) { 336 if (this != &other) { 337 UErrorCode status = U_ZERO_ERROR; 338 delete parsedValuesHash; 339 pattern = other.pattern; 340 copyHashtable(other.parsedValuesHash, status); 341 } 342 return *this; 343} 344 345UBool 346SelectFormat::operator==(const Format& other) const { 347 if( this == &other){ 348 return TRUE; 349 } 350 if( other.getDynamicClassID() != SelectFormat::getStaticClassID() ){ 351 return FALSE; 352 } 353 SelectFormat* fmt = (SelectFormat*)&other; 354 Hashtable* hashOther = fmt->parsedValuesHash; 355 if ( parsedValuesHash == NULL && hashOther == NULL) 356 return TRUE; 357 if ( parsedValuesHash == NULL || hashOther == NULL) 358 return FALSE; 359 if ( hashOther->count() != parsedValuesHash->count() ){ 360 return FALSE; 361 } 362 363 const UHashElement* elem = NULL; 364 int32_t pos = -1; 365 while ((elem = hashOther->nextElement(pos)) != NULL) { 366 const UHashTok otherKeyTok = elem->key; 367 UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; 368 const UHashTok otherKeyToVal = elem->value; 369 UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer; 370 371 UnicodeString* thisElemValue = (UnicodeString*)parsedValuesHash->get(*otherKey); 372 if ( thisElemValue == NULL ){ 373 return FALSE; 374 } 375 if ( *thisElemValue != *otherValue){ 376 return FALSE; 377 } 378 379 } 380 pos = -1; 381 while ((elem = parsedValuesHash->nextElement(pos)) != NULL) { 382 const UHashTok thisKeyTok = elem->key; 383 UnicodeString* thisKey = (UnicodeString*)thisKeyTok.pointer; 384 const UHashTok thisKeyToVal = elem->value; 385 UnicodeString* thisValue = (UnicodeString*)thisKeyToVal.pointer; 386 387 UnicodeString* otherElemValue = (UnicodeString*)hashOther->get(*thisKey); 388 if ( otherElemValue == NULL ){ 389 return FALSE; 390 } 391 if ( *otherElemValue != *thisValue){ 392 return FALSE; 393 } 394 395 } 396 return TRUE; 397} 398 399UBool 400SelectFormat::operator!=(const Format& other) const { 401 return !operator==(other); 402} 403 404void 405SelectFormat::parseObject(const UnicodeString& /*source*/, 406 Formattable& /*result*/, 407 ParsePosition& pos) const 408{ 409 // TODO: not yet supported in icu4j and icu4c 410 pos.setErrorIndex(pos.getIndex()); 411} 412 413void 414SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) { 415 if (other == NULL) { 416 parsedValuesHash = NULL; 417 return; 418 } 419 parsedValuesHash = new Hashtable(TRUE, status); 420 if (U_FAILURE(status)){ 421 return; 422 } 423 parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString); 424 425 int32_t pos = -1; 426 const UHashElement* elem = NULL; 427 428 // walk through the hash table and create a deep clone 429 while ((elem = other->nextElement(pos)) != NULL){ 430 const UHashTok otherKeyTok = elem->key; 431 UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; 432 const UHashTok otherKeyToVal = elem->value; 433 UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer; 434 parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status); 435 if (U_FAILURE(status)){ 436 return; 437 } 438 } 439} 440 441U_NAMESPACE_END 442 443#endif /* #if !UCONFIG_NO_FORMATTING */ 444 445//eof 446