selfmt.cpp revision 50294ead5e5d23f5bbfed76e00e6b510bd41eee1
1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 * Copyright (C) 2010 , Yahoo! Inc.
6 ********************************************************************
7 *
8 * File SELFMT.CPP
9 *
10 * Modification History:
11 *
12 *   Date        Name        Description
13 *   11/11/09    kirtig      Finished first cut of implementation.
14 *   11/16/09    kirtig      Improved version
15 ********************************************************************/
16
17#include "unicode/utypes.h"
18#include "unicode/ustring.h"
19#include "unicode/ucnv_err.h"
20#include "unicode/uchar.h"
21#include "unicode/umsg.h"
22#include "unicode/rbnf.h"
23#include "cmemory.h"
24#include "util.h"
25#include "uassert.h"
26#include "ustrfmt.h"
27#include "uvector.h"
28
29#include "unicode/selfmt.h"
30#include "selfmtimpl.h"
31
32#if !UCONFIG_NO_FORMATTING
33
34U_NAMESPACE_BEGIN
35
36UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat)
37
38#define MAX_KEYWORD_SIZE 30
39static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0};
40
41SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) {
42   if (U_FAILURE(status)) {
43      return;
44   }
45   init(status);
46   applyPattern(pat, status);
47}
48
49SelectFormat::SelectFormat(const SelectFormat& other) : Format(other) {
50   UErrorCode status = U_ZERO_ERROR;
51   pattern = other.pattern;
52   copyHashtable(other.parsedValuesHash, status);
53}
54
55SelectFormat::~SelectFormat() {
56    delete parsedValuesHash;
57}
58
59void
60SelectFormat::init(UErrorCode& status) {
61    if (U_FAILURE(status)) {
62      return;
63    }
64    parsedValuesHash = NULL;
65    pattern.remove();
66}
67
68
69void
70SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
71    if (U_FAILURE(status)) {
72      return;
73    }
74
75    this->pattern = newPattern;
76    enum State{ startState, keywordState, pastKeywordState, phraseState};
77
78    //Initialization
79    UnicodeString keyword ;
80    UnicodeString phrase ;
81    UnicodeString* ptrPhrase ;
82    int32_t braceCount = 0;
83
84    delete parsedValuesHash;
85    this->parsedValuesHash = NULL;
86    parsedValuesHash = new Hashtable(TRUE, status);
87    if (U_FAILURE(status)) {
88        return;
89    }
90    parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
91
92    //Process the state machine
93    State state = startState;
94    for (int32_t i = 0; i < pattern.length(); ++i) {
95        //Get the character and check its type
96        UChar ch = pattern.charAt(i);
97        CharacterClass type = classifyCharacter(ch);
98
99        //Allow any character in phrase but nowhere else
100        if ( type == tOther ) {
101            if ( state == phraseState ){
102                phrase += ch;
103                continue;
104            }else {
105                status = U_PATTERN_SYNTAX_ERROR;
106                return;
107            }
108        }
109
110        //Process the state machine
111        switch (state) {
112            //At the start of pattern
113            case startState:
114                switch (type) {
115                    case tSpace:
116                        break;
117                    case tStartKeyword:
118                        state = keywordState;
119                        keyword += ch;
120                        break;
121                    //If anything else is encountered, it's a syntax error
122                    default:
123                        status = U_PATTERN_SYNTAX_ERROR;
124                        return;
125                }//end of switch(type)
126                break;
127
128            //Handle the keyword state
129            case keywordState:
130                switch (type) {
131                    case tSpace:
132                        state = pastKeywordState;
133                        break;
134                    case tStartKeyword:
135                    case tContinueKeyword:
136                        keyword += ch;
137                        break;
138                    case tLeftBrace:
139                        state = phraseState;
140                        break;
141                    //If anything else is encountered, it's a syntax error
142                    default:
143                        status = U_PATTERN_SYNTAX_ERROR;
144                        return;
145                }//end of switch(type)
146                break;
147
148            //Handle the pastkeyword state
149            case pastKeywordState:
150                switch (type) {
151                    case tSpace:
152                        break;
153                    case tLeftBrace:
154                        state = phraseState;
155                        break;
156                    //If anything else is encountered, it's a syntax error
157                    default:
158                        status = U_PATTERN_SYNTAX_ERROR;
159                        return;
160                }//end of switch(type)
161                break;
162
163            //Handle the phrase state
164            case phraseState:
165                switch (type) {
166                    case tLeftBrace:
167                        braceCount++;
168                        phrase += ch;
169                        break;
170                    case tRightBrace:
171                        //Matching keyword, phrase pair found
172                        if (braceCount == 0){
173                            //Check validity of keyword
174                            if (parsedValuesHash->get(keyword) != NULL) {
175                                status = U_DUPLICATE_KEYWORD;
176                                return;
177                            }
178                            if (keyword.length() == 0) {
179                                status = U_PATTERN_SYNTAX_ERROR;
180                                return;
181                            }
182
183                            //Store the keyword, phrase pair in hashTable
184                            ptrPhrase = new UnicodeString(phrase);
185                            parsedValuesHash->put( keyword, ptrPhrase, status);
186
187                            //Reinitialize
188                            keyword.remove();
189                            phrase.remove();
190                            ptrPhrase = NULL;
191                            state = startState;
192                        }
193
194                        if (braceCount > 0){
195                            braceCount-- ;
196                            phrase += ch;
197                        }
198                        break;
199                    default:
200                        phrase += ch;
201                }//end of switch(type)
202                break;
203
204            //Handle the  default case of switch(state)
205            default:
206                status = U_PATTERN_SYNTAX_ERROR;
207                return;
208
209        }//end of switch(state)
210    }
211
212    //Check if the state machine is back to startState
213    if ( state != startState){
214        status = U_PATTERN_SYNTAX_ERROR;
215        return;
216    }
217
218    //Check if "other" keyword is present
219    if ( !checkSufficientDefinition() ) {
220        status = U_DEFAULT_KEYWORD_MISSING;
221    }
222    return;
223}
224
225UnicodeString&
226SelectFormat::format(const Formattable& obj,
227                   UnicodeString& appendTo,
228                   FieldPosition& pos,
229                   UErrorCode& status) const
230{
231    switch (obj.getType())
232    {
233    case Formattable::kString:
234        return format(obj.getString(), appendTo, pos, status);
235    default:
236        if( U_SUCCESS(status) ){
237            status = U_ILLEGAL_ARGUMENT_ERROR;
238        }
239        return appendTo;
240    }
241}
242
243UnicodeString&
244SelectFormat::format(const UnicodeString& keyword,
245                     UnicodeString& appendTo,
246                     FieldPosition& pos,
247                     UErrorCode& status) const {
248
249    if (U_FAILURE(status)) return appendTo;
250
251    //Check for the validity of the keyword
252    if ( !checkValidKeyword(keyword) ){
253        status = U_ILLEGAL_ARGUMENT_ERROR;
254        return appendTo;
255    }
256
257    if (parsedValuesHash == NULL) {
258        status = U_INVALID_FORMAT_ERROR;
259        return appendTo;
260    }
261
262    UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(keyword);
263    if (selectedPattern == NULL) {
264        selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER);
265    }
266
267    return appendTo += *selectedPattern;
268}
269
270UnicodeString&
271SelectFormat::toPattern(UnicodeString& appendTo) {
272    return appendTo += pattern;
273}
274
275SelectFormat::CharacterClass
276SelectFormat::classifyCharacter(UChar ch) const{
277    if ((ch >= CAP_A) && (ch <= CAP_Z)) {
278        return tStartKeyword;
279    }
280    if ((ch >= LOW_A) && (ch <= LOW_Z)) {
281        return tStartKeyword;
282    }
283    if ((ch >= U_ZERO) && (ch <= U_NINE)) {
284        return tContinueKeyword;
285    }
286    if ( uprv_isRuleWhiteSpace(ch) ){
287        return tSpace;
288    }
289    switch (ch) {
290        case LEFTBRACE:
291            return tLeftBrace;
292        case RIGHTBRACE:
293            return tRightBrace;
294        case HYPHEN:
295        case LOWLINE:
296            return tContinueKeyword;
297        default :
298            return tOther;
299    }
300}
301
302UBool
303SelectFormat::checkSufficientDefinition() {
304    // Check that at least the default rule is defined.
305    return (parsedValuesHash != NULL &&
306           parsedValuesHash->get(SELECT_KEYWORD_OTHER) != NULL) ;
307}
308
309UBool
310SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{
311    int32_t len = argKeyword.length();
312    if (len < 1){
313        return FALSE;
314    }
315    CharacterClass type = classifyCharacter(argKeyword.charAt(0));
316    if( type != tStartKeyword ){
317        return FALSE;
318    }
319
320    for (int32_t i = 0; i < argKeyword.length(); ++i) {
321        type = classifyCharacter(argKeyword.charAt(i));
322        if( type != tStartKeyword && type != tContinueKeyword ){
323            return FALSE;
324        }
325    }
326    return TRUE;
327}
328
329Format* SelectFormat::clone() const
330{
331    return new SelectFormat(*this);
332}
333
334SelectFormat&
335SelectFormat::operator=(const SelectFormat& other) {
336    if (this != &other) {
337        UErrorCode status = U_ZERO_ERROR;
338        delete parsedValuesHash;
339        pattern = other.pattern;
340        copyHashtable(other.parsedValuesHash, status);
341    }
342    return *this;
343}
344
345UBool
346SelectFormat::operator==(const Format& other) const {
347    if( this == &other){
348        return TRUE;
349    }
350    if( other.getDynamicClassID() != SelectFormat::getStaticClassID() ){
351        return  FALSE;
352    }
353    SelectFormat* fmt = (SelectFormat*)&other;
354    Hashtable* hashOther = fmt->parsedValuesHash;
355    if ( parsedValuesHash == NULL && hashOther == NULL)
356        return TRUE;
357    if ( parsedValuesHash == NULL || hashOther == NULL)
358        return FALSE;
359    if ( hashOther->count() != parsedValuesHash->count() ){
360        return FALSE;
361    }
362
363    const UHashElement* elem = NULL;
364    int32_t pos = -1;
365    while ((elem = hashOther->nextElement(pos)) != NULL) {
366        const UHashTok otherKeyTok = elem->key;
367        UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
368        const UHashTok otherKeyToVal = elem->value;
369        UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
370
371        UnicodeString* thisElemValue = (UnicodeString*)parsedValuesHash->get(*otherKey);
372        if ( thisElemValue == NULL ){
373            return FALSE;
374        }
375        if ( *thisElemValue != *otherValue){
376            return FALSE;
377        }
378
379    }
380    pos = -1;
381    while ((elem = parsedValuesHash->nextElement(pos)) != NULL) {
382        const UHashTok thisKeyTok = elem->key;
383        UnicodeString* thisKey = (UnicodeString*)thisKeyTok.pointer;
384        const UHashTok thisKeyToVal = elem->value;
385        UnicodeString* thisValue = (UnicodeString*)thisKeyToVal.pointer;
386
387        UnicodeString* otherElemValue = (UnicodeString*)hashOther->get(*thisKey);
388        if ( otherElemValue == NULL ){
389            return FALSE;
390        }
391        if ( *otherElemValue != *thisValue){
392            return FALSE;
393        }
394
395    }
396    return TRUE;
397}
398
399UBool
400SelectFormat::operator!=(const Format& other) const {
401    return  !operator==(other);
402}
403
404void
405SelectFormat::parseObject(const UnicodeString& /*source*/,
406                        Formattable& /*result*/,
407                        ParsePosition& pos) const
408{
409    // TODO: not yet supported in icu4j and icu4c
410    pos.setErrorIndex(pos.getIndex());
411}
412
413void
414SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) {
415    if (other == NULL) {
416        parsedValuesHash = NULL;
417        return;
418    }
419    parsedValuesHash = new Hashtable(TRUE, status);
420    if (U_FAILURE(status)){
421        return;
422    }
423    parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
424
425    int32_t pos = -1;
426    const UHashElement* elem = NULL;
427
428    // walk through the hash table and create a deep clone
429    while ((elem = other->nextElement(pos)) != NULL){
430        const UHashTok otherKeyTok = elem->key;
431        UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
432        const UHashTok otherKeyToVal = elem->value;
433        UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
434        parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status);
435        if (U_FAILURE(status)){
436            return;
437        }
438    }
439}
440
441U_NAMESPACE_END
442
443#endif /* #if !UCONFIG_NO_FORMATTING */
444
445//eof
446