1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 * Copyright (C) 2010 , Yahoo! Inc.
6 ********************************************************************
7 *
8 * File SELFMT.CPP
9 *
10 * Modification History:
11 *
12 *   Date        Name        Description
13 *   11/11/09    kirtig      Finished first cut of implementation.
14 *   11/16/09    kirtig      Improved version
15 ********************************************************************/
16
17#include "unicode/utypeinfo.h"  // for 'typeid' to work
18
19#include "unicode/utypes.h"
20#include "unicode/ustring.h"
21#include "unicode/ucnv_err.h"
22#include "unicode/uchar.h"
23#include "unicode/umsg.h"
24#include "unicode/rbnf.h"
25#include "cmemory.h"
26#include "util.h"
27#include "uassert.h"
28#include "ustrfmt.h"
29#include "uvector.h"
30
31#include "unicode/selfmt.h"
32#include "selfmtimpl.h"
33
34#if !UCONFIG_NO_FORMATTING
35
36U_NAMESPACE_BEGIN
37
38UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat)
39
40#define MAX_KEYWORD_SIZE 30
41static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0};
42
43SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) : parsedValuesHash(NULL) {
44   if (U_FAILURE(status)) {
45      return;
46   }
47   initHashTable(status);
48   applyPattern(pat, status);
49}
50
51SelectFormat::SelectFormat(const SelectFormat& other) : Format(other), parsedValuesHash(NULL) {
52   UErrorCode status = U_ZERO_ERROR;
53   pattern = other.pattern;
54   copyHashtable(other.parsedValuesHash, status);
55}
56
57SelectFormat::~SelectFormat() {
58  cleanHashTable();
59}
60
61void SelectFormat::initHashTable(UErrorCode &status) {
62  if (U_FAILURE(status)) {
63    return;
64  }
65  // has inited
66  if (parsedValuesHash != NULL) {
67    return;
68  }
69
70  parsedValuesHash = new Hashtable(TRUE, status);
71  if (U_FAILURE(status)) {
72    cleanHashTable();
73    return;
74  } else {
75    if (parsedValuesHash == NULL) {
76      status = U_MEMORY_ALLOCATION_ERROR;
77      return;
78    }
79  }
80  // to use hashtable->equals(), must set Value Compartor.
81  parsedValuesHash->setValueComparator(uhash_compareCaselessUnicodeString);
82}
83
84void SelectFormat::cleanHashTable() {
85  if (parsedValuesHash != NULL) {
86    delete parsedValuesHash;
87    parsedValuesHash = NULL;
88  }
89}
90
91void
92SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
93    if (U_FAILURE(status)) {
94      return;
95    }
96
97    pattern = newPattern;
98    enum State{ startState, keywordState, pastKeywordState, phraseState};
99
100    //Initialization
101    UnicodeString keyword ;
102    UnicodeString phrase ;
103    UnicodeString* ptrPhrase ;
104    int32_t braceCount = 0;
105
106    if (parsedValuesHash == NULL) {
107      initHashTable(status);
108      if (U_FAILURE(status)) {
109        return;
110      }
111    }
112    parsedValuesHash->removeAll();
113    parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
114
115    //Process the state machine
116    State state = startState;
117    for (int32_t i = 0; i < pattern.length(); ++i) {
118        //Get the character and check its type
119        UChar ch = pattern.charAt(i);
120        CharacterClass type = classifyCharacter(ch);
121
122        //Allow any character in phrase but nowhere else
123        if ( type == tOther ) {
124            if ( state == phraseState ){
125                phrase += ch;
126                continue;
127            }else {
128                status = U_PATTERN_SYNTAX_ERROR;
129                cleanHashTable();
130                return;
131            }
132        }
133
134        //Process the state machine
135        switch (state) {
136            //At the start of pattern
137            case startState:
138                switch (type) {
139                    case tSpace:
140                        break;
141                    case tStartKeyword:
142                        state = keywordState;
143                        keyword += ch;
144                        break;
145                    //If anything else is encountered, it's a syntax error
146                    default:
147                        status = U_PATTERN_SYNTAX_ERROR;
148                        cleanHashTable();
149                        return;
150                }//end of switch(type)
151                break;
152
153            //Handle the keyword state
154            case keywordState:
155                switch (type) {
156                    case tSpace:
157                        state = pastKeywordState;
158                        break;
159                    case tStartKeyword:
160                    case tContinueKeyword:
161                        keyword += ch;
162                        break;
163                    case tLeftBrace:
164                        state = phraseState;
165                        break;
166                    //If anything else is encountered, it's a syntax error
167                    default:
168                        status = U_PATTERN_SYNTAX_ERROR;
169                        cleanHashTable();
170                        return;
171                }//end of switch(type)
172                break;
173
174            //Handle the pastkeyword state
175            case pastKeywordState:
176                switch (type) {
177                    case tSpace:
178                        break;
179                    case tLeftBrace:
180                        state = phraseState;
181                        break;
182                    //If anything else is encountered, it's a syntax error
183                    default:
184                        status = U_PATTERN_SYNTAX_ERROR;
185                        cleanHashTable();
186                        return;
187                }//end of switch(type)
188                break;
189
190            //Handle the phrase state
191            case phraseState:
192                switch (type) {
193                    case tLeftBrace:
194                        braceCount++;
195                        phrase += ch;
196                        break;
197                    case tRightBrace:
198                        //Matching keyword, phrase pair found
199                        if (braceCount == 0){
200                            //Check validity of keyword
201                            if (parsedValuesHash->get(keyword) != NULL) {
202                                status = U_DUPLICATE_KEYWORD;
203                                cleanHashTable();
204                                return;
205                            }
206                            if (keyword.length() == 0) {
207                                status = U_PATTERN_SYNTAX_ERROR;
208                                cleanHashTable();
209                                return;
210                            }
211
212                            //Store the keyword, phrase pair in hashTable
213                            ptrPhrase = new UnicodeString(phrase);
214                            parsedValuesHash->put( keyword, ptrPhrase, status);
215
216                            //Reinitialize
217                            keyword.remove();
218                            phrase.remove();
219                            ptrPhrase = NULL;
220                            state = startState;
221                        }
222
223                        if (braceCount > 0){
224                            braceCount-- ;
225                            phrase += ch;
226                        }
227                        break;
228                    default:
229                        phrase += ch;
230                }//end of switch(type)
231                break;
232
233            //Handle the  default case of switch(state)
234            default:
235                status = U_PATTERN_SYNTAX_ERROR;
236                cleanHashTable();
237                return;
238
239        }//end of switch(state)
240    }
241
242    //Check if the state machine is back to startState
243    if ( state != startState){
244        status = U_PATTERN_SYNTAX_ERROR;
245        cleanHashTable();
246        return;
247    }
248
249    //Check if "other" keyword is present
250    if ( !checkSufficientDefinition() ) {
251        status = U_DEFAULT_KEYWORD_MISSING;
252        cleanHashTable();
253    }
254    return;
255}
256
257UnicodeString&
258SelectFormat::format(const Formattable& obj,
259                   UnicodeString& appendTo,
260                   FieldPosition& pos,
261                   UErrorCode& status) const
262{
263    switch (obj.getType())
264    {
265    case Formattable::kString:
266        return format(obj.getString(), appendTo, pos, status);
267    default:
268        if( U_SUCCESS(status) ){
269            status = U_ILLEGAL_ARGUMENT_ERROR;
270        }
271        return appendTo;
272    }
273}
274
275UnicodeString&
276SelectFormat::format(const UnicodeString& keyword,
277                     UnicodeString& appendTo,
278                     FieldPosition& /*pos */,
279                     UErrorCode& status) const {
280
281    if (U_FAILURE(status)) return appendTo;
282
283    if (parsedValuesHash == NULL) {
284        status = U_INVALID_FORMAT_ERROR;
285        return appendTo;
286    }
287
288    //Check for the validity of the keyword
289    if ( !checkValidKeyword(keyword) ){
290        status = U_ILLEGAL_ARGUMENT_ERROR;
291        return appendTo;
292    }
293
294    UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(keyword);
295    if (selectedPattern == NULL) {
296        selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER);
297    }
298
299    return appendTo += *selectedPattern;
300}
301
302UnicodeString&
303SelectFormat::toPattern(UnicodeString& appendTo) {
304    return appendTo += pattern;
305}
306
307SelectFormat::CharacterClass
308SelectFormat::classifyCharacter(UChar ch) const{
309    if ((ch >= CAP_A) && (ch <= CAP_Z)) {
310        return tStartKeyword;
311    }
312    if ((ch >= LOW_A) && (ch <= LOW_Z)) {
313        return tStartKeyword;
314    }
315    if ((ch >= U_ZERO) && (ch <= U_NINE)) {
316        return tContinueKeyword;
317    }
318    if ( uprv_isRuleWhiteSpace(ch) ){
319        return tSpace;
320    }
321    switch (ch) {
322        case LEFTBRACE:
323            return tLeftBrace;
324        case RIGHTBRACE:
325            return tRightBrace;
326        case HYPHEN:
327        case LOWLINE:
328            return tContinueKeyword;
329        default :
330            return tOther;
331    }
332}
333
334UBool
335SelectFormat::checkSufficientDefinition() {
336    // Check that at least the default rule is defined.
337    return (parsedValuesHash != NULL &&
338           parsedValuesHash->get(SELECT_KEYWORD_OTHER) != NULL) ;
339}
340
341UBool
342SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{
343    int32_t len = argKeyword.length();
344    if (len < 1){
345        return FALSE;
346    }
347    CharacterClass type = classifyCharacter(argKeyword.charAt(0));
348    if( type != tStartKeyword ){
349        return FALSE;
350    }
351
352    for (int32_t i = 0; i < argKeyword.length(); ++i) {
353        type = classifyCharacter(argKeyword.charAt(i));
354        if( type != tStartKeyword && type != tContinueKeyword ){
355            return FALSE;
356        }
357    }
358    return TRUE;
359}
360
361Format* SelectFormat::clone() const
362{
363    return new SelectFormat(*this);
364}
365
366SelectFormat&
367SelectFormat::operator=(const SelectFormat& other) {
368    if (this != &other) {
369        UErrorCode status = U_ZERO_ERROR;
370        pattern = other.pattern;
371        copyHashtable(other.parsedValuesHash, status);
372    }
373    return *this;
374}
375
376UBool
377SelectFormat::operator==(const Format& other) const {
378    if( this == &other){
379        return TRUE;
380    }
381    if (typeid(*this) != typeid(other)) {
382        return  FALSE;
383    }
384    SelectFormat* fmt = (SelectFormat*)&other;
385    Hashtable* hashOther = fmt->parsedValuesHash;
386    if ( parsedValuesHash == NULL && hashOther == NULL)
387        return TRUE;
388    if ( parsedValuesHash == NULL || hashOther == NULL)
389        return FALSE;
390    return parsedValuesHash->equals(*hashOther);
391}
392
393UBool
394SelectFormat::operator!=(const Format& other) const {
395    return  !operator==(other);
396}
397
398void
399SelectFormat::parseObject(const UnicodeString& /*source*/,
400                        Formattable& /*result*/,
401                        ParsePosition& pos) const
402{
403    // TODO: not yet supported in icu4j and icu4c
404    pos.setErrorIndex(pos.getIndex());
405}
406
407void
408SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) {
409    if (U_FAILURE(status)) {
410      return;
411    }
412    if (other == NULL) {
413      cleanHashTable();
414      return;
415    }
416    if (parsedValuesHash == NULL) {
417      initHashTable(status);
418      if (U_FAILURE(status)) {
419        return;
420      }
421    }
422
423    parsedValuesHash->removeAll();
424    parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
425
426    int32_t pos = -1;
427    const UHashElement* elem = NULL;
428
429    // walk through the hash table and create a deep clone
430    while ((elem = other->nextElement(pos)) != NULL){
431        const UHashTok otherKeyTok = elem->key;
432        UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
433        const UHashTok otherKeyToVal = elem->value;
434        UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
435        parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status);
436        if (U_FAILURE(status)){
437            cleanHashTable();
438            return;
439        }
440    }
441}
442
443U_NAMESPACE_END
444
445#endif /* #if !UCONFIG_NO_FORMATTING */
446
447//eof
448