1/*
2**********************************************************************
3* Copyright (c) 2002-2011, International Business Machines
4* Corporation and others.  All Rights Reserved.
5**********************************************************************
6* Author: Alan Liu
7* Created: October 30 2002
8* Since: ICU 2.4
9* 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
10**********************************************************************
11*/
12#include "propname.h"
13#include "unicode/uchar.h"
14#include "unicode/udata.h"
15#include "unicode/uscript.h"
16#include "umutex.h"
17#include "cmemory.h"
18#include "cstring.h"
19#include "ucln_cmn.h"
20#include "uarrsort.h"
21#include "uinvchar.h"
22
23#define INCLUDED_FROM_PROPNAME_CPP
24#include "propname_data.h"
25
26U_CDECL_BEGIN
27
28/**
29 * Get the next non-ignorable ASCII character from a property name
30 * and lowercases it.
31 * @return ((advance count for the name)<<8)|character
32 */
33static inline int32_t
34getASCIIPropertyNameChar(const char *name) {
35    int32_t i;
36    char c;
37
38    /* Ignore delimiters '-', '_', and ASCII White_Space */
39    for(i=0;
40        (c=name[i++])==0x2d || c==0x5f ||
41        c==0x20 || (0x09<=c && c<=0x0d);
42    ) {}
43
44    if(c!=0) {
45        return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
46    } else {
47        return i<<8;
48    }
49}
50
51/**
52 * Get the next non-ignorable EBCDIC character from a property name
53 * and lowercases it.
54 * @return ((advance count for the name)<<8)|character
55 */
56static inline int32_t
57getEBCDICPropertyNameChar(const char *name) {
58    int32_t i;
59    char c;
60
61    /* Ignore delimiters '-', '_', and EBCDIC White_Space */
62    for(i=0;
63        (c=name[i++])==0x60 || c==0x6d ||
64        c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
65    ) {}
66
67    if(c!=0) {
68        return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
69    } else {
70        return i<<8;
71    }
72}
73
74/**
75 * Unicode property names and property value names are compared "loosely".
76 *
77 * UCD.html 4.0.1 says:
78 *   For all property names, property value names, and for property values for
79 *   Enumerated, Binary, or Catalog properties, use the following
80 *   loose matching rule:
81 *
82 *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
83 *
84 * This function does just that, for (char *) name strings.
85 * It is almost identical to ucnv_compareNames() but also ignores
86 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
87 *
88 * @internal
89 */
90
91U_CAPI int32_t U_EXPORT2
92uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
93    int32_t rc, r1, r2;
94
95    for(;;) {
96        r1=getASCIIPropertyNameChar(name1);
97        r2=getASCIIPropertyNameChar(name2);
98
99        /* If we reach the ends of both strings then they match */
100        if(((r1|r2)&0xff)==0) {
101            return 0;
102        }
103
104        /* Compare the lowercased characters */
105        if(r1!=r2) {
106            rc=(r1&0xff)-(r2&0xff);
107            if(rc!=0) {
108                return rc;
109            }
110        }
111
112        name1+=r1>>8;
113        name2+=r2>>8;
114    }
115}
116
117U_CAPI int32_t U_EXPORT2
118uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
119    int32_t rc, r1, r2;
120
121    for(;;) {
122        r1=getEBCDICPropertyNameChar(name1);
123        r2=getEBCDICPropertyNameChar(name2);
124
125        /* If we reach the ends of both strings then they match */
126        if(((r1|r2)&0xff)==0) {
127            return 0;
128        }
129
130        /* Compare the lowercased characters */
131        if(r1!=r2) {
132            rc=(r1&0xff)-(r2&0xff);
133            if(rc!=0) {
134                return rc;
135            }
136        }
137
138        name1+=r1>>8;
139        name2+=r2>>8;
140    }
141}
142
143U_CDECL_END
144
145U_NAMESPACE_BEGIN
146
147int32_t PropNameData::findProperty(int32_t property) {
148    int32_t i=1;  // valueMaps index, initially after numRanges
149    for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
150        // Read and skip the start and limit of this range.
151        int32_t start=valueMaps[i];
152        int32_t limit=valueMaps[i+1];
153        i+=2;
154        if(property<start) {
155            break;
156        }
157        if(property<limit) {
158            return i+(property-start)*2;
159        }
160        i+=(limit-start)*2;  // Skip all entries for this range.
161    }
162    return 0;
163}
164
165int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
166    if(valueMapIndex==0) {
167        return 0;  // The property does not have named values.
168    }
169    ++valueMapIndex;  // Skip the BytesTrie offset.
170    int32_t numRanges=valueMaps[valueMapIndex++];
171    if(numRanges<0x10) {
172        // Ranges of values.
173        for(; numRanges>0; --numRanges) {
174            // Read and skip the start and limit of this range.
175            int32_t start=valueMaps[valueMapIndex];
176            int32_t limit=valueMaps[valueMapIndex+1];
177            valueMapIndex+=2;
178            if(value<start) {
179                break;
180            }
181            if(value<limit) {
182                return valueMaps[valueMapIndex+value-start];
183            }
184            valueMapIndex+=limit-start;  // Skip all entries for this range.
185        }
186    } else {
187        // List of values.
188        int32_t valuesStart=valueMapIndex;
189        int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
190        do {
191            int32_t v=valueMaps[valueMapIndex];
192            if(value<v) {
193                break;
194            }
195            if(value==v) {
196                return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
197            }
198        } while(++valueMapIndex<nameGroupOffsetsStart);
199    }
200    return 0;
201}
202
203const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
204    int32_t numNames=*nameGroup++;
205    if(nameIndex<0 || numNames<=nameIndex) {
206        return NULL;
207    }
208    // Skip nameIndex names.
209    for(; nameIndex>0; --nameIndex) {
210        nameGroup=uprv_strchr(nameGroup, 0)+1;
211    }
212    if(*nameGroup==0) {
213        return NULL;  // no name (Property[Value]Aliases.txt has "n/a")
214    }
215    return nameGroup;
216}
217
218UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
219    if(name==NULL) {
220        return FALSE;
221    }
222    UStringTrieResult result=USTRINGTRIE_NO_VALUE;
223    char c;
224    while((c=*name++)!=0) {
225        c=uprv_invCharToLowercaseAscii(c);
226        // Ignore delimiters '-', '_', and ASCII White_Space.
227        if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
228            continue;
229        }
230        if(!USTRINGTRIE_HAS_NEXT(result)) {
231            return FALSE;
232        }
233        result=trie.next((uint8_t)c);
234    }
235    return USTRINGTRIE_HAS_VALUE(result);
236}
237
238const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
239    int32_t valueMapIndex=findProperty(property);
240    if(valueMapIndex==0) {
241        return NULL;  // Not a known property.
242    }
243    return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
244}
245
246const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
247    int32_t valueMapIndex=findProperty(property);
248    if(valueMapIndex==0) {
249        return NULL;  // Not a known property.
250    }
251    int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
252    if(nameGroupOffset==0) {
253        return NULL;
254    }
255    return getName(nameGroups+nameGroupOffset, nameChoice);
256}
257
258int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
259    BytesTrie trie(bytesTries+bytesTrieOffset);
260    if(containsName(trie, alias)) {
261        return trie.getValue();
262    } else {
263        return UCHAR_INVALID_CODE;
264    }
265}
266
267int32_t PropNameData::getPropertyEnum(const char *alias) {
268    return getPropertyOrValueEnum(0, alias);
269}
270
271int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
272    int32_t valueMapIndex=findProperty(property);
273    if(valueMapIndex==0) {
274        return UCHAR_INVALID_CODE;  // Not a known property.
275    }
276    valueMapIndex=valueMaps[valueMapIndex+1];
277    if(valueMapIndex==0) {
278        return UCHAR_INVALID_CODE;  // The property does not have named values.
279    }
280    // valueMapIndex is the start of the property's valueMap,
281    // where the first word is the BytesTrie offset.
282    return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
283}
284U_NAMESPACE_END
285
286//----------------------------------------------------------------------
287// Public API implementation
288
289U_CAPI const char* U_EXPORT2
290u_getPropertyName(UProperty property,
291                  UPropertyNameChoice nameChoice) {
292    U_NAMESPACE_USE
293    return PropNameData::getPropertyName(property, nameChoice);
294}
295
296U_CAPI UProperty U_EXPORT2
297u_getPropertyEnum(const char* alias) {
298    U_NAMESPACE_USE
299    return (UProperty)PropNameData::getPropertyEnum(alias);
300}
301
302U_CAPI const char* U_EXPORT2
303u_getPropertyValueName(UProperty property,
304                       int32_t value,
305                       UPropertyNameChoice nameChoice) {
306    U_NAMESPACE_USE
307    return PropNameData::getPropertyValueName(property, value, nameChoice);
308}
309
310U_CAPI int32_t U_EXPORT2
311u_getPropertyValueEnum(UProperty property,
312                       const char* alias) {
313    U_NAMESPACE_USE
314    return PropNameData::getPropertyValueEnum(property, alias);
315}
316
317U_CAPI const char*  U_EXPORT2
318uscript_getName(UScriptCode scriptCode){
319    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
320                                  U_LONG_PROPERTY_NAME);
321}
322
323U_CAPI const char*  U_EXPORT2
324uscript_getShortName(UScriptCode scriptCode){
325    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
326                                  U_SHORT_PROPERTY_NAME);
327}
328