1/*
2**********************************************************************
3* Copyright (c) 2002-2011, International Business Machines
4* Corporation and others.  All Rights Reserved.
5**********************************************************************
6* Author: Alan Liu
7* Created: October 30 2002
8* Since: ICU 2.4
9* 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
10**********************************************************************
11*/
12#include "propname.h"
13#include "unicode/uchar.h"
14#include "unicode/udata.h"
15#include "umutex.h"
16#include "cmemory.h"
17#include "cstring.h"
18#include "ucln_cmn.h"
19#include "uarrsort.h"
20#include "uinvchar.h"
21
22#define INCLUDED_FROM_PROPNAME_CPP
23#include "propname_data.h"
24
25U_CDECL_BEGIN
26
27/**
28 * Get the next non-ignorable ASCII character from a property name
29 * and lowercases it.
30 * @return ((advance count for the name)<<8)|character
31 */
32static inline int32_t
33getASCIIPropertyNameChar(const char *name) {
34    int32_t i;
35    char c;
36
37    /* Ignore delimiters '-', '_', and ASCII White_Space */
38    for(i=0;
39        (c=name[i++])==0x2d || c==0x5f ||
40        c==0x20 || (0x09<=c && c<=0x0d);
41    ) {}
42
43    if(c!=0) {
44        return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
45    } else {
46        return i<<8;
47    }
48}
49
50/**
51 * Get the next non-ignorable EBCDIC character from a property name
52 * and lowercases it.
53 * @return ((advance count for the name)<<8)|character
54 */
55static inline int32_t
56getEBCDICPropertyNameChar(const char *name) {
57    int32_t i;
58    char c;
59
60    /* Ignore delimiters '-', '_', and EBCDIC White_Space */
61    for(i=0;
62        (c=name[i++])==0x60 || c==0x6d ||
63        c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
64    ) {}
65
66    if(c!=0) {
67        return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
68    } else {
69        return i<<8;
70    }
71}
72
73/**
74 * Unicode property names and property value names are compared "loosely".
75 *
76 * UCD.html 4.0.1 says:
77 *   For all property names, property value names, and for property values for
78 *   Enumerated, Binary, or Catalog properties, use the following
79 *   loose matching rule:
80 *
81 *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
82 *
83 * This function does just that, for (char *) name strings.
84 * It is almost identical to ucnv_compareNames() but also ignores
85 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
86 *
87 * @internal
88 */
89
90U_CAPI int32_t U_EXPORT2
91uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
92    int32_t rc, r1, r2;
93
94    for(;;) {
95        r1=getASCIIPropertyNameChar(name1);
96        r2=getASCIIPropertyNameChar(name2);
97
98        /* If we reach the ends of both strings then they match */
99        if(((r1|r2)&0xff)==0) {
100            return 0;
101        }
102
103        /* Compare the lowercased characters */
104        if(r1!=r2) {
105            rc=(r1&0xff)-(r2&0xff);
106            if(rc!=0) {
107                return rc;
108            }
109        }
110
111        name1+=r1>>8;
112        name2+=r2>>8;
113    }
114}
115
116U_CAPI int32_t U_EXPORT2
117uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
118    int32_t rc, r1, r2;
119
120    for(;;) {
121        r1=getEBCDICPropertyNameChar(name1);
122        r2=getEBCDICPropertyNameChar(name2);
123
124        /* If we reach the ends of both strings then they match */
125        if(((r1|r2)&0xff)==0) {
126            return 0;
127        }
128
129        /* Compare the lowercased characters */
130        if(r1!=r2) {
131            rc=(r1&0xff)-(r2&0xff);
132            if(rc!=0) {
133                return rc;
134            }
135        }
136
137        name1+=r1>>8;
138        name2+=r2>>8;
139    }
140}
141
142U_CDECL_END
143
144U_NAMESPACE_BEGIN
145
146int32_t PropNameData::findProperty(int32_t property) {
147    int32_t i=1;  // valueMaps index, initially after numRanges
148    for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
149        // Read and skip the start and limit of this range.
150        int32_t start=valueMaps[i];
151        int32_t limit=valueMaps[i+1];
152        i+=2;
153        if(property<start) {
154            break;
155        }
156        if(property<limit) {
157            return i+(property-start)*2;
158        }
159        i+=(limit-start)*2;  // Skip all entries for this range.
160    }
161    return 0;
162}
163
164int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
165    if(valueMapIndex==0) {
166        return 0;  // The property does not have named values.
167    }
168    ++valueMapIndex;  // Skip the BytesTrie offset.
169    int32_t numRanges=valueMaps[valueMapIndex++];
170    if(numRanges<0x10) {
171        // Ranges of values.
172        for(; numRanges>0; --numRanges) {
173            // Read and skip the start and limit of this range.
174            int32_t start=valueMaps[valueMapIndex];
175            int32_t limit=valueMaps[valueMapIndex+1];
176            valueMapIndex+=2;
177            if(value<start) {
178                break;
179            }
180            if(value<limit) {
181                return valueMaps[valueMapIndex+value-start];
182            }
183            valueMapIndex+=limit-start;  // Skip all entries for this range.
184        }
185    } else {
186        // List of values.
187        int32_t valuesStart=valueMapIndex;
188        int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
189        do {
190            int32_t v=valueMaps[valueMapIndex];
191            if(value<v) {
192                break;
193            }
194            if(value==v) {
195                return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
196            }
197        } while(++valueMapIndex<nameGroupOffsetsStart);
198    }
199    return 0;
200}
201
202const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
203    int32_t numNames=*nameGroup++;
204    if(nameIndex<0 || numNames<=nameIndex) {
205        return NULL;
206    }
207    // Skip nameIndex names.
208    for(; nameIndex>0; --nameIndex) {
209        nameGroup=uprv_strchr(nameGroup, 0)+1;
210    }
211    if(*nameGroup==0) {
212        return NULL;  // no name (Property[Value]Aliases.txt has "n/a")
213    }
214    return nameGroup;
215}
216
217UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
218    if(name==NULL) {
219        return FALSE;
220    }
221    UStringTrieResult result=USTRINGTRIE_NO_VALUE;
222    char c;
223    while((c=*name++)!=0) {
224        c=uprv_invCharToLowercaseAscii(c);
225        // Ignore delimiters '-', '_', and ASCII White_Space.
226        if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
227            continue;
228        }
229        if(!USTRINGTRIE_HAS_NEXT(result)) {
230            return FALSE;
231        }
232        result=trie.next((uint8_t)c);
233    }
234    return USTRINGTRIE_HAS_VALUE(result);
235}
236
237const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
238    int32_t valueMapIndex=findProperty(property);
239    if(valueMapIndex==0) {
240        return NULL;  // Not a known property.
241    }
242    return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
243}
244
245const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
246    int32_t valueMapIndex=findProperty(property);
247    if(valueMapIndex==0) {
248        return NULL;  // Not a known property.
249    }
250    int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
251    if(nameGroupOffset==0) {
252        return NULL;
253    }
254    return getName(nameGroups+nameGroupOffset, nameChoice);
255}
256
257int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
258    BytesTrie trie(bytesTries+bytesTrieOffset);
259    if(containsName(trie, alias)) {
260        return trie.getValue();
261    } else {
262        return UCHAR_INVALID_CODE;
263    }
264}
265
266int32_t PropNameData::getPropertyEnum(const char *alias) {
267    return getPropertyOrValueEnum(0, alias);
268}
269
270int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
271    int32_t valueMapIndex=findProperty(property);
272    if(valueMapIndex==0) {
273        return UCHAR_INVALID_CODE;  // Not a known property.
274    }
275    valueMapIndex=valueMaps[valueMapIndex+1];
276    if(valueMapIndex==0) {
277        return UCHAR_INVALID_CODE;  // The property does not have named values.
278    }
279    // valueMapIndex is the start of the property's valueMap,
280    // where the first word is the BytesTrie offset.
281    return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
282}
283U_NAMESPACE_END
284
285//----------------------------------------------------------------------
286// Public API implementation
287
288U_CAPI const char* U_EXPORT2
289u_getPropertyName(UProperty property,
290                  UPropertyNameChoice nameChoice) {
291    U_NAMESPACE_USE
292    return PropNameData::getPropertyName(property, nameChoice);
293}
294
295U_CAPI UProperty U_EXPORT2
296u_getPropertyEnum(const char* alias) {
297    U_NAMESPACE_USE
298    return (UProperty)PropNameData::getPropertyEnum(alias);
299}
300
301U_CAPI const char* U_EXPORT2
302u_getPropertyValueName(UProperty property,
303                       int32_t value,
304                       UPropertyNameChoice nameChoice) {
305    U_NAMESPACE_USE
306    return PropNameData::getPropertyValueName(property, value, nameChoice);
307}
308
309U_CAPI int32_t U_EXPORT2
310u_getPropertyValueEnum(UProperty property,
311                       const char* alias) {
312    U_NAMESPACE_USE
313    return PropNameData::getPropertyValueEnum(property, alias);
314}
315