1/*
2*******************************************************************************
3*   Copyright (C) 2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  uscript_props.cpp
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2013feb16
12*   created by: Markus W. Scherer
13*/
14
15#include "unicode/utypes.h"
16#include "unicode/unistr.h"
17#include "unicode/uscript.h"
18#include "unicode/utf16.h"
19#include "ustr_imp.h"
20
21#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
22
23namespace {
24
25// Script metadata (script properties).
26// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
27
28// 0 = NOT_ENCODED, no sample character, default false script properties.
29// Bits 20.. 0: sample character
30
31// Bits 23..21: usage
32const int32_t UNKNOWN = 1 << 21;
33const int32_t EXCLUSION = 2 << 21;
34const int32_t LIMITED_USE = 3 << 21;
35const int32_t ASPIRATIONAL = 4 << 21;
36const int32_t RECOMMENDED = 5 << 21;
37
38// Bits 31..24: Single-bit flags
39const int32_t RTL = 1 << 24;
40const int32_t LB_LETTERS = 1 << 25;
41const int32_t CASED = 1 << 26;
42
43const int32_t SCRIPT_PROPS[] = {
44    // Begin copy-paste output from
45    // tools/trunk/unicode/py/parsescriptmetadata.py
46    0x0040 | UNKNOWN,  // Zyyy
47    0x0308 | UNKNOWN,  // Zinh
48    0x0628 | RECOMMENDED | RTL,  // Arab
49    0x0531 | RECOMMENDED | CASED,  // Armn
50    0x0995 | RECOMMENDED,  // Beng
51    0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
52    0x13C4 | LIMITED_USE,  // Cher
53    0x03E2 | EXCLUSION | CASED,  // Copt
54    0x042F | RECOMMENDED | CASED,  // Cyrl
55    0x10414 | EXCLUSION | CASED,  // Dsrt
56    0x0905 | RECOMMENDED,  // Deva
57    0x12A0 | RECOMMENDED,  // Ethi
58    0x10D3 | RECOMMENDED,  // Geor
59    0x10330 | EXCLUSION,  // Goth
60    0x03A9 | RECOMMENDED | CASED,  // Grek
61    0x0A95 | RECOMMENDED,  // Gujr
62    0x0A15 | RECOMMENDED,  // Guru
63    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
64    0xAC00 | RECOMMENDED,  // Hang
65    0x05D0 | RECOMMENDED | RTL,  // Hebr
66    0x304B | RECOMMENDED | LB_LETTERS,  // Hira
67    0x0C95 | RECOMMENDED,  // Knda
68    0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
69    0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
70    0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
71    0x004C | RECOMMENDED | CASED,  // Latn
72    0x0D15 | RECOMMENDED,  // Mlym
73    0x1826 | ASPIRATIONAL,  // Mong
74    0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
75    0x168F | EXCLUSION,  // Ogam
76    0x10300 | EXCLUSION,  // Ital
77    0x0B15 | RECOMMENDED,  // Orya
78    0x16A0 | EXCLUSION,  // Runr
79    0x0D85 | RECOMMENDED,  // Sinh
80    0x0710 | LIMITED_USE | RTL,  // Syrc
81    0x0B95 | RECOMMENDED,  // Taml
82    0x0C15 | RECOMMENDED,  // Telu
83    0x078C | RECOMMENDED | RTL,  // Thaa
84    0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
85    0x0F40 | RECOMMENDED,  // Tibt
86    0x14C0 | ASPIRATIONAL,  // Cans
87    0xA288 | ASPIRATIONAL | LB_LETTERS,  // Yiii
88    0x1703 | EXCLUSION,  // Tglg
89    0x1723 | EXCLUSION,  // Hano
90    0x1743 | EXCLUSION,  // Buhd
91    0x1763 | EXCLUSION,  // Tagb
92    0x2800 | UNKNOWN,  // Brai
93    0x10800 | EXCLUSION | RTL,  // Cprt
94    0x1900 | LIMITED_USE,  // Limb
95    0x10000 | EXCLUSION,  // Linb
96    0x10480 | EXCLUSION,  // Osma
97    0x10450 | EXCLUSION,  // Shaw
98    0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
99    0x10380 | EXCLUSION,  // Ugar
100    0,
101    0x1A00 | EXCLUSION,  // Bugi
102    0x2C00 | EXCLUSION | CASED,  // Glag
103    0x10A00 | EXCLUSION | RTL,  // Khar
104    0xA800 | LIMITED_USE,  // Sylo
105    0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
106    0x2D30 | ASPIRATIONAL,  // Tfng
107    0x103A0 | EXCLUSION,  // Xpeo
108    0x1B05 | LIMITED_USE | LB_LETTERS,  // Bali
109    0x1BC0 | LIMITED_USE,  // Batk
110    0,
111    0x11005 | EXCLUSION,  // Brah
112    0xAA00 | LIMITED_USE,  // Cham
113    0,
114    0,
115    0,
116    0,
117    0x13153 | EXCLUSION,  // Egyp
118    0,
119    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
120    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
121    0,
122    0,
123    0,
124    0xA984 | LIMITED_USE | LB_LETTERS,  // Java
125    0xA90A | LIMITED_USE,  // Kali
126    0,
127    0,
128    0x1C00 | LIMITED_USE,  // Lepc
129    0,
130    0x0840 | LIMITED_USE | RTL,  // Mand
131    0,
132    0x10980 | EXCLUSION | RTL,  // Mero
133    0x07CA | LIMITED_USE | RTL,  // Nkoo
134    0x10C00 | EXCLUSION | RTL,  // Orkh
135    0,
136    0xA840 | EXCLUSION,  // Phag
137    0x10900 | EXCLUSION | RTL,  // Phnx
138    0x16F00 | ASPIRATIONAL,  // Plrd
139    0,
140    0,
141    0,
142    0,
143    0,
144    0,
145    0xA549 | LIMITED_USE,  // Vaii
146    0,
147    0x12000 | EXCLUSION,  // Xsux
148    0,
149    0xFDD0 | UNKNOWN,  // Zzzz
150    0x102A0 | EXCLUSION,  // Cari
151    0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
152    0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
153    0x10280 | EXCLUSION,  // Lyci
154    0x10920 | EXCLUSION | RTL,  // Lydi
155    0x1C5A | LIMITED_USE,  // Olck
156    0xA930 | EXCLUSION,  // Rjng
157    0xA882 | LIMITED_USE,  // Saur
158    0,
159    0x1B83 | LIMITED_USE,  // Sund
160    0,
161    0xABC0 | LIMITED_USE,  // Mtei
162    0x10840 | EXCLUSION | RTL,  // Armi
163    0x10B00 | EXCLUSION | RTL,  // Avst
164    0x11103 | LIMITED_USE,  // Cakm
165    0xAC00 | RECOMMENDED,  // Kore
166    0x11083 | EXCLUSION,  // Kthi
167    0,
168    0x10B60 | EXCLUSION | RTL,  // Phli
169    0,
170    0,
171    0x10B40 | EXCLUSION | RTL,  // Prti
172    0x0800 | EXCLUSION | RTL,  // Samr
173    0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
174    0,
175    0,
176    0xA6A0 | LIMITED_USE,  // Bamu
177    0xA4D0 | LIMITED_USE,  // Lisu
178    0,
179    0x10A60 | EXCLUSION | RTL,  // Sarb
180    0,
181    0,
182    0,
183    0,
184    0,
185    0,
186    0,
187    0x109A0 | EXCLUSION | RTL,  // Merc
188    0,
189    0,
190    0,
191    0,
192    0,
193    0,
194    0,
195    0,
196    0,
197    0x11183 | EXCLUSION,  // Shrd
198    0x110D0 | EXCLUSION,  // Sora
199    0x11680 | EXCLUSION,  // Takr
200    0,
201    0,
202    0,
203    0,
204    0,
205    0,
206    0,
207    // End copy-paste from parsescriptmetadata.py
208};
209
210int32_t getScriptProps(UScriptCode script) {
211    if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
212        return SCRIPT_PROPS[script];
213    } else {
214        return 0;
215    }
216}
217
218}  // namespace
219
220U_CAPI int32_t U_EXPORT2
221uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
222    if(U_FAILURE(*pErrorCode)) { return 0; }
223    if(capacity < 0 || (capacity > 0 && dest == NULL)) {
224        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
225        return 0;
226    }
227    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
228    int32_t length;
229    if(sampleChar == 0) {
230        length = 0;
231    } else {
232        length = U16_LENGTH(sampleChar);
233        if(length <= capacity) {
234            int32_t i = 0;
235            U16_APPEND_UNSAFE(dest, i, sampleChar);
236        }
237    }
238    return u_terminateUChars(dest, capacity, length, pErrorCode);
239}
240
241U_COMMON_API icu::UnicodeString U_EXPORT2
242uscript_getSampleUnicodeString(UScriptCode script) {
243    icu::UnicodeString sample;
244    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
245    if(sampleChar != 0) {
246        sample.append(sampleChar);
247    }
248    return sample;
249}
250
251U_CAPI UScriptUsage U_EXPORT2
252uscript_getUsage(UScriptCode script) {
253    return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
254}
255
256U_CAPI UBool U_EXPORT2
257uscript_isRightToLeft(UScriptCode script) {
258    return (getScriptProps(script) & RTL) != 0;
259}
260
261U_CAPI UBool U_EXPORT2
262uscript_breaksBetweenLetters(UScriptCode script) {
263    return (getScriptProps(script) & LB_LETTERS) != 0;
264}
265
266U_CAPI UBool U_EXPORT2
267uscript_isCased(UScriptCode script) {
268    return (getScriptProps(script) & CASED) != 0;
269}
270