1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*   Copyright (C) 2013-2016, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7*******************************************************************************
8*   file name:  uscript_props.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2013feb16
14*   created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
18#include "unicode/unistr.h"
19#include "unicode/uscript.h"
20#include "unicode/utf16.h"
21#include "ustr_imp.h"
22#include "cmemory.h"
23
24namespace {
25
26// Script metadata (script properties).
27// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
28
29// 0 = NOT_ENCODED, no sample character, default false script properties.
30// Bits 20.. 0: sample character
31
32// Bits 23..21: usage
33const int32_t UNKNOWN = 1 << 21;
34const int32_t EXCLUSION = 2 << 21;
35const int32_t LIMITED_USE = 3 << 21;
36const int32_t ASPIRATIONAL = 4 << 21;
37const int32_t RECOMMENDED = 5 << 21;
38
39// Bits 31..24: Single-bit flags
40const int32_t RTL = 1 << 24;
41const int32_t LB_LETTERS = 1 << 25;
42const int32_t CASED = 1 << 26;
43
44const int32_t SCRIPT_PROPS[] = {
45    // Begin copy-paste output from
46    // tools/trunk/unicode/py/parsescriptmetadata.py
47    0x0040 | RECOMMENDED,  // Zyyy
48    0x0308 | RECOMMENDED,  // Zinh
49    0x0628 | RECOMMENDED | RTL,  // Arab
50    0x0531 | RECOMMENDED | CASED,  // Armn
51    0x0995 | RECOMMENDED,  // Beng
52    0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
53    0x13C4 | LIMITED_USE | CASED,  // Cher
54    0x03E2 | EXCLUSION | CASED,  // Copt
55    0x042F | RECOMMENDED | CASED,  // Cyrl
56    0x10414 | EXCLUSION | CASED,  // Dsrt
57    0x0905 | RECOMMENDED,  // Deva
58    0x12A0 | RECOMMENDED,  // Ethi
59    0x10D3 | RECOMMENDED,  // Geor
60    0x10330 | EXCLUSION,  // Goth
61    0x03A9 | RECOMMENDED | CASED,  // Grek
62    0x0A95 | RECOMMENDED,  // Gujr
63    0x0A15 | RECOMMENDED,  // Guru
64    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
65    0xAC00 | RECOMMENDED,  // Hang
66    0x05D0 | RECOMMENDED | RTL,  // Hebr
67    0x304B | RECOMMENDED | LB_LETTERS,  // Hira
68    0x0C95 | RECOMMENDED,  // Knda
69    0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
70    0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
71    0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
72    0x004C | RECOMMENDED | CASED,  // Latn
73    0x0D15 | RECOMMENDED,  // Mlym
74    0x1826 | ASPIRATIONAL,  // Mong
75    0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
76    0x168F | EXCLUSION,  // Ogam
77    0x10300 | EXCLUSION,  // Ital
78    0x0B15 | RECOMMENDED,  // Orya
79    0x16A0 | EXCLUSION,  // Runr
80    0x0D85 | RECOMMENDED,  // Sinh
81    0x0710 | LIMITED_USE | RTL,  // Syrc
82    0x0B95 | RECOMMENDED,  // Taml
83    0x0C15 | RECOMMENDED,  // Telu
84    0x078C | RECOMMENDED | RTL,  // Thaa
85    0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
86    0x0F40 | RECOMMENDED,  // Tibt
87    0x14C0 | ASPIRATIONAL,  // Cans
88    0xA288 | ASPIRATIONAL | LB_LETTERS,  // Yiii
89    0x1703 | EXCLUSION,  // Tglg
90    0x1723 | EXCLUSION,  // Hano
91    0x1743 | EXCLUSION,  // Buhd
92    0x1763 | EXCLUSION,  // Tagb
93    0x280E | UNKNOWN,  // Brai
94    0x10800 | EXCLUSION | RTL,  // Cprt
95    0x1900 | LIMITED_USE,  // Limb
96    0x10000 | EXCLUSION,  // Linb
97    0x10480 | EXCLUSION,  // Osma
98    0x10450 | EXCLUSION,  // Shaw
99    0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
100    0x10380 | EXCLUSION,  // Ugar
101    0,
102    0x1A00 | EXCLUSION,  // Bugi
103    0x2C00 | EXCLUSION | CASED,  // Glag
104    0x10A00 | EXCLUSION | RTL,  // Khar
105    0xA800 | LIMITED_USE,  // Sylo
106    0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
107    0x2D30 | ASPIRATIONAL,  // Tfng
108    0x103A0 | EXCLUSION,  // Xpeo
109    0x1B05 | LIMITED_USE,  // Bali
110    0x1BC0 | LIMITED_USE,  // Batk
111    0,
112    0x11005 | EXCLUSION,  // Brah
113    0xAA00 | LIMITED_USE,  // Cham
114    0,
115    0,
116    0,
117    0,
118    0x13153 | EXCLUSION,  // Egyp
119    0,
120    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
121    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
122    0x16B1C | EXCLUSION,  // Hmng
123    0x10CA1 | EXCLUSION | RTL | CASED,  // Hung
124    0,
125    0xA984 | LIMITED_USE,  // Java
126    0xA90A | LIMITED_USE,  // Kali
127    0,
128    0,
129    0x1C00 | LIMITED_USE,  // Lepc
130    0x10647 | EXCLUSION,  // Lina
131    0x0840 | LIMITED_USE | RTL,  // Mand
132    0,
133    0x10980 | EXCLUSION | RTL,  // Mero
134    0x07CA | LIMITED_USE | RTL,  // Nkoo
135    0x10C00 | EXCLUSION | RTL,  // Orkh
136    0x1036B | EXCLUSION,  // Perm
137    0xA840 | EXCLUSION,  // Phag
138    0x10900 | EXCLUSION | RTL,  // Phnx
139    0x16F00 | ASPIRATIONAL,  // Plrd
140    0,
141    0,
142    0,
143    0,
144    0,
145    0,
146    0xA549 | LIMITED_USE,  // Vaii
147    0,
148    0x12000 | EXCLUSION,  // Xsux
149    0,
150    0xFDD0 | UNKNOWN,  // Zzzz
151    0x102A0 | EXCLUSION,  // Cari
152    0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
153    0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
154    0x10280 | EXCLUSION,  // Lyci
155    0x10920 | EXCLUSION | RTL,  // Lydi
156    0x1C5A | LIMITED_USE,  // Olck
157    0xA930 | EXCLUSION,  // Rjng
158    0xA882 | LIMITED_USE,  // Saur
159    0x1D850 | EXCLUSION,  // Sgnw
160    0x1B83 | LIMITED_USE,  // Sund
161    0,
162    0xABC0 | LIMITED_USE,  // Mtei
163    0x10840 | EXCLUSION | RTL,  // Armi
164    0x10B00 | EXCLUSION | RTL,  // Avst
165    0x11103 | LIMITED_USE,  // Cakm
166    0xAC00 | RECOMMENDED,  // Kore
167    0x11083 | EXCLUSION,  // Kthi
168    0x10AD8 | EXCLUSION | RTL,  // Mani
169    0x10B60 | EXCLUSION | RTL,  // Phli
170    0x10B8F | EXCLUSION | RTL,  // Phlp
171    0,
172    0x10B40 | EXCLUSION | RTL,  // Prti
173    0x0800 | EXCLUSION | RTL,  // Samr
174    0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
175    0,
176    0,
177    0xA6A0 | LIMITED_USE,  // Bamu
178    0xA4D0 | LIMITED_USE,  // Lisu
179    0,
180    0x10A60 | EXCLUSION | RTL,  // Sarb
181    0x16AE6 | EXCLUSION,  // Bass
182    0x1BC20 | EXCLUSION,  // Dupl
183    0x10500 | EXCLUSION,  // Elba
184    0x11315 | EXCLUSION,  // Gran
185    0,
186    0,
187    0x1E802 | EXCLUSION | RTL,  // Mend
188    0x109A0 | EXCLUSION | RTL,  // Merc
189    0x10A95 | EXCLUSION | RTL,  // Narb
190    0x10896 | EXCLUSION | RTL,  // Nbat
191    0x10873 | EXCLUSION | RTL,  // Palm
192    0x112BE | EXCLUSION,  // Sind
193    0x118B4 | EXCLUSION | CASED,  // Wara
194    0,
195    0,
196    0x16A4F | EXCLUSION,  // Mroo
197    0,
198    0x11183 | EXCLUSION,  // Shrd
199    0x110D0 | EXCLUSION,  // Sora
200    0x11680 | EXCLUSION,  // Takr
201    0x18229 | EXCLUSION | LB_LETTERS,  // Tang
202    0,
203    0x14400 | EXCLUSION,  // Hluw
204    0x11208 | EXCLUSION,  // Khoj
205    0x11484 | EXCLUSION,  // Tirh
206    0x10537 | EXCLUSION,  // Aghb
207    0x11152 | EXCLUSION,  // Mahj
208    0x11717 | EXCLUSION | LB_LETTERS,  // Ahom
209    0x108F4 | EXCLUSION | RTL,  // Hatr
210    0x1160E | EXCLUSION,  // Modi
211    0x1128F | EXCLUSION,  // Mult
212    0x11AC0 | EXCLUSION,  // Pauc
213    0x1158E | EXCLUSION,  // Sidd
214    0x1E909 | LIMITED_USE | RTL | CASED,  // Adlm
215    0x11C0E | EXCLUSION,  // Bhks
216    0x11C72 | EXCLUSION,  // Marc
217    0x11412 | LIMITED_USE,  // Newa
218    0x104B5 | LIMITED_USE | CASED,  // Osge
219    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hanb
220    0x1112 | RECOMMENDED,  // Jamo
221    0,
222    // End copy-paste from parsescriptmetadata.py
223};
224
225int32_t getScriptProps(UScriptCode script) {
226    if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) {
227        return SCRIPT_PROPS[script];
228    } else {
229        return 0;
230    }
231}
232
233}  // namespace
234
235U_CAPI int32_t U_EXPORT2
236uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
237    if(U_FAILURE(*pErrorCode)) { return 0; }
238    if(capacity < 0 || (capacity > 0 && dest == NULL)) {
239        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
240        return 0;
241    }
242    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
243    int32_t length;
244    if(sampleChar == 0) {
245        length = 0;
246    } else {
247        length = U16_LENGTH(sampleChar);
248        if(length <= capacity) {
249            int32_t i = 0;
250            U16_APPEND_UNSAFE(dest, i, sampleChar);
251        }
252    }
253    return u_terminateUChars(dest, capacity, length, pErrorCode);
254}
255
256U_COMMON_API icu::UnicodeString U_EXPORT2
257uscript_getSampleUnicodeString(UScriptCode script) {
258    icu::UnicodeString sample;
259    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
260    if(sampleChar != 0) {
261        sample.append(sampleChar);
262    }
263    return sample;
264}
265
266U_CAPI UScriptUsage U_EXPORT2
267uscript_getUsage(UScriptCode script) {
268    return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
269}
270
271U_CAPI UBool U_EXPORT2
272uscript_isRightToLeft(UScriptCode script) {
273    return (getScriptProps(script) & RTL) != 0;
274}
275
276U_CAPI UBool U_EXPORT2
277uscript_breaksBetweenLetters(UScriptCode script) {
278    return (getScriptProps(script) & LB_LETTERS) != 0;
279}
280
281U_CAPI UBool U_EXPORT2
282uscript_isCased(UScriptCode script) {
283    return (getScriptProps(script) & CASED) != 0;
284}
285