1/*
2   Unicode character type helpers.
3
4   Written by Marc-Andre Lemburg (mal@lemburg.com).
5   Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
6
7   Copyright (c) Corporation for National Research Initiatives.
8
9*/
10
11#include "Python.h"
12#include "unicodeobject.h"
13
14#define ALPHA_MASK 0x01
15#define DECIMAL_MASK 0x02
16#define DIGIT_MASK 0x04
17#define LOWER_MASK 0x08
18#define LINEBREAK_MASK 0x10
19#define SPACE_MASK 0x20
20#define TITLE_MASK 0x40
21#define UPPER_MASK 0x80
22#define NODELTA_MASK 0x100
23#define NUMERIC_MASK 0x200
24
25typedef struct {
26    const Py_UNICODE upper;
27    const Py_UNICODE lower;
28    const Py_UNICODE title;
29    const unsigned char decimal;
30    const unsigned char digit;
31    const unsigned short flags;
32} _PyUnicode_TypeRecord;
33
34#include "unicodetype_db.h"
35
36static const _PyUnicode_TypeRecord *
37gettyperecord(Py_UNICODE code)
38{
39    int index;
40
41#ifdef Py_UNICODE_WIDE
42    if (code >= 0x110000)
43        index = 0;
44    else
45#endif
46    {
47        index = index1[(code>>SHIFT)];
48        index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
49    }
50
51    return &_PyUnicode_TypeRecords[index];
52}
53
54/* Returns the titlecase Unicode characters corresponding to ch or just
55   ch if no titlecase mapping is known. */
56
57Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
58{
59    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
60    int delta = ctype->title;
61
62    if (ctype->flags & NODELTA_MASK)
63	return delta;
64
65    if (delta >= 32768)
66	    delta -= 65536;
67
68    return ch + delta;
69}
70
71/* Returns 1 for Unicode characters having the category 'Lt', 0
72   otherwise. */
73
74int _PyUnicode_IsTitlecase(Py_UNICODE ch)
75{
76    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
77
78    return (ctype->flags & TITLE_MASK) != 0;
79}
80
81/* Returns the integer decimal (0-9) for Unicode characters having
82   this property, -1 otherwise. */
83
84int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
85{
86    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
87
88    return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
89}
90
91int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
92{
93    if (_PyUnicode_ToDecimalDigit(ch) < 0)
94	return 0;
95    return 1;
96}
97
98/* Returns the integer digit (0-9) for Unicode characters having
99   this property, -1 otherwise. */
100
101int _PyUnicode_ToDigit(Py_UNICODE ch)
102{
103    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
104
105    return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
106}
107
108int _PyUnicode_IsDigit(Py_UNICODE ch)
109{
110    if (_PyUnicode_ToDigit(ch) < 0)
111	return 0;
112    return 1;
113}
114
115/* Returns the numeric value as double for Unicode characters having
116   this property, -1.0 otherwise. */
117
118int _PyUnicode_IsNumeric(Py_UNICODE ch)
119{
120    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
121
122    return (ctype->flags & NUMERIC_MASK) != 0;
123}
124
125#ifndef WANT_WCTYPE_FUNCTIONS
126
127/* Returns 1 for Unicode characters having the category 'Ll', 0
128   otherwise. */
129
130int _PyUnicode_IsLowercase(Py_UNICODE ch)
131{
132    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
133
134    return (ctype->flags & LOWER_MASK) != 0;
135}
136
137/* Returns 1 for Unicode characters having the category 'Lu', 0
138   otherwise. */
139
140int _PyUnicode_IsUppercase(Py_UNICODE ch)
141{
142    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
143
144    return (ctype->flags & UPPER_MASK) != 0;
145}
146
147/* Returns the uppercase Unicode characters corresponding to ch or just
148   ch if no uppercase mapping is known. */
149
150Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
151{
152    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
153    int delta = ctype->upper;
154    if (ctype->flags & NODELTA_MASK)
155	return delta;
156    if (delta >= 32768)
157	    delta -= 65536;
158    return ch + delta;
159}
160
161/* Returns the lowercase Unicode characters corresponding to ch or just
162   ch if no lowercase mapping is known. */
163
164Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
165{
166    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
167    int delta = ctype->lower;
168    if (ctype->flags & NODELTA_MASK)
169	return delta;
170    if (delta >= 32768)
171	    delta -= 65536;
172    return ch + delta;
173}
174
175/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
176   'Lo' or 'Lm',  0 otherwise. */
177
178int _PyUnicode_IsAlpha(Py_UNICODE ch)
179{
180    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
181
182    return (ctype->flags & ALPHA_MASK) != 0;
183}
184
185#else
186
187/* Export the interfaces using the wchar_t type for portability
188   reasons:  */
189
190int _PyUnicode_IsLowercase(Py_UNICODE ch)
191{
192    return iswlower(ch);
193}
194
195int _PyUnicode_IsUppercase(Py_UNICODE ch)
196{
197    return iswupper(ch);
198}
199
200Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
201{
202    return towlower(ch);
203}
204
205Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
206{
207    return towupper(ch);
208}
209
210int _PyUnicode_IsAlpha(Py_UNICODE ch)
211{
212    return iswalpha(ch);
213}
214
215#endif
216