hb-unicode.h revision 19c0eab8cf96d00e168c4b11ec435019c1ed44f7
1/*
2 * Copyright (C) 2009  Red Hat, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Red Hat Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_UNICODE_H
28#define HB_UNICODE_H
29
30#include "hb-common.h"
31
32HB_BEGIN_DECLS
33
34
35/* Unicode General Category property */
36typedef enum
37{
38  HB_CATEGORY_CONTROL,
39  HB_CATEGORY_FORMAT,
40  HB_CATEGORY_UNASSIGNED,
41  HB_CATEGORY_PRIVATE_USE,
42  HB_CATEGORY_SURROGATE,
43  HB_CATEGORY_LOWERCASE_LETTER,
44  HB_CATEGORY_MODIFIER_LETTER,
45  HB_CATEGORY_OTHER_LETTER,
46  HB_CATEGORY_TITLECASE_LETTER,
47  HB_CATEGORY_UPPERCASE_LETTER,
48  HB_CATEGORY_COMBINING_MARK,
49  HB_CATEGORY_ENCLOSING_MARK,
50  HB_CATEGORY_NON_SPACING_MARK,
51  HB_CATEGORY_DECIMAL_NUMBER,
52  HB_CATEGORY_LETTER_NUMBER,
53  HB_CATEGORY_OTHER_NUMBER,
54  HB_CATEGORY_CONNECT_PUNCTUATION,
55  HB_CATEGORY_DASH_PUNCTUATION,
56  HB_CATEGORY_CLOSE_PUNCTUATION,
57  HB_CATEGORY_FINAL_PUNCTUATION,
58  HB_CATEGORY_INITIAL_PUNCTUATION,
59  HB_CATEGORY_OTHER_PUNCTUATION,
60  HB_CATEGORY_OPEN_PUNCTUATION,
61  HB_CATEGORY_CURRENCY_SYMBOL,
62  HB_CATEGORY_MODIFIER_SYMBOL,
63  HB_CATEGORY_MATH_SYMBOL,
64  HB_CATEGORY_OTHER_SYMBOL,
65  HB_CATEGORY_LINE_SEPARATOR,
66  HB_CATEGORY_PARAGRAPH_SEPARATOR,
67  HB_CATEGORY_SPACE_SEPARATOR
68} hb_category_t;
69
70/* Unicode Script property */
71typedef enum
72{                               /* ISO 15924 code */
73  HB_SCRIPT_INVALID_CODE = -1,
74  HB_SCRIPT_COMMON       = 0,   /* Zyyy */
75  HB_SCRIPT_INHERITED,          /* Qaai */
76  HB_SCRIPT_ARABIC,             /* Arab */
77  HB_SCRIPT_ARMENIAN,           /* Armn */
78  HB_SCRIPT_BENGALI,            /* Beng */
79  HB_SCRIPT_BOPOMOFO,           /* Bopo */
80  HB_SCRIPT_CHEROKEE,           /* Cher */
81  HB_SCRIPT_COPTIC,             /* Qaac */
82  HB_SCRIPT_CYRILLIC,           /* Cyrl (Cyrs) */
83  HB_SCRIPT_DESERET,            /* Dsrt */
84  HB_SCRIPT_DEVANAGARI,         /* Deva */
85  HB_SCRIPT_ETHIOPIC,           /* Ethi */
86  HB_SCRIPT_GEORGIAN,           /* Geor (Geon, Geoa) */
87  HB_SCRIPT_GOTHIC,             /* Goth */
88  HB_SCRIPT_GREEK,              /* Grek */
89  HB_SCRIPT_GUJARATI,           /* Gujr */
90  HB_SCRIPT_GURMUKHI,           /* Guru */
91  HB_SCRIPT_HAN,                /* Hani */
92  HB_SCRIPT_HANGUL,             /* Hang */
93  HB_SCRIPT_HEBREW,             /* Hebr */
94  HB_SCRIPT_HIRAGANA,           /* Hira */
95  HB_SCRIPT_KANNADA,            /* Knda */
96  HB_SCRIPT_KATAKANA,           /* Kana */
97  HB_SCRIPT_KHMER,              /* Khmr */
98  HB_SCRIPT_LAO,                /* Laoo */
99  HB_SCRIPT_LATIN,              /* Latn (Latf, Latg) */
100  HB_SCRIPT_MALAYALAM,          /* Mlym */
101  HB_SCRIPT_MONGOLIAN,          /* Mong */
102  HB_SCRIPT_MYANMAR,            /* Mymr */
103  HB_SCRIPT_OGHAM,              /* Ogam */
104  HB_SCRIPT_OLD_ITALIC,         /* Ital */
105  HB_SCRIPT_ORIYA,              /* Orya */
106  HB_SCRIPT_RUNIC,              /* Runr */
107  HB_SCRIPT_SINHALA,            /* Sinh */
108  HB_SCRIPT_SYRIAC,             /* Syrc (Syrj, Syrn, Syre) */
109  HB_SCRIPT_TAMIL,              /* Taml */
110  HB_SCRIPT_TELUGU,             /* Telu */
111  HB_SCRIPT_THAANA,             /* Thaa */
112  HB_SCRIPT_THAI,               /* Thai */
113  HB_SCRIPT_TIBETAN,            /* Tibt */
114  HB_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */
115  HB_SCRIPT_YI,                 /* Yiii */
116  HB_SCRIPT_TAGALOG,            /* Tglg */
117  HB_SCRIPT_HANUNOO,            /* Hano */
118  HB_SCRIPT_BUHID,              /* Buhd */
119  HB_SCRIPT_TAGBANWA,           /* Tagb */
120
121  /* Unicode-4.0 additions */
122  HB_SCRIPT_BRAILLE,            /* Brai */
123  HB_SCRIPT_CYPRIOT,            /* Cprt */
124  HB_SCRIPT_LIMBU,              /* Limb */
125  HB_SCRIPT_OSMANYA,            /* Osma */
126  HB_SCRIPT_SHAVIAN,            /* Shaw */
127  HB_SCRIPT_LINEAR_B,           /* Linb */
128  HB_SCRIPT_TAI_LE,             /* Tale */
129  HB_SCRIPT_UGARITIC,           /* Ugar */
130
131  /* Unicode-4.1 additions */
132  HB_SCRIPT_NEW_TAI_LUE,        /* Talu */
133  HB_SCRIPT_BUGINESE,           /* Bugi */
134  HB_SCRIPT_GLAGOLITIC,         /* Glag */
135  HB_SCRIPT_TIFINAGH,           /* Tfng */
136  HB_SCRIPT_SYLOTI_NAGRI,       /* Sylo */
137  HB_SCRIPT_OLD_PERSIAN,        /* Xpeo */
138  HB_SCRIPT_KHAROSHTHI,         /* Khar */
139
140  /* Unicode-5.0 additions */
141  HB_SCRIPT_UNKNOWN,            /* Zzzz */
142  HB_SCRIPT_BALINESE,           /* Bali */
143  HB_SCRIPT_CUNEIFORM,          /* Xsux */
144  HB_SCRIPT_PHOENICIAN,         /* Phnx */
145  HB_SCRIPT_PHAGS_PA,           /* Phag */
146  HB_SCRIPT_NKO,                /* Nkoo */
147
148  /* Unicode-5.1 additions */
149  HB_SCRIPT_KAYAH_LI,           /* Kali */
150  HB_SCRIPT_LEPCHA,             /* Lepc */
151  HB_SCRIPT_REJANG,             /* Rjng */
152  HB_SCRIPT_SUNDANESE,          /* Sund */
153  HB_SCRIPT_SAURASHTRA,         /* Saur */
154  HB_SCRIPT_CHAM,               /* Cham */
155  HB_SCRIPT_OL_CHIKI,           /* Olck */
156  HB_SCRIPT_VAI,                /* Vaii */
157  HB_SCRIPT_CARIAN,             /* Cari */
158  HB_SCRIPT_LYCIAN,             /* Lyci */
159  HB_SCRIPT_LYDIAN,             /* Lydi */
160
161  /* Unicode-5.2 additions */
162  HB_SCRIPT_AVESTAN,                /* Avst */
163  HB_SCRIPT_BAMUM,                  /* Bamu */
164  HB_SCRIPT_EGYPTIAN_HIEROGLYPHS,   /* Egyp */
165  HB_SCRIPT_IMPERIAL_ARAMAIC,       /* Armi */
166  HB_SCRIPT_INSCRIPTIONAL_PAHLAVI,  /* Phli */
167  HB_SCRIPT_INSCRIPTIONAL_PARTHIAN, /* Prti */
168  HB_SCRIPT_JAVANESE,               /* Java */
169  HB_SCRIPT_KAITHI,                 /* Kthi */
170  HB_SCRIPT_LISU,                   /* Lisu */
171  HB_SCRIPT_MEITEI_MAYEK,           /* Mtei */
172  HB_SCRIPT_OLD_SOUTH_ARABIAN,      /* Sarb */
173  HB_SCRIPT_OLD_TURKIC,             /* Orkh */
174  HB_SCRIPT_SAMARITAN,              /* Samr */
175  HB_SCRIPT_TAI_THAM,               /* Lana */
176  HB_SCRIPT_TAI_VIET                /* Tavt */
177} hb_script_t;
178
179
180/*
181 * hb_unicode_funcs_t
182 */
183
184typedef struct _hb_unicode_funcs_t hb_unicode_funcs_t;
185
186hb_unicode_funcs_t *
187hb_unicode_funcs_create (void);
188
189hb_unicode_funcs_t *
190hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs);
191
192unsigned int
193hb_unicode_funcs_get_reference_count (hb_unicode_funcs_t *ufuncs);
194
195void
196hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs);
197
198hb_unicode_funcs_t *
199hb_unicode_funcs_copy (hb_unicode_funcs_t *ufuncs);
200
201void
202hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs);
203
204
205/*
206 * funcs
207 */
208
209
210/* typedefs */
211
212typedef hb_codepoint_t (*hb_unicode_get_mirroring_func_t) (hb_codepoint_t unicode);
213typedef hb_category_t (*hb_unicode_get_general_category_func_t) (hb_codepoint_t unicode);
214typedef hb_script_t (*hb_unicode_get_script_func_t) (hb_codepoint_t unicode);
215typedef unsigned int (*hb_unicode_get_combining_class_func_t) (hb_codepoint_t unicode);
216typedef unsigned int (*hb_unicode_get_eastasian_width_func_t) (hb_codepoint_t unicode);
217
218
219/* setters */
220
221void
222hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
223				     hb_unicode_get_mirroring_func_t mirroring_func);
224
225void
226hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
227					    hb_unicode_get_general_category_func_t general_category_func);
228
229void
230hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
231				  hb_unicode_get_script_func_t script_func);
232
233void
234hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
235					   hb_unicode_get_combining_class_func_t combining_class_func);
236
237void
238hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
239					   hb_unicode_get_eastasian_width_func_t eastasian_width_func);
240
241
242/* getters */
243
244/* These never return NULL.  Return fallback defaults instead. */
245
246hb_unicode_get_mirroring_func_t
247hb_unicode_funcs_get_mirroring_func (hb_unicode_funcs_t *ufuncs);
248
249hb_unicode_get_general_category_func_t
250hb_unicode_funcs_get_general_category_func (hb_unicode_funcs_t *ufuncs);
251
252hb_unicode_get_script_func_t
253hb_unicode_funcs_get_script_func (hb_unicode_funcs_t *ufuncs);
254
255hb_unicode_get_combining_class_func_t
256hb_unicode_funcs_get_combining_class_func (hb_unicode_funcs_t *ufuncs);
257
258hb_unicode_get_eastasian_width_func_t
259hb_unicode_funcs_get_eastasian_width_func (hb_unicode_funcs_t *ufuncs);
260
261
262/* accessors */
263
264hb_codepoint_t
265hb_unicode_get_mirroring (hb_unicode_funcs_t *ufuncs,
266			  hb_codepoint_t unicode);
267
268hb_category_t
269hb_unicode_get_general_category (hb_unicode_funcs_t *ufuncs,
270				 hb_codepoint_t unicode);
271
272hb_script_t
273hb_unicode_get_script (hb_unicode_funcs_t *ufuncs,
274		       hb_codepoint_t unicode);
275
276unsigned int
277hb_unicode_get_combining_class (hb_unicode_funcs_t *ufuncs,
278				hb_codepoint_t unicode);
279
280unsigned int
281hb_unicode_get_eastasian_width (hb_unicode_funcs_t *ufuncs,
282				hb_codepoint_t unicode);
283
284
285HB_END_DECLS
286
287#endif /* HB_UNICODE_H */
288