1/*
2 * Copyright © 2009  Red Hat, Inc.
3 * Copyright © 2011  Codethink Limited
4 * Copyright © 2011,2012  Google, Inc.
5 *
6 *  This is part of HarfBuzz, a text shaping library.
7 *
8 * Permission is hereby granted, without written agreement and without
9 * license or royalty fees, to use, copy, modify, and distribute this
10 * software and its documentation for any purpose, provided that the
11 * above copyright notice and the following two paragraphs appear in
12 * all copies of this software.
13 *
14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * DAMAGE.
19 *
20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 *
26 * Red Hat Author(s): Behdad Esfahbod
27 * Codethink Author(s): Ryan Lortie
28 * Google Author(s): Behdad Esfahbod
29 */
30
31#ifndef HB_H_IN
32#error "Include <hb.h> instead."
33#endif
34
35#ifndef HB_UNICODE_H
36#define HB_UNICODE_H
37
38#include "hb-common.h"
39
40HB_BEGIN_DECLS
41
42
43/* hb_unicode_general_category_t */
44
45/* Unicode Character Database property: General_Category (gc) */
46typedef enum
47{
48  HB_UNICODE_GENERAL_CATEGORY_CONTROL,			/* Cc */
49  HB_UNICODE_GENERAL_CATEGORY_FORMAT,			/* Cf */
50  HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED,		/* Cn */
51  HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE,		/* Co */
52  HB_UNICODE_GENERAL_CATEGORY_SURROGATE,		/* Cs */
53  HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER,		/* Ll */
54  HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER,		/* Lm */
55  HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER,		/* Lo */
56  HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER,		/* Lt */
57  HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER,		/* Lu */
58  HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK,		/* Mc */
59  HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK,		/* Me */
60  HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK,		/* Mn */
61  HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER,		/* Nd */
62  HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER,		/* Nl */
63  HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER,		/* No */
64  HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION,	/* Pc */
65  HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION,		/* Pd */
66  HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION,	/* Pe */
67  HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION,	/* Pf */
68  HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION,	/* Pi */
69  HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION,	/* Po */
70  HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION,		/* Ps */
71  HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL,		/* Sc */
72  HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL,		/* Sk */
73  HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL,		/* Sm */
74  HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL,		/* So */
75  HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR,		/* Zl */
76  HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR,	/* Zp */
77  HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR		/* Zs */
78} hb_unicode_general_category_t;
79
80/* hb_unicode_combining_class_t */
81
82/* Note: newer versions of Unicode may add new values.  Clients should be ready to handle
83 * any value in the 0..254 range being returned from hb_unicode_combining_class().
84 */
85
86/* Unicode Character Database property: Canonical_Combining_Class (ccc) */
87typedef enum
88{
89  HB_UNICODE_COMBINING_CLASS_NOT_REORDERED	= 0,
90  HB_UNICODE_COMBINING_CLASS_OVERLAY		= 1,
91  HB_UNICODE_COMBINING_CLASS_NUKTA		= 7,
92  HB_UNICODE_COMBINING_CLASS_KANA_VOICING	= 8,
93  HB_UNICODE_COMBINING_CLASS_VIRAMA		= 9,
94
95  /* Hebrew */
96  HB_UNICODE_COMBINING_CLASS_CCC10	=  10,
97  HB_UNICODE_COMBINING_CLASS_CCC11	=  11,
98  HB_UNICODE_COMBINING_CLASS_CCC12	=  12,
99  HB_UNICODE_COMBINING_CLASS_CCC13	=  13,
100  HB_UNICODE_COMBINING_CLASS_CCC14	=  14,
101  HB_UNICODE_COMBINING_CLASS_CCC15	=  15,
102  HB_UNICODE_COMBINING_CLASS_CCC16	=  16,
103  HB_UNICODE_COMBINING_CLASS_CCC17	=  17,
104  HB_UNICODE_COMBINING_CLASS_CCC18	=  18,
105  HB_UNICODE_COMBINING_CLASS_CCC19	=  19,
106  HB_UNICODE_COMBINING_CLASS_CCC20	=  20,
107  HB_UNICODE_COMBINING_CLASS_CCC21	=  21,
108  HB_UNICODE_COMBINING_CLASS_CCC22	=  22,
109  HB_UNICODE_COMBINING_CLASS_CCC23	=  23,
110  HB_UNICODE_COMBINING_CLASS_CCC24	=  24,
111  HB_UNICODE_COMBINING_CLASS_CCC25	=  25,
112  HB_UNICODE_COMBINING_CLASS_CCC26	=  26,
113
114  /* Arabic */
115  HB_UNICODE_COMBINING_CLASS_CCC27	=  27,
116  HB_UNICODE_COMBINING_CLASS_CCC28	=  28,
117  HB_UNICODE_COMBINING_CLASS_CCC29	=  29,
118  HB_UNICODE_COMBINING_CLASS_CCC30	=  30,
119  HB_UNICODE_COMBINING_CLASS_CCC31	=  31,
120  HB_UNICODE_COMBINING_CLASS_CCC32	=  32,
121  HB_UNICODE_COMBINING_CLASS_CCC33	=  33,
122  HB_UNICODE_COMBINING_CLASS_CCC34	=  34,
123  HB_UNICODE_COMBINING_CLASS_CCC35	=  35,
124
125  /* Syriac */
126  HB_UNICODE_COMBINING_CLASS_CCC36	=  36,
127
128  /* Telugu */
129  HB_UNICODE_COMBINING_CLASS_CCC84	=  84,
130  HB_UNICODE_COMBINING_CLASS_CCC91	=  91,
131
132  /* Thai */
133  HB_UNICODE_COMBINING_CLASS_CCC103	= 103,
134  HB_UNICODE_COMBINING_CLASS_CCC107	= 107,
135
136  /* Lao */
137  HB_UNICODE_COMBINING_CLASS_CCC118	= 118,
138  HB_UNICODE_COMBINING_CLASS_CCC122	= 122,
139
140  /* Tibetan */
141  HB_UNICODE_COMBINING_CLASS_CCC129	= 129,
142  HB_UNICODE_COMBINING_CLASS_CCC130	= 130,
143  HB_UNICODE_COMBINING_CLASS_CCC133	= 132,
144
145
146  HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT	= 200,
147  HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW		= 202,
148  HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE		= 214,
149  HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT	= 216,
150  HB_UNICODE_COMBINING_CLASS_BELOW_LEFT			= 218,
151  HB_UNICODE_COMBINING_CLASS_BELOW			= 220,
152  HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT		= 222,
153  HB_UNICODE_COMBINING_CLASS_LEFT			= 224,
154  HB_UNICODE_COMBINING_CLASS_RIGHT			= 226,
155  HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT			= 228,
156  HB_UNICODE_COMBINING_CLASS_ABOVE			= 230,
157  HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT		= 232,
158  HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW		= 233,
159  HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE		= 234,
160
161  HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT		= 240,
162
163  HB_UNICODE_COMBINING_CLASS_INVALID	= 255
164} hb_unicode_combining_class_t;
165
166
167/*
168 * hb_unicode_funcs_t
169 */
170
171typedef struct hb_unicode_funcs_t hb_unicode_funcs_t;
172
173
174/*
175 * just give me the best implementation you've got there.
176 */
177hb_unicode_funcs_t *
178hb_unicode_funcs_get_default (void);
179
180
181hb_unicode_funcs_t *
182hb_unicode_funcs_create (hb_unicode_funcs_t *parent);
183
184hb_unicode_funcs_t *
185hb_unicode_funcs_get_empty (void);
186
187hb_unicode_funcs_t *
188hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs);
189
190void
191hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs);
192
193hb_bool_t
194hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
195			        hb_user_data_key_t *key,
196			        void *              data,
197			        hb_destroy_func_t   destroy,
198				hb_bool_t           replace);
199
200
201void *
202hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
203			        hb_user_data_key_t *key);
204
205
206void
207hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs);
208
209hb_bool_t
210hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs);
211
212hb_unicode_funcs_t *
213hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
214
215
216/*
217 * funcs
218 */
219
220/* typedefs */
221
222typedef hb_unicode_combining_class_t	(*hb_unicode_combining_class_func_t)	(hb_unicode_funcs_t *ufuncs,
223										 hb_codepoint_t      unicode,
224										 void               *user_data);
225typedef unsigned int			(*hb_unicode_eastasian_width_func_t)	(hb_unicode_funcs_t *ufuncs,
226										 hb_codepoint_t      unicode,
227										 void               *user_data);
228typedef hb_unicode_general_category_t	(*hb_unicode_general_category_func_t)	(hb_unicode_funcs_t *ufuncs,
229										 hb_codepoint_t      unicode,
230										 void               *user_data);
231typedef hb_codepoint_t			(*hb_unicode_mirroring_func_t)		(hb_unicode_funcs_t *ufuncs,
232										 hb_codepoint_t      unicode,
233										 void               *user_data);
234typedef hb_script_t			(*hb_unicode_script_func_t)		(hb_unicode_funcs_t *ufuncs,
235										 hb_codepoint_t      unicode,
236										 void               *user_data);
237
238typedef hb_bool_t			(*hb_unicode_compose_func_t)		(hb_unicode_funcs_t *ufuncs,
239										 hb_codepoint_t      a,
240										 hb_codepoint_t      b,
241										 hb_codepoint_t     *ab,
242										 void               *user_data);
243typedef hb_bool_t			(*hb_unicode_decompose_func_t)		(hb_unicode_funcs_t *ufuncs,
244										 hb_codepoint_t      ab,
245										 hb_codepoint_t     *a,
246										 hb_codepoint_t     *b,
247										 void               *user_data);
248
249/**
250 * hb_unicode_decompose_compatibility_func_t:
251 * @ufuncs: Unicode function structure
252 * @u: codepoint to decompose
253 * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into
254 * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func()
255 *
256 * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed.
257 * The complete length of the decomposition will be returned.
258 *
259 * If @u has no compatibility decomposition, zero should be returned.
260 *
261 * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any
262 * compatibility decomposition plus an terminating value of 0.  Consequently, @decompose must be allocated by the caller to be at least this length.  Implementations
263 * of this function type must ensure that they do not write past the provided array.
264 *
265 * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available.
266 */
267typedef unsigned int			(*hb_unicode_decompose_compatibility_func_t)	(hb_unicode_funcs_t *ufuncs,
268											 hb_codepoint_t      u,
269											 hb_codepoint_t     *decomposed,
270											 void               *user_data);
271
272/* See Unicode 6.1 for details on the maximum decomposition length. */
273#define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */
274
275/* setters */
276
277void
278hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
279					   hb_unicode_combining_class_func_t combining_class_func,
280					   void *user_data, hb_destroy_func_t destroy);
281
282void
283hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
284					   hb_unicode_eastasian_width_func_t eastasian_width_func,
285					   void *user_data, hb_destroy_func_t destroy);
286
287void
288hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
289					    hb_unicode_general_category_func_t general_category_func,
290					    void *user_data, hb_destroy_func_t destroy);
291
292void
293hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
294				     hb_unicode_mirroring_func_t mirroring_func,
295				     void *user_data, hb_destroy_func_t destroy);
296
297void
298hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
299				  hb_unicode_script_func_t script_func,
300				  void *user_data, hb_destroy_func_t destroy);
301
302void
303hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
304				   hb_unicode_compose_func_t compose_func,
305				   void *user_data, hb_destroy_func_t destroy);
306
307void
308hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
309				     hb_unicode_decompose_func_t decompose_func,
310				     void *user_data, hb_destroy_func_t destroy);
311
312void
313hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs,
314						   hb_unicode_decompose_compatibility_func_t decompose_compatibility_func,
315						   void *user_data, hb_destroy_func_t destroy);
316
317/* accessors */
318
319hb_unicode_combining_class_t
320hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
321			    hb_codepoint_t unicode);
322
323unsigned int
324hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs,
325			    hb_codepoint_t unicode);
326
327hb_unicode_general_category_t
328hb_unicode_general_category (hb_unicode_funcs_t *ufuncs,
329			     hb_codepoint_t unicode);
330
331hb_codepoint_t
332hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs,
333		      hb_codepoint_t unicode);
334
335hb_script_t
336hb_unicode_script (hb_unicode_funcs_t *ufuncs,
337		   hb_codepoint_t unicode);
338
339hb_bool_t
340hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
341		    hb_codepoint_t      a,
342		    hb_codepoint_t      b,
343		    hb_codepoint_t     *ab);
344hb_bool_t
345hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
346		      hb_codepoint_t      ab,
347		      hb_codepoint_t     *a,
348		      hb_codepoint_t     *b);
349
350unsigned int
351hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
352				    hb_codepoint_t      u,
353				    hb_codepoint_t     *decomposed);
354
355HB_END_DECLS
356
357#endif /* HB_UNICODE_H */
358