1/* 2 * Copyright © 2009 Red Hat, Inc. 3 * Copyright © 2011 Codethink Limited 4 * Copyright © 2011,2012 Google, Inc. 5 * 6 * This is part of HarfBuzz, a text shaping library. 7 * 8 * Permission is hereby granted, without written agreement and without 9 * license or royalty fees, to use, copy, modify, and distribute this 10 * software and its documentation for any purpose, provided that the 11 * above copyright notice and the following two paragraphs appear in 12 * all copies of this software. 13 * 14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 18 * DAMAGE. 19 * 20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 25 * 26 * Red Hat Author(s): Behdad Esfahbod 27 * Codethink Author(s): Ryan Lortie 28 * Google Author(s): Behdad Esfahbod 29 */ 30 31#ifndef HB_H_IN 32#error "Include <hb.h> instead." 33#endif 34 35#ifndef HB_UNICODE_H 36#define HB_UNICODE_H 37 38#include "hb-common.h" 39 40HB_BEGIN_DECLS 41 42 43/* hb_unicode_general_category_t */ 44 45/* Unicode Character Database property: General_Category (gc) */ 46typedef enum 47{ 48 HB_UNICODE_GENERAL_CATEGORY_CONTROL, /* Cc */ 49 HB_UNICODE_GENERAL_CATEGORY_FORMAT, /* Cf */ 50 HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, /* Cn */ 51 HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE, /* Co */ 52 HB_UNICODE_GENERAL_CATEGORY_SURROGATE, /* Cs */ 53 HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, /* Ll */ 54 HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER, /* Lm */ 55 HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, /* Lo */ 56 HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, /* Lt */ 57 HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, /* Lu */ 58 HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK, /* Mc */ 59 HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK, /* Me */ 60 HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, /* Mn */ 61 HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER, /* Nd */ 62 HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER, /* Nl */ 63 HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER, /* No */ 64 HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION, /* Pc */ 65 HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION, /* Pd */ 66 HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION, /* Pe */ 67 HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION, /* Pf */ 68 HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION, /* Pi */ 69 HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION, /* Po */ 70 HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION, /* Ps */ 71 HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL, /* Sc */ 72 HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL, /* Sk */ 73 HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL, /* Sm */ 74 HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL, /* So */ 75 HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR, /* Zl */ 76 HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR, /* Zp */ 77 HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR /* Zs */ 78} hb_unicode_general_category_t; 79 80/* hb_unicode_combining_class_t */ 81 82/* Note: newer versions of Unicode may add new values. Clients should be ready to handle 83 * any value in the 0..254 range being returned from hb_unicode_combining_class(). 84 */ 85 86/* Unicode Character Database property: Canonical_Combining_Class (ccc) */ 87typedef enum 88{ 89 HB_UNICODE_COMBINING_CLASS_NOT_REORDERED = 0, 90 HB_UNICODE_COMBINING_CLASS_OVERLAY = 1, 91 HB_UNICODE_COMBINING_CLASS_NUKTA = 7, 92 HB_UNICODE_COMBINING_CLASS_KANA_VOICING = 8, 93 HB_UNICODE_COMBINING_CLASS_VIRAMA = 9, 94 95 /* Hebrew */ 96 HB_UNICODE_COMBINING_CLASS_CCC10 = 10, 97 HB_UNICODE_COMBINING_CLASS_CCC11 = 11, 98 HB_UNICODE_COMBINING_CLASS_CCC12 = 12, 99 HB_UNICODE_COMBINING_CLASS_CCC13 = 13, 100 HB_UNICODE_COMBINING_CLASS_CCC14 = 14, 101 HB_UNICODE_COMBINING_CLASS_CCC15 = 15, 102 HB_UNICODE_COMBINING_CLASS_CCC16 = 16, 103 HB_UNICODE_COMBINING_CLASS_CCC17 = 17, 104 HB_UNICODE_COMBINING_CLASS_CCC18 = 18, 105 HB_UNICODE_COMBINING_CLASS_CCC19 = 19, 106 HB_UNICODE_COMBINING_CLASS_CCC20 = 20, 107 HB_UNICODE_COMBINING_CLASS_CCC21 = 21, 108 HB_UNICODE_COMBINING_CLASS_CCC22 = 22, 109 HB_UNICODE_COMBINING_CLASS_CCC23 = 23, 110 HB_UNICODE_COMBINING_CLASS_CCC24 = 24, 111 HB_UNICODE_COMBINING_CLASS_CCC25 = 25, 112 HB_UNICODE_COMBINING_CLASS_CCC26 = 26, 113 114 /* Arabic */ 115 HB_UNICODE_COMBINING_CLASS_CCC27 = 27, 116 HB_UNICODE_COMBINING_CLASS_CCC28 = 28, 117 HB_UNICODE_COMBINING_CLASS_CCC29 = 29, 118 HB_UNICODE_COMBINING_CLASS_CCC30 = 30, 119 HB_UNICODE_COMBINING_CLASS_CCC31 = 31, 120 HB_UNICODE_COMBINING_CLASS_CCC32 = 32, 121 HB_UNICODE_COMBINING_CLASS_CCC33 = 33, 122 HB_UNICODE_COMBINING_CLASS_CCC34 = 34, 123 HB_UNICODE_COMBINING_CLASS_CCC35 = 35, 124 125 /* Syriac */ 126 HB_UNICODE_COMBINING_CLASS_CCC36 = 36, 127 128 /* Telugu */ 129 HB_UNICODE_COMBINING_CLASS_CCC84 = 84, 130 HB_UNICODE_COMBINING_CLASS_CCC91 = 91, 131 132 /* Thai */ 133 HB_UNICODE_COMBINING_CLASS_CCC103 = 103, 134 HB_UNICODE_COMBINING_CLASS_CCC107 = 107, 135 136 /* Lao */ 137 HB_UNICODE_COMBINING_CLASS_CCC118 = 118, 138 HB_UNICODE_COMBINING_CLASS_CCC122 = 122, 139 140 /* Tibetan */ 141 HB_UNICODE_COMBINING_CLASS_CCC129 = 129, 142 HB_UNICODE_COMBINING_CLASS_CCC130 = 130, 143 HB_UNICODE_COMBINING_CLASS_CCC133 = 132, 144 145 146 HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT = 200, 147 HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW = 202, 148 HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE = 214, 149 HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT = 216, 150 HB_UNICODE_COMBINING_CLASS_BELOW_LEFT = 218, 151 HB_UNICODE_COMBINING_CLASS_BELOW = 220, 152 HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT = 222, 153 HB_UNICODE_COMBINING_CLASS_LEFT = 224, 154 HB_UNICODE_COMBINING_CLASS_RIGHT = 226, 155 HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT = 228, 156 HB_UNICODE_COMBINING_CLASS_ABOVE = 230, 157 HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT = 232, 158 HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW = 233, 159 HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE = 234, 160 161 HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT = 240, 162 163 HB_UNICODE_COMBINING_CLASS_INVALID = 255 164} hb_unicode_combining_class_t; 165 166 167/* 168 * hb_unicode_funcs_t 169 */ 170 171typedef struct hb_unicode_funcs_t hb_unicode_funcs_t; 172 173 174/* 175 * just give me the best implementation you've got there. 176 */ 177hb_unicode_funcs_t * 178hb_unicode_funcs_get_default (void); 179 180 181hb_unicode_funcs_t * 182hb_unicode_funcs_create (hb_unicode_funcs_t *parent); 183 184hb_unicode_funcs_t * 185hb_unicode_funcs_get_empty (void); 186 187hb_unicode_funcs_t * 188hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs); 189 190void 191hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs); 192 193hb_bool_t 194hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs, 195 hb_user_data_key_t *key, 196 void * data, 197 hb_destroy_func_t destroy, 198 hb_bool_t replace); 199 200 201void * 202hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs, 203 hb_user_data_key_t *key); 204 205 206void 207hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs); 208 209hb_bool_t 210hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs); 211 212hb_unicode_funcs_t * 213hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs); 214 215 216/* 217 * funcs 218 */ 219 220/* typedefs */ 221 222typedef hb_unicode_combining_class_t (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs, 223 hb_codepoint_t unicode, 224 void *user_data); 225typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs, 226 hb_codepoint_t unicode, 227 void *user_data); 228typedef hb_unicode_general_category_t (*hb_unicode_general_category_func_t) (hb_unicode_funcs_t *ufuncs, 229 hb_codepoint_t unicode, 230 void *user_data); 231typedef hb_codepoint_t (*hb_unicode_mirroring_func_t) (hb_unicode_funcs_t *ufuncs, 232 hb_codepoint_t unicode, 233 void *user_data); 234typedef hb_script_t (*hb_unicode_script_func_t) (hb_unicode_funcs_t *ufuncs, 235 hb_codepoint_t unicode, 236 void *user_data); 237 238typedef hb_bool_t (*hb_unicode_compose_func_t) (hb_unicode_funcs_t *ufuncs, 239 hb_codepoint_t a, 240 hb_codepoint_t b, 241 hb_codepoint_t *ab, 242 void *user_data); 243typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs, 244 hb_codepoint_t ab, 245 hb_codepoint_t *a, 246 hb_codepoint_t *b, 247 void *user_data); 248 249/** 250 * hb_unicode_decompose_compatibility_func_t: 251 * @ufuncs: Unicode function structure 252 * @u: codepoint to decompose 253 * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into 254 * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func() 255 * 256 * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed. 257 * The complete length of the decomposition will be returned. 258 * 259 * If @u has no compatibility decomposition, zero should be returned. 260 * 261 * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any 262 * compatibility decomposition plus an terminating value of 0. Consequently, @decompose must be allocated by the caller to be at least this length. Implementations 263 * of this function type must ensure that they do not write past the provided array. 264 * 265 * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available. 266 */ 267typedef unsigned int (*hb_unicode_decompose_compatibility_func_t) (hb_unicode_funcs_t *ufuncs, 268 hb_codepoint_t u, 269 hb_codepoint_t *decomposed, 270 void *user_data); 271 272/* See Unicode 6.1 for details on the maximum decomposition length. */ 273#define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */ 274 275/* setters */ 276 277void 278hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs, 279 hb_unicode_combining_class_func_t combining_class_func, 280 void *user_data, hb_destroy_func_t destroy); 281 282void 283hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs, 284 hb_unicode_eastasian_width_func_t eastasian_width_func, 285 void *user_data, hb_destroy_func_t destroy); 286 287void 288hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs, 289 hb_unicode_general_category_func_t general_category_func, 290 void *user_data, hb_destroy_func_t destroy); 291 292void 293hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs, 294 hb_unicode_mirroring_func_t mirroring_func, 295 void *user_data, hb_destroy_func_t destroy); 296 297void 298hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs, 299 hb_unicode_script_func_t script_func, 300 void *user_data, hb_destroy_func_t destroy); 301 302void 303hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs, 304 hb_unicode_compose_func_t compose_func, 305 void *user_data, hb_destroy_func_t destroy); 306 307void 308hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs, 309 hb_unicode_decompose_func_t decompose_func, 310 void *user_data, hb_destroy_func_t destroy); 311 312void 313hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs, 314 hb_unicode_decompose_compatibility_func_t decompose_compatibility_func, 315 void *user_data, hb_destroy_func_t destroy); 316 317/* accessors */ 318 319hb_unicode_combining_class_t 320hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs, 321 hb_codepoint_t unicode); 322 323unsigned int 324hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs, 325 hb_codepoint_t unicode); 326 327hb_unicode_general_category_t 328hb_unicode_general_category (hb_unicode_funcs_t *ufuncs, 329 hb_codepoint_t unicode); 330 331hb_codepoint_t 332hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs, 333 hb_codepoint_t unicode); 334 335hb_script_t 336hb_unicode_script (hb_unicode_funcs_t *ufuncs, 337 hb_codepoint_t unicode); 338 339hb_bool_t 340hb_unicode_compose (hb_unicode_funcs_t *ufuncs, 341 hb_codepoint_t a, 342 hb_codepoint_t b, 343 hb_codepoint_t *ab); 344hb_bool_t 345hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, 346 hb_codepoint_t ab, 347 hb_codepoint_t *a, 348 hb_codepoint_t *b); 349 350unsigned int 351hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs, 352 hb_codepoint_t u, 353 hb_codepoint_t *decomposed); 354 355HB_END_DECLS 356 357#endif /* HB_UNICODE_H */ 358