1/* 2 * Copyright © 2013 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27#include "hb-ot-shape-complex-private.hh" 28 29 30/* Hangul shaper */ 31 32 33/* Same order as the feature array below */ 34enum { 35 NONE, 36 37 LJMO, 38 VJMO, 39 TJMO, 40 41 FIRST_HANGUL_FEATURE = LJMO, 42 HANGUL_FEATURE_COUNT = TJMO + 1 43}; 44 45static const hb_tag_t hangul_features[HANGUL_FEATURE_COUNT] = 46{ 47 HB_TAG_NONE, 48 HB_TAG('l','j','m','o'), 49 HB_TAG('v','j','m','o'), 50 HB_TAG('t','j','m','o') 51}; 52 53static void 54collect_features_hangul (hb_ot_shape_planner_t *plan) 55{ 56 hb_ot_map_builder_t *map = &plan->map; 57 58 for (unsigned int i = FIRST_HANGUL_FEATURE; i < HANGUL_FEATURE_COUNT; i++) 59 map->add_feature (hangul_features[i], 1, F_NONE); 60} 61 62struct hangul_shape_plan_t 63{ 64 ASSERT_POD (); 65 66 hb_mask_t mask_array[HANGUL_FEATURE_COUNT]; 67}; 68 69static void * 70data_create_hangul (const hb_ot_shape_plan_t *plan) 71{ 72 hangul_shape_plan_t *hangul_plan = (hangul_shape_plan_t *) calloc (1, sizeof (hangul_shape_plan_t)); 73 if (unlikely (!hangul_plan)) 74 return NULL; 75 76 for (unsigned int i = 0; i < HANGUL_FEATURE_COUNT; i++) 77 hangul_plan->mask_array[i] = plan->map.get_1_mask (hangul_features[i]); 78 79 return hangul_plan; 80} 81 82static void 83data_destroy_hangul (void *data) 84{ 85 free (data); 86} 87 88/* Constants for algorithmic hangul syllable [de]composition. */ 89#define LBase 0x1100u 90#define VBase 0x1161u 91#define TBase 0x11A7u 92#define LCount 19u 93#define VCount 21u 94#define TCount 28u 95#define SBase 0xAC00u 96#define NCount (VCount * TCount) 97#define SCount (LCount * NCount) 98 99#define isCombiningL(u) (hb_in_range ((u), LBase, LBase+LCount-1)) 100#define isCombiningV(u) (hb_in_range ((u), VBase, VBase+VCount-1)) 101#define isCombiningT(u) (hb_in_range ((u), TBase+1, TBase+TCount-1)) 102#define isCombinedS(u) (hb_in_range ((u), SBase, SBase+SCount-1)) 103 104#define isL(u) (hb_in_ranges ((u), 0x1100u, 0x115Fu, 0xA960u, 0xA97Cu)) 105#define isV(u) (hb_in_ranges ((u), 0x1160u, 0x11A7u, 0xD7B0u, 0xD7C6u)) 106#define isT(u) (hb_in_ranges ((u), 0x11A8u, 0x11FFu, 0xD7CBu, 0xD7FBu)) 107 108#define isHangulTone(u) (hb_in_range ((u), 0x302Eu, 0x302Fu)) 109 110/* buffer var allocations */ 111#define hangul_shaping_feature() complex_var_u8_0() /* hangul jamo shaping feature */ 112 113static bool 114is_zero_width_char (hb_font_t *font, 115 hb_codepoint_t unicode) 116{ 117 hb_codepoint_t glyph; 118 return hb_font_get_glyph (font, unicode, 0, &glyph) && hb_font_get_glyph_h_advance (font, glyph) == 0; 119} 120 121static void 122preprocess_text_hangul (const hb_ot_shape_plan_t *plan, 123 hb_buffer_t *buffer, 124 hb_font_t *font) 125{ 126 HB_BUFFER_ALLOCATE_VAR (buffer, hangul_shaping_feature); 127 128 /* Hangul syllables come in two shapes: LV, and LVT. Of those: 129 * 130 * - LV can be precomposed, or decomposed. Lets call those 131 * <LV> and <L,V>, 132 * - LVT can be fully precomposed, partically precomposed, or 133 * fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>. 134 * 135 * The composition / decomposition is mechanical. However, not 136 * all <L,V> sequences compose, and not all <LV,T> sequences 137 * compose. 138 * 139 * Here are the specifics: 140 * 141 * - <L>: U+1100..115F, U+A960..A97F 142 * - <V>: U+1160..11A7, U+D7B0..D7C7 143 * - <T>: U+11A8..11FF, U+D7CB..D7FB 144 * 145 * - Only the <L,V> sequences for the 11xx ranges combine. 146 * - Only <LV,T> sequences for T in U+11A8..11C3 combine. 147 * 148 * Here is what we want to accomplish in this shaper: 149 * 150 * - If the whole syllable can be precomposed, do that, 151 * - Otherwise, fully decompose and apply ljmo/vjmo/tjmo features. 152 * - If a valid syllable is followed by a Hangul tone mark, reorder the tone 153 * mark to precede the whole syllable - unless it is a zero-width glyph, in 154 * which case we leave it untouched, assuming it's designed to overstrike. 155 * 156 * That is, of the different possible syllables: 157 * 158 * <L> 159 * <L,V> 160 * <L,V,T> 161 * <LV> 162 * <LVT> 163 * <LV, T> 164 * 165 * - <L> needs no work. 166 * 167 * - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we 168 * should fully decompose them if font supports. 169 * 170 * - <L,V> and <L,V,T> we should compose if the whole thing can be composed. 171 * 172 * - <LV,T> we should compose if the whole thing can be composed, otherwise we should 173 * decompose. 174 */ 175 176 buffer->clear_output (); 177 unsigned int start = 0, end = 0; /* Extent of most recently seen syllable; 178 * valid only if start < end 179 */ 180 unsigned int count = buffer->len; 181 182 for (buffer->idx = 0; buffer->idx < count;) 183 { 184 hb_codepoint_t u = buffer->cur().codepoint; 185 186 if (isHangulTone (u)) 187 { 188 /* 189 * We could cache the width of the tone marks and the existence of dotted-circle, 190 * but the use of the Hangul tone mark characters seems to be rare enough that 191 * I didn't bother for now. 192 */ 193 if (start < end && end == buffer->out_len) 194 { 195 /* Tone mark follows a valid syllable; move it in front, unless it's zero width. */ 196 buffer->next_glyph (); 197 if (!is_zero_width_char (font, u)) 198 { 199 hb_glyph_info_t *info = buffer->out_info; 200 hb_glyph_info_t tone = info[end]; 201 memmove (&info[start + 1], &info[start], (end - start) * sizeof (hb_glyph_info_t)); 202 info[start] = tone; 203 } 204 /* Merge clusters across the (possibly reordered) syllable+tone. 205 * We want to merge even in the zero-width tone mark case here, 206 * so that clustering behavior isn't dependent on how the tone mark 207 * is handled by the font. 208 */ 209 buffer->merge_out_clusters (start, end + 1); 210 } 211 else 212 { 213 /* No valid syllable as base for tone mark; try to insert dotted circle. */ 214 if (font->has_glyph (0x25CCu)) 215 { 216 hb_codepoint_t chars[2]; 217 if (!is_zero_width_char (font, u)) { 218 chars[0] = u; 219 chars[1] = 0x25CCu; 220 } else { 221 chars[0] = 0x25CCu; 222 chars[1] = u; 223 } 224 buffer->replace_glyphs (1, 2, chars); 225 } 226 else 227 { 228 /* No dotted circle available in the font; just leave tone mark untouched. */ 229 buffer->next_glyph (); 230 } 231 } 232 start = end = buffer->out_len; 233 continue; 234 } 235 236 start = buffer->out_len; /* Remember current position as a potential syllable start; 237 * will only be used if we set end to a later position. 238 */ 239 240 if (isL (u) && buffer->idx + 1 < count) 241 { 242 hb_codepoint_t l = u; 243 hb_codepoint_t v = buffer->cur(+1).codepoint; 244 if (isV (v)) 245 { 246 /* Have <L,V> or <L,V,T>. */ 247 hb_codepoint_t t = 0; 248 unsigned int tindex = 0; 249 if (buffer->idx + 2 < count) 250 { 251 t = buffer->cur(+2).codepoint; 252 if (isT (t)) 253 tindex = t - TBase; /* Only used if isCombiningT (t); otherwise invalid. */ 254 else 255 t = 0; /* The next character was not a trailing jamo. */ 256 } 257 258 /* We've got a syllable <L,V,T?>; see if it can potentially be composed. */ 259 if (isCombiningL (l) && isCombiningV (v) && (t == 0 || isCombiningT (t))) 260 { 261 /* Try to compose; if this succeeds, end is set to start+1. */ 262 hb_codepoint_t s = SBase + (l - LBase) * NCount + (v - VBase) * TCount + tindex; 263 if (font->has_glyph (s)) 264 { 265 buffer->replace_glyphs (t ? 3 : 2, 1, &s); 266 if (unlikely (buffer->in_error)) 267 return; 268 end = start + 1; 269 continue; 270 } 271 } 272 273 /* We didn't compose, either because it's an Old Hangul syllable without a 274 * precomposed character in Unicode, or because the font didn't support the 275 * necessary precomposed glyph. 276 * Set jamo features on the individual glyphs, and advance past them. 277 */ 278 buffer->cur().hangul_shaping_feature() = LJMO; 279 buffer->next_glyph (); 280 buffer->cur().hangul_shaping_feature() = VJMO; 281 buffer->next_glyph (); 282 if (t) 283 { 284 buffer->cur().hangul_shaping_feature() = TJMO; 285 buffer->next_glyph (); 286 end = start + 3; 287 } 288 else 289 end = start + 2; 290 buffer->merge_out_clusters (start, end); 291 continue; 292 } 293 } 294 295 else if (isCombinedS (u)) 296 { 297 /* Have <LV>, <LVT>, or <LV,T> */ 298 hb_codepoint_t s = u; 299 bool has_glyph = font->has_glyph (s); 300 unsigned int lindex = (s - SBase) / NCount; 301 unsigned int nindex = (s - SBase) % NCount; 302 unsigned int vindex = nindex / TCount; 303 unsigned int tindex = nindex % TCount; 304 305 if (!tindex && 306 buffer->idx + 1 < count && 307 isCombiningT (buffer->cur(+1).codepoint)) 308 { 309 /* <LV,T>, try to combine. */ 310 unsigned int new_tindex = buffer->cur(+1).codepoint - TBase; 311 hb_codepoint_t new_s = s + new_tindex; 312 if (font->has_glyph (new_s)) 313 { 314 buffer->replace_glyphs (2, 1, &new_s); 315 if (unlikely (buffer->in_error)) 316 return; 317 end = start + 1; 318 continue; 319 } 320 } 321 322 /* Otherwise, decompose if font doesn't support <LV> or <LVT>, 323 * or if having non-combining <LV,T>. Note that we already handled 324 * combining <LV,T> above. */ 325 if (!has_glyph || 326 (!tindex && 327 buffer->idx + 1 < count && 328 isT (buffer->cur(+1).codepoint))) 329 { 330 hb_codepoint_t decomposed[3] = {LBase + lindex, 331 VBase + vindex, 332 TBase + tindex}; 333 if (font->has_glyph (decomposed[0]) && 334 font->has_glyph (decomposed[1]) && 335 (!tindex || font->has_glyph (decomposed[2]))) 336 { 337 unsigned int s_len = tindex ? 3 : 2; 338 buffer->replace_glyphs (1, s_len, decomposed); 339 if (unlikely (buffer->in_error)) 340 return; 341 342 /* We decomposed S: apply jamo features to the individual glyphs 343 * that are now in buffer->out_info. 344 */ 345 hb_glyph_info_t *info = buffer->out_info; 346 347 /* If we decomposed an LV because of a non-combining T following, 348 * we want to include this T in the syllable. 349 */ 350 if (has_glyph && !tindex) 351 { 352 buffer->next_glyph (); 353 s_len++; 354 } 355 end = start + s_len; 356 357 unsigned int i = start; 358 info[i++].hangul_shaping_feature() = LJMO; 359 info[i++].hangul_shaping_feature() = VJMO; 360 if (i < end) 361 info[i++].hangul_shaping_feature() = TJMO; 362 buffer->merge_out_clusters (start, end); 363 continue; 364 } 365 } 366 367 if (has_glyph) 368 { 369 /* We didn't decompose the S, so just advance past it. */ 370 end = start + 1; 371 buffer->next_glyph (); 372 continue; 373 } 374 } 375 376 /* Didn't find a recognizable syllable, so we leave end <= start; 377 * this will prevent tone-mark reordering happening. 378 */ 379 buffer->next_glyph (); 380 } 381 buffer->swap_buffers (); 382} 383 384static void 385setup_masks_hangul (const hb_ot_shape_plan_t *plan, 386 hb_buffer_t *buffer, 387 hb_font_t *font HB_UNUSED) 388{ 389 const hangul_shape_plan_t *hangul_plan = (const hangul_shape_plan_t *) plan->data; 390 391 if (likely (hangul_plan)) 392 { 393 unsigned int count = buffer->len; 394 hb_glyph_info_t *info = buffer->info; 395 for (unsigned int i = 0; i < count; i++, info++) 396 info->mask |= hangul_plan->mask_array[info->hangul_shaping_feature()]; 397 } 398 399 HB_BUFFER_DEALLOCATE_VAR (buffer, hangul_shaping_feature); 400} 401 402 403const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul = 404{ 405 "hangul", 406 collect_features_hangul, 407 NULL, /* override_features */ 408 data_create_hangul, /* data_create */ 409 data_destroy_hangul, /* data_destroy */ 410 preprocess_text_hangul, 411 HB_OT_SHAPE_NORMALIZATION_MODE_NONE, 412 NULL, /* decompose */ 413 NULL, /* compose */ 414 setup_masks_hangul, /* setup_masks */ 415 HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, 416 false, /* fallback_position */ 417}; 418