1c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod/* 2c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * Copyright © 2013 Google, Inc. 3c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 4c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * This is part of HarfBuzz, a text shaping library. 5c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 6c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * Permission is hereby granted, without written agreement and without 7c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * license or royalty fees, to use, copy, modify, and distribute this 8c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * software and its documentation for any purpose, provided that the 9c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * above copyright notice and the following two paragraphs appear in 10c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * all copies of this software. 11c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 12c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * DAMAGE. 17c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 18c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 24c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * Google Author(s): Behdad Esfahbod 25c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod */ 26c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 27c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod#include "hb-ot-shape-complex-private.hh" 28c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 29c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 30c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod/* Hangul shaper */ 31c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 32c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 33103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew/* Same order as the feature array below */ 34103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kewenum { 35103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew NONE, 36103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 37103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew LJMO, 38103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew VJMO, 39103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew TJMO, 40103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 41103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew FIRST_HANGUL_FEATURE = LJMO, 42103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew HANGUL_FEATURE_COUNT = TJMO + 1 43103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew}; 44103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 45103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kewstatic const hb_tag_t hangul_features[HANGUL_FEATURE_COUNT] = 46c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod{ 47103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew HB_TAG_NONE, 48c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod HB_TAG('l','j','m','o'), 49c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod HB_TAG('v','j','m','o'), 50103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew HB_TAG('t','j','m','o') 51c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod}; 52c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 53c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbodstatic void 54c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbodcollect_features_hangul (hb_ot_shape_planner_t *plan) 55c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod{ 56103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew hb_ot_map_builder_t *map = &plan->map; 57103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 58103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew for (unsigned int i = FIRST_HANGUL_FEATURE; i < HANGUL_FEATURE_COUNT; i++) 59103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew map->add_feature (hangul_features[i], 1, F_NONE); 60103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew} 61103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 62103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kewstruct hangul_shape_plan_t 63103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew{ 64103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew ASSERT_POD (); 65103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 66103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew hb_mask_t mask_array[HANGUL_FEATURE_COUNT]; 67103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew}; 68103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 69103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kewstatic void * 70103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kewdata_create_hangul (const hb_ot_shape_plan_t *plan) 71103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew{ 72103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew hangul_shape_plan_t *hangul_plan = (hangul_shape_plan_t *) calloc (1, sizeof (hangul_shape_plan_t)); 73103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (unlikely (!hangul_plan)) 74103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew return NULL; 75103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 76103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew for (unsigned int i = 0; i < HANGUL_FEATURE_COUNT; i++) 77103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew hangul_plan->mask_array[i] = plan->map.get_1_mask (hangul_features[i]); 78103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 79103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew return hangul_plan; 80103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew} 81103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 82103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kewstatic void 83103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kewdata_destroy_hangul (void *data) 84103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew{ 85103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew free (data); 86c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod} 87c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 88103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew/* Constants for algorithmic hangul syllable [de]composition. */ 897627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define LBase 0x1100u 907627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define VBase 0x1161u 917627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define TBase 0x11A7u 927627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define LCount 19u 937627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define VCount 21u 947627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define TCount 28u 957627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define SBase 0xAC00u 96c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod#define NCount (VCount * TCount) 97c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod#define SCount (LCount * NCount) 98c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 997627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define isCombiningL(u) (hb_in_range ((u), LBase, LBase+LCount-1)) 1007627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define isCombiningV(u) (hb_in_range ((u), VBase, VBase+VCount-1)) 1017627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define isCombiningT(u) (hb_in_range ((u), TBase+1, TBase+TCount-1)) 1027627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define isCombinedS(u) (hb_in_range ((u), SBase, SBase+SCount-1)) 103c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 1047627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define isL(u) (hb_in_ranges ((u), 0x1100u, 0x115Fu, 0xA960u, 0xA97Cu)) 1057627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define isV(u) (hb_in_ranges ((u), 0x1160u, 0x11A7u, 0xD7B0u, 0xD7C6u)) 1067627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define isT(u) (hb_in_ranges ((u), 0x11A8u, 0x11FFu, 0xD7CBu, 0xD7FBu)) 107103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 1087627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod#define isHangulTone(u) (hb_in_range ((u), 0x302Eu, 0x302Fu)) 1097244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew 110103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew/* buffer var allocations */ 111103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew#define hangul_shaping_feature() complex_var_u8_0() /* hangul jamo shaping feature */ 112103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 113103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kewstatic bool 114103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kewis_zero_width_char (hb_font_t *font, 115103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew hb_codepoint_t unicode) 116103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew{ 117103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew hb_codepoint_t glyph; 118103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew return hb_font_get_glyph (font, unicode, 0, &glyph) && hb_font_get_glyph_h_advance (font, glyph) == 0; 119103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew} 120c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 121c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbodstatic void 122c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbodpreprocess_text_hangul (const hb_ot_shape_plan_t *plan, 123c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod hb_buffer_t *buffer, 124c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod hb_font_t *font) 125c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod{ 126103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew HB_BUFFER_ALLOCATE_VAR (buffer, hangul_shaping_feature); 127103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 128c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod /* Hangul syllables come in two shapes: LV, and LVT. Of those: 129c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 130c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * - LV can be precomposed, or decomposed. Lets call those 131c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * <LV> and <L,V>, 132c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * - LVT can be fully precomposed, partically precomposed, or 133c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>. 134c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 135c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * The composition / decomposition is mechanical. However, not 136c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * all <L,V> sequences compose, and not all <LV,T> sequences 137c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * compose. 138c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 139c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * Here are the specifics: 140c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 141c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * - <L>: U+1100..115F, U+A960..A97F 142c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * - <V>: U+1160..11A7, U+D7B0..D7C7 14332478656ce6e7926c3ad481511f02187ca743af6Behdad Esfahbod * - <T>: U+11A8..11FF, U+D7CB..D7FB 144c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 145c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * - Only the <L,V> sequences for the 11xx ranges combine. 146c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * - Only <LV,T> sequences for T in U+11A8..11C3 combine. 147c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 148c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * Here is what we want to accomplish in this shaper: 149c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 150c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * - If the whole syllable can be precomposed, do that, 151103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew * - Otherwise, fully decompose and apply ljmo/vjmo/tjmo features. 1527244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew * - If a valid syllable is followed by a Hangul tone mark, reorder the tone 1537244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew * mark to precede the whole syllable - unless it is a zero-width glyph, in 1547244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew * which case we leave it untouched, assuming it's designed to overstrike. 155c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 156c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * That is, of the different possible syllables: 157c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 158c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * <L> 159c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * <L,V> 160c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * <L,V,T> 161c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * <LV> 162c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * <LVT> 163c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * <LV, T> 164c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 165c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * - <L> needs no work. 166c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 167c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we 168c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * should fully decompose them if font supports. 169c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 170c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * - <L,V> and <L,V,T> we should compose if the whole thing can be composed. 171c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * 172c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * - <LV,T> we should compose if the whole thing can be composed, otherwise we should 173c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod * decompose. 174c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod */ 175c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 176c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod buffer->clear_output (); 177103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew unsigned int start = 0, end = 0; /* Extent of most recently seen syllable; 178103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew * valid only if start < end 179103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew */ 180c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod unsigned int count = buffer->len; 181103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 182c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod for (buffer->idx = 0; buffer->idx < count;) 183c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod { 184c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod hb_codepoint_t u = buffer->cur().codepoint; 185c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 1867244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew if (isHangulTone (u)) 1877244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew { 1887244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew /* 1897244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew * We could cache the width of the tone marks and the existence of dotted-circle, 1907244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew * but the use of the Hangul tone mark characters seems to be rare enough that 1917244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew * I didn't bother for now. 1927244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew */ 1937244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew if (start < end && end == buffer->out_len) 1947244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew { 1957244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew /* Tone mark follows a valid syllable; move it in front, unless it's zero width. */ 1967244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew buffer->next_glyph (); 1977244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew if (!is_zero_width_char (font, u)) 1987244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew { 1997244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew hb_glyph_info_t *info = buffer->out_info; 2007244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew hb_glyph_info_t tone = info[end]; 2017244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew memmove (&info[start + 1], &info[start], (end - start) * sizeof (hb_glyph_info_t)); 2027244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew info[start] = tone; 2037244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew } 2047244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew /* Merge clusters across the (possibly reordered) syllable+tone. 2057244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew * We want to merge even in the zero-width tone mark case here, 2067244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew * so that clustering behavior isn't dependent on how the tone mark 2077244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew * is handled by the font. 2087244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew */ 2097244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew buffer->merge_out_clusters (start, end + 1); 2107244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew } 2117244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew else 2127244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew { 2137244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew /* No valid syllable as base for tone mark; try to insert dotted circle. */ 2147627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod if (font->has_glyph (0x25CCu)) 2157244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew { 2167244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew hb_codepoint_t chars[2]; 21783d7e7915a5eaa8ff4c7014c319844e7dffd8225Jonathan Kew if (!is_zero_width_char (font, u)) { 2187244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew chars[0] = u; 2197627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod chars[1] = 0x25CCu; 2207244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew } else { 2217627100f428ac0ec8509d961d368d2d25d8f0b6eBehdad Esfahbod chars[0] = 0x25CCu; 2227244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew chars[1] = u; 2237244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew } 2247244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew buffer->replace_glyphs (1, 2, chars); 2257244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew } 2267244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew else 2277244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew { 2287244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew /* No dotted circle available in the font; just leave tone mark untouched. */ 2297244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew buffer->next_glyph (); 2307244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew } 2317244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew } 2327244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew start = end = buffer->out_len; 2337244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew continue; 2347244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew } 2357244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew 236103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew start = buffer->out_len; /* Remember current position as a potential syllable start; 237103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew * will only be used if we set end to a later position. 238103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew */ 239103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 240103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (isL (u) && buffer->idx + 1 < count) 241c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod { 242c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod hb_codepoint_t l = u; 243c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod hb_codepoint_t v = buffer->cur(+1).codepoint; 244103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (isV (v)) 245c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod { 246103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew /* Have <L,V> or <L,V,T>. */ 247103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew hb_codepoint_t t = 0; 248c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod unsigned int tindex = 0; 249c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod if (buffer->idx + 2 < count) 250c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod { 251103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew t = buffer->cur(+2).codepoint; 252103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (isT (t)) 253103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew tindex = t - TBase; /* Only used if isCombiningT (t); otherwise invalid. */ 254103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew else 255103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew t = 0; /* The next character was not a trailing jamo. */ 256c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod } 257c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 258103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew /* We've got a syllable <L,V,T?>; see if it can potentially be composed. */ 259103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (isCombiningL (l) && isCombiningV (v) && (t == 0 || isCombiningT (t))) 260c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod { 261103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew /* Try to compose; if this succeeds, end is set to start+1. */ 262c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod hb_codepoint_t s = SBase + (l - LBase) * NCount + (v - VBase) * TCount + tindex; 2638de20b1e8a1c4d2081f64e695045e6e4da7ce144Behdad Esfahbod if (font->has_glyph (s)) 264c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod { 265103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->replace_glyphs (t ? 3 : 2, 1, &s); 266c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod if (unlikely (buffer->in_error)) 267c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod return; 268103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew end = start + 1; 269c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod continue; 270c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod } 271c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod } 272103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 273103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew /* We didn't compose, either because it's an Old Hangul syllable without a 274103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew * precomposed character in Unicode, or because the font didn't support the 275103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew * necessary precomposed glyph. 276103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew * Set jamo features on the individual glyphs, and advance past them. 277103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew */ 278103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->cur().hangul_shaping_feature() = LJMO; 279103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->next_glyph (); 280103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->cur().hangul_shaping_feature() = VJMO; 281103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->next_glyph (); 282103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (t) 283103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew { 284103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->cur().hangul_shaping_feature() = TJMO; 285103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->next_glyph (); 286103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew end = start + 3; 287103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew } 288103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew else 289103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew end = start + 2; 290103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->merge_out_clusters (start, end); 291103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew continue; 292c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod } 293c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod } 294c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 295103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew else if (isCombinedS (u)) 296c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod { 297103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew /* Have <LV>, <LVT>, or <LV,T> */ 298c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod hb_codepoint_t s = u; 2998de20b1e8a1c4d2081f64e695045e6e4da7ce144Behdad Esfahbod bool has_glyph = font->has_glyph (s); 300c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod unsigned int lindex = (s - SBase) / NCount; 301c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod unsigned int nindex = (s - SBase) % NCount; 302bdb20dafc3f737923da3dca0c832fdf4ab8daabcBehdad Esfahbod unsigned int vindex = nindex / TCount; 303bdb20dafc3f737923da3dca0c832fdf4ab8daabcBehdad Esfahbod unsigned int tindex = nindex % TCount; 304c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 305c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod if (!tindex && 306c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod buffer->idx + 1 < count && 307c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod isCombiningT (buffer->cur(+1).codepoint)) 308c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod { 309c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod /* <LV,T>, try to combine. */ 31029ea403d67e29c2d531c1f613ce3d69e60f078f6Behdad Esfahbod unsigned int new_tindex = buffer->cur(+1).codepoint - TBase; 31129ea403d67e29c2d531c1f613ce3d69e60f078f6Behdad Esfahbod hb_codepoint_t new_s = s + new_tindex; 312103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (font->has_glyph (new_s)) 313c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod { 314c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod buffer->replace_glyphs (2, 1, &new_s); 315c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod if (unlikely (buffer->in_error)) 316c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod return; 317103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew end = start + 1; 318c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod continue; 319c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod } 320c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod } 321c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 32229ea403d67e29c2d531c1f613ce3d69e60f078f6Behdad Esfahbod /* Otherwise, decompose if font doesn't support <LV> or <LVT>, 32329ea403d67e29c2d531c1f613ce3d69e60f078f6Behdad Esfahbod * or if having non-combining <LV,T>. Note that we already handled 32429ea403d67e29c2d531c1f613ce3d69e60f078f6Behdad Esfahbod * combining <LV,T> above. */ 325c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod if (!has_glyph || 32629ea403d67e29c2d531c1f613ce3d69e60f078f6Behdad Esfahbod (!tindex && 32729ea403d67e29c2d531c1f613ce3d69e60f078f6Behdad Esfahbod buffer->idx + 1 < count && 328c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod isT (buffer->cur(+1).codepoint))) 329c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod { 330c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod hb_codepoint_t decomposed[3] = {LBase + lindex, 331c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod VBase + vindex, 332c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod TBase + tindex}; 333103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (font->has_glyph (decomposed[0]) && 3348de20b1e8a1c4d2081f64e695045e6e4da7ce144Behdad Esfahbod font->has_glyph (decomposed[1]) && 3358de20b1e8a1c4d2081f64e695045e6e4da7ce144Behdad Esfahbod (!tindex || font->has_glyph (decomposed[2]))) 336c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod { 337103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew unsigned int s_len = tindex ? 3 : 2; 338103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->replace_glyphs (1, s_len, decomposed); 339c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod if (unlikely (buffer->in_error)) 340c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod return; 341103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 342103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew /* We decomposed S: apply jamo features to the individual glyphs 343103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew * that are now in buffer->out_info. 344103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew */ 345103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew hb_glyph_info_t *info = buffer->out_info; 346103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 347103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew /* If we decomposed an LV because of a non-combining T following, 348103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew * we want to include this T in the syllable. 349103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew */ 350103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (has_glyph && !tindex) 351103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew { 352103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->next_glyph (); 353103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew s_len++; 354103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew } 355103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew end = start + s_len; 356103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 357103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew unsigned int i = start; 358103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew info[i++].hangul_shaping_feature() = LJMO; 359103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew info[i++].hangul_shaping_feature() = VJMO; 360103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (i < end) 361103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew info[i++].hangul_shaping_feature() = TJMO; 362103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->merge_out_clusters (start, end); 363c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod continue; 364c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod } 365c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod } 366103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 367103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (has_glyph) 368103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew { 369103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew /* We didn't decompose the S, so just advance past it. */ 370103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew end = start + 1; 371103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew buffer->next_glyph (); 372103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew continue; 373103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew } 374c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod } 375c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 3767244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew /* Didn't find a recognizable syllable, so we leave end <= start; 3777244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew * this will prevent tone-mark reordering happening. 3787244b3fc3bf9757dd094709d36bea68682264e20Jonathan Kew */ 379c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod buffer->next_glyph (); 380c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod } 381c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod buffer->swap_buffers (); 382c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod} 383c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod 384103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kewstatic void 385103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kewsetup_masks_hangul (const hb_ot_shape_plan_t *plan, 386103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew hb_buffer_t *buffer, 387103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew hb_font_t *font HB_UNUSED) 388103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew{ 389103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew const hangul_shape_plan_t *hangul_plan = (const hangul_shape_plan_t *) plan->data; 390103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 391103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew if (likely (hangul_plan)) 392103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew { 393103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew unsigned int count = buffer->len; 394103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew hb_glyph_info_t *info = buffer->info; 395103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew for (unsigned int i = 0; i < count; i++, info++) 396103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew info->mask |= hangul_plan->mask_array[info->hangul_shaping_feature()]; 397103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew } 398103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 399103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew HB_BUFFER_DEALLOCATE_VAR (buffer, hangul_shaping_feature); 400103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew} 401103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 402103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew 403c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbodconst hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul = 404c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod{ 405c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod "hangul", 406c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod collect_features_hangul, 407c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod NULL, /* override_features */ 408103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew data_create_hangul, /* data_create */ 409103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew data_destroy_hangul, /* data_destroy */ 410c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod preprocess_text_hangul, 4118fc1f7fe74a25bf8549f5edd79c7da6b720eb064Behdad Esfahbod HB_OT_SHAPE_NORMALIZATION_MODE_NONE, 412c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod NULL, /* decompose */ 413c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod NULL, /* compose */ 414103436838df3a77552d3d33fc4bd80f09d9bf079Jonathan Kew setup_masks_hangul, /* setup_masks */ 415deef1862657d55b7ae8d45f4eecbe45c80785c4eJonathan Kew HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, 416c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod false, /* fallback_position */ 417c98b7183f7dc453d5bac1f2503017cded317a495Behdad Esfahbod}; 418