hb-ot-shape-complex-hangul.cc revision 29ea403d67e29c2d531c1f613ce3d69e60f078f6
1/* 2 * Copyright © 2013 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27#include "hb-ot-shape-complex-private.hh" 28 29 30/* Hangul shaper */ 31 32 33static const hb_tag_t hangul_features[] = 34{ 35 HB_TAG('l','j','m','o'), 36 HB_TAG('v','j','m','o'), 37 HB_TAG('t','j','m','o'), 38 HB_TAG_NONE 39}; 40 41static void 42collect_features_hangul (hb_ot_shape_planner_t *plan) 43{ 44 for (const hb_tag_t *script_features = hangul_features; script_features && *script_features; script_features++) 45 plan->map.add_global_bool_feature (*script_features); 46} 47 48#define LBase 0x1100 49#define VBase 0x1161 50#define TBase 0x11A7 51#define LCount 19 52#define VCount 21 53#define TCount 28 54#define SBase 0xAC00 55#define NCount (VCount * TCount) 56#define SCount (LCount * NCount) 57 58#define isCombiningL(u) (hb_in_range<hb_codepoint_t> ((u), LBase, LBase+LCount-1)) 59#define isCombiningV(u) (hb_in_range<hb_codepoint_t> ((u), VBase, VBase+VCount-1)) 60#define isCombiningT(u) (hb_in_range<hb_codepoint_t> ((u), TBase+1, TBase+TCount-1)) 61#define isCombinedS(u) (hb_in_range<hb_codepoint_t> ((u), SBase, SBase+SCount-1)) 62 63#define isT(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x11A8, 0x11FF, 0xD7CB, 0xD7FB)) 64 65static void 66preprocess_text_hangul (const hb_ot_shape_plan_t *plan, 67 hb_buffer_t *buffer, 68 hb_font_t *font) 69{ 70 /* Hangul syllables come in two shapes: LV, and LVT. Of those: 71 * 72 * - LV can be precomposed, or decomposed. Lets call those 73 * <LV> and <L,V>, 74 * - LVT can be fully precomposed, partically precomposed, or 75 * fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>. 76 * 77 * The composition / decomposition is mechanical. However, not 78 * all <L,V> sequences compose, and not all <LV,T> sequences 79 * compose. 80 * 81 * Here are the specifics: 82 * 83 * - <L>: U+1100..115F, U+A960..A97F 84 * - <V>: U+1160..11A7, U+D7B0..D7C7 85 * - <T>: U+11A8..11FF, U+D7CB..D7FB 86 * 87 * - Only the <L,V> sequences for the 11xx ranges combine. 88 * - Only <LV,T> sequences for T in U+11A8..11C3 combine. 89 * 90 * Here is what we want to accomplish in this shaper: 91 * 92 * - If the whole syllable can be precomposed, do that, 93 * - Otherwise, fully decompose. 94 * 95 * That is, of the different possible syllables: 96 * 97 * <L> 98 * <L,V> 99 * <L,V,T> 100 * <LV> 101 * <LVT> 102 * <LV, T> 103 * 104 * - <L> needs no work. 105 * 106 * - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we 107 * should fully decompose them if font supports. 108 * 109 * - <L,V> and <L,V,T> we should compose if the whole thing can be composed. 110 * 111 * - <LV,T> we should compose if the whole thing can be composed, otherwise we should 112 * decompose. 113 */ 114 115 buffer->clear_output (); 116 unsigned int count = buffer->len; 117 for (buffer->idx = 0; buffer->idx < count;) 118 { 119 hb_codepoint_t u = buffer->cur().codepoint; 120 121 if (isCombiningL(u) && buffer->idx + 1 < count) 122 { 123 hb_codepoint_t l = u; 124 hb_codepoint_t v = buffer->cur(+1).codepoint; 125 if (isCombiningV(v)) 126 { 127 /* Have <L,V> or <L,V,T>. */ 128 unsigned int len = 2; 129 unsigned int tindex = 0; 130 if (buffer->idx + 2 < count) 131 { 132 hb_codepoint_t t = buffer->cur(+2).codepoint; 133 if (isCombiningT(t)) 134 { 135 len = 3; 136 tindex = t - TBase; 137 } 138 else if (isT (t)) 139 { 140 /* Old T jamo. Doesn't combine. Don't combine *anything*. */ 141 len = 0; 142 } 143 } 144 145 if (len) 146 { 147 hb_codepoint_t s = SBase + (l - LBase) * NCount + (v - VBase) * TCount + tindex; 148 hb_codepoint_t glyph; 149 if (font->get_glyph (s, 0, &glyph)) 150 { 151 buffer->replace_glyphs (len, 1, &s); 152 if (unlikely (buffer->in_error)) 153 return; 154 continue; 155 } 156 } 157 } 158 } 159 160 else if (isCombinedS(u)) 161 { 162 /* Have <LV>, <LVT>, or <LV,T> */ 163 hb_codepoint_t s = u; 164 hb_codepoint_t glyph; 165 bool has_glyph = font->get_glyph (s, 0, &glyph); 166 unsigned int lindex = (s - SBase) / NCount; 167 unsigned int nindex = (s - SBase) % NCount; 168 unsigned int vindex = nindex / TCount; 169 unsigned int tindex = nindex % TCount; 170 171 if (!tindex && 172 buffer->idx + 1 < count && 173 isCombiningT (buffer->cur(+1).codepoint)) 174 { 175 /* <LV,T>, try to combine. */ 176 unsigned int new_tindex = buffer->cur(+1).codepoint - TBase; 177 hb_codepoint_t new_s = s + new_tindex; 178 if (font->get_glyph (new_s, 0, &glyph)) 179 { 180 buffer->replace_glyphs (2, 1, &new_s); 181 if (unlikely (buffer->in_error)) 182 return; 183 continue; 184 } 185 } 186 187 /* Otherwise, decompose if font doesn't support <LV> or <LVT>, 188 * or if having non-combining <LV,T>. Note that we already handled 189 * combining <LV,T> above. */ 190 if (!has_glyph || 191 (!tindex && 192 buffer->idx + 1 < count && 193 isT (buffer->cur(+1).codepoint))) 194 { 195 hb_codepoint_t decomposed[3] = {LBase + lindex, 196 VBase + vindex, 197 TBase + tindex}; 198 if (font->get_glyph (decomposed[0], 0, &glyph) && 199 font->get_glyph (decomposed[1], 0, &glyph) && 200 (!tindex || font->get_glyph (decomposed[2], 0, &glyph))) 201 { 202 buffer->replace_glyphs (1, tindex ? 3 : 2, decomposed); 203 if (unlikely (buffer->in_error)) 204 return; 205 continue; 206 } 207 } 208 } 209 210 buffer->next_glyph (); 211 } 212 buffer->swap_buffers (); 213} 214 215const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul = 216{ 217 "hangul", 218 collect_features_hangul, 219 NULL, /* override_features */ 220 NULL, /* data_create */ 221 NULL, /* data_destroy */ 222 preprocess_text_hangul, 223 HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, 224 NULL, /* decompose */ 225 NULL, /* compose */ 226 NULL, /* setup_masks */ 227 HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT, 228 false, /* fallback_position */ 229}; 230