hb-ot-shape-complex-hangul.cc revision 29ea403d67e29c2d531c1f613ce3d69e60f078f6
1/*
2 * Copyright © 2013  Google, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#include "hb-ot-shape-complex-private.hh"
28
29
30/* Hangul shaper */
31
32
33static const hb_tag_t hangul_features[] =
34{
35  HB_TAG('l','j','m','o'),
36  HB_TAG('v','j','m','o'),
37  HB_TAG('t','j','m','o'),
38  HB_TAG_NONE
39};
40
41static void
42collect_features_hangul (hb_ot_shape_planner_t *plan)
43{
44  for (const hb_tag_t *script_features = hangul_features; script_features && *script_features; script_features++)
45    plan->map.add_global_bool_feature (*script_features);
46}
47
48#define LBase 0x1100
49#define VBase 0x1161
50#define TBase 0x11A7
51#define LCount 19
52#define VCount 21
53#define TCount 28
54#define SBase 0xAC00
55#define NCount (VCount * TCount)
56#define SCount (LCount * NCount)
57
58#define isCombiningL(u) (hb_in_range<hb_codepoint_t> ((u), LBase, LBase+LCount-1))
59#define isCombiningV(u) (hb_in_range<hb_codepoint_t> ((u), VBase, VBase+VCount-1))
60#define isCombiningT(u) (hb_in_range<hb_codepoint_t> ((u), TBase+1, TBase+TCount-1))
61#define isCombinedS(u) (hb_in_range<hb_codepoint_t> ((u), SBase, SBase+SCount-1))
62
63#define isT(u) (hb_in_ranges<hb_codepoint_t> ((u),  0x11A8, 0x11FF, 0xD7CB, 0xD7FB))
64
65static void
66preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
67			hb_buffer_t              *buffer,
68			hb_font_t                *font)
69{
70  /* Hangul syllables come in two shapes: LV, and LVT.  Of those:
71   *
72   *   - LV can be precomposed, or decomposed.  Lets call those
73   *     <LV> and <L,V>,
74   *   - LVT can be fully precomposed, partically precomposed, or
75   *     fully decomposed.  Ie. <LVT>, <LV,T>, or <L,V,T>.
76   *
77   * The composition / decomposition is mechanical.  However, not
78   * all <L,V> sequences compose, and not all <LV,T> sequences
79   * compose.
80   *
81   * Here are the specifics:
82   *
83   *   - <L>: U+1100..115F, U+A960..A97F
84   *   - <V>: U+1160..11A7, U+D7B0..D7C7
85   *   - <T>: U+11A8..11FF, U+D7CB..D7FB
86   *
87   *   - Only the <L,V> sequences for the 11xx ranges combine.
88   *   - Only <LV,T> sequences for T in U+11A8..11C3 combine.
89   *
90   * Here is what we want to accomplish in this shaper:
91   *
92   *   - If the whole syllable can be precomposed, do that,
93   *   - Otherwise, fully decompose.
94   *
95   * That is, of the different possible syllables:
96   *
97   *   <L>
98   *   <L,V>
99   *   <L,V,T>
100   *   <LV>
101   *   <LVT>
102   *   <LV, T>
103   *
104   * - <L> needs no work.
105   *
106   * - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we
107   *   should fully decompose them if font supports.
108   *
109   * - <L,V> and <L,V,T> we should compose if the whole thing can be composed.
110   *
111   * - <LV,T> we should compose if the whole thing can be composed, otherwise we should
112   *   decompose.
113   */
114
115  buffer->clear_output ();
116  unsigned int count = buffer->len;
117  for (buffer->idx = 0; buffer->idx < count;)
118  {
119    hb_codepoint_t u = buffer->cur().codepoint;
120
121    if (isCombiningL(u) && buffer->idx + 1 < count)
122    {
123      hb_codepoint_t l = u;
124      hb_codepoint_t v = buffer->cur(+1).codepoint;
125      if (isCombiningV(v))
126      {
127        /* Have <L,V> or <L,V,T>. */
128        unsigned int len = 2;
129	unsigned int tindex = 0;
130	if (buffer->idx + 2 < count)
131	{
132	  hb_codepoint_t t = buffer->cur(+2).codepoint;
133	  if (isCombiningT(t))
134	  {
135	    len = 3;
136	    tindex = t - TBase;
137	  }
138	  else if (isT (t))
139	  {
140	    /* Old T jamo.  Doesn't combine.  Don't combine *anything*. */
141	   len = 0;
142	  }
143	}
144
145	if (len)
146	{
147	  hb_codepoint_t s = SBase + (l - LBase) * NCount + (v - VBase) * TCount + tindex;
148	  hb_codepoint_t glyph;
149	  if (font->get_glyph (s, 0, &glyph))
150	  {
151	    buffer->replace_glyphs (len, 1, &s);
152	    if (unlikely (buffer->in_error))
153	      return;
154	    continue;
155	  }
156	}
157      }
158    }
159
160    else if (isCombinedS(u))
161    {
162       /* Have <LV>, <LVT>, or <LV,T> */
163      hb_codepoint_t s = u;
164      hb_codepoint_t glyph;
165      bool has_glyph = font->get_glyph (s, 0, &glyph);
166      unsigned int lindex = (s - SBase) / NCount;
167      unsigned int nindex = (s - SBase) % NCount;
168      unsigned int vindex = nindex / TCount;
169      unsigned int tindex = nindex % TCount;
170
171      if (!tindex &&
172	  buffer->idx + 1 < count &&
173	  isCombiningT (buffer->cur(+1).codepoint))
174      {
175	/* <LV,T>, try to combine. */
176	unsigned int new_tindex = buffer->cur(+1).codepoint - TBase;
177	hb_codepoint_t new_s = s + new_tindex;
178        if (font->get_glyph (new_s, 0, &glyph))
179	{
180	  buffer->replace_glyphs (2, 1, &new_s);
181	  if (unlikely (buffer->in_error))
182	    return;
183	  continue;
184	}
185      }
186
187      /* Otherwise, decompose if font doesn't support <LV> or <LVT>,
188       * or if having non-combining <LV,T>.  Note that we already handled
189       * combining <LV,T> above. */
190      if (!has_glyph ||
191	  (!tindex &&
192	   buffer->idx + 1 < count &&
193	   isT (buffer->cur(+1).codepoint)))
194      {
195	hb_codepoint_t decomposed[3] = {LBase + lindex,
196					VBase + vindex,
197					TBase + tindex};
198        if (font->get_glyph (decomposed[0], 0, &glyph) &&
199	    font->get_glyph (decomposed[1], 0, &glyph) &&
200	    (!tindex || font->get_glyph (decomposed[2], 0, &glyph)))
201	{
202	  buffer->replace_glyphs (1, tindex ? 3 : 2, decomposed);
203	  if (unlikely (buffer->in_error))
204	    return;
205	  continue;
206	}
207      }
208    }
209
210    buffer->next_glyph ();
211  }
212  buffer->swap_buffers ();
213}
214
215const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul =
216{
217  "hangul",
218  collect_features_hangul,
219  NULL, /* override_features */
220  NULL, /* data_create */
221  NULL, /* data_destroy */
222  preprocess_text_hangul,
223  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
224  NULL, /* decompose */
225  NULL, /* compose */
226  NULL, /* setup_masks */
227  HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT,
228  false, /* fallback_position */
229};
230