1/*
2 * Copyright © 2011,2012,2013  Google, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#include "hb-ot-shape-complex-indic-private.hh"
28
29/* buffer var allocations */
30#define myanmar_category() complex_var_u8_0() /* myanmar_category_t */
31#define myanmar_position() complex_var_u8_1() /* myanmar_position_t */
32
33
34/*
35 * Myanmar shaper.
36 */
37
38static const hb_tag_t
39basic_features[] =
40{
41  /*
42   * Basic features.
43   * These features are applied in order, one at a time, after initial_reordering.
44   */
45  HB_TAG('r','p','h','f'),
46  HB_TAG('p','r','e','f'),
47  HB_TAG('b','l','w','f'),
48  HB_TAG('p','s','t','f'),
49};
50static const hb_tag_t
51other_features[] =
52{
53  /*
54   * Other features.
55   * These features are applied all at once, after final_reordering.
56   */
57  HB_TAG('p','r','e','s'),
58  HB_TAG('a','b','v','s'),
59  HB_TAG('b','l','w','s'),
60  HB_TAG('p','s','t','s'),
61  /* Positioning features, though we don't care about the types. */
62  HB_TAG('d','i','s','t'),
63  /* Pre-release version of Windows 8 Myanmar font had abvm,blwm
64   * features.  The released Windows 8 version of the font (as well
65   * as the released spec) used 'mark' instead.  The Windows 8
66   * shaper however didn't apply 'mark' but did apply 'mkmk'.
67   * Perhaps it applied abvm/blwm.  This was fixed in a Windows 8
68   * update, so now it applies mark/mkmk.  We are guessing that
69   * it still applies abvm/blwm too.
70   */
71  HB_TAG('a','b','v','m'),
72  HB_TAG('b','l','w','m'),
73};
74
75static void
76setup_syllables (const hb_ot_shape_plan_t *plan,
77		 hb_font_t *font,
78		 hb_buffer_t *buffer);
79static void
80initial_reordering (const hb_ot_shape_plan_t *plan,
81		    hb_font_t *font,
82		    hb_buffer_t *buffer);
83static void
84final_reordering (const hb_ot_shape_plan_t *plan,
85		  hb_font_t *font,
86		  hb_buffer_t *buffer);
87
88static void
89collect_features_myanmar (hb_ot_shape_planner_t *plan)
90{
91  hb_ot_map_builder_t *map = &plan->map;
92
93  /* Do this before any lookups have been applied. */
94  map->add_gsub_pause (setup_syllables);
95
96  map->add_global_bool_feature (HB_TAG('l','o','c','l'));
97  /* The Indic specs do not require ccmp, but we apply it here since if
98   * there is a use of it, it's typically at the beginning. */
99  map->add_global_bool_feature (HB_TAG('c','c','m','p'));
100
101
102  map->add_gsub_pause (initial_reordering);
103  for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
104  {
105    map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
106    map->add_gsub_pause (NULL);
107  }
108  map->add_gsub_pause (final_reordering);
109  for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
110    map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
111}
112
113static void
114override_features_myanmar (hb_ot_shape_planner_t *plan)
115{
116  plan->map.add_feature (HB_TAG('l','i','g','a'), 0, F_GLOBAL);
117}
118
119
120enum syllable_type_t {
121  consonant_syllable,
122  punctuation_cluster,
123  broken_cluster,
124  non_myanmar_cluster,
125};
126
127#include "hb-ot-shape-complex-myanmar-machine.hh"
128
129
130/* Note: This enum is duplicated in the -machine.rl source file.
131 * Not sure how to avoid duplication. */
132enum myanmar_category_t {
133  OT_As  = 18, /* Asat */
134  OT_D   = 19, /* Digits except zero */
135  OT_D0  = 20, /* Digit zero */
136  OT_DB  = OT_N, /* Dot below */
137  OT_GB  = OT_PLACEHOLDER,
138  OT_MH  = 21, /* Various consonant medial types */
139  OT_MR  = 22, /* Various consonant medial types */
140  OT_MW  = 23, /* Various consonant medial types */
141  OT_MY  = 24, /* Various consonant medial types */
142  OT_PT  = 25, /* Pwo and other tones */
143  OT_VAbv = 26,
144  OT_VBlw = 27,
145  OT_VPre = 28,
146  OT_VPst = 29,
147  OT_VS   = 30, /* Variation selectors */
148  OT_P    = 31  /* Punctuation */
149};
150
151
152static inline bool
153is_one_of (const hb_glyph_info_t &info, unsigned int flags)
154{
155  /* If it ligated, all bets are off. */
156  if (_hb_glyph_info_ligated (&info)) return false;
157  return !!(FLAG (info.myanmar_category()) & flags);
158}
159
160static inline bool
161is_consonant (const hb_glyph_info_t &info)
162{
163  return is_one_of (info, CONSONANT_FLAGS);
164}
165
166
167static inline void
168set_myanmar_properties (hb_glyph_info_t &info)
169{
170  hb_codepoint_t u = info.codepoint;
171  unsigned int type = hb_indic_get_categories (u);
172  indic_category_t cat = (indic_category_t) (type & 0x7Fu);
173  indic_position_t pos = (indic_position_t) (type >> 8);
174
175  /* Myanmar
176   * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm#analyze
177   */
178  if (unlikely (hb_in_range (u, 0xFE00u, 0xFE0Fu)))
179    cat = (indic_category_t) OT_VS;
180
181  switch (u)
182  {
183    case 0x104Eu:
184      cat = (indic_category_t) OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */
185      break;
186
187    case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u:
188    case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u:
189    case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu:
190    case 0x25FEu:
191      cat = (indic_category_t) OT_GB;
192      break;
193
194    case 0x1004u: case 0x101Bu: case 0x105Au:
195      cat = (indic_category_t) OT_Ra;
196      break;
197
198    case 0x1032u: case 0x1036u:
199      cat = (indic_category_t) OT_A;
200      break;
201
202    case 0x103Au:
203      cat = (indic_category_t) OT_As;
204      break;
205
206    case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u:
207    case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u:
208    case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u:
209    case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u:
210    case 0x1097u: case 0x1098u: case 0x1099u:
211      cat = (indic_category_t) OT_D;
212      break;
213
214    case 0x1040u:
215      cat = (indic_category_t) OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */
216      break;
217
218    case 0x103Eu: case 0x1060u:
219      cat = (indic_category_t) OT_MH;
220      break;
221
222    case 0x103Cu:
223      cat = (indic_category_t) OT_MR;
224      break;
225
226    case 0x103Du: case 0x1082u:
227      cat = (indic_category_t) OT_MW;
228      break;
229
230    case 0x103Bu: case 0x105Eu: case 0x105Fu:
231      cat = (indic_category_t) OT_MY;
232      break;
233
234    case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au:
235    case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu:
236      cat = (indic_category_t) OT_PT;
237      break;
238
239    case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u:
240    case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du:
241    case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu:
242      cat = (indic_category_t) OT_SM;
243      break;
244
245    case 0x104Au: case 0x104Bu:
246      cat = (indic_category_t) OT_P;
247      break;
248  }
249
250  if (cat == OT_M)
251  {
252    switch ((int) pos)
253    {
254      case POS_PRE_C:	cat = (indic_category_t) OT_VPre;
255			pos = POS_PRE_M;                  break;
256      case POS_ABOVE_C:	cat = (indic_category_t) OT_VAbv; break;
257      case POS_BELOW_C:	cat = (indic_category_t) OT_VBlw; break;
258      case POS_POST_C:	cat = (indic_category_t) OT_VPst; break;
259    }
260  }
261
262  info.myanmar_category() = (myanmar_category_t) cat;
263  info.myanmar_position() = pos;
264}
265
266
267
268static void
269setup_masks_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED,
270		   hb_buffer_t              *buffer,
271		   hb_font_t                *font HB_UNUSED)
272{
273  HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_category);
274  HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_position);
275
276  /* We cannot setup masks here.  We save information about characters
277   * and setup masks later on in a pause-callback. */
278
279  unsigned int count = buffer->len;
280  hb_glyph_info_t *info = buffer->info;
281  for (unsigned int i = 0; i < count; i++)
282    set_myanmar_properties (info[i]);
283}
284
285static void
286setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
287		 hb_font_t *font HB_UNUSED,
288		 hb_buffer_t *buffer)
289{
290  find_syllables (buffer);
291}
292
293static int
294compare_myanmar_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
295{
296  int a = pa->myanmar_position();
297  int b = pb->myanmar_position();
298
299  return a < b ? -1 : a == b ? 0 : +1;
300}
301
302
303/* Rules from:
304 * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm */
305
306static void
307initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
308				       hb_face_t *face,
309				       hb_buffer_t *buffer,
310				       unsigned int start, unsigned int end)
311{
312  hb_glyph_info_t *info = buffer->info;
313
314  unsigned int base = end;
315  bool has_reph = false;
316
317  {
318    unsigned int limit = start;
319    if (start + 3 <= end &&
320	info[start  ].myanmar_category() == OT_Ra &&
321	info[start+1].myanmar_category() == OT_As &&
322	info[start+2].myanmar_category() == OT_H)
323    {
324      limit += 3;
325      base = start;
326      has_reph = true;
327    }
328
329    {
330      if (!has_reph)
331	base = limit;
332
333      for (unsigned int i = limit; i < end; i++)
334	if (is_consonant (info[i]))
335	{
336	  base = i;
337	  break;
338	}
339    }
340  }
341
342  /* Reorder! */
343  {
344    unsigned int i = start;
345    for (; i < start + (has_reph ? 3 : 0); i++)
346      info[i].myanmar_position() = POS_AFTER_MAIN;
347    for (; i < base; i++)
348      info[i].myanmar_position() = POS_PRE_C;
349    if (i < end)
350    {
351      info[i].myanmar_position() = POS_BASE_C;
352      i++;
353    }
354    indic_position_t pos = POS_AFTER_MAIN;
355    /* The following loop may be ugly, but it implements all of
356     * Myanmar reordering! */
357    for (; i < end; i++)
358    {
359      if (info[i].myanmar_category() == OT_MR) /* Pre-base reordering */
360      {
361	info[i].myanmar_position() = POS_PRE_C;
362	continue;
363      }
364      if (info[i].myanmar_position() < POS_BASE_C) /* Left matra */
365      {
366	continue;
367      }
368
369      if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == OT_VBlw)
370      {
371	pos = POS_BELOW_C;
372	info[i].myanmar_position() = pos;
373	continue;
374      }
375
376      if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_A)
377      {
378	info[i].myanmar_position() = POS_BEFORE_SUB;
379	continue;
380      }
381      if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_VBlw)
382      {
383	info[i].myanmar_position() = pos;
384	continue;
385      }
386      if (pos == POS_BELOW_C && info[i].myanmar_category() != OT_A)
387      {
388        pos = POS_AFTER_SUB;
389	info[i].myanmar_position() = pos;
390	continue;
391      }
392      info[i].myanmar_position() = pos;
393    }
394  }
395
396  buffer->merge_clusters (start, end);
397  /* Sit tight, rock 'n roll! */
398  hb_bubble_sort (info + start, end - start, compare_myanmar_order);
399}
400
401static void
402initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan,
403				   hb_face_t *face,
404				   hb_buffer_t *buffer,
405				   unsigned int start, unsigned int end)
406{
407  /* We already inserted dotted-circles, so just call the consonant_syllable. */
408  initial_reordering_consonant_syllable (plan, face, buffer, start, end);
409}
410
411static void
412initial_reordering_punctuation_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
413					hb_face_t *face HB_UNUSED,
414					hb_buffer_t *buffer HB_UNUSED,
415					unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
416{
417  /* Nothing to do right now.  If we ever switch to using the output
418   * buffer in the reordering process, we'd need to next_glyph() here. */
419}
420
421static void
422initial_reordering_non_myanmar_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
423					hb_face_t *face HB_UNUSED,
424					hb_buffer_t *buffer HB_UNUSED,
425					unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
426{
427  /* Nothing to do right now.  If we ever switch to using the output
428   * buffer in the reordering process, we'd need to next_glyph() here. */
429}
430
431
432static void
433initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
434			     hb_face_t *face,
435			     hb_buffer_t *buffer,
436			     unsigned int start, unsigned int end)
437{
438  syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
439  switch (syllable_type) {
440  case consonant_syllable:	initial_reordering_consonant_syllable  (plan, face, buffer, start, end); return;
441  case punctuation_cluster:	initial_reordering_punctuation_cluster (plan, face, buffer, start, end); return;
442  case broken_cluster:		initial_reordering_broken_cluster      (plan, face, buffer, start, end); return;
443  case non_myanmar_cluster:	initial_reordering_non_myanmar_cluster (plan, face, buffer, start, end); return;
444  }
445}
446
447static inline void
448insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
449		       hb_font_t *font,
450		       hb_buffer_t *buffer)
451{
452  /* Note: This loop is extra overhead, but should not be measurable. */
453  bool has_broken_syllables = false;
454  unsigned int count = buffer->len;
455  hb_glyph_info_t *info = buffer->info;
456  for (unsigned int i = 0; i < count; i++)
457    if ((info[i].syllable() & 0x0F) == broken_cluster)
458    {
459      has_broken_syllables = true;
460      break;
461    }
462  if (likely (!has_broken_syllables))
463    return;
464
465
466  hb_codepoint_t dottedcircle_glyph;
467  if (!font->get_glyph (0x25CCu, 0, &dottedcircle_glyph))
468    return;
469
470  hb_glyph_info_t dottedcircle = {0};
471  dottedcircle.codepoint = 0x25CCu;
472  set_myanmar_properties (dottedcircle);
473  dottedcircle.codepoint = dottedcircle_glyph;
474
475  buffer->clear_output ();
476
477  buffer->idx = 0;
478  unsigned int last_syllable = 0;
479  while (buffer->idx < buffer->len)
480  {
481    unsigned int syllable = buffer->cur().syllable();
482    syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
483    if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
484    {
485      last_syllable = syllable;
486
487      hb_glyph_info_t info = dottedcircle;
488      info.cluster = buffer->cur().cluster;
489      info.mask = buffer->cur().mask;
490      info.syllable() = buffer->cur().syllable();
491
492      buffer->output_info (info);
493    }
494    else
495      buffer->next_glyph ();
496  }
497
498  buffer->swap_buffers ();
499}
500
501static void
502initial_reordering (const hb_ot_shape_plan_t *plan,
503		    hb_font_t *font,
504		    hb_buffer_t *buffer)
505{
506  insert_dotted_circles (plan, font, buffer);
507
508  hb_glyph_info_t *info = buffer->info;
509  unsigned int count = buffer->len;
510  if (unlikely (!count)) return;
511  unsigned int last = 0;
512  unsigned int last_syllable = info[0].syllable();
513  for (unsigned int i = 1; i < count; i++)
514    if (last_syllable != info[i].syllable()) {
515      initial_reordering_syllable (plan, font->face, buffer, last, i);
516      last = i;
517      last_syllable = info[last].syllable();
518    }
519  initial_reordering_syllable (plan, font->face, buffer, last, count);
520}
521
522static void
523final_reordering (const hb_ot_shape_plan_t *plan,
524		  hb_font_t *font HB_UNUSED,
525		  hb_buffer_t *buffer)
526{
527  hb_glyph_info_t *info = buffer->info;
528  unsigned int count = buffer->len;
529
530  /* Zero syllables now... */
531  for (unsigned int i = 0; i < count; i++)
532    info[i].syllable() = 0;
533
534  HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_category);
535  HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_position);
536}
537
538
539/* Uniscribe seems to have a shaper for 'mymr' that is like the
540 * generic shaper, except that it zeros mark advances GDEF_LATE. */
541const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar_old =
542{
543  "default",
544  NULL, /* collect_features */
545  NULL, /* override_features */
546  NULL, /* data_create */
547  NULL, /* data_destroy */
548  NULL, /* preprocess_text */
549  HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
550  NULL, /* decompose */
551  NULL, /* compose */
552  NULL, /* setup_masks */
553  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
554  true, /* fallback_position */
555};
556
557const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar =
558{
559  "myanmar",
560  collect_features_myanmar,
561  override_features_myanmar,
562  NULL, /* data_create */
563  NULL, /* data_destroy */
564  NULL, /* preprocess_text */
565  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
566  NULL, /* decompose */
567  NULL, /* compose */
568  setup_masks_myanmar,
569  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
570  false, /* fallback_position */
571};
572