1/*
2 * Copyright © 2015  Mozilla Foundation.
3 * Copyright © 2015  Google, Inc.
4 *
5 *  This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Mozilla Author(s): Jonathan Kew
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#include "hb-ot-shape-complex-use-private.hh"
30#include "hb-ot-shape-complex-arabic-private.hh"
31
32/* buffer var allocations */
33#define use_category() complex_var_u8_0()
34
35
36/*
37 * Universal Shaping Engine.
38 * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
39 */
40
41static const hb_tag_t
42basic_features[] =
43{
44  /*
45   * Basic features.
46   * These features are applied all at once, before reordering.
47   */
48  HB_TAG('r','k','r','f'),
49  HB_TAG('a','b','v','f'),
50  HB_TAG('b','l','w','f'),
51  HB_TAG('h','a','l','f'),
52  HB_TAG('p','s','t','f'),
53  HB_TAG('v','a','t','u'),
54  HB_TAG('c','j','c','t'),
55};
56static const hb_tag_t
57arabic_features[] =
58{
59  HB_TAG('i','s','o','l'),
60  HB_TAG('i','n','i','t'),
61  HB_TAG('m','e','d','i'),
62  HB_TAG('f','i','n','a'),
63  /* The spec doesn't specify these but we apply anyway, since our Arabic shaper
64   * does.  These are only used in Syriac spec. */
65  HB_TAG('m','e','d','2'),
66  HB_TAG('f','i','n','2'),
67  HB_TAG('f','i','n','3'),
68};
69/* Same order as arabic_features.  Don't need Syriac stuff.*/
70enum joining_form_t {
71  ISOL,
72  INIT,
73  MEDI,
74  FINA,
75  _NONE
76};
77static const hb_tag_t
78other_features[] =
79{
80  /*
81   * Other features.
82   * These features are applied all at once, after reordering.
83   */
84  HB_TAG('a','b','v','s'),
85  HB_TAG('b','l','w','s'),
86  HB_TAG('h','a','l','n'),
87  HB_TAG('p','r','e','s'),
88  HB_TAG('p','s','t','s'),
89  /* Positioning features, though we don't care about the types. */
90  HB_TAG('d','i','s','t'),
91  HB_TAG('a','b','v','m'),
92  HB_TAG('b','l','w','m'),
93};
94
95static void
96setup_syllables (const hb_ot_shape_plan_t *plan,
97		 hb_font_t *font,
98		 hb_buffer_t *buffer);
99static void
100clear_substitution_flags (const hb_ot_shape_plan_t *plan,
101			  hb_font_t *font,
102			  hb_buffer_t *buffer);
103static void
104record_rphf (const hb_ot_shape_plan_t *plan,
105	     hb_font_t *font,
106	     hb_buffer_t *buffer);
107static void
108record_pref (const hb_ot_shape_plan_t *plan,
109	     hb_font_t *font,
110	     hb_buffer_t *buffer);
111static void
112reorder (const hb_ot_shape_plan_t *plan,
113	 hb_font_t *font,
114	 hb_buffer_t *buffer);
115
116static void
117collect_features_use (hb_ot_shape_planner_t *plan)
118{
119  hb_ot_map_builder_t *map = &plan->map;
120
121  /* Do this before any lookups have been applied. */
122  map->add_gsub_pause (setup_syllables);
123
124  /* "Default glyph pre-processing group" */
125  map->add_global_bool_feature (HB_TAG('l','o','c','l'));
126  map->add_global_bool_feature (HB_TAG('c','c','m','p'));
127  map->add_global_bool_feature (HB_TAG('n','u','k','t'));
128  map->add_global_bool_feature (HB_TAG('a','k','h','n'));
129
130  /* "Reordering group" */
131  map->add_gsub_pause (clear_substitution_flags);
132  map->add_feature (HB_TAG('r','p','h','f'), 1, F_MANUAL_ZWJ);
133  map->add_gsub_pause (record_rphf);
134  map->add_gsub_pause (clear_substitution_flags);
135  map->add_feature (HB_TAG('p','r','e','f'), 1, F_GLOBAL | F_MANUAL_ZWJ);
136  map->add_gsub_pause (record_pref);
137
138  /* "Orthographic unit shaping group" */
139  for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++)
140    map->add_feature (basic_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
141
142  map->add_gsub_pause (reorder);
143
144  /* "Topographical features" */
145  for (unsigned int i = 0; i < ARRAY_LENGTH (arabic_features); i++)
146    map->add_feature (arabic_features[i], 1, F_NONE);
147  map->add_gsub_pause (NULL);
148
149  /* "Standard typographic presentation" and "Positional feature application" */
150  for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++)
151    map->add_feature (other_features[i], 1, F_GLOBAL | F_MANUAL_ZWJ);
152}
153
154struct use_shape_plan_t
155{
156  ASSERT_POD ();
157
158  hb_mask_t rphf_mask;
159
160  arabic_shape_plan_t *arabic_plan;
161};
162
163static bool
164has_arabic_joining (hb_script_t script)
165{
166  /* List of scripts that have data in arabic-table. */
167  switch ((int) script)
168  {
169    /* Unicode-1.1 additions */
170    case HB_SCRIPT_ARABIC:
171
172    /* Unicode-3.0 additions */
173    case HB_SCRIPT_MONGOLIAN:
174    case HB_SCRIPT_SYRIAC:
175
176    /* Unicode-5.0 additions */
177    case HB_SCRIPT_NKO:
178    case HB_SCRIPT_PHAGS_PA:
179
180    /* Unicode-6.0 additions */
181    case HB_SCRIPT_MANDAIC:
182
183    /* Unicode-7.0 additions */
184    case HB_SCRIPT_MANICHAEAN:
185    case HB_SCRIPT_PSALTER_PAHLAVI:
186
187      return true;
188
189    default:
190      return false;
191  }
192}
193
194static void *
195data_create_use (const hb_ot_shape_plan_t *plan)
196{
197  use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t));
198  if (unlikely (!use_plan))
199    return NULL;
200
201  use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f'));
202
203  if (has_arabic_joining (plan->props.script))
204  {
205    use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan);
206    if (unlikely (!use_plan->arabic_plan))
207    {
208      free (use_plan);
209      return NULL;
210    }
211  }
212
213  return use_plan;
214}
215
216static void
217data_destroy_use (void *data)
218{
219  use_shape_plan_t *use_plan = (use_shape_plan_t *) data;
220
221  if (use_plan->arabic_plan)
222    data_destroy_arabic (use_plan->arabic_plan);
223
224  free (data);
225}
226
227enum syllable_type_t {
228  independent_cluster,
229  virama_terminated_cluster,
230  consonant_cluster,
231  vowel_cluster,
232  number_joiner_terminated_cluster,
233  numeral_cluster,
234  symbol_cluster,
235  broken_cluster,
236};
237
238#include "hb-ot-shape-complex-use-machine.hh"
239
240
241static void
242setup_masks_use (const hb_ot_shape_plan_t *plan,
243		 hb_buffer_t              *buffer,
244		 hb_font_t                *font HB_UNUSED)
245{
246  const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
247
248  /* Do this before allocating use_category(). */
249  if (use_plan->arabic_plan)
250  {
251    setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script);
252  }
253
254  HB_BUFFER_ALLOCATE_VAR (buffer, use_category);
255
256  /* We cannot setup masks here.  We save information about characters
257   * and setup masks later on in a pause-callback. */
258
259  unsigned int count = buffer->len;
260  hb_glyph_info_t *info = buffer->info;
261  for (unsigned int i = 0; i < count; i++)
262    info[i].use_category() = hb_use_get_categories (info[i].codepoint);
263}
264
265static void
266setup_rphf_mask (const hb_ot_shape_plan_t *plan,
267		 hb_buffer_t *buffer)
268{
269  const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
270
271  hb_mask_t mask = use_plan->rphf_mask;
272  if (!mask) return;
273
274  hb_glyph_info_t *info = buffer->info;
275
276  foreach_syllable (buffer, start, end)
277  {
278    unsigned int limit = info[start].use_category() == USE_R ? 1 : MIN (3u, end - start);
279    for (unsigned int i = start; i < start + limit; i++)
280      info[i].mask |= mask;
281  }
282}
283
284static void
285setup_topographical_masks (const hb_ot_shape_plan_t *plan,
286			   hb_buffer_t *buffer)
287{
288  const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
289  if (use_plan->arabic_plan)
290    return;
291
292  ASSERT_STATIC (INIT < 4 && ISOL < 4 && MEDI < 4 && FINA < 4);
293  hb_mask_t masks[4], all_masks = 0;
294  for (unsigned int i = 0; i < 4; i++)
295  {
296    masks[i] = plan->map.get_1_mask (arabic_features[i]);
297    if (masks[i] == plan->map.get_global_mask ())
298      masks[i] = 0;
299    all_masks |= masks[i];
300  }
301  if (!all_masks)
302    return;
303  hb_mask_t other_masks = ~all_masks;
304
305  unsigned int last_start = 0;
306  joining_form_t last_form = _NONE;
307  hb_glyph_info_t *info = buffer->info;
308  foreach_syllable (buffer, start, end)
309  {
310    syllable_type_t syllable_type = (syllable_type_t) (info[start].syllable() & 0x0F);
311    switch (syllable_type)
312    {
313      case independent_cluster:
314      case symbol_cluster:
315	/* These don't join.  Nothing to do. */
316	last_form = _NONE;
317	break;
318
319      case virama_terminated_cluster:
320      case consonant_cluster:
321      case vowel_cluster:
322      case number_joiner_terminated_cluster:
323      case numeral_cluster:
324      case broken_cluster:
325
326	bool join = last_form == FINA || last_form == ISOL;
327
328	if (join)
329	{
330	  /* Fixup previous syllable's form. */
331	  last_form = last_form == FINA ? MEDI : INIT;
332	  for (unsigned int i = last_start; i < start; i++)
333	    info[i].mask = (info[i].mask & other_masks) | masks[last_form];
334	}
335
336	/* Form for this syllable. */
337	last_form = join ? FINA : ISOL;
338	for (unsigned int i = start; i < end; i++)
339	  info[i].mask = (info[i].mask & other_masks) | masks[last_form];
340
341	break;
342    }
343
344    last_start = start;
345  }
346}
347
348static void
349setup_syllables (const hb_ot_shape_plan_t *plan,
350		 hb_font_t *font HB_UNUSED,
351		 hb_buffer_t *buffer)
352{
353  find_syllables (buffer);
354  setup_rphf_mask (plan, buffer);
355  setup_topographical_masks (plan, buffer);
356}
357
358static void
359clear_substitution_flags (const hb_ot_shape_plan_t *plan,
360			  hb_font_t *font HB_UNUSED,
361			  hb_buffer_t *buffer)
362{
363  hb_glyph_info_t *info = buffer->info;
364  unsigned int count = buffer->len;
365  for (unsigned int i = 0; i < count; i++)
366    _hb_glyph_info_clear_substituted (&info[i]);
367}
368
369static void
370record_rphf (const hb_ot_shape_plan_t *plan,
371	     hb_font_t *font,
372	     hb_buffer_t *buffer)
373{
374  const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data;
375
376  hb_mask_t mask = use_plan->rphf_mask;
377  if (!mask) return;
378  hb_glyph_info_t *info = buffer->info;
379
380  foreach_syllable (buffer, start, end)
381  {
382    /* Mark a substituted repha as USE_R. */
383    for (unsigned int i = start; i < end && (info[i].mask & mask); i++)
384      if (_hb_glyph_info_substituted (&info[i]))
385      {
386	info[i].use_category() = USE_R;
387	break;
388      }
389  }
390}
391
392static void
393record_pref (const hb_ot_shape_plan_t *plan,
394	     hb_font_t *font,
395	     hb_buffer_t *buffer)
396{
397  hb_glyph_info_t *info = buffer->info;
398
399  foreach_syllable (buffer, start, end)
400  {
401    /* Mark a substituted pref as VPre, as they behave the same way. */
402    for (unsigned int i = start; i < end; i++)
403      if (_hb_glyph_info_substituted (&info[i]))
404      {
405	info[i].use_category() = USE_VPre;
406	break;
407      }
408  }
409}
410
411static inline bool
412is_halant (const hb_glyph_info_t &info)
413{
414  return info.use_category() == USE_H && !_hb_glyph_info_ligated (&info);
415}
416
417static void
418reorder_syllable (hb_buffer_t *buffer, unsigned int start, unsigned int end)
419{
420  syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F);
421  /* Only a few syllable types need reordering. */
422  if (unlikely (!(FLAG_SAFE (syllable_type) &
423		  (FLAG (virama_terminated_cluster) |
424		   FLAG (consonant_cluster) |
425		   FLAG (vowel_cluster) |
426		   FLAG (broken_cluster) |
427		   0))))
428    return;
429
430  hb_glyph_info_t *info = buffer->info;
431
432#define BASE_FLAGS (FLAG (USE_B) | FLAG (USE_GB) | FLAG (USE_IV))
433
434  /* Move things forward. */
435  if (info[start].use_category() == USE_R && end - start > 1)
436  {
437    /* Got a repha.  Reorder it to after first base, before first halant. */
438    for (unsigned int i = start + 1; i < end; i++)
439      if ((FLAG_UNSAFE (info[i].use_category()) & (BASE_FLAGS)) || is_halant (info[i]))
440      {
441	/* If we hit a halant, move before it; otherwise it's a base: move to it's
442	 * place, and shift things in between backward. */
443
444	if (is_halant (info[i]))
445	  i--;
446
447	buffer->merge_clusters (start, i + 1);
448	hb_glyph_info_t t = info[start];
449	memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0]));
450	info[i] = t;
451
452	break;
453      }
454  }
455
456  /* Move things back. */
457  unsigned int j = end;
458  for (unsigned int i = start; i < end; i++)
459  {
460    uint32_t flag = FLAG_UNSAFE (info[i].use_category());
461    if ((flag & (BASE_FLAGS)) || is_halant (info[i]))
462    {
463      /* If we hit a halant, move after it; otherwise it's a base: move to it's
464       * place, and shift things in between backward. */
465      if (is_halant (info[i]))
466	j = i + 1;
467      else
468	j = i;
469    }
470    else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) &&
471	     /* Only move the first component of a MultipleSubst. */
472	     0 == _hb_glyph_info_get_lig_comp (&info[i]) &&
473	     j < i)
474    {
475      buffer->merge_clusters (j, i + 1);
476      hb_glyph_info_t t = info[i];
477      memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0]));
478      info[j] = t;
479    }
480  }
481}
482
483static inline void
484insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
485		       hb_font_t *font,
486		       hb_buffer_t *buffer)
487{
488  /* Note: This loop is extra overhead, but should not be measurable. */
489  bool has_broken_syllables = false;
490  unsigned int count = buffer->len;
491  hb_glyph_info_t *info = buffer->info;
492  for (unsigned int i = 0; i < count; i++)
493    if ((info[i].syllable() & 0x0F) == broken_cluster)
494    {
495      has_broken_syllables = true;
496      break;
497    }
498  if (likely (!has_broken_syllables))
499    return;
500
501  hb_glyph_info_t dottedcircle = {0};
502  if (!font->get_nominal_glyph (0x25CCu, &dottedcircle.codepoint))
503    return;
504  dottedcircle.use_category() = hb_use_get_categories (0x25CC);
505
506  buffer->clear_output ();
507
508  buffer->idx = 0;
509  unsigned int last_syllable = 0;
510  while (buffer->idx < buffer->len && !buffer->in_error)
511  {
512    unsigned int syllable = buffer->cur().syllable();
513    syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
514    if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
515    {
516      last_syllable = syllable;
517
518      hb_glyph_info_t ginfo = dottedcircle;
519      ginfo.cluster = buffer->cur().cluster;
520      ginfo.mask = buffer->cur().mask;
521      ginfo.syllable() = buffer->cur().syllable();
522      /* TODO Set glyph_props? */
523
524      /* Insert dottedcircle after possible Repha. */
525      while (buffer->idx < buffer->len && !buffer->in_error &&
526	     last_syllable == buffer->cur().syllable() &&
527	     buffer->cur().use_category() == USE_R)
528        buffer->next_glyph ();
529
530      buffer->output_info (ginfo);
531    }
532    else
533      buffer->next_glyph ();
534  }
535
536  buffer->swap_buffers ();
537}
538
539static void
540reorder (const hb_ot_shape_plan_t *plan,
541	 hb_font_t *font,
542	 hb_buffer_t *buffer)
543{
544  insert_dotted_circles (plan, font, buffer);
545
546  hb_glyph_info_t *info = buffer->info;
547
548  foreach_syllable (buffer, start, end)
549    reorder_syllable (buffer, start, end);
550
551  /* Zero syllables now... */
552  unsigned int count = buffer->len;
553  for (unsigned int i = 0; i < count; i++)
554    info[i].syllable() = 0;
555
556  HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
557}
558
559static bool
560compose_use (const hb_ot_shape_normalize_context_t *c,
561	     hb_codepoint_t  a,
562	     hb_codepoint_t  b,
563	     hb_codepoint_t *ab)
564{
565  /* Avoid recomposing split matras. */
566  if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
567    return false;
568
569  return (bool)c->unicode->compose (a, b, ab);
570}
571
572
573const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
574{
575  "use",
576  collect_features_use,
577  NULL, /* override_features */
578  data_create_use,
579  data_destroy_use,
580  NULL, /* preprocess_text */
581  NULL, /* postprocess_glyphs */
582  HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
583  NULL, /* decompose */
584  compose_use,
585  setup_masks_use,
586  HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
587  false, /* fallback_position */
588};
589