hb-ot-shape-complex-indic.cc revision 70fe77bb9a25922bd34f206826d8731d901fb451
1/*
2 * Copyright © 2011,2012  Google, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#include "hb-ot-shape-complex-indic-private.hh"
28#include "hb-ot-shape-private.hh"
29
30#define OLD_INDIC_TAG(script) (((hb_tag_t) script) | 0x20000000)
31#define IS_OLD_INDIC_TAG(tag) ( \
32				(tag) == OLD_INDIC_TAG (HB_SCRIPT_BENGALI) || \
33				(tag) == OLD_INDIC_TAG (HB_SCRIPT_DEVANAGARI) || \
34				(tag) == OLD_INDIC_TAG (HB_SCRIPT_GUJARATI) || \
35				(tag) == OLD_INDIC_TAG (HB_SCRIPT_GURMUKHI) || \
36				(tag) == OLD_INDIC_TAG (HB_SCRIPT_KANNADA) || \
37				(tag) == OLD_INDIC_TAG (HB_SCRIPT_MALAYALAM) || \
38				(tag) == OLD_INDIC_TAG (HB_SCRIPT_ORIYA) || \
39				(tag) == OLD_INDIC_TAG (HB_SCRIPT_TAMIL) || \
40				(tag) == OLD_INDIC_TAG (HB_SCRIPT_TELUGU) \
41			      )
42struct indic_options_t
43{
44  int initialized : 1;
45  int uniscribe_bug_compatible : 1;
46};
47
48union indic_options_union_t {
49  int i;
50  indic_options_t opts;
51};
52ASSERT_STATIC (sizeof (int) == sizeof (indic_options_union_t));
53
54static indic_options_union_t
55indic_options_init (void)
56{
57  indic_options_union_t u;
58  u.i = 0;
59  u.opts.initialized = 1;
60
61  char *c = getenv ("HB_OT_INDIC_OPTIONS");
62  u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");
63
64  return u;
65}
66
67inline indic_options_t
68indic_options (void)
69{
70  static indic_options_union_t options;
71
72  if (unlikely (!options.i)) {
73    /* This is idempotent and threadsafe. */
74    options = indic_options_init ();
75  }
76
77  return options.opts;
78}
79
80
81static int
82compare_codepoint (const void *pa, const void *pb)
83{
84  hb_codepoint_t a = * (hb_codepoint_t *) pa;
85  hb_codepoint_t b = * (hb_codepoint_t *) pb;
86
87  return a < b ? -1 : a == b ? 0 : +1;
88}
89
90static indic_position_t
91consonant_position (hb_codepoint_t u)
92{
93  consonant_position_t *record;
94
95  /* Khmer does not have pre-base half forms. */
96  if (0x1780 <= u && u <= 0x17FF)
97    return POS_BELOW_C;
98
99  record = (consonant_position_t *) bsearch (&u, consonant_positions,
100					     ARRAY_LENGTH (consonant_positions),
101					     sizeof (consonant_positions[0]),
102					     compare_codepoint);
103
104  return record ? record->position : POS_BASE_C;
105}
106
107static bool
108is_ra (hb_codepoint_t u)
109{
110  return !!bsearch (&u, ra_chars,
111		    ARRAY_LENGTH (ra_chars),
112		    sizeof (ra_chars[0]),
113		    compare_codepoint);
114}
115
116static bool
117is_joiner (const hb_glyph_info_t &info)
118{
119  return !!(FLAG (info.indic_category()) & (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ)));
120}
121
122static bool
123is_consonant (const hb_glyph_info_t &info)
124{
125  /* Note:
126   *
127   * We treat Vowels and placeholders as if they were consonants.  This is safe because Vowels
128   * cannot happen in a consonant syllable.  The plus side however is, we can call the
129   * consonant syllable logic from the vowel syllable function and get it all right! */
130  return !!(FLAG (info.indic_category()) & (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE)));
131}
132
133struct feature_list_t {
134  hb_tag_t tag;
135  hb_bool_t is_global;
136};
137
138static const feature_list_t
139indic_basic_features[] =
140{
141  {HB_TAG('n','u','k','t'), true},
142  {HB_TAG('a','k','h','n'), false},
143  {HB_TAG('r','p','h','f'), false},
144  {HB_TAG('r','k','r','f'), true},
145  {HB_TAG('p','r','e','f'), false},
146  {HB_TAG('b','l','w','f'), false},
147  {HB_TAG('h','a','l','f'), false},
148  {HB_TAG('a','b','v','f'), false},
149  {HB_TAG('p','s','t','f'), false},
150  {HB_TAG('c','j','c','t'), false},
151  {HB_TAG('v','a','t','u'), true},
152};
153
154/* Same order as the indic_basic_features array */
155enum {
156  _NUKT,
157  AKHN,
158  RPHF,
159  _RKRF,
160  PREF,
161  BLWF,
162  HALF,
163  ABVF,
164  PSTF,
165  CJCT,
166  VATU
167};
168
169static const feature_list_t
170indic_other_features[] =
171{
172  {HB_TAG('i','n','i','t'), false},
173  {HB_TAG('p','r','e','s'), true},
174  {HB_TAG('a','b','v','s'), true},
175  {HB_TAG('b','l','w','s'), true},
176  {HB_TAG('p','s','t','s'), true},
177  {HB_TAG('h','a','l','n'), true},
178
179  {HB_TAG('d','i','s','t'), true},
180  {HB_TAG('a','b','v','m'), true},
181  {HB_TAG('b','l','w','m'), true},
182};
183
184/* Same order as the indic_other_features array */
185enum {
186  INIT
187};
188
189
190static void
191initial_reordering (const hb_ot_map_t *map,
192		    hb_face_t *face,
193		    hb_buffer_t *buffer,
194		    void *user_data HB_UNUSED);
195static void
196final_reordering (const hb_ot_map_t *map,
197		  hb_face_t *face,
198		  hb_buffer_t *buffer,
199		  void *user_data HB_UNUSED);
200
201void
202_hb_ot_shape_complex_collect_features_indic (hb_ot_map_builder_t *map,
203					     const hb_segment_properties_t *props HB_UNUSED)
204{
205  map->add_bool_feature (HB_TAG('l','o','c','l'));
206  /* The Indic specs do not require ccmp, but we apply it here since if
207   * there is a use of it, it's typically at the beginning. */
208  map->add_bool_feature (HB_TAG('c','c','m','p'));
209
210  map->add_gsub_pause (initial_reordering, NULL);
211
212  for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++) {
213    map->add_bool_feature (indic_basic_features[i].tag, indic_basic_features[i].is_global);
214    map->add_gsub_pause (NULL, NULL);
215  }
216
217  map->add_gsub_pause (final_reordering, NULL);
218
219  for (unsigned int i = 0; i < ARRAY_LENGTH (indic_other_features); i++) {
220    map->add_bool_feature (indic_other_features[i].tag, indic_other_features[i].is_global);
221    map->add_gsub_pause (NULL, NULL);
222  }
223}
224
225
226hb_ot_shape_normalization_mode_t
227_hb_ot_shape_complex_normalization_preference_indic (void)
228{
229  /* We want split matras decomposed by the common shaping logic. */
230  return HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED;
231}
232
233
234void
235_hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED,
236					hb_buffer_t *buffer,
237					hb_font_t *font HB_UNUSED)
238{
239  HB_BUFFER_ALLOCATE_VAR (buffer, indic_category);
240  HB_BUFFER_ALLOCATE_VAR (buffer, indic_position);
241
242  /* We cannot setup masks here.  We save information about characters
243   * and setup masks later on in a pause-callback. */
244
245  unsigned int count = buffer->len;
246  for (unsigned int i = 0; i < count; i++)
247  {
248    hb_glyph_info_t &info = buffer->info[i];
249    unsigned int type = get_indic_categories (info.codepoint);
250
251    info.indic_category() = type & 0x0F;
252    info.indic_position() = type >> 4;
253
254    /* The spec says U+0952 is OT_A.  However, testing shows that Uniscribe
255     * treats U+0951..U+0952 all as OT_VD.
256     * TESTS:
257     * U+092E,U+0947,U+0952
258     * U+092E,U+0952,U+0947
259     * U+092E,U+0947,U+0951
260     * U+092E,U+0951,U+0947
261     * */
262    if (unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x0951, 0x0954)))
263      info.indic_category() = OT_VD;
264
265    if (info.indic_category() == OT_C) {
266      info.indic_position() = consonant_position (info.codepoint);
267      if (is_ra (info.codepoint))
268	info.indic_category() = OT_Ra;
269    } else if (info.indic_category() == OT_SM ||
270	       info.indic_category() == OT_VD) {
271      info.indic_position() = POS_SMVD;
272    } else if (unlikely (info.codepoint == 0x200C))
273      info.indic_category() = OT_ZWNJ;
274    else if (unlikely (info.codepoint == 0x200D))
275      info.indic_category() = OT_ZWJ;
276    else if (unlikely (info.codepoint == 0x25CC))
277      info.indic_category() = OT_DOTTEDCIRCLE;
278  }
279}
280
281static int
282compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
283{
284  int a = pa->indic_position();
285  int b = pb->indic_position();
286
287  return a < b ? -1 : a == b ? 0 : +1;
288}
289
290/* Rules from:
291 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */
292
293static void
294initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buffer, hb_mask_t *basic_mask_array,
295				       unsigned int start, unsigned int end)
296{
297  hb_glyph_info_t *info = buffer->info;
298
299
300  /* 1. Find base consonant:
301   *
302   * The shaping engine finds the base consonant of the syllable, using the
303   * following algorithm: starting from the end of the syllable, move backwards
304   * until a consonant is found that does not have a below-base or post-base
305   * form (post-base forms have to follow below-base forms), or that is not a
306   * pre-base reordering Ra, or arrive at the first consonant. The consonant
307   * stopped at will be the base.
308   *
309   *   o If the syllable starts with Ra + Halant (in a script that has Reph)
310   *     and has more than one consonant, Ra is excluded from candidates for
311   *     base consonants.
312   */
313
314  unsigned int base = end;
315  bool has_reph = false;
316
317  {
318    /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
319     *    and has more than one consonant, Ra is excluded from candidates for
320     *    base consonants. */
321    unsigned int limit = start;
322    if (basic_mask_array[RPHF] &&
323	start + 3 <= end &&
324	info[start].indic_category() == OT_Ra &&
325	info[start + 1].indic_category() == OT_H &&
326	!is_joiner (info[start + 2]))
327    {
328      limit += 2;
329      base = start;
330      has_reph = true;
331    };
332
333    /* -> starting from the end of the syllable, move backwards */
334    unsigned int i = end;
335    do {
336      i--;
337      /* -> until a consonant is found */
338      if (is_consonant (info[i]))
339      {
340	/* -> that does not have a below-base or post-base form
341	 * (post-base forms have to follow below-base forms), */
342	if (info[i].indic_position() != POS_BELOW_C &&
343	    info[i].indic_position() != POS_POST_C)
344	{
345	  base = i;
346	  break;
347	}
348
349	/* -> or that is not a pre-base reordering Ra,
350	 *
351	 * TODO
352	 */
353
354	/* -> or arrive at the first consonant. The consonant stopped at will
355	 * be the base. */
356	base = i;
357      }
358      else
359	if (is_joiner (info[i]))
360	  break;
361    } while (i > limit);
362    if (base < start)
363      base = start; /* Just in case... */
364
365
366    /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
367     *    and has more than one consonant, Ra is excluded from candidates for
368     *    base consonants. */
369    if (has_reph && base == start) {
370      /* Have no other consonant, so Reph is not formed and Ra becomes base. */
371      has_reph = false;
372    }
373  }
374
375
376  /* 2. Decompose and reorder Matras:
377   *
378   * Each matra and any syllable modifier sign in the cluster are moved to the
379   * appropriate position relative to the consonant(s) in the cluster. The
380   * shaping engine decomposes two- or three-part matras into their constituent
381   * parts before any repositioning. Matra characters are classified by which
382   * consonant in a conjunct they have affinity for and are reordered to the
383   * following positions:
384   *
385   *   o Before first half form in the syllable
386   *   o After subjoined consonants
387   *   o After post-form consonant
388   *   o After main consonant (for above marks)
389   *
390   * IMPLEMENTATION NOTES:
391   *
392   * The normalize() routine has already decomposed matras for us, so we don't
393   * need to worry about that.
394   */
395
396
397  /* 3.  Reorder marks to canonical order:
398   *
399   * Adjacent nukta and halant or nukta and vedic sign are always repositioned
400   * if necessary, so that the nukta is first.
401   *
402   * IMPLEMENTATION NOTES:
403   *
404   * We don't need to do this: the normalize() routine already did this for us.
405   */
406
407
408  /* Reorder characters */
409
410  for (unsigned int i = start; i < base; i++)
411    info[i].indic_position() = POS_PRE_C;
412  info[base].indic_position() = POS_BASE_C;
413
414  /* Handle beginning Ra */
415  if (has_reph)
416    info[start].indic_position() = POS_RA_TO_BECOME_REPH;
417
418  /* For old-style Indic script tags, move the first post-base Halant after
419   * last consonant. */
420  if (IS_OLD_INDIC_TAG (map->get_chosen_script (0))) {
421    for (unsigned int i = base + 1; i < end; i++)
422      if (info[i].indic_category() == OT_H) {
423        unsigned int j;
424        for (j = end - 1; j > i; j--)
425	  if (is_consonant (info[j]))
426	    break;
427	if (j > i) {
428	  /* Move Halant to after last consonant. */
429	  hb_glyph_info_t t = info[i];
430	  memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0]));
431	  info[j] = t;
432	}
433        break;
434      }
435  }
436
437  /* Attach ZWJ, ZWNJ, nukta, and halant to previous char to move with them. */
438  if (!indic_options ().uniscribe_bug_compatible)
439  {
440    /* Please update the Uniscribe branch when touching this! */
441    for (unsigned int i = start + 1; i < end; i++)
442      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H))))
443	info[i].indic_position() = info[i - 1].indic_position();
444  } else {
445    /*
446     * Uniscribe doesn't move the Halant with Left Matra.
447     * TEST: U+092B,U+093F,U+094DE
448     */
449    /* Please update the non-Uniscribe branch when touching this! */
450    for (unsigned int i = start + 1; i < end; i++)
451      if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H)))) {
452	info[i].indic_position() = info[i - 1].indic_position();
453	if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_PRE_M)
454	  for (unsigned int j = i; j > start; j--)
455	    if (info[j - 1].indic_position() != POS_PRE_M) {
456	      info[i].indic_position() = info[j - 1].indic_position();
457	      break;
458	    }
459      }
460  }
461
462  /* We do bubble-sort, skip malicious clusters attempts */
463  if (end - start < 64)
464  {
465    /* Sit tight, rock 'n roll! */
466    hb_bubble_sort (info + start, end - start, compare_indic_order);
467    /* Find base again */
468    base = end;
469    for (unsigned int i = start; i < end; i++)
470      if (info[i].indic_position() == POS_BASE_C) {
471        base = i;
472	break;
473      }
474  }
475
476  /* Setup masks now */
477
478  {
479    hb_mask_t mask;
480
481    /* Reph */
482    for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++)
483      info[i].mask |= basic_mask_array[RPHF];
484
485    /* Pre-base */
486    mask = basic_mask_array[HALF] | basic_mask_array[AKHN] | basic_mask_array[CJCT];
487    for (unsigned int i = start; i < base; i++)
488      info[i].mask  |= mask;
489    /* Base */
490    mask = basic_mask_array[AKHN] | basic_mask_array[CJCT];
491    info[base].mask |= mask;
492    /* Post-base */
493    mask = basic_mask_array[BLWF] | basic_mask_array[ABVF] | basic_mask_array[PSTF] | basic_mask_array[CJCT];
494    for (unsigned int i = base + 1; i < end; i++)
495      info[i].mask  |= mask;
496  }
497
498  /* Apply ZWJ/ZWNJ effects */
499  for (unsigned int i = start + 1; i < end; i++)
500    if (is_joiner (info[i])) {
501      bool non_joiner = info[i].indic_category() == OT_ZWNJ;
502      unsigned int j = i;
503
504      do {
505	j--;
506
507	info[j].mask &= ~basic_mask_array[CJCT];
508	if (non_joiner)
509	  info[j].mask &= ~basic_mask_array[HALF];
510
511      } while (j > start && !is_consonant (info[j]));
512    }
513}
514
515
516static void
517initial_reordering_vowel_syllable (const hb_ot_map_t *map,
518				   hb_buffer_t *buffer,
519				   hb_mask_t *basic_mask_array,
520				   unsigned int start, unsigned int end)
521{
522  /* We made the vowels look like consonants.  So let's call the consonant logic! */
523  initial_reordering_consonant_syllable (map, buffer, basic_mask_array, start, end);
524}
525
526static void
527initial_reordering_standalone_cluster (const hb_ot_map_t *map,
528				       hb_buffer_t *buffer,
529				       hb_mask_t *basic_mask_array,
530				       unsigned int start, unsigned int end)
531{
532  /* We treat NBSP/dotted-circle as if they are consonants, so we should just chain.
533   * Only if not in compatibility mode that is... */
534
535  if (indic_options ().uniscribe_bug_compatible)
536  {
537    /* For dotted-circle, this is what Uniscribe does:
538     * If dotted-circle is the last glyph, it just does nothing.
539     * Ie. It doesn't form Reph. */
540    if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE)
541      return;
542  }
543
544  initial_reordering_consonant_syllable (map, buffer, basic_mask_array, start, end);
545}
546
547static void
548initial_reordering_non_indic (const hb_ot_map_t *map HB_UNUSED,
549			      hb_buffer_t *buffer HB_UNUSED,
550			      hb_mask_t *basic_mask_array HB_UNUSED,
551			      unsigned int start HB_UNUSED, unsigned int end HB_UNUSED)
552{
553  /* Nothing to do right now.  If we ever switch to using the output
554   * buffer in the reordering process, we'd need to next_glyph() here. */
555}
556
557#include "hb-ot-shape-complex-indic-machine.hh"
558
559static void
560initial_reordering (const hb_ot_map_t *map,
561		    hb_face_t *face HB_UNUSED,
562		    hb_buffer_t *buffer,
563		    void *user_data HB_UNUSED)
564{
565  hb_mask_t basic_mask_array[ARRAY_LENGTH (indic_basic_features)] = {0};
566  unsigned int num_masks = ARRAY_LENGTH (indic_basic_features);
567  for (unsigned int i = 0; i < num_masks; i++)
568    basic_mask_array[i] = map->get_1_mask (indic_basic_features[i].tag);
569
570  find_syllables (map, buffer, basic_mask_array);
571}
572
573static void
574final_reordering_syllable (hb_buffer_t *buffer, hb_mask_t *other_mask_array,
575			   unsigned int start, unsigned int end)
576{
577  hb_glyph_info_t *info = buffer->info;
578
579  /* 4. Final reordering:
580   *
581   * After the localized forms and basic shaping forms GSUB features have been
582   * applied (see below), the shaping engine performs some final glyph
583   * reordering before applying all the remaining font features to the entire
584   * cluster.
585   */
586
587  /* Find base again */
588  unsigned int base = end;
589  for (unsigned int i = start; i < end; i++)
590    if (info[i].indic_position() == POS_BASE_C) {
591      base = i;
592      break;
593    }
594
595  if (base == start) {
596    /* There's no Reph, and no left Matra to reposition.  Just merge the cluster
597     * and go home. */
598    buffer->merge_clusters (start, end);
599    return;
600  }
601
602  unsigned int start_of_last_cluster = base;
603
604  /*   o Reorder matras:
605   *
606   *     If a pre-base matra character had been reordered before applying basic
607   *     features, the glyph can be moved closer to the main consonant based on
608   *     whether half-forms had been formed. Actual position for the matra is
609   *     defined as “after last standalone halant glyph, after initial matra
610   *     position and before the main consonant”. If ZWJ or ZWNJ follow this
611   *     halant, position is moved after it.
612   */
613
614  {
615    unsigned int new_matra_pos = base - 1;
616    while (new_matra_pos > start &&
617	   !(FLAG (info[new_matra_pos].indic_category()) & (FLAG (OT_M) | FLAG (OT_H))))
618      new_matra_pos--;
619    /* If we found no Halant we are done.  Otherwise only proceed if the Halant does
620     * not belong to the Matra itself! */
621    if (info[new_matra_pos].indic_category() == OT_H &&
622	info[new_matra_pos].indic_position() != POS_PRE_M) {
623      /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
624      if (new_matra_pos + 1 < end && is_joiner (info[new_matra_pos + 1]))
625	new_matra_pos++;
626
627      /* Now go see if there's actually any matras... */
628      for (unsigned int i = new_matra_pos; i > start; i--)
629	if (info[i - 1].indic_position () == POS_PRE_M)
630	{
631	  unsigned int old_matra_pos = i - 1;
632	  hb_glyph_info_t matra = info[old_matra_pos];
633	  memmove (&info[old_matra_pos], &info[old_matra_pos + 1], (new_matra_pos - old_matra_pos) * sizeof (info[0]));
634	  info[new_matra_pos] = matra;
635	  start_of_last_cluster = MIN (new_matra_pos, start_of_last_cluster);
636	  new_matra_pos--;
637	}
638    }
639  }
640
641
642  /*   o Reorder reph:
643   *
644   *     Reph’s original position is always at the beginning of the syllable,
645   *     (i.e. it is not reordered at the character reordering stage). However,
646   *     it will be reordered according to the basic-forms shaping results.
647   *     Possible positions for reph, depending on the script, are; after main,
648   *     before post-base consonant forms, and after post-base consonant forms.
649   */
650
651  /* If there's anything after the Ra that has the REPH pos, it ought to be halant.
652   * Which means that the font has failed to ligate the Reph.  In which case, we
653   * shouldn't move. */
654  if (start + 1 < end &&
655      info[start].indic_position() == POS_RA_TO_BECOME_REPH &&
656      info[start + 1].indic_position() != POS_RA_TO_BECOME_REPH)
657  {
658      unsigned int new_reph_pos;
659
660     enum reph_position_t {
661       REPH_AFTER_MAIN,
662       REPH_BEFORE_SUBSCRIPT,
663       REPH_AFTER_SUBSCRIPT,
664       REPH_BEFORE_POSTSCRIPT,
665       REPH_AFTER_POSTSCRIPT
666     } reph_pos;
667
668     /* XXX Figure out old behavior too */
669     switch ((hb_tag_t) buffer->props.script)
670     {
671       case HB_SCRIPT_MALAYALAM:
672       case HB_SCRIPT_ORIYA:
673	 reph_pos = REPH_AFTER_MAIN;
674	 break;
675
676       case HB_SCRIPT_GURMUKHI:
677	 reph_pos = REPH_BEFORE_SUBSCRIPT;
678	 break;
679
680       case HB_SCRIPT_BENGALI:
681	 reph_pos = REPH_AFTER_SUBSCRIPT;
682	 break;
683
684       default:
685       case HB_SCRIPT_DEVANAGARI:
686       case HB_SCRIPT_GUJARATI:
687	 reph_pos = REPH_BEFORE_POSTSCRIPT;
688	 break;
689
690       case HB_SCRIPT_KANNADA:
691       case HB_SCRIPT_TAMIL:
692       case HB_SCRIPT_TELUGU:
693	 reph_pos = REPH_AFTER_POSTSCRIPT;
694	 break;
695     }
696
697    /*       1. If reph should be positioned after post-base consonant forms,
698     *          proceed to step 5.
699     */
700    if (reph_pos == REPH_AFTER_POSTSCRIPT)
701    {
702      goto reph_step_5;
703    }
704
705    /*       2. If the reph repositioning class is not after post-base: target
706     *          position is after the first explicit halant glyph between the
707     *          first post-reph consonant and last main consonant. If ZWJ or ZWNJ
708     *          are following this halant, position is moved after it. If such
709     *          position is found, this is the target position. Otherwise,
710     *          proceed to the next step.
711     *
712     *          Note: in old-implementation fonts, where classifications were
713     *          fixed in shaping engine, there was no case where reph position
714     *          will be found on this step.
715     */
716    {
717      new_reph_pos = start + 1;
718      while (new_reph_pos < base && info[new_reph_pos].indic_category() != OT_H)
719	new_reph_pos++;
720
721      if (new_reph_pos < base && info[new_reph_pos].indic_category() == OT_H) {
722	/* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
723	if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
724	  new_reph_pos++;
725	goto reph_move;
726      }
727    }
728
729    /*       3. If reph should be repositioned after the main consonant: find the
730     *          first consonant not ligated with main, or find the first
731     *          consonant that is not a potential pre-base reordering Ra.
732     */
733    if (reph_pos == REPH_AFTER_MAIN)
734    {
735      /* XXX */
736    }
737
738    /*       4. If reph should be positioned before post-base consonant, find
739     *          first post-base classified consonant not ligated with main. If no
740     *          consonant is found, the target position should be before the
741     *          first matra, syllable modifier sign or vedic sign.
742     */
743    /* This is our take on what step 4 is trying to say (and failing, BADLY). */
744    if (reph_pos == REPH_AFTER_SUBSCRIPT)
745    {
746      new_reph_pos = base;
747      while (new_reph_pos < end &&
748	     !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_POST_M) | FLAG (POS_SMVD))))
749	new_reph_pos++;
750      if (new_reph_pos < end)
751        goto reph_move;
752    }
753
754    /*       5. If no consonant is found in steps 3 or 4, move reph to a position
755     *          immediately before the first post-base matra, syllable modifier
756     *          sign or vedic sign that has a reordering class after the intended
757     *          reph position. For example, if the reordering position for reph
758     *          is post-main, it will skip above-base matras that also have a
759     *          post-main position.
760     */
761    reph_step_5:
762    {
763      /* XXX */
764    }
765
766    /*       6. Otherwise, reorder reph to the end of the syllable.
767     */
768    {
769      new_reph_pos = end - 1;
770      while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD)
771	new_reph_pos--;
772
773      /*
774       * If the Reph is to be ending up after a Matra,Halant sequence,
775       * position it before that Halant so it can interact with the Matra.
776       * However, if it's a plain Consonant,Halant we shouldn't do that.
777       * Uniscribe doesn't do this.
778       * TEST: U+0930,U+094D,U+0915,U+094B,U+094D
779       */
780      if (!indic_options ().uniscribe_bug_compatible &&
781	  unlikely (info[new_reph_pos].indic_category() == OT_H)) {
782	for (unsigned int i = base + 1; i < new_reph_pos; i++)
783	  if (info[i].indic_category() == OT_M) {
784	    /* Ok, got it. */
785	    new_reph_pos--;
786	  }
787      }
788      goto reph_move;
789    }
790
791    reph_move:
792    {
793      /* Move */
794      hb_glyph_info_t reph = info[start];
795      memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0]));
796      info[new_reph_pos] = reph;
797      start_of_last_cluster = start; /* Yay, one big cluster! */
798    }
799  }
800
801
802  /*   o Reorder pre-base reordering consonants:
803   *
804   *     If a pre-base reordering consonant is found, reorder it according to
805   *     the following rules:
806   *
807   *       1. Only reorder a glyph produced by substitution during application
808   *          of the <pref> feature. (Note that a font may shape a Ra consonant with
809   *          the feature generally but block it in certain contexts.)
810   *
811   *       2. Try to find a target position the same way as for pre-base matra.
812   *          If it is found, reorder pre-base consonant glyph.
813   *
814   *       3. If position is not found, reorder immediately before main
815   *          consonant.
816   */
817
818  /* TODO */
819
820
821
822  /* Apply 'init' to the Left Matra if it's a word start. */
823  if (info[start].indic_position () == POS_PRE_M &&
824      (!start ||
825       !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) &
826	 (FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) |
827	  FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) |
828	  FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) |
829	  FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) |
830	  FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) |
831	  FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
832	  FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
833	  FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))))
834    info[start].mask |= other_mask_array[INIT];
835
836
837
838  /* Finish off the clusters and go home! */
839
840  if (!indic_options ().uniscribe_bug_compatible)
841  {
842    /* This is what Uniscribe does.  Ie. add cluster boundaries after Halant,ZWNJ.
843     * This means, half forms are submerged into the main consonants cluster.
844     * This is unnecessary, and makes cursor positioning harder, but that's what
845     * Uniscribe does. */
846    unsigned int cluster_start = start;
847    for (unsigned int i = start + 1; i < start_of_last_cluster; i++)
848      if (info[i - 1].indic_category() == OT_H && info[i].indic_category() == OT_ZWNJ) {
849        i++;
850	buffer->merge_clusters (cluster_start, i);
851	cluster_start = i;
852      }
853    start_of_last_cluster = cluster_start;
854  }
855
856  buffer->merge_clusters (start_of_last_cluster, end);
857}
858
859
860static void
861final_reordering (const hb_ot_map_t *map,
862		  hb_face_t *face HB_UNUSED,
863		  hb_buffer_t *buffer,
864		  void *user_data HB_UNUSED)
865{
866  unsigned int count = buffer->len;
867  if (!count) return;
868
869  hb_mask_t other_mask_array[ARRAY_LENGTH (indic_other_features)] = {0};
870  unsigned int num_masks = ARRAY_LENGTH (indic_other_features);
871  for (unsigned int i = 0; i < num_masks; i++)
872    other_mask_array[i] = map->get_1_mask (indic_other_features[i].tag);
873
874  hb_glyph_info_t *info = buffer->info;
875  unsigned int last = 0;
876  unsigned int last_syllable = info[0].syllable();
877  for (unsigned int i = 1; i < count; i++)
878    if (last_syllable != info[i].syllable()) {
879      final_reordering_syllable (buffer, other_mask_array, last, i);
880      last = i;
881      last_syllable = info[last].syllable();
882    }
883  final_reordering_syllable (buffer, other_mask_array, last, count);
884
885  HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category);
886  HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position);
887}
888
889
890
891