hb-ot-shape-normalize.cc revision c311d852080b50ffc85e80168de62abb05a6be59
1655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod/*
2655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * Copyright © 2011  Google, Inc.
3655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
4655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *  This is part of HarfBuzz, a text shaping library.
5655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
6655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * Permission is hereby granted, without written agreement and without
7655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * license or royalty fees, to use, copy, modify, and distribute this
8655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * software and its documentation for any purpose, provided that the
9655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * above copyright notice and the following two paragraphs appear in
10655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * all copies of this software.
11655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
12655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * DAMAGE.
17655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
18655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
24655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * Google Author(s): Behdad Esfahbod
25655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod */
26655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
27655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod#include "hb-ot-shape-private.hh"
285d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod#include "hb-ot-shape-complex-private.hh"
29655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
30655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad EsfahbodHB_BEGIN_DECLS
31655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
325d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod/*
335d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * HIGHLEVEL DESIGN:
345d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
355d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * This file exports one main function: _hb_ot_shape_normalize().
365d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
375d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * This function closely reflects the Unicode Normalization Algorithm,
385d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * yet it's different.  The shaper an either prefer decomposed (NFD) or
395d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * composed (NFC).
405d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
415d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * In general what happens is that: each grapheme is decomposed in a chain
425d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * of 1:2 decompositions, marks reordered, and then recomposed if desires,
435d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * so far it's like Unicode Normalization.  However, the decomposition and
445d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * recomposition only happens if the font supports the resulting characters.
455d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
465d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * The goals are:
475d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
485d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - Try to render all canonically equivalent strings similarly.  To really
495d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     achieve this we have to always do the full decomposition and then
505d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     selectively recompose from there.  It's kinda too expensive though, so
515d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     we skip some cases.  For example, if composed is desired, we simply
525d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     don't touch 1-character clusters that are supported by the font, even
535d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     though their NFC may be different.
545d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
555d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - When a font has a precomposed character for a sequence but the 'ccmp'
565d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     feature in the font is not adequate, form use the precomposed character
575d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     which typically has better mark positioning.
585d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
595d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - When a font does not support a character but supports its decomposition,
605d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     well, use the decomposition.
615d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
625d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - The Indic shaper requests decomposed output.  This will handle splitting
635d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     matra for the Indic shaper.
645d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod */
655d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
66c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbodstatic void
67c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbodoutput_glyph (hb_ot_shape_context_t *c,
68c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod	      hb_codepoint_t glyph)
69c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod{
70c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod  hb_buffer_t *buffer = c->buffer;
71c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod
72c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod  buffer->output_glyph (glyph);
73c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod  hb_glyph_info_set_unicode_props (&buffer->out_info[buffer->out_len - 1], buffer->unicode);
74c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod}
7545412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
765c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbodstatic bool
7745412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahboddecompose (hb_ot_shape_context_t *c,
784ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod	   bool shortest,
7945412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod	   hb_codepoint_t ab)
80655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
8145412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  hb_codepoint_t a, b, glyph;
8245412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
834ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  if (!hb_unicode_decompose (c->buffer->unicode, ab, &a, &b) ||
84dcdc51cdc0ba9d9fb75f84dd5fa7a49aa0b24ea0Behdad Esfahbod      (b && !hb_font_get_glyph (c->font, b, 0, &glyph)))
854ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    return FALSE;
8645412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
874ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  bool has_a = hb_font_get_glyph (c->font, a, 0, &glyph);
884ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  if (shortest && has_a) {
894ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    /* Output a and b */
90c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod    output_glyph (c, a);
91dcdc51cdc0ba9d9fb75f84dd5fa7a49aa0b24ea0Behdad Esfahbod    if (b)
92c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod      output_glyph (c, b);
934ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    return TRUE;
944ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  }
95655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
964ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  if (decompose (c, shortest, a)) {
97dcdc51cdc0ba9d9fb75f84dd5fa7a49aa0b24ea0Behdad Esfahbod    if (b)
98c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod      output_glyph (c, b);
994ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    return TRUE;
1004ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  }
10145412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
1024ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  if (has_a) {
103c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod    output_glyph (c, a);
104dcdc51cdc0ba9d9fb75f84dd5fa7a49aa0b24ea0Behdad Esfahbod    if (b)
105c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod      output_glyph (c, b);
10645412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod    return TRUE;
10745412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  }
10845412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
1094ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  return FALSE;
1105c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod}
1115c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod
112c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbodstatic void
1134ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahboddecompose_current_glyph (hb_ot_shape_context_t *c,
1144ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod			 bool shortest)
115d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod{
116c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod  if (decompose (c, shortest, c->buffer->info[c->buffer->idx].codepoint))
1174ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    c->buffer->skip_glyph ();
118c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod  else
1194ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    c->buffer->next_glyph ();
120d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod}
121d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod
122c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbodstatic void
1234ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahboddecompose_single_char_cluster (hb_ot_shape_context_t *c,
1244ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod			       bool will_recompose)
1255c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod{
1264ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  hb_codepoint_t glyph;
1274ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
1284ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  /* If recomposing and font supports this, we're good to go */
1294ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  if (will_recompose && hb_font_get_glyph (c->font, c->buffer->info[c->buffer->idx].codepoint, 0, &glyph)) {
1304ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    c->buffer->next_glyph ();
131c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod    return;
1324ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  }
1334ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
134c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod  decompose_current_glyph (c, will_recompose);
135655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
136655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
137c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbodstatic void
1384ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahboddecompose_multi_char_cluster (hb_ot_shape_context_t *c,
1394ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod			      unsigned int end)
140655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
1415d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  /* TODO Currently if there's a variation-selector we give-up, it's just too hard. */
1424ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  for (unsigned int i = c->buffer->idx; i < end; i++)
143d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod    if (unlikely (is_variation_selector (c->buffer->info[i].codepoint)))
144c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod      return;
145d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod
1464ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  while (c->buffer->idx < end)
147c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod    decompose_current_glyph (c, FALSE);
148655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
149655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
15045412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbodvoid
1515d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod_hb_ot_shape_normalize (hb_ot_shape_context_t *c)
152655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
1535d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  hb_buffer_t *buffer = c->buffer;
1545d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  bool recompose = !hb_ot_shape_complex_prefer_decomposed (c->plan->shaper);
1554ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  bool has_multichar_clusters = FALSE;
15634c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  unsigned int count;
1575d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
158c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod  /* We do a fairly straightforward yet custom normalization process in three
1595389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * separate rounds: decompose, reorder, recompose (if desired).  Currently
1605389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * this makes two buffer swaps.  We can make it faster by moving the last
1615389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * two rounds into the inner loop for the first round, but it's more readable
1625389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * this way. */
1635d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
16434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
1654ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  /* First round, decompose */
1664ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
1675389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  buffer->clear_output ();
16834c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  count = buffer->len;
169468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod  for (buffer->idx = 0; buffer->idx < count;)
1705d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  {
171655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod    unsigned int end;
172468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod    for (end = buffer->idx + 1; end < count; end++)
173468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod      if (buffer->info[buffer->idx].cluster != buffer->info[end].cluster)
174655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod        break;
1755d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
176468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod    if (buffer->idx + 1 == end)
177c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod      decompose_single_char_cluster (c, recompose);
1784ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    else {
179c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod      decompose_multi_char_cluster (c, end);
1804ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod      has_multichar_clusters = TRUE;
1814ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    }
182655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod  }
183468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod  buffer->swap_buffers ();
1844ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
18534c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
1864ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  /* Technically speaking, two characters with ccc=0 may combine.  But all
1874ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod   * those cases are in languages that the indic module handles (which expects
1884ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod   * decomposed), or in Hangul jamo, which again, we want decomposed anyway.
1895389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * So we don't bother combining across cluster boundaries.
1905389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   *
1915389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * TODO: Am I right about Hangul?  If I am, we should add a Hangul module
1925389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * that requests decomposed. */
1934ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
1944ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  if (!has_multichar_clusters)
1954ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    return; /* Done! */
1964ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
19734c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
1984ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  /* Second round, reorder (inplace) */
1994ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
20034c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  count = buffer->len;
20134c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  for (unsigned int i = 0; i < count; i++)
20234c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  {
20334c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    if (buffer->info[i].combining_class() == 0)
20434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      continue;
20534c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
20634c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    unsigned int end;
20734c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    for (end = i + 1; end < count; end++)
20834c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      if (buffer->info[end].combining_class() == 0)
20934c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod        break;
21034c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
21134c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    /* We are going to do a bubble-sort.  Only do this if the
21234c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod     * sequence is short.  Doing it on long sequences can result
21334c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod     * in an O(n^2) DoS. */
21434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    if (end - i > 10) {
21534c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      i = end;
21634c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      continue;
21734c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    }
21834c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
21934c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    unsigned int k = end - i - 1;
22034c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    do {
22134c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      hb_glyph_info_t *pinfo = buffer->info + i;
22234c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      unsigned int new_k = 0;
22334c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
22434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      for (unsigned int j = 0; j < k; j++)
22534c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod	if (pinfo[j].combining_class() > pinfo[j+1].combining_class()) {
22634c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod	  hb_glyph_info_t t;
22734c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod	  t = pinfo[j];
22834c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod	  pinfo[j] = pinfo[j + 1];
22934c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod	  pinfo[j + 1] = t;
23034c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
23134c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod	  new_k = j;
23234c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod	}
23334c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      k = new_k;
23434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    } while (k);
23534c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
23634c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    i = end;
23734c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  }
23834c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
2394ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
2405389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  if (!recompose)
2415389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    return;
2425389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod
2434ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  /* Third round, recompose */
24434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
2455389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  /* As noted in the comment earlier, we don't try to combine
2465389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * ccc=0 chars with their previous Starter. */
2474ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
2485389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  buffer->clear_output ();
2495389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  count = buffer->len;
2505389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  unsigned int starter = 0;
2515389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  buffer->next_glyph ();
2525389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  while (buffer->idx < count)
2535389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  {
2545389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    if (buffer->info[buffer->idx].combining_class() == 0) {
2555389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod      starter = buffer->out_len;
2565389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod      buffer->next_glyph ();
2575389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod      continue;
2585389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    }
2595389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod
2605389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    hb_codepoint_t composed, glyph;
2615389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    if ((buffer->out_info[buffer->out_len - 1].combining_class() >=
2625389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod	 buffer->info[buffer->idx].combining_class()) ||
2635389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod	!hb_unicode_compose (c->buffer->unicode,
2645389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod			     buffer->out_info[starter].codepoint,
2655389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod			     buffer->info[buffer->idx].codepoint,
2665389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod			     &composed) ||
2675389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod	!hb_font_get_glyph (c->font, composed, 0, &glyph))
2685389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    {
2695389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod      /* Blocked, or doesn't compose. */
2705389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod      buffer->next_glyph ();
2715389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod      continue;
2725389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    }
2735389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod
2745389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    /* Composes. Modify starter and carry on. */
2755389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    buffer->out_info[starter].codepoint = composed;
2765389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer->unicode);
2774ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
2785389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    buffer->skip_glyph ();
2794ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  }
2805389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  buffer->swap_buffers ();
28134c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
282655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
283655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
284655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad EsfahbodHB_END_DECLS
285