hb-ot-shape-normalize.cc revision 84186a64004e5dcd2ce98b564d0e0a09aa5d68b2
1655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod/*
211138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod * Copyright © 2011,2012  Google, Inc.
3655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
4655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *  This is part of HarfBuzz, a text shaping library.
5655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
6655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * Permission is hereby granted, without written agreement and without
7655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * license or royalty fees, to use, copy, modify, and distribute this
8655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * software and its documentation for any purpose, provided that the
9655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * above copyright notice and the following two paragraphs appear in
10655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * all copies of this software.
11655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
12655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * DAMAGE.
17655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
18655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
24655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * Google Author(s): Behdad Esfahbod
25655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod */
26655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
2711138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod#include "hb-ot-shape-normalize-private.hh"
28655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod#include "hb-ot-shape-private.hh"
29655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
30655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
315d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod/*
325d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * HIGHLEVEL DESIGN:
335d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
345d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * This file exports one main function: _hb_ot_shape_normalize().
355d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
365d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * This function closely reflects the Unicode Normalization Algorithm,
37c346671b6b9b05fa51b95c16212eb29ac69510faBehdad Esfahbod * yet it's different.
38c346671b6b9b05fa51b95c16212eb29ac69510faBehdad Esfahbod *
39c346671b6b9b05fa51b95c16212eb29ac69510faBehdad Esfahbod * Each shaper specifies whether it prefers decomposed (NFD) or composed (NFC).
40c346671b6b9b05fa51b95c16212eb29ac69510faBehdad Esfahbod * The logic however tries to use whatever the font can support.
415d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
425d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * In general what happens is that: each grapheme is decomposed in a chain
43947c9a778c0d4b428b58806f98c34ede59b7439cBehdad Esfahbod * of 1:2 decompositions, marks reordered, and then recomposed if desired,
445d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * so far it's like Unicode Normalization.  However, the decomposition and
455d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * recomposition only happens if the font supports the resulting characters.
465d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
475d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * The goals are:
485d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
495d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - Try to render all canonically equivalent strings similarly.  To really
505d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     achieve this we have to always do the full decomposition and then
515d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     selectively recompose from there.  It's kinda too expensive though, so
525d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     we skip some cases.  For example, if composed is desired, we simply
535d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     don't touch 1-character clusters that are supported by the font, even
545d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     though their NFC may be different.
555d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
565d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - When a font has a precomposed character for a sequence but the 'ccmp'
57947c9a778c0d4b428b58806f98c34ede59b7439cBehdad Esfahbod *     feature in the font is not adequate, use the precomposed character
585d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     which typically has better mark positioning.
595d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
6055deff7595ef357d000fef83559c74c9f8acad00Behdad Esfahbod *   - When a font does not support a combining mark, but supports it precomposed
61c346671b6b9b05fa51b95c16212eb29ac69510faBehdad Esfahbod *     with previous base, use that.  This needs the itemizer to have this
62e3b2e077f549b04779c08a9fedb1f35b9f11075cBehdad Esfahbod *     knowledge too.  We need to provide assistance to the itemizer.
6355deff7595ef357d000fef83559c74c9f8acad00Behdad Esfahbod *
645d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - When a font does not support a character but supports its decomposition,
65378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod *     well, use the decomposition (preferring the canonical decomposition, but
6684186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     falling back to the compatibility decomposition if necessary).  The
6784186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     compatibility decomposition is really nice to have, for characters like
6884186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     ellipsis, or various-sized space characters.
695d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
7084186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *   - The complex shapers can customize the compose and decompose functions to
7184186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     offload some of their requirements to the normalizer.  For example, the
7284186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     Indic shaper may want to disallow recomposing of two matras.
7384186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *
7484186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *   - We try compatibility decomposition if decomposing through canonical
7584186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     decomposition alone failed to find a sequence that the font supports.
7684186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     We don't try compatibility decomposition recursively during the canonical
7784186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     decomposition phase.  This has minimal impact.  There are only a handful
7884186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     of Greek letter that have canonical decompositions that include characters
7984186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     with compatibility decomposition.  Those can be found using this command:
8084186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *
8184186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     egrep  "`echo -n ';('; grep ';<' UnicodeData.txt | cut -d';' -f1 | tr '\n' '|'; echo ') '`" UnicodeData.txt
825d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod */
835d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
84c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbodstatic void
859f377ed3210fe7d9f15e0c4f82020556f9a8f6f0Behdad Esfahbodoutput_glyph (hb_buffer_t *buffer, hb_codepoint_t glyph)
86c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod{
87c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod  buffer->output_glyph (glyph);
8899c2695759a6af855d565f4994bbdf220570bb48Behdad Esfahbod  _hb_glyph_info_set_unicode_props (&buffer->prev(), buffer->unicode);
89c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod}
9045412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
915c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbodstatic bool
9211138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahboddecompose (hb_font_t *font, hb_buffer_t *buffer,
934ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod	   bool shortest,
9445412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod	   hb_codepoint_t ab)
95655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
9645412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  hb_codepoint_t a, b, glyph;
9745412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
9811138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod  if (!hb_unicode_decompose (buffer->unicode, ab, &a, &b) ||
9911138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod      (b && !hb_font_get_glyph (font, b, 0, &glyph)))
1000594a2448440208efa0acac9a5d8d52d43108289Behdad Esfahbod    return false;
10145412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
10211138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod  bool has_a = hb_font_get_glyph (font, a, 0, &glyph);
1034ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  if (shortest && has_a) {
1044ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    /* Output a and b */
1059f377ed3210fe7d9f15e0c4f82020556f9a8f6f0Behdad Esfahbod    output_glyph (buffer, a);
106dcdc51cdc0ba9d9fb75f84dd5fa7a49aa0b24ea0Behdad Esfahbod    if (b)
1079f377ed3210fe7d9f15e0c4f82020556f9a8f6f0Behdad Esfahbod      output_glyph (buffer, b);
1080594a2448440208efa0acac9a5d8d52d43108289Behdad Esfahbod    return true;
1094ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  }
110655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
11111138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod  if (decompose (font, buffer, shortest, a)) {
112dcdc51cdc0ba9d9fb75f84dd5fa7a49aa0b24ea0Behdad Esfahbod    if (b)
1139f377ed3210fe7d9f15e0c4f82020556f9a8f6f0Behdad Esfahbod      output_glyph (buffer, b);
1140594a2448440208efa0acac9a5d8d52d43108289Behdad Esfahbod    return true;
1154ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  }
11645412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
1174ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  if (has_a) {
1189f377ed3210fe7d9f15e0c4f82020556f9a8f6f0Behdad Esfahbod    output_glyph (buffer, a);
119dcdc51cdc0ba9d9fb75f84dd5fa7a49aa0b24ea0Behdad Esfahbod    if (b)
1209f377ed3210fe7d9f15e0c4f82020556f9a8f6f0Behdad Esfahbod      output_glyph (buffer, b);
1210594a2448440208efa0acac9a5d8d52d43108289Behdad Esfahbod    return true;
12245412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  }
12345412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
1240594a2448440208efa0acac9a5d8d52d43108289Behdad Esfahbod  return false;
1255c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod}
1265c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod
127378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbodstatic bool
128378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahboddecompose_compatibility (hb_font_t *font, hb_buffer_t *buffer,
129378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod			 hb_codepoint_t u)
130d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod{
131378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  unsigned int len, i;
132378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  hb_codepoint_t decomposed[HB_UNICODE_MAX_DECOMPOSITION_LEN];
133378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod
134378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  len = hb_unicode_decompose_compatibility (buffer->unicode, u, decomposed);
135378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  if (!len)
136378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod    return false;
137378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod
138378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  hb_codepoint_t glyph;
139378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  for (i = 0; i < len; i++)
140378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod    if (!hb_font_get_glyph (font, decomposed[i], 0, &glyph))
141378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod      return false;
142378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod
143378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  for (i = 0; i < len; i++)
144378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod    output_glyph (buffer, decomposed[i]);
145378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod
146378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  return true;
147d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod}
148d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod
149c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbodstatic void
150378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahboddecompose_current_character (hb_font_t *font, hb_buffer_t *buffer,
151378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod			     bool shortest)
1525c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod{
1534ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  hb_codepoint_t glyph;
1544ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
155378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  /* Kind of a cute waterfall here... */
156378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  if (shortest && hb_font_get_glyph (font, buffer->cur().codepoint, 0, &glyph))
157378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod    buffer->next_glyph ();
158378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  else if (decompose (font, buffer, shortest, buffer->cur().codepoint))
159378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod    buffer->skip_glyph ();
160378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  else if (!shortest && hb_font_get_glyph (font, buffer->cur().codepoint, 0, &glyph))
161378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod    buffer->next_glyph ();
162378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  else if (decompose_compatibility (font, buffer, buffer->cur().codepoint))
163378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod    buffer->skip_glyph ();
164378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  else
16511138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod    buffer->next_glyph ();
166655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
167655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
168c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbodstatic void
16911138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahboddecompose_multi_char_cluster (hb_font_t *font, hb_buffer_t *buffer,
1704ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod			      unsigned int end)
171655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
1725d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  /* TODO Currently if there's a variation-selector we give-up, it's just too hard. */
17311138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod  for (unsigned int i = buffer->idx; i < end; i++)
17411138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod    if (unlikely (_hb_unicode_is_variation_selector (buffer->info[i].codepoint))) {
17511138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod      while (buffer->idx < end)
17611138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod	buffer->next_glyph ();
177c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod      return;
178af913c5788e600e36d29f44fe4e77db84cf8c442Behdad Esfahbod    }
179d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod
18011138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod  while (buffer->idx < end)
181378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod    decompose_current_character (font, buffer, false);
182655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
183655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
18445d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbodstatic int
18545d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbodcompare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
18645d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod{
187d1deaa2f5bd028e8076265cba92cffa4fa2834acBehdad Esfahbod  unsigned int a = _hb_glyph_info_get_modified_combining_class (pa);
188d1deaa2f5bd028e8076265cba92cffa4fa2834acBehdad Esfahbod  unsigned int b = _hb_glyph_info_get_modified_combining_class (pb);
18945d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod
19045d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod  return a < b ? -1 : a == b ? 0 : +1;
19145d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod}
19245d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod
19345412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbodvoid
19411138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod_hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
19511138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod			hb_ot_shape_normalization_mode_t mode)
196655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
19711138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod  bool recompose = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED;
1980594a2448440208efa0acac9a5d8d52d43108289Behdad Esfahbod  bool has_multichar_clusters = false;
19934c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  unsigned int count;
2005d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
201c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod  /* We do a fairly straightforward yet custom normalization process in three
2025389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * separate rounds: decompose, reorder, recompose (if desired).  Currently
2035389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * this makes two buffer swaps.  We can make it faster by moving the last
2045389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * two rounds into the inner loop for the first round, but it's more readable
2055389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * this way. */
2065d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
20734c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
2084ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  /* First round, decompose */
2094ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
2105389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  buffer->clear_output ();
21134c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  count = buffer->len;
212468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod  for (buffer->idx = 0; buffer->idx < count;)
2135d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  {
214655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod    unsigned int end;
215468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod    for (end = buffer->idx + 1; end < count; end++)
21699c2695759a6af855d565f4994bbdf220570bb48Behdad Esfahbod      if (buffer->cur().cluster != buffer->info[end].cluster)
217655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod        break;
2185d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
219468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod    if (buffer->idx + 1 == end)
220378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod      decompose_current_character (font, buffer, recompose);
2214ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    else {
22211138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod      decompose_multi_char_cluster (font, buffer, end);
2230594a2448440208efa0acac9a5d8d52d43108289Behdad Esfahbod      has_multichar_clusters = true;
2244ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    }
225655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod  }
226468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod  buffer->swap_buffers ();
2274ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
22834c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
229968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod  if (mode != HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL && !has_multichar_clusters)
2304ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    return; /* Done! */
2314ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
23234c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
2334ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  /* Second round, reorder (inplace) */
2344ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
23534c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  count = buffer->len;
23634c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  for (unsigned int i = 0; i < count; i++)
23734c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  {
238d1deaa2f5bd028e8076265cba92cffa4fa2834acBehdad Esfahbod    if (_hb_glyph_info_get_modified_combining_class (&buffer->info[i]) == 0)
23934c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      continue;
24034c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
24134c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    unsigned int end;
24234c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    for (end = i + 1; end < count; end++)
243d1deaa2f5bd028e8076265cba92cffa4fa2834acBehdad Esfahbod      if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0)
24434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod        break;
24534c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
24634c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    /* We are going to do a bubble-sort.  Only do this if the
24734c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod     * sequence is short.  Doing it on long sequences can result
24834c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod     * in an O(n^2) DoS. */
24934c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    if (end - i > 10) {
25034c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      i = end;
25134c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      continue;
25234c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    }
25334c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
25445d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod    hb_bubble_sort (buffer->info + i, end - i, compare_combining_class);
25534c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
25634c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    i = end;
25734c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  }
25834c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
2594ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
2605389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  if (!recompose)
2615389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    return;
2625389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod
2634ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  /* Third round, recompose */
26434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
2655389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  /* As noted in the comment earlier, we don't try to combine
2665389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * ccc=0 chars with their previous Starter. */
2674ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
2685389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  buffer->clear_output ();
2695389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  count = buffer->len;
2705389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  unsigned int starter = 0;
2715389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  buffer->next_glyph ();
2725389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  while (buffer->idx < count)
2735389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  {
2745389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    hb_codepoint_t composed, glyph;
275968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod    if (/* If mode is NOT COMPOSED_FULL (ie. it's COMPOSED_DIACRITICS), we don't try to
276968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	 * compose a CCC=0 character with it's preceding starter. */
277968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	(mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL ||
27899c2695759a6af855d565f4994bbdf220570bb48Behdad Esfahbod	 _hb_glyph_info_get_modified_combining_class (&buffer->cur()) != 0) &&
279968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	/* If there's anything between the starter and this char, they should have CCC
280968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	 * smaller than this character's. */
281968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	(starter == buffer->out_len - 1 ||
28299c2695759a6af855d565f4994bbdf220570bb48Behdad Esfahbod	 _hb_glyph_info_get_modified_combining_class (&buffer->prev()) < _hb_glyph_info_get_modified_combining_class (&buffer->cur())) &&
283968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	/* And compose. */
284e02d9257863b49e33ab5942971266349d3c548f6Behdad Esfahbod	hb_unicode_compose (buffer->unicode,
285e02d9257863b49e33ab5942971266349d3c548f6Behdad Esfahbod			    buffer->out_info[starter].codepoint,
28699c2695759a6af855d565f4994bbdf220570bb48Behdad Esfahbod			    buffer->cur().codepoint,
287e02d9257863b49e33ab5942971266349d3c548f6Behdad Esfahbod			    &composed) &&
288968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	/* And the font has glyph for the composite. */
289e02d9257863b49e33ab5942971266349d3c548f6Behdad Esfahbod	hb_font_get_glyph (font, composed, 0, &glyph))
2905389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    {
291bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod      /* Composes. */
292bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod      buffer->next_glyph (); /* Copy to out-buffer. */
293bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod      if (unlikely (buffer->in_error))
294bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod        return;
295bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod      buffer->merge_out_clusters (starter, buffer->out_len);
296bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod      buffer->out_len--; /* Remove the second composble. */
297bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod      buffer->out_info[starter].codepoint = composed; /* Modify starter and carry on. */
298d1deaa2f5bd028e8076265cba92cffa4fa2834acBehdad Esfahbod      _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer->unicode);
299e02d9257863b49e33ab5942971266349d3c548f6Behdad Esfahbod
3005389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod      continue;
3015389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    }
3025389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod
303e02d9257863b49e33ab5942971266349d3c548f6Behdad Esfahbod    /* Blocked, or doesn't compose. */
304e02d9257863b49e33ab5942971266349d3c548f6Behdad Esfahbod    buffer->next_glyph ();
305968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod
30699c2695759a6af855d565f4994bbdf220570bb48Behdad Esfahbod    if (_hb_glyph_info_get_modified_combining_class (&buffer->prev()) == 0)
307968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod      starter = buffer->out_len - 1;
3084ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  }
3095389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  buffer->swap_buffers ();
31034c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
311655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
312