hb-ot-shape-normalize.cc revision f4cb4762986a28634fa7de9b706f9d37859b881e
1655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod/*
211138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod * Copyright © 2011,2012  Google, Inc.
3655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
4655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *  This is part of HarfBuzz, a text shaping library.
5655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
6655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * Permission is hereby granted, without written agreement and without
7655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * license or royalty fees, to use, copy, modify, and distribute this
8655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * software and its documentation for any purpose, provided that the
9655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * above copyright notice and the following two paragraphs appear in
10655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * all copies of this software.
11655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
12655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * DAMAGE.
17655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
18655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
24655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * Google Author(s): Behdad Esfahbod
25655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod */
26655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
2711138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod#include "hb-ot-shape-normalize-private.hh"
28655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod#include "hb-ot-shape-private.hh"
29655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
30655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
315d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod/*
325d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * HIGHLEVEL DESIGN:
335d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
345d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * This file exports one main function: _hb_ot_shape_normalize().
355d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
365d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * This function closely reflects the Unicode Normalization Algorithm,
37c346671b6b9b05fa51b95c16212eb29ac69510faBehdad Esfahbod * yet it's different.
38c346671b6b9b05fa51b95c16212eb29ac69510faBehdad Esfahbod *
39c346671b6b9b05fa51b95c16212eb29ac69510faBehdad Esfahbod * Each shaper specifies whether it prefers decomposed (NFD) or composed (NFC).
40c346671b6b9b05fa51b95c16212eb29ac69510faBehdad Esfahbod * The logic however tries to use whatever the font can support.
415d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
425d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * In general what happens is that: each grapheme is decomposed in a chain
43947c9a778c0d4b428b58806f98c34ede59b7439cBehdad Esfahbod * of 1:2 decompositions, marks reordered, and then recomposed if desired,
445d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * so far it's like Unicode Normalization.  However, the decomposition and
455d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * recomposition only happens if the font supports the resulting characters.
465d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
475d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * The goals are:
485d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
495d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - Try to render all canonically equivalent strings similarly.  To really
505d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     achieve this we have to always do the full decomposition and then
515d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     selectively recompose from there.  It's kinda too expensive though, so
525d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     we skip some cases.  For example, if composed is desired, we simply
535d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     don't touch 1-character clusters that are supported by the font, even
545d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     though their NFC may be different.
555d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
565d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - When a font has a precomposed character for a sequence but the 'ccmp'
57947c9a778c0d4b428b58806f98c34ede59b7439cBehdad Esfahbod *     feature in the font is not adequate, use the precomposed character
585d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     which typically has better mark positioning.
595d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
6055deff7595ef357d000fef83559c74c9f8acad00Behdad Esfahbod *   - When a font does not support a combining mark, but supports it precomposed
61c346671b6b9b05fa51b95c16212eb29ac69510faBehdad Esfahbod *     with previous base, use that.  This needs the itemizer to have this
62e3b2e077f549b04779c08a9fedb1f35b9f11075cBehdad Esfahbod *     knowledge too.  We need to provide assistance to the itemizer.
6355deff7595ef357d000fef83559c74c9f8acad00Behdad Esfahbod *
645d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - When a font does not support a character but supports its decomposition,
65378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod *     well, use the decomposition (preferring the canonical decomposition, but
6684186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     falling back to the compatibility decomposition if necessary).  The
6784186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     compatibility decomposition is really nice to have, for characters like
6884186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     ellipsis, or various-sized space characters.
695d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
7084186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *   - The complex shapers can customize the compose and decompose functions to
7184186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     offload some of their requirements to the normalizer.  For example, the
7284186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     Indic shaper may want to disallow recomposing of two matras.
7384186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *
7484186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *   - We try compatibility decomposition if decomposing through canonical
7584186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     decomposition alone failed to find a sequence that the font supports.
7684186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     We don't try compatibility decomposition recursively during the canonical
7784186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     decomposition phase.  This has minimal impact.  There are only a handful
7884186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     of Greek letter that have canonical decompositions that include characters
7984186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     with compatibility decomposition.  Those can be found using this command:
8084186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *
8184186a64004e5dcd2ce98b564d0e0a09aa5d68b2Behdad Esfahbod *     egrep  "`echo -n ';('; grep ';<' UnicodeData.txt | cut -d';' -f1 | tr '\n' '|'; echo ') '`" UnicodeData.txt
825d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod */
835d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
84428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbodstatic hb_bool_t
85428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahboddecompose_func (hb_unicode_funcs_t *unicode,
86428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod		hb_codepoint_t  ab,
87428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod		hb_codepoint_t *a,
88428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod		hb_codepoint_t *b)
89428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod{
900f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  /* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
910f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  switch (ab) {
920f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x0AC9  : return false;
930f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod
940f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x0931  : return false;
950f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x0B94  : return false;
960f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod
970f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    /* These ones have Unicode decompositions, but we do it
980f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod     * this way to be close to what Uniscribe does. */
990f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x0DDA  : *a = 0x0DD9; *b= 0x0DDA; return true;
1000f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x0DDC  : *a = 0x0DD9; *b= 0x0DDC; return true;
1010f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x0DDD  : *a = 0x0DD9; *b= 0x0DDD; return true;
1020f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x0DDE  : *a = 0x0DD9; *b= 0x0DDE; return true;
1030f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod
1040f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x0F77  : *a = 0x0FB2; *b= 0x0F81; return true;
1050f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x0F79  : *a = 0x0FB3; *b= 0x0F81; return true;
1060f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x17BE  : *a = 0x17C1; *b= 0x17BE; return true;
1070f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x17BF  : *a = 0x17C1; *b= 0x17BF; return true;
1080f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x17C0  : *a = 0x17C1; *b= 0x17C0; return true;
1090f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x17C4  : *a = 0x17C1; *b= 0x17C4; return true;
1100f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x17C5  : *a = 0x17C1; *b= 0x17C5; return true;
1110f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x1925  : *a = 0x1920; *b= 0x1923; return true;
1120f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x1926  : *a = 0x1920; *b= 0x1924; return true;
1130f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x1B3C  : *a = 0x1B42; *b= 0x1B3C; return true;
1140f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x1112E  : *a = 0x11127; *b= 0x11131; return true;
1150f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x1112F  : *a = 0x11127; *b= 0x11132; return true;
1160f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod#if 0
1170f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x0B57  : *a = 0xno decomp, -> RIGHT; return true;
1180f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x1C29  : *a = 0xno decomp, -> LEFT; return true;
1190f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0xA9C0  : *a = 0xno decomp, -> RIGHT; return true;
1200f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    case 0x111BF  : *a = 0xno decomp, -> ABOVE; return true;
1210f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod#endif
1220f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  }
123428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod  return unicode->decompose (ab, a, b);
124428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod}
125428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod
126428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbodstatic hb_bool_t
127428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbodcompose_func (hb_unicode_funcs_t *unicode,
128428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod	      hb_codepoint_t  a,
129428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod	      hb_codepoint_t  b,
130428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod	      hb_codepoint_t *ab)
131428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod{
1320f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  /* XXX, this belongs to indic normalizer. */
1330f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  if ((FLAG (unicode->general_category (a)) &
1340f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod       (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
1350f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
1360f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
1370f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    return false;
1380f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  /* XXX, add composition-exclusion exceptions to Indic shaper. */
1390f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
1400f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod
1410f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  /* XXX, these belong to the hebew / default shaper. */
1420f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  /* Hebrew presentation-form shaping.
1430f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod   * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 */
1440f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  // Hebrew presentation forms with dagesh, for characters 0x05D0..0x05EA;
1450f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  // note that some letters do not have a dagesh presForm encoded
1460f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  static const hb_codepoint_t sDageshForms[0x05EA - 0x05D0 + 1] = {
1470f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB30, // ALEF
1480f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB31, // BET
1490f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB32, // GIMEL
1500f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB33, // DALET
1510f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB34, // HE
1520f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB35, // VAV
1530f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB36, // ZAYIN
1540f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0, // HET
1550f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB38, // TET
1560f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB39, // YOD
1570f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB3A, // FINAL KAF
1580f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB3B, // KAF
1590f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB3C, // LAMED
1600f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0, // FINAL MEM
1610f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB3E, // MEM
1620f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0, // FINAL NUN
1630f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB40, // NUN
1640f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB41, // SAMEKH
1650f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0, // AYIN
1660f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB43, // FINAL PE
1670f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB44, // PE
1680f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0, // FINAL TSADI
1690f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB46, // TSADI
1700f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB47, // QOF
1710f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB48, // RESH
1720f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB49, // SHIN
1730f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod    0xFB4A // TAV
1740f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  };
1750f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod
1760f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  hb_bool_t found = unicode->compose (a, b, ab);
1770f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod
1780f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  if (!found && (b & ~0x7F) == 0x0580) {
1790f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      // special-case Hebrew presentation forms that are excluded from
1800f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      // standard normalization, but wanted for old fonts
1810f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      switch (b) {
1820f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      case 0x05B4: // HIRIQ
1830f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  if (a == 0x05D9) { // YOD
1840f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB1D;
1850f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
1860f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  }
1870f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  break;
1880f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      case 0x05B7: // patah
1890f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  if (a == 0x05F2) { // YIDDISH YOD YOD
1900f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB1F;
1910f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
1920f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  } else if (a == 0x05D0) { // ALEF
1930f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB2E;
1940f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
1950f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  }
1960f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  break;
1970f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      case 0x05B8: // QAMATS
1980f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  if (a == 0x05D0) { // ALEF
1990f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB2F;
2000f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
2010f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  }
2020f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  break;
2030f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      case 0x05B9: // HOLAM
2040f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  if (a == 0x05D5) { // VAV
2050f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB4B;
2060f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
2070f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  }
2080f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  break;
2090f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      case 0x05BC: // DAGESH
2100f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  if (a >= 0x05D0 && a <= 0x05EA) {
2110f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = sDageshForms[a - 0x05D0];
2120f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = (*ab != 0);
2130f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  } else if (a == 0xFB2A) { // SHIN WITH SHIN DOT
2140f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB2C;
2150f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
2160f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  } else if (a == 0xFB2B) { // SHIN WITH SIN DOT
2170f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB2D;
2180f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
2190f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  }
2200f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  break;
2210f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      case 0x05BF: // RAFE
2220f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  switch (a) {
2230f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  case 0x05D1: // BET
2240f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB4C;
2250f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
2260f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      break;
2270f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  case 0x05DB: // KAF
2280f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB4D;
2290f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
2300f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      break;
2310f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  case 0x05E4: // PE
2320f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB4E;
2330f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
2340f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      break;
2350f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  }
2360f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  break;
2370f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      case 0x05C1: // SHIN DOT
2380f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  if (a == 0x05E9) { // SHIN
2390f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB2A;
2400f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
2410f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  } else if (a == 0xFB49) { // SHIN WITH DAGESH
2420f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB2C;
2430f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
2440f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  }
2450f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  break;
2460f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      case 0x05C2: // SIN DOT
2470f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  if (a == 0x05E9) { // SHIN
2480f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB2B;
2490f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
2500f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  } else if (a == 0xFB49) { // SHIN WITH DAGESH
2510f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      *ab = 0xFB2D;
2520f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	      found = true;
2530f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  }
2540f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod	  break;
2550f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod      }
2560f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  }
2570f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod
2580f8881d6bbf6cd59938315eeff9b71cfc736aa4eBehdad Esfahbod  return found;
259428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod}
260428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod
261b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod
262b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbodstatic inline void
263b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbodset_glyph (hb_glyph_info_t &info, hb_font_t *font)
264b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod{
26507d682806349aee81f53114778ce0beb23909ed7Behdad Esfahbod  font->get_glyph (info.codepoint, 0, &info.glyph_index());
266b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod}
267b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod
2688d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbodstatic inline void
269b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbodoutput_char (hb_buffer_t *buffer, hb_codepoint_t unichar, hb_codepoint_t glyph)
270c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod{
271b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod  buffer->cur().glyph_index() = glyph;
2728d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod  buffer->output_glyph (unichar);
27399c2695759a6af855d565f4994bbdf220570bb48Behdad Esfahbod  _hb_glyph_info_set_unicode_props (&buffer->prev(), buffer->unicode);
274c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod}
27545412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
2768d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbodstatic inline void
277b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbodnext_char (hb_buffer_t *buffer, hb_codepoint_t glyph)
2788d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod{
279b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod  buffer->cur().glyph_index() = glyph;
2808d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod  buffer->next_glyph ();
2818d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod}
2828d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod
2838d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbodstatic inline void
2848d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbodskip_char (hb_buffer_t *buffer)
2858d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod{
2868d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod  buffer->skip_glyph ();
2878d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod}
2888d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod
289f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod/* Returns 0 if didn't decompose, number of resulting characters otherwise. */
290f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbodstatic inline unsigned int
29107d682806349aee81f53114778ce0beb23909ed7Behdad Esfahboddecompose (hb_font_t *font, hb_buffer_t *buffer, bool shortest, hb_codepoint_t ab)
292655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
293b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod  hb_codepoint_t a, b, a_glyph, b_glyph;
29445412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
295428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod  if (!decompose_func (buffer->unicode, ab, &a, &b) ||
296b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      (b && !font->get_glyph (b, 0, &b_glyph)))
297f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    return 0;
29845412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
299b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod  bool has_a = font->get_glyph (a, 0, &a_glyph);
3004ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  if (shortest && has_a) {
3014ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    /* Output a and b */
302b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    output_char (buffer, a, a_glyph);
303f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    if (likely (b)) {
304b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      output_char (buffer, b, b_glyph);
305f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod      return 2;
306f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    }
307f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    return 1;
3084ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  }
309655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
310f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  unsigned int ret;
311f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  if ((ret = decompose (font, buffer, shortest, a))) {
312f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    if (b) {
313b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      output_char (buffer, b, b_glyph);
314f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod      return ret + 1;
315f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    }
316f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    return ret;
3174ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  }
31845412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
3194ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  if (has_a) {
320b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    output_char (buffer, a, a_glyph);
321f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    if (likely (b)) {
322b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      output_char (buffer, b, b_glyph);
323f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod      return 2;
324f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    }
325f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    return 1;
32645412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  }
32745412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
328f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  return 0;
3295c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod}
3305c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod
331f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod/* Returns 0 if didn't decompose, number of resulting characters otherwise. */
332f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbodstatic inline bool
33307d682806349aee81f53114778ce0beb23909ed7Behdad Esfahboddecompose_compatibility (hb_font_t *font, hb_buffer_t *buffer, hb_codepoint_t u)
334d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod{
335378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  unsigned int len, i;
336378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  hb_codepoint_t decomposed[HB_UNICODE_MAX_DECOMPOSITION_LEN];
337b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod  hb_codepoint_t glyphs[HB_UNICODE_MAX_DECOMPOSITION_LEN];
338378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod
339208f70f0553d73d2908b21b9552298029482a8b9Behdad Esfahbod  len = buffer->unicode->decompose_compatibility (u, decomposed);
340378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  if (!len)
341f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    return 0;
342378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod
343378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  for (i = 0; i < len; i++)
344b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    if (!font->get_glyph (decomposed[i], 0, &glyphs[i]))
345f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod      return 0;
346378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod
347378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  for (i = 0; i < len; i++)
348b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    output_char (buffer, decomposed[i], glyphs[i]);
349378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod
350f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  return len;
351d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod}
352d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod
353f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod/* Returns true if recomposition may be benefitial. */
354f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbodstatic inline bool
35507d682806349aee81f53114778ce0beb23909ed7Behdad Esfahboddecompose_current_character (hb_font_t *font, hb_buffer_t *buffer, bool shortest)
3565c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod{
3574ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  hb_codepoint_t glyph;
358f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  unsigned int len = 1;
3594ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
360378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod  /* Kind of a cute waterfall here... */
3618fbfda920e0b3bb4ab7afb732826026964b79be9Behdad Esfahbod  if (shortest && font->get_glyph (buffer->cur().codepoint, 0, &glyph))
362b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    next_char (buffer, glyph);
363f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  else if ((len = decompose (font, buffer, shortest, buffer->cur().codepoint)))
3648d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod    skip_char (buffer);
3658fbfda920e0b3bb4ab7afb732826026964b79be9Behdad Esfahbod  else if (!shortest && font->get_glyph (buffer->cur().codepoint, 0, &glyph))
366b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    next_char (buffer, glyph);
367f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  else if ((len = decompose_compatibility (font, buffer, buffer->cur().codepoint)))
3688d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod    skip_char (buffer);
36907d682806349aee81f53114778ce0beb23909ed7Behdad Esfahbod  else
37007d682806349aee81f53114778ce0beb23909ed7Behdad Esfahbod    next_char (buffer, glyph); /* glyph is initialized in earlier branches. */
371f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod
372f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  /*
373f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod   * A recomposition would only be useful if we decomposed into at least three
374f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod   * characters...
375f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod   */
376f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  return len > 2;
377b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod}
378b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod
379b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbodstatic inline void
38007d682806349aee81f53114778ce0beb23909ed7Behdad Esfahbodhandle_variation_selector_cluster (hb_font_t *font, hb_buffer_t *buffer, unsigned int end)
381b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod{
382b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod  for (; buffer->idx < end - 1;) {
383b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    if (unlikely (buffer->unicode->is_variation_selector (buffer->cur(+1).codepoint))) {
384b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      /* The next two lines are some ugly lines... But work. */
385b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      font->get_glyph (buffer->cur().codepoint, buffer->cur(+1).codepoint, &buffer->cur().glyph_index());
386b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      buffer->replace_glyphs (2, 1, &buffer->cur().codepoint);
387b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    } else {
388b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      set_glyph (buffer->cur(), font);
389b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      buffer->next_glyph ();
390b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    }
391b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod  }
392b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod  if (likely (buffer->idx < end)) {
393b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    set_glyph (buffer->cur(), font);
394b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    buffer->next_glyph ();
395b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod  }
396655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
397655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
398f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod/* Returns true if recomposition may be benefitial. */
399f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbodstatic inline bool
40007d682806349aee81f53114778ce0beb23909ed7Behdad Esfahboddecompose_multi_char_cluster (hb_font_t *font, hb_buffer_t *buffer, unsigned int end)
401655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
4025d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  /* TODO Currently if there's a variation-selector we give-up, it's just too hard. */
40311138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod  for (unsigned int i = buffer->idx; i < end; i++)
404208f70f0553d73d2908b21b9552298029482a8b9Behdad Esfahbod    if (unlikely (buffer->unicode->is_variation_selector (buffer->info[i].codepoint))) {
405b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      handle_variation_selector_cluster (font, buffer, end);
406f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod      return false;
407af913c5788e600e36d29f44fe4e77db84cf8c442Behdad Esfahbod    }
408d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod
40911138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod  while (buffer->idx < end)
410378d279bbf692195c4654e312dae854ab3be04cfBehdad Esfahbod    decompose_current_character (font, buffer, false);
411f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  /* We can be smarter here and only return true if there are at least two ccc!=0 marks.
412f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod   * But does not matter. */
413f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  return true;
414f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod}
415f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod
416f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbodstatic inline bool
417f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahboddecompose_cluster (hb_font_t *font, hb_buffer_t *buffer, bool recompose, unsigned int end)
418f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod{
419f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  if (likely (buffer->idx + 1 == end))
420f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    return decompose_current_character (font, buffer, recompose);
421f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  else
422f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    return decompose_multi_char_cluster (font, buffer, end);
423655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
424655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
425f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod
42645d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbodstatic int
42745d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbodcompare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
42845d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod{
429d1deaa2f5bd028e8076265cba92cffa4fa2834acBehdad Esfahbod  unsigned int a = _hb_glyph_info_get_modified_combining_class (pa);
430d1deaa2f5bd028e8076265cba92cffa4fa2834acBehdad Esfahbod  unsigned int b = _hb_glyph_info_get_modified_combining_class (pb);
43145d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod
43245d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod  return a < b ? -1 : a == b ? 0 : +1;
43345d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod}
43445d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod
435f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod
43645412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbodvoid
43711138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod_hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
43811138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod			hb_ot_shape_normalization_mode_t mode)
439655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
44011138ccff71f442da1fcf64faa0e1d22e083e775Behdad Esfahbod  bool recompose = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED;
441f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  bool can_use_recompose = false;
44234c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  unsigned int count;
4435d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
444c311d852080b50ffc85e80168de62abb05a6be59Behdad Esfahbod  /* We do a fairly straightforward yet custom normalization process in three
4455389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * separate rounds: decompose, reorder, recompose (if desired).  Currently
4465389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * this makes two buffer swaps.  We can make it faster by moving the last
4475389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * two rounds into the inner loop for the first round, but it's more readable
4485389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * this way. */
4495d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
45034c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
4514ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  /* First round, decompose */
4524ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
4535389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  buffer->clear_output ();
45434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  count = buffer->len;
455468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod  for (buffer->idx = 0; buffer->idx < count;)
4565d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  {
457655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod    unsigned int end;
458468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod    for (end = buffer->idx + 1; end < count; end++)
45999c2695759a6af855d565f4994bbdf220570bb48Behdad Esfahbod      if (buffer->cur().cluster != buffer->info[end].cluster)
460655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod        break;
4615d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
462f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod    can_use_recompose = decompose_cluster (font, buffer, recompose, end) || can_use_recompose;
463655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod  }
464468e9cb25c9bc14781b7013e447d763f93bf76a3Behdad Esfahbod  buffer->swap_buffers ();
4654ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
46634c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
467f4cb4762986a28634fa7de9b706f9d37859b881eBehdad Esfahbod  if (mode != HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL && !can_use_recompose)
4684ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod    return; /* Done! */
4694ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
47034c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
4714ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  /* Second round, reorder (inplace) */
4724ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
47334c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  count = buffer->len;
47434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  for (unsigned int i = 0; i < count; i++)
47534c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  {
476d1deaa2f5bd028e8076265cba92cffa4fa2834acBehdad Esfahbod    if (_hb_glyph_info_get_modified_combining_class (&buffer->info[i]) == 0)
47734c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      continue;
47834c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
47934c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    unsigned int end;
48034c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    for (end = i + 1; end < count; end++)
481d1deaa2f5bd028e8076265cba92cffa4fa2834acBehdad Esfahbod      if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0)
48234c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod        break;
48334c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
48434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    /* We are going to do a bubble-sort.  Only do this if the
48534c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod     * sequence is short.  Doing it on long sequences can result
48634c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod     * in an O(n^2) DoS. */
48734c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    if (end - i > 10) {
48834c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      i = end;
48934c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod      continue;
49034c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    }
49134c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
49245d6f29f15f1d2323bcaa2498aed23ff0c8a1567Behdad Esfahbod    hb_bubble_sort (buffer->info + i, end - i, compare_combining_class);
49334c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
49434c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod    i = end;
49534c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod  }
49634c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
4974ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
4985389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  if (!recompose)
4995389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    return;
5005389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod
5014ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  /* Third round, recompose */
50234c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
5035389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  /* As noted in the comment earlier, we don't try to combine
5045389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod   * ccc=0 chars with their previous Starter. */
5054ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod
5065389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  buffer->clear_output ();
5075389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  count = buffer->len;
5085389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  unsigned int starter = 0;
509b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod  buffer->next_glyph ();
5105389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  while (buffer->idx < count)
5115389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  {
5125389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    hb_codepoint_t composed, glyph;
513968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod    if (/* If mode is NOT COMPOSED_FULL (ie. it's COMPOSED_DIACRITICS), we don't try to
514968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	 * compose a CCC=0 character with it's preceding starter. */
515968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	(mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL ||
51699c2695759a6af855d565f4994bbdf220570bb48Behdad Esfahbod	 _hb_glyph_info_get_modified_combining_class (&buffer->cur()) != 0) &&
517968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	/* If there's anything between the starter and this char, they should have CCC
518968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	 * smaller than this character's. */
519968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	(starter == buffer->out_len - 1 ||
52099c2695759a6af855d565f4994bbdf220570bb48Behdad Esfahbod	 _hb_glyph_info_get_modified_combining_class (&buffer->prev()) < _hb_glyph_info_get_modified_combining_class (&buffer->cur())) &&
521968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	/* And compose. */
522428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod	compose_func (buffer->unicode,
523428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod		      buffer->out_info[starter].codepoint,
524428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod		      buffer->cur().codepoint,
525428dfcab6634ff264570a0a5d715efb8048c3db5Behdad Esfahbod		      &composed) &&
526968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod	/* And the font has glyph for the composite. */
5278fbfda920e0b3bb4ab7afb732826026964b79be9Behdad Esfahbod	font->get_glyph (composed, 0, &glyph))
5285389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    {
529bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod      /* Composes. */
530b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      buffer->next_glyph (); /* Copy to out-buffer. */
531bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod      if (unlikely (buffer->in_error))
532bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod        return;
533bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod      buffer->merge_out_clusters (starter, buffer->out_len);
5348d1eef3f32fb539de2a72804fa3834acc18daab5Behdad Esfahbod      buffer->out_len--; /* Remove the second composable. */
535bc8357ea7b4c0d7c715aae353176434fb9460205Behdad Esfahbod      buffer->out_info[starter].codepoint = composed; /* Modify starter and carry on. */
536b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod      set_glyph (buffer->out_info[starter], font);
537d1deaa2f5bd028e8076265cba92cffa4fa2834acBehdad Esfahbod      _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer->unicode);
538e02d9257863b49e33ab5942971266349d3c548f6Behdad Esfahbod
5395389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod      continue;
5405389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod    }
5415389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod
542e02d9257863b49e33ab5942971266349d3c548f6Behdad Esfahbod    /* Blocked, or doesn't compose. */
543b00321ea78793d9b3592b5173a9800e6322424feBehdad Esfahbod    buffer->next_glyph ();
544968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod
54599c2695759a6af855d565f4994bbdf220570bb48Behdad Esfahbod    if (_hb_glyph_info_get_modified_combining_class (&buffer->prev()) == 0)
546968318455304804dc53045e8ba0cd4d76800c02dBehdad Esfahbod      starter = buffer->out_len - 1;
5474ff0d2d9dfc4f7e4880a4e964ca9872624508ea0Behdad Esfahbod  }
5485389ff4dbc46c76c9483e3c95f22524b60e21166Behdad Esfahbod  buffer->swap_buffers ();
54934c22f816808d061a980cffca12de03beb437fa0Behdad Esfahbod
550655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
551