hb-ot-shape-normalize.cc revision 45412523dc295cb5ee12e096bfacb282cc925843
1655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod/*
2655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * Copyright © 2011  Google, Inc.
3655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
4655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *  This is part of HarfBuzz, a text shaping library.
5655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
6655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * Permission is hereby granted, without written agreement and without
7655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * license or royalty fees, to use, copy, modify, and distribute this
8655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * software and its documentation for any purpose, provided that the
9655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * above copyright notice and the following two paragraphs appear in
10655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * all copies of this software.
11655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
12655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * DAMAGE.
17655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
18655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod *
24655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod * Google Author(s): Behdad Esfahbod
25655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod */
26655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
27655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod#include "hb-ot-shape-private.hh"
285d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod#include "hb-ot-shape-complex-private.hh"
29655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
30655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad EsfahbodHB_BEGIN_DECLS
31655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
325d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod/*
335d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * HIGHLEVEL DESIGN:
345d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
355d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * This file exports one main function: _hb_ot_shape_normalize().
365d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
375d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * This function closely reflects the Unicode Normalization Algorithm,
385d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * yet it's different.  The shaper an either prefer decomposed (NFD) or
395d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * composed (NFC).
405d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
415d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * In general what happens is that: each grapheme is decomposed in a chain
425d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * of 1:2 decompositions, marks reordered, and then recomposed if desires,
435d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * so far it's like Unicode Normalization.  However, the decomposition and
445d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * recomposition only happens if the font supports the resulting characters.
455d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
465d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod * The goals are:
475d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
485d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - Try to render all canonically equivalent strings similarly.  To really
495d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     achieve this we have to always do the full decomposition and then
505d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     selectively recompose from there.  It's kinda too expensive though, so
515d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     we skip some cases.  For example, if composed is desired, we simply
525d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     don't touch 1-character clusters that are supported by the font, even
535d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     though their NFC may be different.
545d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
555d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - When a font has a precomposed character for a sequence but the 'ccmp'
565d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     feature in the font is not adequate, form use the precomposed character
575d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     which typically has better mark positioning.
585d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
595d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - When a font does not support a character but supports its decomposition,
605d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     well, use the decomposition.
615d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *
625d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *   - The Indic shaper requests decomposed output.  This will handle splitting
635d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod *     matra for the Indic shaper.
645d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod */
655d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
6645412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
675c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbodstatic bool
6845412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahboddecompose (hb_ot_shape_context_t *c,
6945412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod	   bool recompose,
7045412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod	   hb_codepoint_t ab)
71655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
7245412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  hb_codepoint_t a, b, glyph;
7345412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  bool has_this = hb_font_get_glyph (c->font, ab, 0, &glyph);
7445412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
7545412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  /* If recomposing and the single char is supported by the font, we're good. */
7645412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  if (recompose && has_this)
7745412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod    return TRUE;
7845412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
7945412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  if (hb_unicode_decompose (c->buffer->unicode, ab, &a, &b) &&
8045412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod      hb_font_get_glyph (c->font, b, 0, &glyph) &&
8145412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod      decompose (c, recompose, a))
8245412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  {
8345412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod    /* Successfully decomposed. */
8445412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
8545412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod    if (recompose) {
8645412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod      /* Try composing b with base if not blocked */
87655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
8845412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod    }
8945412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
9045412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod    return TRUE;
9145412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  }
9245412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod
9345412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  return has_this;
945c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod}
955c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod
965c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbodstatic bool
97d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahboddecompose_single_char_cluster (hb_ot_shape_context_t *c,
985d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod			       bool recompose,
99d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod			       unsigned int i)
100d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod{
10145412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod//  c->buffer->copy ();
10245412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod//  bool ret = decompose (c, recompose, c->buffer->info[i].codepoint);
10345412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod//  c->buffer->skip ();
10445412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod//  return ret;
105d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod  return FALSE;
106d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod}
107d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod
10845412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbodstatic void
1095c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbodhandle_single_char_cluster (hb_ot_shape_context_t *c,
1105d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod			    bool recompose,
1115c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod			    unsigned int i)
1125c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod{
113655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod  /* Decompose */
11445412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod  decompose_single_char_cluster (c, recompose, i);
115655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
116655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
11745412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbodstatic void
118655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbodhandle_multi_char_cluster (hb_ot_shape_context_t *c,
1195d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod			   bool recompose,
120d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod			   unsigned int start,
121655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod			   unsigned int end)
122655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
1235d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  /* TODO Currently if there's a variation-selector we give-up, it's just too hard. */
124d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod  for (unsigned int i = start; i < end; i++)
125d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod    if (unlikely (is_variation_selector (c->buffer->info[i].codepoint)))
12645412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod      return;
127d6b9c6d20041b4f4fa11befc179aee757c41904dBehdad Esfahbod
128655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
129655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
13045412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbodvoid
1315d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod_hb_ot_shape_normalize (hb_ot_shape_context_t *c)
132655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod{
1335d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  hb_buffer_t *buffer = c->buffer;
1345d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  bool recompose = !hb_ot_shape_complex_prefer_decomposed (c->plan->shaper);
1355d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
1365d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  buffer->clear_output ();
1375d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
1385d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  unsigned int count = buffer->len;
1395d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  for (buffer->i = 0; buffer->i < count;)
1405d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  {
1415c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod
142655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod    unsigned int end;
1435d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod    for (end = buffer->i + 1; end < count; end++)
1445d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod      if (buffer->info[buffer->i].cluster != buffer->info[end].cluster)
145655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod        break;
1465d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod
1475d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod    if (buffer->i + 1 == end)
14845412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod      handle_single_char_cluster (c, recompose, buffer->i);
149655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod    else
15045412523dc295cb5ee12e096bfacb282cc925843Behdad Esfahbod      handle_multi_char_cluster (c, recompose, buffer->i, end);
1515d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod    while (buffer->i < end)
1525d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod      c->buffer->next_glyph ();
153655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod  }
1545c6f5982d78e2d7fadc2fbb8b4f3a4be9420c59aBehdad Esfahbod
1555d90a342e319068716429bf7af76c3896b61a0e5Behdad Esfahbod  buffer->swap ();
156655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod}
157655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad Esfahbod
158655586fe5e1fadf2a2ef7826e61ee9a445ffa37aBehdad EsfahbodHB_END_DECLS
159