15569331642446be05292e3e1f8a51218827168cdclaireho/*
25569331642446be05292e3e1f8a51218827168cdclaireho * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
35569331642446be05292e3e1f8a51218827168cdclaireho *
45569331642446be05292e3e1f8a51218827168cdclaireho * This is part of HarfBuzz, an OpenType Layout engine library.
55569331642446be05292e3e1f8a51218827168cdclaireho *
65569331642446be05292e3e1f8a51218827168cdclaireho * Permission is hereby granted, without written agreement and without
75569331642446be05292e3e1f8a51218827168cdclaireho * license or royalty fees, to use, copy, modify, and distribute this
85569331642446be05292e3e1f8a51218827168cdclaireho * software and its documentation for any purpose, provided that the
95569331642446be05292e3e1f8a51218827168cdclaireho * above copyright notice and the following two paragraphs appear in
105569331642446be05292e3e1f8a51218827168cdclaireho * all copies of this software.
115569331642446be05292e3e1f8a51218827168cdclaireho *
125569331642446be05292e3e1f8a51218827168cdclaireho * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
135569331642446be05292e3e1f8a51218827168cdclaireho * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
145569331642446be05292e3e1f8a51218827168cdclaireho * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
155569331642446be05292e3e1f8a51218827168cdclaireho * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
165569331642446be05292e3e1f8a51218827168cdclaireho * DAMAGE.
175569331642446be05292e3e1f8a51218827168cdclaireho *
185569331642446be05292e3e1f8a51218827168cdclaireho * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
195569331642446be05292e3e1f8a51218827168cdclaireho * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
205569331642446be05292e3e1f8a51218827168cdclaireho * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
215569331642446be05292e3e1f8a51218827168cdclaireho * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
225569331642446be05292e3e1f8a51218827168cdclaireho * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
235569331642446be05292e3e1f8a51218827168cdclaireho */
245569331642446be05292e3e1f8a51218827168cdclaireho
255569331642446be05292e3e1f8a51218827168cdclaireho#include "harfbuzz-shaper.h"
265569331642446be05292e3e1f8a51218827168cdclaireho#include "harfbuzz-shaper-private.h"
275569331642446be05292e3e1f8a51218827168cdclaireho
285569331642446be05292e3e1f8a51218827168cdclaireho#include <assert.h>
295569331642446be05292e3e1f8a51218827168cdclaireho#include <stdio.h>
305569331642446be05292e3e1f8a51218827168cdclaireho
315569331642446be05292e3e1f8a51218827168cdclaireho/*
325569331642446be05292e3e1f8a51218827168cdclaireho//  Vocabulary
335569331642446be05292e3e1f8a51218827168cdclaireho//      Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the
345569331642446be05292e3e1f8a51218827168cdclaireho//                      center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,
355569331642446be05292e3e1f8a51218827168cdclaireho//                      split vowels, signs... but there is only one base in a syllable, it has to be coded as
365569331642446be05292e3e1f8a51218827168cdclaireho//                      the first character of the syllable.
375569331642446be05292e3e1f8a51218827168cdclaireho//      split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
385569331642446be05292e3e1f8a51218827168cdclaireho//                      Khmer language has five of them. Khmer split vowels either have one part before the
395569331642446be05292e3e1f8a51218827168cdclaireho//                      base and one after the base or they have a part before the base and a part above the base.
405569331642446be05292e3e1f8a51218827168cdclaireho//                      The first part of all Khmer split vowels is the same character, identical to
415569331642446be05292e3e1f8a51218827168cdclaireho//                      the glyph of Khmer dependent vowel SRA EI
425569331642446be05292e3e1f8a51218827168cdclaireho//      coeng -->  modifier used in Khmer to construct coeng (subscript) consonants
435569331642446be05292e3e1f8a51218827168cdclaireho//                 Differently than indian languages, the coeng modifies the consonant that follows it,
445569331642446be05292e3e1f8a51218827168cdclaireho//                 not the one preceding it  Each consonant has two forms, the base form and the subscript form
455569331642446be05292e3e1f8a51218827168cdclaireho//                 the base form is the normal one (using the consonants code-point), the subscript form is
465569331642446be05292e3e1f8a51218827168cdclaireho//                 displayed when the combination coeng + consonant is encountered.
475569331642446be05292e3e1f8a51218827168cdclaireho//      Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
485569331642446be05292e3e1f8a51218827168cdclaireho//      Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
495569331642446be05292e3e1f8a51218827168cdclaireho//      Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
505569331642446be05292e3e1f8a51218827168cdclaireho//      Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
515569331642446be05292e3e1f8a51218827168cdclaireho//                           if it is attached to a consonant of the first series or a consonant of the second series
525569331642446be05292e3e1f8a51218827168cdclaireho//                           Most consonants have an equivalent in the other series, but some of theme exist only in
535569331642446be05292e3e1f8a51218827168cdclaireho//                           one series (for example SA). If we want to use the consonant SA with a vowel sound that
545569331642446be05292e3e1f8a51218827168cdclaireho//                           can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
555569331642446be05292e3e1f8a51218827168cdclaireho//                           of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
565569331642446be05292e3e1f8a51218827168cdclaireho//                           x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
575569331642446be05292e3e1f8a51218827168cdclaireho//                           MUSIKATOAN a second series consonant to have a first series vowel sound.
585569331642446be05292e3e1f8a51218827168cdclaireho//                           Consonant shifter are both normally supercript marks, but, when they are followed by a
595569331642446be05292e3e1f8a51218827168cdclaireho//                           superscript, they change shape and take the form of subscript dependent vowel SRA U.
605569331642446be05292e3e1f8a51218827168cdclaireho//                           If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
615569331642446be05292e3e1f8a51218827168cdclaireho//                           should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
625569331642446be05292e3e1f8a51218827168cdclaireho//                           be placed after the coeng consonant.
635569331642446be05292e3e1f8a51218827168cdclaireho//      Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base
645569331642446be05292e3e1f8a51218827168cdclaireho//                           Each vowel has its own position. Only one vowel per syllable is allowed.
655569331642446be05292e3e1f8a51218827168cdclaireho//      Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
665569331642446be05292e3e1f8a51218827168cdclaireho//                           Allowed in a syllable.
675569331642446be05292e3e1f8a51218827168cdclaireho//
685569331642446be05292e3e1f8a51218827168cdclaireho//
695569331642446be05292e3e1f8a51218827168cdclaireho//   order is important here! This order must be the same that is found in each horizontal
705569331642446be05292e3e1f8a51218827168cdclaireho//   line in the statetable for Khmer (see khmerStateTable) .
715569331642446be05292e3e1f8a51218827168cdclaireho*/
725569331642446be05292e3e1f8a51218827168cdclairehoenum KhmerCharClassValues {
735569331642446be05292e3e1f8a51218827168cdclaireho    CC_RESERVED             =  0,
745569331642446be05292e3e1f8a51218827168cdclaireho    CC_CONSONANT            =  1, /* Consonant of type 1 or independent vowel */
755569331642446be05292e3e1f8a51218827168cdclaireho    CC_CONSONANT2           =  2, /* Consonant of type 2 */
765569331642446be05292e3e1f8a51218827168cdclaireho    CC_CONSONANT3           =  3, /* Consonant of type 3 */
775569331642446be05292e3e1f8a51218827168cdclaireho    CC_ZERO_WIDTH_NJ_MARK   =  4, /* Zero Width non joiner character (0x200C) */
785569331642446be05292e3e1f8a51218827168cdclaireho    CC_CONSONANT_SHIFTER    =  5,
795569331642446be05292e3e1f8a51218827168cdclaireho    CC_ROBAT                =  6, /* Khmer special diacritic accent -treated differently in state table */
805569331642446be05292e3e1f8a51218827168cdclaireho    CC_COENG                =  7, /* Subscript consonant combining character */
815569331642446be05292e3e1f8a51218827168cdclaireho    CC_DEPENDENT_VOWEL      =  8,
825569331642446be05292e3e1f8a51218827168cdclaireho    CC_SIGN_ABOVE           =  9,
835569331642446be05292e3e1f8a51218827168cdclaireho    CC_SIGN_AFTER           = 10,
845569331642446be05292e3e1f8a51218827168cdclaireho    CC_ZERO_WIDTH_J_MARK    = 11, /* Zero width joiner character */
855569331642446be05292e3e1f8a51218827168cdclaireho    CC_COUNT                = 12  /* This is the number of character classes */
865569331642446be05292e3e1f8a51218827168cdclaireho};
875569331642446be05292e3e1f8a51218827168cdclaireho
885569331642446be05292e3e1f8a51218827168cdclaireho
895569331642446be05292e3e1f8a51218827168cdclairehoenum KhmerCharClassFlags {
905569331642446be05292e3e1f8a51218827168cdclaireho    CF_CLASS_MASK    = 0x0000FFFF,
915569331642446be05292e3e1f8a51218827168cdclaireho
925569331642446be05292e3e1f8a51218827168cdclaireho    CF_CONSONANT     = 0x01000000,  /* flag to speed up comparing */
935569331642446be05292e3e1f8a51218827168cdclaireho    CF_SPLIT_VOWEL   = 0x02000000,  /* flag for a split vowel -> the first part is added in front of the syllable */
945569331642446be05292e3e1f8a51218827168cdclaireho    CF_DOTTED_CIRCLE = 0x04000000,  /* add a dotted circle if a character with this flag is the first in a syllable */
955569331642446be05292e3e1f8a51218827168cdclaireho    CF_COENG         = 0x08000000,  /* flag to speed up comparing */
965569331642446be05292e3e1f8a51218827168cdclaireho    CF_SHIFTER       = 0x10000000,  /* flag to speed up comparing */
975569331642446be05292e3e1f8a51218827168cdclaireho    CF_ABOVE_VOWEL   = 0x20000000,  /* flag to speed up comparing */
985569331642446be05292e3e1f8a51218827168cdclaireho
995569331642446be05292e3e1f8a51218827168cdclaireho    /* position flags */
1005569331642446be05292e3e1f8a51218827168cdclaireho    CF_POS_BEFORE    = 0x00080000,
1015569331642446be05292e3e1f8a51218827168cdclaireho    CF_POS_BELOW     = 0x00040000,
1025569331642446be05292e3e1f8a51218827168cdclaireho    CF_POS_ABOVE     = 0x00020000,
1035569331642446be05292e3e1f8a51218827168cdclaireho    CF_POS_AFTER     = 0x00010000,
1045569331642446be05292e3e1f8a51218827168cdclaireho    CF_POS_MASK      = 0x000f0000
1055569331642446be05292e3e1f8a51218827168cdclaireho};
1065569331642446be05292e3e1f8a51218827168cdclaireho
1075569331642446be05292e3e1f8a51218827168cdclaireho
1085569331642446be05292e3e1f8a51218827168cdclaireho/* Characters that get referred to by name */
1095569331642446be05292e3e1f8a51218827168cdclairehoenum KhmerChar {
1105569331642446be05292e3e1f8a51218827168cdclaireho    C_SIGN_ZWNJ     = 0x200C,
1115569331642446be05292e3e1f8a51218827168cdclaireho    C_SIGN_ZWJ      = 0x200D,
1125569331642446be05292e3e1f8a51218827168cdclaireho    C_RO            = 0x179A,
1135569331642446be05292e3e1f8a51218827168cdclaireho    C_VOWEL_AA      = 0x17B6,
1145569331642446be05292e3e1f8a51218827168cdclaireho    C_SIGN_NIKAHIT  = 0x17C6,
1155569331642446be05292e3e1f8a51218827168cdclaireho    C_VOWEL_E       = 0x17C1,
1165569331642446be05292e3e1f8a51218827168cdclaireho    C_COENG         = 0x17D2
1175569331642446be05292e3e1f8a51218827168cdclaireho};
1185569331642446be05292e3e1f8a51218827168cdclaireho
1195569331642446be05292e3e1f8a51218827168cdclaireho
1205569331642446be05292e3e1f8a51218827168cdclaireho/*
1215569331642446be05292e3e1f8a51218827168cdclaireho//  simple classes, they are used in the statetable (in this file) to control the length of a syllable
1225569331642446be05292e3e1f8a51218827168cdclaireho//  they are also used to know where a character should be placed (location in reference to the base character)
1235569331642446be05292e3e1f8a51218827168cdclaireho//  and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
1245569331642446be05292e3e1f8a51218827168cdclaireho//  indicate error in syllable construction
1255569331642446be05292e3e1f8a51218827168cdclaireho*/
1265569331642446be05292e3e1f8a51218827168cdclairehoenum {
1275569331642446be05292e3e1f8a51218827168cdclaireho    _xx = CC_RESERVED,
1285569331642446be05292e3e1f8a51218827168cdclaireho    _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
1295569331642446be05292e3e1f8a51218827168cdclaireho    _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER,
1305569331642446be05292e3e1f8a51218827168cdclaireho    _c1 = CC_CONSONANT | CF_CONSONANT,
1315569331642446be05292e3e1f8a51218827168cdclaireho    _c2 = CC_CONSONANT2 | CF_CONSONANT,
1325569331642446be05292e3e1f8a51218827168cdclaireho    _c3 = CC_CONSONANT3 | CF_CONSONANT,
1335569331642446be05292e3e1f8a51218827168cdclaireho    _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE,
1345569331642446be05292e3e1f8a51218827168cdclaireho    _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER,
1355569331642446be05292e3e1f8a51218827168cdclaireho    _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE,
1365569331642446be05292e3e1f8a51218827168cdclaireho    _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE,
1375569331642446be05292e3e1f8a51218827168cdclaireho    _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL,
1385569331642446be05292e3e1f8a51218827168cdclaireho    _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE,
1395569331642446be05292e3e1f8a51218827168cdclaireho    _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE,
1405569331642446be05292e3e1f8a51218827168cdclaireho
1415569331642446be05292e3e1f8a51218827168cdclaireho    /* split vowel */
1425569331642446be05292e3e1f8a51218827168cdclaireho    _va = _da | CF_SPLIT_VOWEL,
1435569331642446be05292e3e1f8a51218827168cdclaireho    _vr = _dr | CF_SPLIT_VOWEL
1445569331642446be05292e3e1f8a51218827168cdclaireho};
1455569331642446be05292e3e1f8a51218827168cdclaireho
1465569331642446be05292e3e1f8a51218827168cdclaireho
1475569331642446be05292e3e1f8a51218827168cdclaireho/*
1485569331642446be05292e3e1f8a51218827168cdclaireho//   Character class: a character class value
1495569331642446be05292e3e1f8a51218827168cdclaireho//   ORed with character class flags.
1505569331642446be05292e3e1f8a51218827168cdclaireho*/
1515569331642446be05292e3e1f8a51218827168cdclairehotypedef unsigned long KhmerCharClass;
1525569331642446be05292e3e1f8a51218827168cdclaireho
1535569331642446be05292e3e1f8a51218827168cdclaireho
1545569331642446be05292e3e1f8a51218827168cdclaireho/*
1555569331642446be05292e3e1f8a51218827168cdclaireho//  Character class tables
1565569331642446be05292e3e1f8a51218827168cdclaireho//  _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
1575569331642446be05292e3e1f8a51218827168cdclaireho//  _sa Sign placed above the base
1585569331642446be05292e3e1f8a51218827168cdclaireho//  _sp Sign placed after the base
1595569331642446be05292e3e1f8a51218827168cdclaireho//  _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
1605569331642446be05292e3e1f8a51218827168cdclaireho//  _c2 Consonant of type 2 (only RO)
1615569331642446be05292e3e1f8a51218827168cdclaireho//  _c3 Consonant of type 3
1625569331642446be05292e3e1f8a51218827168cdclaireho//  _rb Khmer sign robat u17CC. combining mark for subscript consonants
1635569331642446be05292e3e1f8a51218827168cdclaireho//  _cd Consonant-shifter
1645569331642446be05292e3e1f8a51218827168cdclaireho//  _dl Dependent vowel placed before the base (left of the base)
1655569331642446be05292e3e1f8a51218827168cdclaireho//  _db Dependent vowel placed below the base
1665569331642446be05292e3e1f8a51218827168cdclaireho//  _da Dependent vowel placed above the base
1675569331642446be05292e3e1f8a51218827168cdclaireho//  _dr Dependent vowel placed behind the base (right of the base)
1685569331642446be05292e3e1f8a51218827168cdclaireho//  _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
1695569331642446be05292e3e1f8a51218827168cdclaireho//      it to create a subscript consonant or independent vowel
1705569331642446be05292e3e1f8a51218827168cdclaireho//  _va Khmer split vowel in which the first part is before the base and the second one above the base
1715569331642446be05292e3e1f8a51218827168cdclaireho//  _vr Khmer split vowel in which the first part is before the base and the second one behind (right of) the base
1725569331642446be05292e3e1f8a51218827168cdclaireho*/
1735569331642446be05292e3e1f8a51218827168cdclairehostatic const KhmerCharClass khmerCharClasses[] = {
1745569331642446be05292e3e1f8a51218827168cdclaireho    _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */
1755569331642446be05292e3e1f8a51218827168cdclaireho    _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */
1765569331642446be05292e3e1f8a51218827168cdclaireho    _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */
1775569331642446be05292e3e1f8a51218827168cdclaireho    _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */
1785569331642446be05292e3e1f8a51218827168cdclaireho    _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */
1795569331642446be05292e3e1f8a51218827168cdclaireho    _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx  /* 17D0 - 17DF */
1805569331642446be05292e3e1f8a51218827168cdclaireho};
1815569331642446be05292e3e1f8a51218827168cdclaireho
1825569331642446be05292e3e1f8a51218827168cdclaireho/* this enum must reflect the range of khmerCharClasses */
1835569331642446be05292e3e1f8a51218827168cdclairehoenum KhmerCharClassesRange {
1845569331642446be05292e3e1f8a51218827168cdclaireho    KhmerFirstChar = 0x1780,
1855569331642446be05292e3e1f8a51218827168cdclaireho    KhmerLastChar  = 0x17df
1865569331642446be05292e3e1f8a51218827168cdclaireho};
1875569331642446be05292e3e1f8a51218827168cdclaireho
1885569331642446be05292e3e1f8a51218827168cdclaireho/*
1895569331642446be05292e3e1f8a51218827168cdclaireho//  Below we define how a character in the input string is either in the khmerCharClasses table
1905569331642446be05292e3e1f8a51218827168cdclaireho//  (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear
1915569331642446be05292e3e1f8a51218827168cdclaireho//  within the syllable, but are not in the table) we also get their type back, or an unknown object
1925569331642446be05292e3e1f8a51218827168cdclaireho//  in which case we get _xx (CC_RESERVED) back
1935569331642446be05292e3e1f8a51218827168cdclaireho*/
1945569331642446be05292e3e1f8a51218827168cdclairehostatic KhmerCharClass getKhmerCharClass(HB_UChar16 uc)
1955569331642446be05292e3e1f8a51218827168cdclaireho{
1965569331642446be05292e3e1f8a51218827168cdclaireho    if (uc == C_SIGN_ZWJ) {
1975569331642446be05292e3e1f8a51218827168cdclaireho        return CC_ZERO_WIDTH_J_MARK;
1985569331642446be05292e3e1f8a51218827168cdclaireho    }
1995569331642446be05292e3e1f8a51218827168cdclaireho
2005569331642446be05292e3e1f8a51218827168cdclaireho    if (uc == C_SIGN_ZWNJ) {
2015569331642446be05292e3e1f8a51218827168cdclaireho        return CC_ZERO_WIDTH_NJ_MARK;
2025569331642446be05292e3e1f8a51218827168cdclaireho    }
2035569331642446be05292e3e1f8a51218827168cdclaireho
2045569331642446be05292e3e1f8a51218827168cdclaireho    if (uc < KhmerFirstChar || uc > KhmerLastChar) {
2055569331642446be05292e3e1f8a51218827168cdclaireho        return CC_RESERVED;
2065569331642446be05292e3e1f8a51218827168cdclaireho    }
2075569331642446be05292e3e1f8a51218827168cdclaireho
2085569331642446be05292e3e1f8a51218827168cdclaireho    return khmerCharClasses[uc - KhmerFirstChar];
2095569331642446be05292e3e1f8a51218827168cdclaireho}
2105569331642446be05292e3e1f8a51218827168cdclaireho
2115569331642446be05292e3e1f8a51218827168cdclaireho
2125569331642446be05292e3e1f8a51218827168cdclaireho/*
2135569331642446be05292e3e1f8a51218827168cdclaireho//  The stateTable is used to calculate the end (the length) of a well
2145569331642446be05292e3e1f8a51218827168cdclaireho//  formed Khmer Syllable.
2155569331642446be05292e3e1f8a51218827168cdclaireho//
2165569331642446be05292e3e1f8a51218827168cdclaireho//  Each horizontal line is ordered exactly the same way as the values in KhmerClassTable
2175569331642446be05292e3e1f8a51218827168cdclaireho//  CharClassValues. This coincidence of values allows the follow up of the table.
2185569331642446be05292e3e1f8a51218827168cdclaireho//
2195569331642446be05292e3e1f8a51218827168cdclaireho//  Each line corresponds to a state, which does not necessarily need to be a type
2205569331642446be05292e3e1f8a51218827168cdclaireho//  of component... for example, state 2 is a base, with is always a first character
2215569331642446be05292e3e1f8a51218827168cdclaireho//  in the syllable, but the state could be produced a consonant of any type when
2225569331642446be05292e3e1f8a51218827168cdclaireho//  it is the first character that is analysed (in ground state).
2235569331642446be05292e3e1f8a51218827168cdclaireho//
2245569331642446be05292e3e1f8a51218827168cdclaireho//  Differentiating 3 types of consonants is necessary in order to
2255569331642446be05292e3e1f8a51218827168cdclaireho//  forbid the use of certain combinations, such as having a second
2265569331642446be05292e3e1f8a51218827168cdclaireho//  coeng after a coeng RO,
2275569331642446be05292e3e1f8a51218827168cdclaireho//  The inexistent possibility of having a type 3 after another type 3 is permitted,
2285569331642446be05292e3e1f8a51218827168cdclaireho//  eliminating it would very much complicate the table, and it does not create typing
2295569331642446be05292e3e1f8a51218827168cdclaireho//  problems, as the case above.
2305569331642446be05292e3e1f8a51218827168cdclaireho//
2315569331642446be05292e3e1f8a51218827168cdclaireho//  The table is quite complex, in order to limit the number of coeng consonants
2325569331642446be05292e3e1f8a51218827168cdclaireho//  to 2 (by means of the table).
2335569331642446be05292e3e1f8a51218827168cdclaireho//
2345569331642446be05292e3e1f8a51218827168cdclaireho//  There a peculiarity, as far as Unicode is concerned:
2355569331642446be05292e3e1f8a51218827168cdclaireho//  - The consonant-shifter is considered in two possible different
2365569331642446be05292e3e1f8a51218827168cdclaireho//    locations, the one considered in Unicode 3.0 and the one considered in
2375569331642446be05292e3e1f8a51218827168cdclaireho//    Unicode 4.0. (there is a backwards compatibility problem in this standard).
2385569331642446be05292e3e1f8a51218827168cdclaireho//
2395569331642446be05292e3e1f8a51218827168cdclaireho//
2405569331642446be05292e3e1f8a51218827168cdclaireho//  xx    independent character, such as a number, punctuation sign or non-khmer char
2415569331642446be05292e3e1f8a51218827168cdclaireho//
2425569331642446be05292e3e1f8a51218827168cdclaireho//  c1    Khmer consonant of type 1 or an independent vowel
2435569331642446be05292e3e1f8a51218827168cdclaireho//        that is, a letter in which the subscript for is only under the
2445569331642446be05292e3e1f8a51218827168cdclaireho//        base, not taking any space to the right or to the left
2455569331642446be05292e3e1f8a51218827168cdclaireho//
2465569331642446be05292e3e1f8a51218827168cdclaireho//  c2    Khmer consonant of type 2, the coeng form takes space under
2475569331642446be05292e3e1f8a51218827168cdclaireho//        and to the left of the base (only RO is of this type)
2485569331642446be05292e3e1f8a51218827168cdclaireho//
2495569331642446be05292e3e1f8a51218827168cdclaireho//  c3    Khmer consonant of type 3. Its subscript form takes space under
2505569331642446be05292e3e1f8a51218827168cdclaireho//        and to the right of the base.
2515569331642446be05292e3e1f8a51218827168cdclaireho//
2525569331642446be05292e3e1f8a51218827168cdclaireho//  cs    Khmer consonant shifter
2535569331642446be05292e3e1f8a51218827168cdclaireho//
2545569331642446be05292e3e1f8a51218827168cdclaireho//  rb    Khmer robat
2555569331642446be05292e3e1f8a51218827168cdclaireho//
2565569331642446be05292e3e1f8a51218827168cdclaireho//  co    coeng character (u17D2)
2575569331642446be05292e3e1f8a51218827168cdclaireho//
2585569331642446be05292e3e1f8a51218827168cdclaireho//  dv    dependent vowel (including split vowels, they are treated in the same way).
2595569331642446be05292e3e1f8a51218827168cdclaireho//        even if dv is not defined above, the component that is really tested for is
2605569331642446be05292e3e1f8a51218827168cdclaireho//        KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels
2615569331642446be05292e3e1f8a51218827168cdclaireho//
2625569331642446be05292e3e1f8a51218827168cdclaireho//  zwj   Zero Width joiner
2635569331642446be05292e3e1f8a51218827168cdclaireho//
2645569331642446be05292e3e1f8a51218827168cdclaireho//  zwnj  Zero width non joiner
2655569331642446be05292e3e1f8a51218827168cdclaireho//
2665569331642446be05292e3e1f8a51218827168cdclaireho//  sa    above sign
2675569331642446be05292e3e1f8a51218827168cdclaireho//
2685569331642446be05292e3e1f8a51218827168cdclaireho//  sp    post sign
2695569331642446be05292e3e1f8a51218827168cdclaireho//
2705569331642446be05292e3e1f8a51218827168cdclaireho//  there are lines with equal content but for an easier understanding
2715569331642446be05292e3e1f8a51218827168cdclaireho//  (and maybe change in the future) we did not join them
2725569331642446be05292e3e1f8a51218827168cdclaireho*/
2735569331642446be05292e3e1f8a51218827168cdclairehostatic const signed char khmerStateTable[][CC_COUNT] =
2745569331642446be05292e3e1f8a51218827168cdclaireho{
2755569331642446be05292e3e1f8a51218827168cdclaireho    /* xx  c1  c2  c3 zwnj cs  rb  co  dv  sa  sp zwj */
2765569331642446be05292e3e1f8a51218827168cdclaireho    { 1,  2,  2,  2,  1,  1,  1,  6,  1,  1,  1,  2}, /*  0 - ground state */
2775569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /*  1 - exit state (or sign to the right of the syllable) */
2785569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1,  3,  4,  5,  6, 16, 17,  1, -1}, /*  2 - Base consonant */
2795569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, -1,  4, -1, -1, 16, -1, -1, -1}, /*  3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel */
2805569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, 15, -1, -1,  6, 16, 17,  1, 14}, /*  4 - First register shifter */
2815569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1,  1, -1}, /*  5 - Robat */
2825569331642446be05292e3e1f8a51218827168cdclaireho    {-1,  7,  8,  9, -1, -1, -1, -1, -1, -1, -1, -1}, /*  6 - First Coeng */
2835569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  7 - First consonant of type 1 after coeng */
2845569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17,  1, 14}, /*  8 - First consonant of type 2 after coeng */
2855569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  9 - First consonant or type 3 after ceong */
2865569331642446be05292e3e1f8a51218827168cdclaireho    {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */
2875569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */
2885569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */
2895569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 13 - Second register shifter */
2905569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */
2915569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */
2925569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17,  1, 18}, /* 16 - dependent vowel */
2935569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, 18}, /* 17 - sign above */
2945569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */
2955569331642446be05292e3e1f8a51218827168cdclaireho    {-1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */
2965569331642446be05292e3e1f8a51218827168cdclaireho    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1}, /* 20 - dependent vowel after a Robat */
2975569331642446be05292e3e1f8a51218827168cdclaireho};
2985569331642446be05292e3e1f8a51218827168cdclaireho
2995569331642446be05292e3e1f8a51218827168cdclaireho
3005569331642446be05292e3e1f8a51218827168cdclaireho/*  #define KHMER_DEBUG */
3015569331642446be05292e3e1f8a51218827168cdclaireho#ifdef KHMER_DEBUG
3025569331642446be05292e3e1f8a51218827168cdclaireho#define KHDEBUG qDebug
3035569331642446be05292e3e1f8a51218827168cdclaireho#else
3045569331642446be05292e3e1f8a51218827168cdclaireho#define KHDEBUG if(0) printf
3055569331642446be05292e3e1f8a51218827168cdclaireho#endif
3065569331642446be05292e3e1f8a51218827168cdclaireho
3075569331642446be05292e3e1f8a51218827168cdclaireho/*
3085569331642446be05292e3e1f8a51218827168cdclaireho//  Given an input string of characters and a location in which to start looking
3095569331642446be05292e3e1f8a51218827168cdclaireho//  calculate, using the state table, which one is the last character of the syllable
3105569331642446be05292e3e1f8a51218827168cdclaireho//  that starts in the starting position.
3115569331642446be05292e3e1f8a51218827168cdclaireho*/
3125569331642446be05292e3e1f8a51218827168cdclairehostatic int khmer_nextSyllableBoundary(const HB_UChar16 *s, int start, int end, HB_Bool *invalid)
3135569331642446be05292e3e1f8a51218827168cdclaireho{
3145569331642446be05292e3e1f8a51218827168cdclaireho    const HB_UChar16 *uc = s + start;
3155569331642446be05292e3e1f8a51218827168cdclaireho    int state = 0;
3165569331642446be05292e3e1f8a51218827168cdclaireho    int pos = start;
3175569331642446be05292e3e1f8a51218827168cdclaireho    *invalid = FALSE;
3185569331642446be05292e3e1f8a51218827168cdclaireho
3195569331642446be05292e3e1f8a51218827168cdclaireho    while (pos < end) {
3205569331642446be05292e3e1f8a51218827168cdclaireho        KhmerCharClass charClass = getKhmerCharClass(*uc);
3215569331642446be05292e3e1f8a51218827168cdclaireho        if (pos == start) {
3225569331642446be05292e3e1f8a51218827168cdclaireho            *invalid = (charClass > 0) && ! (charClass & CF_CONSONANT);
3235569331642446be05292e3e1f8a51218827168cdclaireho        }
3245569331642446be05292e3e1f8a51218827168cdclaireho        state = khmerStateTable[state][charClass & CF_CLASS_MASK];
3255569331642446be05292e3e1f8a51218827168cdclaireho
3265569331642446be05292e3e1f8a51218827168cdclaireho        KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state,
3275569331642446be05292e3e1f8a51218827168cdclaireho                charClass, *uc );
3285569331642446be05292e3e1f8a51218827168cdclaireho
3295569331642446be05292e3e1f8a51218827168cdclaireho        if (state < 0) {
3305569331642446be05292e3e1f8a51218827168cdclaireho            break;
3315569331642446be05292e3e1f8a51218827168cdclaireho        }
3325569331642446be05292e3e1f8a51218827168cdclaireho        ++uc;
3335569331642446be05292e3e1f8a51218827168cdclaireho        ++pos;
3345569331642446be05292e3e1f8a51218827168cdclaireho    }
3355569331642446be05292e3e1f8a51218827168cdclaireho    return pos;
3365569331642446be05292e3e1f8a51218827168cdclaireho}
3375569331642446be05292e3e1f8a51218827168cdclaireho
3385569331642446be05292e3e1f8a51218827168cdclaireho#ifndef NO_OPENTYPE
3395569331642446be05292e3e1f8a51218827168cdclairehostatic const HB_OpenTypeFeature khmer_features[] = {
3405569331642446be05292e3e1f8a51218827168cdclaireho    { HB_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty },
3415569331642446be05292e3e1f8a51218827168cdclaireho    { HB_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty },
3425569331642446be05292e3e1f8a51218827168cdclaireho    { HB_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty },
3435569331642446be05292e3e1f8a51218827168cdclaireho    { HB_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty },
3445569331642446be05292e3e1f8a51218827168cdclaireho    { HB_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty },
3455569331642446be05292e3e1f8a51218827168cdclaireho    { HB_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty },
3465569331642446be05292e3e1f8a51218827168cdclaireho    { HB_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty },
3475569331642446be05292e3e1f8a51218827168cdclaireho    { HB_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty },
3485569331642446be05292e3e1f8a51218827168cdclaireho    { HB_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty },
3495569331642446be05292e3e1f8a51218827168cdclaireho    { 0, 0 }
3505569331642446be05292e3e1f8a51218827168cdclaireho};
3515569331642446be05292e3e1f8a51218827168cdclaireho#endif
3525569331642446be05292e3e1f8a51218827168cdclaireho
3535569331642446be05292e3e1f8a51218827168cdclaireho
3545569331642446be05292e3e1f8a51218827168cdclairehostatic HB_Bool khmer_shape_syllable(HB_Bool openType, HB_ShaperItem *item)
3555569331642446be05292e3e1f8a51218827168cdclaireho{
3565569331642446be05292e3e1f8a51218827168cdclaireho/*    KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length,
3575569331642446be05292e3e1f8a51218827168cdclaireho  	    item->string->mid(item->from, item->length).toUtf8().data()); */
3585569331642446be05292e3e1f8a51218827168cdclaireho
3595569331642446be05292e3e1f8a51218827168cdclaireho    int len = 0;
3605569331642446be05292e3e1f8a51218827168cdclaireho    int syllableEnd = item->item.pos + item->item.length;
3615569331642446be05292e3e1f8a51218827168cdclaireho    unsigned short reordered[16];
3625569331642446be05292e3e1f8a51218827168cdclaireho    unsigned char properties[16];
3635569331642446be05292e3e1f8a51218827168cdclaireho    enum {
3645569331642446be05292e3e1f8a51218827168cdclaireho	AboveForm = 0x01,
3655569331642446be05292e3e1f8a51218827168cdclaireho	PreForm = 0x02,
3665569331642446be05292e3e1f8a51218827168cdclaireho	PostForm = 0x04,
3675569331642446be05292e3e1f8a51218827168cdclaireho	BelowForm = 0x08
3685569331642446be05292e3e1f8a51218827168cdclaireho    };
3695569331642446be05292e3e1f8a51218827168cdclaireho#ifndef NO_OPENTYPE
3705569331642446be05292e3e1f8a51218827168cdclaireho    const int availableGlyphs = item->num_glyphs;
3715569331642446be05292e3e1f8a51218827168cdclaireho#endif
3725569331642446be05292e3e1f8a51218827168cdclaireho    int coengRo;
3735569331642446be05292e3e1f8a51218827168cdclaireho    int i;
3745569331642446be05292e3e1f8a51218827168cdclaireho
3755569331642446be05292e3e1f8a51218827168cdclaireho    /* according to the specs this is the max length one can get
3765569331642446be05292e3e1f8a51218827168cdclaireho       ### the real value should be smaller */
3775569331642446be05292e3e1f8a51218827168cdclaireho    assert(item->item.length < 13);
3785569331642446be05292e3e1f8a51218827168cdclaireho
3795569331642446be05292e3e1f8a51218827168cdclaireho    memset(properties, 0, 16*sizeof(unsigned char));
3805569331642446be05292e3e1f8a51218827168cdclaireho
3815569331642446be05292e3e1f8a51218827168cdclaireho#ifdef KHMER_DEBUG
3825569331642446be05292e3e1f8a51218827168cdclaireho    qDebug("original:");
3835569331642446be05292e3e1f8a51218827168cdclaireho    for (int i = from; i < syllableEnd; i++) {
3845569331642446be05292e3e1f8a51218827168cdclaireho        qDebug("    %d: %4x", i, string[i]);
3855569331642446be05292e3e1f8a51218827168cdclaireho    }
3865569331642446be05292e3e1f8a51218827168cdclaireho#endif
3875569331642446be05292e3e1f8a51218827168cdclaireho
3885569331642446be05292e3e1f8a51218827168cdclaireho    /*
3895569331642446be05292e3e1f8a51218827168cdclaireho    // write a pre vowel or the pre part of a split vowel first
3905569331642446be05292e3e1f8a51218827168cdclaireho    // and look out for coeng + ro. RO is the only vowel of type 2, and
3915569331642446be05292e3e1f8a51218827168cdclaireho    // therefore the only one that requires saving space before the base.
3925569331642446be05292e3e1f8a51218827168cdclaireho    */
3935569331642446be05292e3e1f8a51218827168cdclaireho    coengRo = -1;  /* There is no Coeng Ro, if found this value will change */
3945569331642446be05292e3e1f8a51218827168cdclaireho    for (i = item->item.pos; i < syllableEnd; i += 1) {
3955569331642446be05292e3e1f8a51218827168cdclaireho        KhmerCharClass charClass = getKhmerCharClass(item->string[i]);
3965569331642446be05292e3e1f8a51218827168cdclaireho
3975569331642446be05292e3e1f8a51218827168cdclaireho        /* if a split vowel, write the pre part. In Khmer the pre part
3985569331642446be05292e3e1f8a51218827168cdclaireho           is the same for all split vowels, same glyph as pre vowel C_VOWEL_E */
3995569331642446be05292e3e1f8a51218827168cdclaireho        if (charClass & CF_SPLIT_VOWEL) {
4005569331642446be05292e3e1f8a51218827168cdclaireho            reordered[len] = C_VOWEL_E;
4015569331642446be05292e3e1f8a51218827168cdclaireho            properties[len] = PreForm;
4025569331642446be05292e3e1f8a51218827168cdclaireho            ++len;
4035569331642446be05292e3e1f8a51218827168cdclaireho            break; /* there can be only one vowel */
4045569331642446be05292e3e1f8a51218827168cdclaireho        }
4055569331642446be05292e3e1f8a51218827168cdclaireho        /* if a vowel with pos before write it out */
4065569331642446be05292e3e1f8a51218827168cdclaireho        if (charClass & CF_POS_BEFORE) {
4075569331642446be05292e3e1f8a51218827168cdclaireho            reordered[len] = item->string[i];
4085569331642446be05292e3e1f8a51218827168cdclaireho            properties[len] = PreForm;
4095569331642446be05292e3e1f8a51218827168cdclaireho            ++len;
4105569331642446be05292e3e1f8a51218827168cdclaireho            break; /* there can be only one vowel */
4115569331642446be05292e3e1f8a51218827168cdclaireho        }
4125569331642446be05292e3e1f8a51218827168cdclaireho        /* look for coeng + ro and remember position
4135569331642446be05292e3e1f8a51218827168cdclaireho           works because coeng + ro is always in front of a vowel (if there is a vowel)
4145569331642446be05292e3e1f8a51218827168cdclaireho           and because CC_CONSONANT2 is enough to identify it, as it is the only consonant
4155569331642446be05292e3e1f8a51218827168cdclaireho           with this flag */
4165569331642446be05292e3e1f8a51218827168cdclaireho        if ( (charClass & CF_COENG) && (i + 1 < syllableEnd) &&
4175569331642446be05292e3e1f8a51218827168cdclaireho              ( (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT2) ) {
4185569331642446be05292e3e1f8a51218827168cdclaireho            coengRo = i;
4195569331642446be05292e3e1f8a51218827168cdclaireho        }
4205569331642446be05292e3e1f8a51218827168cdclaireho    }
4215569331642446be05292e3e1f8a51218827168cdclaireho
4225569331642446be05292e3e1f8a51218827168cdclaireho    /* write coeng + ro if found */
4235569331642446be05292e3e1f8a51218827168cdclaireho    if (coengRo > -1) {
4245569331642446be05292e3e1f8a51218827168cdclaireho        reordered[len] = C_COENG;
4255569331642446be05292e3e1f8a51218827168cdclaireho        properties[len] = PreForm;
4265569331642446be05292e3e1f8a51218827168cdclaireho        ++len;
4275569331642446be05292e3e1f8a51218827168cdclaireho        reordered[len] = C_RO;
4285569331642446be05292e3e1f8a51218827168cdclaireho        properties[len] = PreForm;
4295569331642446be05292e3e1f8a51218827168cdclaireho        ++len;
4305569331642446be05292e3e1f8a51218827168cdclaireho    }
4315569331642446be05292e3e1f8a51218827168cdclaireho
4325569331642446be05292e3e1f8a51218827168cdclaireho    /*
4335569331642446be05292e3e1f8a51218827168cdclaireho       shall we add a dotted circle?
4345569331642446be05292e3e1f8a51218827168cdclaireho       If in the position in which the base should be (first char in the string) there is
4355569331642446be05292e3e1f8a51218827168cdclaireho       a character that has the Dotted circle flag (a character that cannot be a base)
4365569331642446be05292e3e1f8a51218827168cdclaireho       then write a dotted circle */
4375569331642446be05292e3e1f8a51218827168cdclaireho    if (getKhmerCharClass(item->string[item->item.pos]) & CF_DOTTED_CIRCLE) {
4385569331642446be05292e3e1f8a51218827168cdclaireho        reordered[len] = C_DOTTED_CIRCLE;
4395569331642446be05292e3e1f8a51218827168cdclaireho        ++len;
4405569331642446be05292e3e1f8a51218827168cdclaireho    }
4415569331642446be05292e3e1f8a51218827168cdclaireho
4425569331642446be05292e3e1f8a51218827168cdclaireho    /* copy what is left to the output, skipping before vowels and
4435569331642446be05292e3e1f8a51218827168cdclaireho       coeng Ro if they are present */
4445569331642446be05292e3e1f8a51218827168cdclaireho    for (i = item->item.pos; i < syllableEnd; i += 1) {
4455569331642446be05292e3e1f8a51218827168cdclaireho        HB_UChar16 uc = item->string[i];
4465569331642446be05292e3e1f8a51218827168cdclaireho        KhmerCharClass charClass = getKhmerCharClass(uc);
4475569331642446be05292e3e1f8a51218827168cdclaireho
4485569331642446be05292e3e1f8a51218827168cdclaireho        /* skip a before vowel, it was already processed */
4495569331642446be05292e3e1f8a51218827168cdclaireho        if (charClass & CF_POS_BEFORE) {
4505569331642446be05292e3e1f8a51218827168cdclaireho            continue;
4515569331642446be05292e3e1f8a51218827168cdclaireho        }
4525569331642446be05292e3e1f8a51218827168cdclaireho
4535569331642446be05292e3e1f8a51218827168cdclaireho        /* skip coeng + ro, it was already processed */
4545569331642446be05292e3e1f8a51218827168cdclaireho        if (i == coengRo) {
4555569331642446be05292e3e1f8a51218827168cdclaireho            i += 1;
4565569331642446be05292e3e1f8a51218827168cdclaireho            continue;
4575569331642446be05292e3e1f8a51218827168cdclaireho        }
4585569331642446be05292e3e1f8a51218827168cdclaireho
4595569331642446be05292e3e1f8a51218827168cdclaireho        switch (charClass & CF_POS_MASK)
4605569331642446be05292e3e1f8a51218827168cdclaireho        {
4615569331642446be05292e3e1f8a51218827168cdclaireho            case CF_POS_ABOVE :
4625569331642446be05292e3e1f8a51218827168cdclaireho                reordered[len] = uc;
4635569331642446be05292e3e1f8a51218827168cdclaireho                properties[len] = AboveForm;
4645569331642446be05292e3e1f8a51218827168cdclaireho                ++len;
4655569331642446be05292e3e1f8a51218827168cdclaireho                break;
4665569331642446be05292e3e1f8a51218827168cdclaireho
4675569331642446be05292e3e1f8a51218827168cdclaireho            case CF_POS_AFTER :
4685569331642446be05292e3e1f8a51218827168cdclaireho                reordered[len] = uc;
4695569331642446be05292e3e1f8a51218827168cdclaireho                properties[len] = PostForm;
4705569331642446be05292e3e1f8a51218827168cdclaireho                ++len;
4715569331642446be05292e3e1f8a51218827168cdclaireho                break;
4725569331642446be05292e3e1f8a51218827168cdclaireho
4735569331642446be05292e3e1f8a51218827168cdclaireho            case CF_POS_BELOW :
4745569331642446be05292e3e1f8a51218827168cdclaireho                reordered[len] = uc;
4755569331642446be05292e3e1f8a51218827168cdclaireho                properties[len] = BelowForm;
4765569331642446be05292e3e1f8a51218827168cdclaireho                ++len;
4775569331642446be05292e3e1f8a51218827168cdclaireho                break;
4785569331642446be05292e3e1f8a51218827168cdclaireho
4795569331642446be05292e3e1f8a51218827168cdclaireho            default:
4805569331642446be05292e3e1f8a51218827168cdclaireho                /* assign the correct flags to a coeng consonant
4815569331642446be05292e3e1f8a51218827168cdclaireho                   Consonants of type 3 are taged as Post forms and those type 1 as below forms */
4825569331642446be05292e3e1f8a51218827168cdclaireho                if ( (charClass & CF_COENG) && i + 1 < syllableEnd ) {
4835569331642446be05292e3e1f8a51218827168cdclaireho                    unsigned char property = (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT3 ?
4845569331642446be05292e3e1f8a51218827168cdclaireho                                              PostForm : BelowForm;
4855569331642446be05292e3e1f8a51218827168cdclaireho                    reordered[len] = uc;
4865569331642446be05292e3e1f8a51218827168cdclaireho                    properties[len] = property;
4875569331642446be05292e3e1f8a51218827168cdclaireho                    ++len;
4885569331642446be05292e3e1f8a51218827168cdclaireho                    i += 1;
4895569331642446be05292e3e1f8a51218827168cdclaireho                    reordered[len] = item->string[i];
4905569331642446be05292e3e1f8a51218827168cdclaireho                    properties[len] = property;
4915569331642446be05292e3e1f8a51218827168cdclaireho                    ++len;
4925569331642446be05292e3e1f8a51218827168cdclaireho                    break;
4935569331642446be05292e3e1f8a51218827168cdclaireho                }
4945569331642446be05292e3e1f8a51218827168cdclaireho
4955569331642446be05292e3e1f8a51218827168cdclaireho                /* if a shifter is followed by an above vowel change the shifter to below form,
4965569331642446be05292e3e1f8a51218827168cdclaireho                   an above vowel can have two possible positions i + 1 or i + 3
4975569331642446be05292e3e1f8a51218827168cdclaireho                   (position i+1 corresponds to unicode 3, position i+3 to Unicode 4)
4985569331642446be05292e3e1f8a51218827168cdclaireho                   and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two
4995569331642446be05292e3e1f8a51218827168cdclaireho                   different positions, right after the shifter or after a vowel (Unicode 4) */
5005569331642446be05292e3e1f8a51218827168cdclaireho                if ( (charClass & CF_SHIFTER) && (i + 1 < syllableEnd) ) {
5015569331642446be05292e3e1f8a51218827168cdclaireho                    if (getKhmerCharClass(item->string[i+1]) & CF_ABOVE_VOWEL ) {
5025569331642446be05292e3e1f8a51218827168cdclaireho                        reordered[len] = uc;
5035569331642446be05292e3e1f8a51218827168cdclaireho                        properties[len] = BelowForm;
5045569331642446be05292e3e1f8a51218827168cdclaireho                        ++len;
5055569331642446be05292e3e1f8a51218827168cdclaireho                        break;
5065569331642446be05292e3e1f8a51218827168cdclaireho                    }
5075569331642446be05292e3e1f8a51218827168cdclaireho                    if (i + 2 < syllableEnd &&
5085569331642446be05292e3e1f8a51218827168cdclaireho                        (item->string[i+1] == C_VOWEL_AA) &&
5095569331642446be05292e3e1f8a51218827168cdclaireho                        (item->string[i+2] == C_SIGN_NIKAHIT) )
5105569331642446be05292e3e1f8a51218827168cdclaireho                    {
5115569331642446be05292e3e1f8a51218827168cdclaireho                        reordered[len] = uc;
5125569331642446be05292e3e1f8a51218827168cdclaireho                        properties[len] = BelowForm;
5135569331642446be05292e3e1f8a51218827168cdclaireho                        ++len;
5145569331642446be05292e3e1f8a51218827168cdclaireho                        break;
5155569331642446be05292e3e1f8a51218827168cdclaireho                    }
5165569331642446be05292e3e1f8a51218827168cdclaireho                    if (i + 3 < syllableEnd && (getKhmerCharClass(item->string[i+3]) & CF_ABOVE_VOWEL) ) {
5175569331642446be05292e3e1f8a51218827168cdclaireho                        reordered[len] = uc;
5185569331642446be05292e3e1f8a51218827168cdclaireho                        properties[len] = BelowForm;
5195569331642446be05292e3e1f8a51218827168cdclaireho                        ++len;
5205569331642446be05292e3e1f8a51218827168cdclaireho                        break;
5215569331642446be05292e3e1f8a51218827168cdclaireho                    }
5225569331642446be05292e3e1f8a51218827168cdclaireho                    if (i + 4 < syllableEnd &&
5235569331642446be05292e3e1f8a51218827168cdclaireho                        (item->string[i+3] == C_VOWEL_AA) &&
5245569331642446be05292e3e1f8a51218827168cdclaireho                        (item->string[i+4] == C_SIGN_NIKAHIT) )
5255569331642446be05292e3e1f8a51218827168cdclaireho                    {
5265569331642446be05292e3e1f8a51218827168cdclaireho                        reordered[len] = uc;
5275569331642446be05292e3e1f8a51218827168cdclaireho                        properties[len] = BelowForm;
5285569331642446be05292e3e1f8a51218827168cdclaireho                        ++len;
5295569331642446be05292e3e1f8a51218827168cdclaireho                        break;
5305569331642446be05292e3e1f8a51218827168cdclaireho                    }
5315569331642446be05292e3e1f8a51218827168cdclaireho                }
5325569331642446be05292e3e1f8a51218827168cdclaireho
5335569331642446be05292e3e1f8a51218827168cdclaireho                /* default - any other characters */
5345569331642446be05292e3e1f8a51218827168cdclaireho                reordered[len] = uc;
5355569331642446be05292e3e1f8a51218827168cdclaireho                ++len;
5365569331642446be05292e3e1f8a51218827168cdclaireho                break;
5375569331642446be05292e3e1f8a51218827168cdclaireho        } /* switch */
5385569331642446be05292e3e1f8a51218827168cdclaireho    } /* for */
5395569331642446be05292e3e1f8a51218827168cdclaireho
5405569331642446be05292e3e1f8a51218827168cdclaireho    if (!item->font->klass->convertStringToGlyphIndices(item->font,
5415569331642446be05292e3e1f8a51218827168cdclaireho                                                        reordered, len,
5425569331642446be05292e3e1f8a51218827168cdclaireho                                                        item->glyphs, &item->num_glyphs,
5435569331642446be05292e3e1f8a51218827168cdclaireho                                                        item->item.bidiLevel % 2))
5445569331642446be05292e3e1f8a51218827168cdclaireho        return FALSE;
5455569331642446be05292e3e1f8a51218827168cdclaireho
5465569331642446be05292e3e1f8a51218827168cdclaireho
5475569331642446be05292e3e1f8a51218827168cdclaireho    KHDEBUG("after shaping: len=%d", len);
5485569331642446be05292e3e1f8a51218827168cdclaireho    for (i = 0; i < len; i++) {
5495569331642446be05292e3e1f8a51218827168cdclaireho	item->attributes[i].mark = FALSE;
5505569331642446be05292e3e1f8a51218827168cdclaireho	item->attributes[i].clusterStart = FALSE;
5515569331642446be05292e3e1f8a51218827168cdclaireho	item->attributes[i].justification = 0;
5525569331642446be05292e3e1f8a51218827168cdclaireho	item->attributes[i].zeroWidth = FALSE;
5535569331642446be05292e3e1f8a51218827168cdclaireho	KHDEBUG("    %d: %4x property=%x", i, reordered[i], properties[i]);
5545569331642446be05292e3e1f8a51218827168cdclaireho    }
5555569331642446be05292e3e1f8a51218827168cdclaireho
5565569331642446be05292e3e1f8a51218827168cdclaireho    /* now we have the syllable in the right order, and can start running it through open type. */
5575569331642446be05292e3e1f8a51218827168cdclaireho
5585569331642446be05292e3e1f8a51218827168cdclaireho#ifndef NO_OPENTYPE
5595569331642446be05292e3e1f8a51218827168cdclaireho    if (openType) {
5605569331642446be05292e3e1f8a51218827168cdclaireho 	hb_uint32 where[16];
5615569331642446be05292e3e1f8a51218827168cdclaireho        for (i = 0; i < len; ++i) {
5625569331642446be05292e3e1f8a51218827168cdclaireho            where[i] = ~(PreSubstProperty
5635569331642446be05292e3e1f8a51218827168cdclaireho                         | BelowSubstProperty
5645569331642446be05292e3e1f8a51218827168cdclaireho                         | AboveSubstProperty
5655569331642446be05292e3e1f8a51218827168cdclaireho                         | PostSubstProperty
5665569331642446be05292e3e1f8a51218827168cdclaireho                         | CligProperty
5675569331642446be05292e3e1f8a51218827168cdclaireho                         | PositioningProperties);
5685569331642446be05292e3e1f8a51218827168cdclaireho            if (properties[i] == PreForm)
5695569331642446be05292e3e1f8a51218827168cdclaireho                where[i] &= ~PreFormProperty;
5705569331642446be05292e3e1f8a51218827168cdclaireho            else if (properties[i] == BelowForm)
5715569331642446be05292e3e1f8a51218827168cdclaireho                where[i] &= ~BelowFormProperty;
5725569331642446be05292e3e1f8a51218827168cdclaireho            else if (properties[i] == AboveForm)
5735569331642446be05292e3e1f8a51218827168cdclaireho                where[i] &= ~AboveFormProperty;
5745569331642446be05292e3e1f8a51218827168cdclaireho            else if (properties[i] == PostForm)
5755569331642446be05292e3e1f8a51218827168cdclaireho                where[i] &= ~PostFormProperty;
5765569331642446be05292e3e1f8a51218827168cdclaireho        }
5775569331642446be05292e3e1f8a51218827168cdclaireho
5785569331642446be05292e3e1f8a51218827168cdclaireho        HB_OpenTypeShape(item, where);
5795569331642446be05292e3e1f8a51218827168cdclaireho        if (!HB_OpenTypePosition(item, availableGlyphs, /*doLogClusters*/FALSE))
5805569331642446be05292e3e1f8a51218827168cdclaireho            return FALSE;
5815569331642446be05292e3e1f8a51218827168cdclaireho    } else
5825569331642446be05292e3e1f8a51218827168cdclaireho#endif
5835569331642446be05292e3e1f8a51218827168cdclaireho    {
5845569331642446be05292e3e1f8a51218827168cdclaireho	KHDEBUG("Not using openType");
5855569331642446be05292e3e1f8a51218827168cdclaireho        HB_HeuristicPosition(item);
5865569331642446be05292e3e1f8a51218827168cdclaireho    }
5875569331642446be05292e3e1f8a51218827168cdclaireho
5885569331642446be05292e3e1f8a51218827168cdclaireho    item->attributes[0].clusterStart = TRUE;
5895569331642446be05292e3e1f8a51218827168cdclaireho    return TRUE;
5905569331642446be05292e3e1f8a51218827168cdclaireho}
5915569331642446be05292e3e1f8a51218827168cdclaireho
5925569331642446be05292e3e1f8a51218827168cdclairehoHB_Bool HB_KhmerShape(HB_ShaperItem *item)
5935569331642446be05292e3e1f8a51218827168cdclaireho{
5945569331642446be05292e3e1f8a51218827168cdclaireho    HB_Bool openType = FALSE;
5955569331642446be05292e3e1f8a51218827168cdclaireho    unsigned short *logClusters = item->log_clusters;
5965569331642446be05292e3e1f8a51218827168cdclaireho    int i;
5975569331642446be05292e3e1f8a51218827168cdclaireho
5985569331642446be05292e3e1f8a51218827168cdclaireho    HB_ShaperItem syllable = *item;
5995569331642446be05292e3e1f8a51218827168cdclaireho    int first_glyph = 0;
6005569331642446be05292e3e1f8a51218827168cdclaireho
6015569331642446be05292e3e1f8a51218827168cdclaireho    int sstart = item->item.pos;
6025569331642446be05292e3e1f8a51218827168cdclaireho    int end = sstart + item->item.length;
6035569331642446be05292e3e1f8a51218827168cdclaireho
6045569331642446be05292e3e1f8a51218827168cdclaireho    assert(item->item.script == HB_Script_Khmer);
6055569331642446be05292e3e1f8a51218827168cdclaireho
6065569331642446be05292e3e1f8a51218827168cdclaireho#ifndef NO_OPENTYPE
6075569331642446be05292e3e1f8a51218827168cdclaireho    openType = HB_SelectScript(item, khmer_features);
6085569331642446be05292e3e1f8a51218827168cdclaireho#endif
6095569331642446be05292e3e1f8a51218827168cdclaireho
6105569331642446be05292e3e1f8a51218827168cdclaireho    KHDEBUG("khmer_shape: from %d length %d", item->item.pos, item->item.length);
6115569331642446be05292e3e1f8a51218827168cdclaireho    while (sstart < end) {
6125569331642446be05292e3e1f8a51218827168cdclaireho        HB_Bool invalid;
6135569331642446be05292e3e1f8a51218827168cdclaireho        int send = khmer_nextSyllableBoundary(item->string, sstart, end, &invalid);
6145569331642446be05292e3e1f8a51218827168cdclaireho        KHDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
6155569331642446be05292e3e1f8a51218827168cdclaireho               invalid ? "TRUE" : "FALSE");
6165569331642446be05292e3e1f8a51218827168cdclaireho        syllable.item.pos = sstart;
6175569331642446be05292e3e1f8a51218827168cdclaireho        syllable.item.length = send-sstart;
6185569331642446be05292e3e1f8a51218827168cdclaireho        syllable.glyphs = item->glyphs + first_glyph;
6195569331642446be05292e3e1f8a51218827168cdclaireho        syllable.attributes = item->attributes + first_glyph;
6205569331642446be05292e3e1f8a51218827168cdclaireho        syllable.offsets = item->offsets + first_glyph;
6215569331642446be05292e3e1f8a51218827168cdclaireho        syllable.advances = item->advances + first_glyph;
6225569331642446be05292e3e1f8a51218827168cdclaireho        syllable.num_glyphs = item->num_glyphs - first_glyph;
6235569331642446be05292e3e1f8a51218827168cdclaireho        if (!khmer_shape_syllable(openType, &syllable)) {
6245569331642446be05292e3e1f8a51218827168cdclaireho            KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
6255569331642446be05292e3e1f8a51218827168cdclaireho            item->num_glyphs += syllable.num_glyphs;
6265569331642446be05292e3e1f8a51218827168cdclaireho            return FALSE;
6275569331642446be05292e3e1f8a51218827168cdclaireho        }
6285569331642446be05292e3e1f8a51218827168cdclaireho        /* fix logcluster array */
6295569331642446be05292e3e1f8a51218827168cdclaireho        KHDEBUG("syllable:");
6305569331642446be05292e3e1f8a51218827168cdclaireho        for (i = first_glyph; i < first_glyph + (int)syllable.num_glyphs; ++i)
6315569331642446be05292e3e1f8a51218827168cdclaireho            KHDEBUG("        %d -> glyph %x", i, item->glyphs[i]);
6325569331642446be05292e3e1f8a51218827168cdclaireho        KHDEBUG("    logclusters:");
6335569331642446be05292e3e1f8a51218827168cdclaireho        for (i = sstart; i < send; ++i) {
6345569331642446be05292e3e1f8a51218827168cdclaireho            KHDEBUG("        %d -> glyph %d", i, first_glyph);
6355569331642446be05292e3e1f8a51218827168cdclaireho            logClusters[i-item->item.pos] = first_glyph;
6365569331642446be05292e3e1f8a51218827168cdclaireho        }
6375569331642446be05292e3e1f8a51218827168cdclaireho        sstart = send;
6385569331642446be05292e3e1f8a51218827168cdclaireho        first_glyph += syllable.num_glyphs;
6395569331642446be05292e3e1f8a51218827168cdclaireho    }
6405569331642446be05292e3e1f8a51218827168cdclaireho    item->num_glyphs = first_glyph;
6415569331642446be05292e3e1f8a51218827168cdclaireho    return TRUE;
6425569331642446be05292e3e1f8a51218827168cdclaireho}
6435569331642446be05292e3e1f8a51218827168cdclaireho
6445569331642446be05292e3e1f8a51218827168cdclairehovoid HB_KhmerAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
6455569331642446be05292e3e1f8a51218827168cdclaireho{
6465569331642446be05292e3e1f8a51218827168cdclaireho    int end = from + len;
6475569331642446be05292e3e1f8a51218827168cdclaireho    const HB_UChar16 *uc = text + from;
6485569331642446be05292e3e1f8a51218827168cdclaireho    hb_uint32 i = 0;
6495569331642446be05292e3e1f8a51218827168cdclaireho    HB_UNUSED(script);
6505569331642446be05292e3e1f8a51218827168cdclaireho    attributes += from;
6515569331642446be05292e3e1f8a51218827168cdclaireho    while ( i < len ) {
6525569331642446be05292e3e1f8a51218827168cdclaireho	HB_Bool invalid;
6535569331642446be05292e3e1f8a51218827168cdclaireho	hb_uint32 boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
6545569331642446be05292e3e1f8a51218827168cdclaireho
6555569331642446be05292e3e1f8a51218827168cdclaireho	attributes[i].charStop = TRUE;
6565569331642446be05292e3e1f8a51218827168cdclaireho
6575569331642446be05292e3e1f8a51218827168cdclaireho	if ( boundary > len-1 ) boundary = len;
6585569331642446be05292e3e1f8a51218827168cdclaireho	i++;
6595569331642446be05292e3e1f8a51218827168cdclaireho	while ( i < boundary ) {
6605569331642446be05292e3e1f8a51218827168cdclaireho	    attributes[i].charStop = FALSE;
6615569331642446be05292e3e1f8a51218827168cdclaireho	    ++uc;
6625569331642446be05292e3e1f8a51218827168cdclaireho	    ++i;
6635569331642446be05292e3e1f8a51218827168cdclaireho	}
6645569331642446be05292e3e1f8a51218827168cdclaireho	assert( i == boundary );
6655569331642446be05292e3e1f8a51218827168cdclaireho    }
6665569331642446be05292e3e1f8a51218827168cdclaireho}
6675569331642446be05292e3e1f8a51218827168cdclaireho
668