15569331642446be05292e3e1f8a51218827168cdclaireho/* 25569331642446be05292e3e1f8a51218827168cdclaireho * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 35569331642446be05292e3e1f8a51218827168cdclaireho * 45569331642446be05292e3e1f8a51218827168cdclaireho * This is part of HarfBuzz, an OpenType Layout engine library. 55569331642446be05292e3e1f8a51218827168cdclaireho * 65569331642446be05292e3e1f8a51218827168cdclaireho * Permission is hereby granted, without written agreement and without 75569331642446be05292e3e1f8a51218827168cdclaireho * license or royalty fees, to use, copy, modify, and distribute this 85569331642446be05292e3e1f8a51218827168cdclaireho * software and its documentation for any purpose, provided that the 95569331642446be05292e3e1f8a51218827168cdclaireho * above copyright notice and the following two paragraphs appear in 105569331642446be05292e3e1f8a51218827168cdclaireho * all copies of this software. 115569331642446be05292e3e1f8a51218827168cdclaireho * 125569331642446be05292e3e1f8a51218827168cdclaireho * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 135569331642446be05292e3e1f8a51218827168cdclaireho * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 145569331642446be05292e3e1f8a51218827168cdclaireho * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 155569331642446be05292e3e1f8a51218827168cdclaireho * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 165569331642446be05292e3e1f8a51218827168cdclaireho * DAMAGE. 175569331642446be05292e3e1f8a51218827168cdclaireho * 185569331642446be05292e3e1f8a51218827168cdclaireho * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 195569331642446be05292e3e1f8a51218827168cdclaireho * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 205569331642446be05292e3e1f8a51218827168cdclaireho * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 215569331642446be05292e3e1f8a51218827168cdclaireho * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 225569331642446be05292e3e1f8a51218827168cdclaireho * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 235569331642446be05292e3e1f8a51218827168cdclaireho */ 245569331642446be05292e3e1f8a51218827168cdclaireho 255569331642446be05292e3e1f8a51218827168cdclaireho#include "harfbuzz-shaper.h" 265569331642446be05292e3e1f8a51218827168cdclaireho#include "harfbuzz-shaper-private.h" 275569331642446be05292e3e1f8a51218827168cdclaireho 285569331642446be05292e3e1f8a51218827168cdclaireho#include <assert.h> 295569331642446be05292e3e1f8a51218827168cdclaireho#include <stdio.h> 305569331642446be05292e3e1f8a51218827168cdclaireho 315569331642446be05292e3e1f8a51218827168cdclaireho/* 325569331642446be05292e3e1f8a51218827168cdclaireho// Vocabulary 335569331642446be05292e3e1f8a51218827168cdclaireho// Base -> A consonant or an independent vowel in its full (not subscript) form. It is the 345569331642446be05292e3e1f8a51218827168cdclaireho// center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels, 355569331642446be05292e3e1f8a51218827168cdclaireho// split vowels, signs... but there is only one base in a syllable, it has to be coded as 365569331642446be05292e3e1f8a51218827168cdclaireho// the first character of the syllable. 375569331642446be05292e3e1f8a51218827168cdclaireho// split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant). 385569331642446be05292e3e1f8a51218827168cdclaireho// Khmer language has five of them. Khmer split vowels either have one part before the 395569331642446be05292e3e1f8a51218827168cdclaireho// base and one after the base or they have a part before the base and a part above the base. 405569331642446be05292e3e1f8a51218827168cdclaireho// The first part of all Khmer split vowels is the same character, identical to 415569331642446be05292e3e1f8a51218827168cdclaireho// the glyph of Khmer dependent vowel SRA EI 425569331642446be05292e3e1f8a51218827168cdclaireho// coeng --> modifier used in Khmer to construct coeng (subscript) consonants 435569331642446be05292e3e1f8a51218827168cdclaireho// Differently than indian languages, the coeng modifies the consonant that follows it, 445569331642446be05292e3e1f8a51218827168cdclaireho// not the one preceding it Each consonant has two forms, the base form and the subscript form 455569331642446be05292e3e1f8a51218827168cdclaireho// the base form is the normal one (using the consonants code-point), the subscript form is 465569331642446be05292e3e1f8a51218827168cdclaireho// displayed when the combination coeng + consonant is encountered. 475569331642446be05292e3e1f8a51218827168cdclaireho// Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant 485569331642446be05292e3e1f8a51218827168cdclaireho// Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO) 495569331642446be05292e3e1f8a51218827168cdclaireho// Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA) 505569331642446be05292e3e1f8a51218827168cdclaireho// Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds 515569331642446be05292e3e1f8a51218827168cdclaireho// if it is attached to a consonant of the first series or a consonant of the second series 525569331642446be05292e3e1f8a51218827168cdclaireho// Most consonants have an equivalent in the other series, but some of theme exist only in 535569331642446be05292e3e1f8a51218827168cdclaireho// one series (for example SA). If we want to use the consonant SA with a vowel sound that 545569331642446be05292e3e1f8a51218827168cdclaireho// can only be done with a vowel sound that corresponds to a vowel accompanying a consonant 555569331642446be05292e3e1f8a51218827168cdclaireho// of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN 565569331642446be05292e3e1f8a51218827168cdclaireho// x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and 575569331642446be05292e3e1f8a51218827168cdclaireho// MUSIKATOAN a second series consonant to have a first series vowel sound. 585569331642446be05292e3e1f8a51218827168cdclaireho// Consonant shifter are both normally supercript marks, but, when they are followed by a 595569331642446be05292e3e1f8a51218827168cdclaireho// superscript, they change shape and take the form of subscript dependent vowel SRA U. 605569331642446be05292e3e1f8a51218827168cdclaireho// If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they 615569331642446be05292e3e1f8a51218827168cdclaireho// should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should 625569331642446be05292e3e1f8a51218827168cdclaireho// be placed after the coeng consonant. 635569331642446be05292e3e1f8a51218827168cdclaireho// Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base 645569331642446be05292e3e1f8a51218827168cdclaireho// Each vowel has its own position. Only one vowel per syllable is allowed. 655569331642446be05292e3e1f8a51218827168cdclaireho// Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are 665569331642446be05292e3e1f8a51218827168cdclaireho// Allowed in a syllable. 675569331642446be05292e3e1f8a51218827168cdclaireho// 685569331642446be05292e3e1f8a51218827168cdclaireho// 695569331642446be05292e3e1f8a51218827168cdclaireho// order is important here! This order must be the same that is found in each horizontal 705569331642446be05292e3e1f8a51218827168cdclaireho// line in the statetable for Khmer (see khmerStateTable) . 715569331642446be05292e3e1f8a51218827168cdclaireho*/ 725569331642446be05292e3e1f8a51218827168cdclairehoenum KhmerCharClassValues { 735569331642446be05292e3e1f8a51218827168cdclaireho CC_RESERVED = 0, 745569331642446be05292e3e1f8a51218827168cdclaireho CC_CONSONANT = 1, /* Consonant of type 1 or independent vowel */ 755569331642446be05292e3e1f8a51218827168cdclaireho CC_CONSONANT2 = 2, /* Consonant of type 2 */ 765569331642446be05292e3e1f8a51218827168cdclaireho CC_CONSONANT3 = 3, /* Consonant of type 3 */ 775569331642446be05292e3e1f8a51218827168cdclaireho CC_ZERO_WIDTH_NJ_MARK = 4, /* Zero Width non joiner character (0x200C) */ 785569331642446be05292e3e1f8a51218827168cdclaireho CC_CONSONANT_SHIFTER = 5, 795569331642446be05292e3e1f8a51218827168cdclaireho CC_ROBAT = 6, /* Khmer special diacritic accent -treated differently in state table */ 805569331642446be05292e3e1f8a51218827168cdclaireho CC_COENG = 7, /* Subscript consonant combining character */ 815569331642446be05292e3e1f8a51218827168cdclaireho CC_DEPENDENT_VOWEL = 8, 825569331642446be05292e3e1f8a51218827168cdclaireho CC_SIGN_ABOVE = 9, 835569331642446be05292e3e1f8a51218827168cdclaireho CC_SIGN_AFTER = 10, 845569331642446be05292e3e1f8a51218827168cdclaireho CC_ZERO_WIDTH_J_MARK = 11, /* Zero width joiner character */ 855569331642446be05292e3e1f8a51218827168cdclaireho CC_COUNT = 12 /* This is the number of character classes */ 865569331642446be05292e3e1f8a51218827168cdclaireho}; 875569331642446be05292e3e1f8a51218827168cdclaireho 885569331642446be05292e3e1f8a51218827168cdclaireho 895569331642446be05292e3e1f8a51218827168cdclairehoenum KhmerCharClassFlags { 905569331642446be05292e3e1f8a51218827168cdclaireho CF_CLASS_MASK = 0x0000FFFF, 915569331642446be05292e3e1f8a51218827168cdclaireho 925569331642446be05292e3e1f8a51218827168cdclaireho CF_CONSONANT = 0x01000000, /* flag to speed up comparing */ 935569331642446be05292e3e1f8a51218827168cdclaireho CF_SPLIT_VOWEL = 0x02000000, /* flag for a split vowel -> the first part is added in front of the syllable */ 945569331642446be05292e3e1f8a51218827168cdclaireho CF_DOTTED_CIRCLE = 0x04000000, /* add a dotted circle if a character with this flag is the first in a syllable */ 955569331642446be05292e3e1f8a51218827168cdclaireho CF_COENG = 0x08000000, /* flag to speed up comparing */ 965569331642446be05292e3e1f8a51218827168cdclaireho CF_SHIFTER = 0x10000000, /* flag to speed up comparing */ 975569331642446be05292e3e1f8a51218827168cdclaireho CF_ABOVE_VOWEL = 0x20000000, /* flag to speed up comparing */ 985569331642446be05292e3e1f8a51218827168cdclaireho 995569331642446be05292e3e1f8a51218827168cdclaireho /* position flags */ 1005569331642446be05292e3e1f8a51218827168cdclaireho CF_POS_BEFORE = 0x00080000, 1015569331642446be05292e3e1f8a51218827168cdclaireho CF_POS_BELOW = 0x00040000, 1025569331642446be05292e3e1f8a51218827168cdclaireho CF_POS_ABOVE = 0x00020000, 1035569331642446be05292e3e1f8a51218827168cdclaireho CF_POS_AFTER = 0x00010000, 1045569331642446be05292e3e1f8a51218827168cdclaireho CF_POS_MASK = 0x000f0000 1055569331642446be05292e3e1f8a51218827168cdclaireho}; 1065569331642446be05292e3e1f8a51218827168cdclaireho 1075569331642446be05292e3e1f8a51218827168cdclaireho 1085569331642446be05292e3e1f8a51218827168cdclaireho/* Characters that get referred to by name */ 1095569331642446be05292e3e1f8a51218827168cdclairehoenum KhmerChar { 1105569331642446be05292e3e1f8a51218827168cdclaireho C_SIGN_ZWNJ = 0x200C, 1115569331642446be05292e3e1f8a51218827168cdclaireho C_SIGN_ZWJ = 0x200D, 1125569331642446be05292e3e1f8a51218827168cdclaireho C_RO = 0x179A, 1135569331642446be05292e3e1f8a51218827168cdclaireho C_VOWEL_AA = 0x17B6, 1145569331642446be05292e3e1f8a51218827168cdclaireho C_SIGN_NIKAHIT = 0x17C6, 1155569331642446be05292e3e1f8a51218827168cdclaireho C_VOWEL_E = 0x17C1, 1165569331642446be05292e3e1f8a51218827168cdclaireho C_COENG = 0x17D2 1175569331642446be05292e3e1f8a51218827168cdclaireho}; 1185569331642446be05292e3e1f8a51218827168cdclaireho 1195569331642446be05292e3e1f8a51218827168cdclaireho 1205569331642446be05292e3e1f8a51218827168cdclaireho/* 1215569331642446be05292e3e1f8a51218827168cdclaireho// simple classes, they are used in the statetable (in this file) to control the length of a syllable 1225569331642446be05292e3e1f8a51218827168cdclaireho// they are also used to know where a character should be placed (location in reference to the base character) 1235569331642446be05292e3e1f8a51218827168cdclaireho// and also to know if a character, when independently displayed, should be displayed with a dotted-circle to 1245569331642446be05292e3e1f8a51218827168cdclaireho// indicate error in syllable construction 1255569331642446be05292e3e1f8a51218827168cdclaireho*/ 1265569331642446be05292e3e1f8a51218827168cdclairehoenum { 1275569331642446be05292e3e1f8a51218827168cdclaireho _xx = CC_RESERVED, 1285569331642446be05292e3e1f8a51218827168cdclaireho _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE, 1295569331642446be05292e3e1f8a51218827168cdclaireho _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER, 1305569331642446be05292e3e1f8a51218827168cdclaireho _c1 = CC_CONSONANT | CF_CONSONANT, 1315569331642446be05292e3e1f8a51218827168cdclaireho _c2 = CC_CONSONANT2 | CF_CONSONANT, 1325569331642446be05292e3e1f8a51218827168cdclaireho _c3 = CC_CONSONANT3 | CF_CONSONANT, 1335569331642446be05292e3e1f8a51218827168cdclaireho _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE, 1345569331642446be05292e3e1f8a51218827168cdclaireho _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER, 1355569331642446be05292e3e1f8a51218827168cdclaireho _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE, 1365569331642446be05292e3e1f8a51218827168cdclaireho _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE, 1375569331642446be05292e3e1f8a51218827168cdclaireho _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL, 1385569331642446be05292e3e1f8a51218827168cdclaireho _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE, 1395569331642446be05292e3e1f8a51218827168cdclaireho _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE, 1405569331642446be05292e3e1f8a51218827168cdclaireho 1415569331642446be05292e3e1f8a51218827168cdclaireho /* split vowel */ 1425569331642446be05292e3e1f8a51218827168cdclaireho _va = _da | CF_SPLIT_VOWEL, 1435569331642446be05292e3e1f8a51218827168cdclaireho _vr = _dr | CF_SPLIT_VOWEL 1445569331642446be05292e3e1f8a51218827168cdclaireho}; 1455569331642446be05292e3e1f8a51218827168cdclaireho 1465569331642446be05292e3e1f8a51218827168cdclaireho 1475569331642446be05292e3e1f8a51218827168cdclaireho/* 1485569331642446be05292e3e1f8a51218827168cdclaireho// Character class: a character class value 1495569331642446be05292e3e1f8a51218827168cdclaireho// ORed with character class flags. 1505569331642446be05292e3e1f8a51218827168cdclaireho*/ 1515569331642446be05292e3e1f8a51218827168cdclairehotypedef unsigned long KhmerCharClass; 1525569331642446be05292e3e1f8a51218827168cdclaireho 1535569331642446be05292e3e1f8a51218827168cdclaireho 1545569331642446be05292e3e1f8a51218827168cdclaireho/* 1555569331642446be05292e3e1f8a51218827168cdclaireho// Character class tables 1565569331642446be05292e3e1f8a51218827168cdclaireho// _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs... 1575569331642446be05292e3e1f8a51218827168cdclaireho// _sa Sign placed above the base 1585569331642446be05292e3e1f8a51218827168cdclaireho// _sp Sign placed after the base 1595569331642446be05292e3e1f8a51218827168cdclaireho// _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants) 1605569331642446be05292e3e1f8a51218827168cdclaireho// _c2 Consonant of type 2 (only RO) 1615569331642446be05292e3e1f8a51218827168cdclaireho// _c3 Consonant of type 3 1625569331642446be05292e3e1f8a51218827168cdclaireho// _rb Khmer sign robat u17CC. combining mark for subscript consonants 1635569331642446be05292e3e1f8a51218827168cdclaireho// _cd Consonant-shifter 1645569331642446be05292e3e1f8a51218827168cdclaireho// _dl Dependent vowel placed before the base (left of the base) 1655569331642446be05292e3e1f8a51218827168cdclaireho// _db Dependent vowel placed below the base 1665569331642446be05292e3e1f8a51218827168cdclaireho// _da Dependent vowel placed above the base 1675569331642446be05292e3e1f8a51218827168cdclaireho// _dr Dependent vowel placed behind the base (right of the base) 1685569331642446be05292e3e1f8a51218827168cdclaireho// _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following 1695569331642446be05292e3e1f8a51218827168cdclaireho// it to create a subscript consonant or independent vowel 1705569331642446be05292e3e1f8a51218827168cdclaireho// _va Khmer split vowel in which the first part is before the base and the second one above the base 1715569331642446be05292e3e1f8a51218827168cdclaireho// _vr Khmer split vowel in which the first part is before the base and the second one behind (right of) the base 1725569331642446be05292e3e1f8a51218827168cdclaireho*/ 1735569331642446be05292e3e1f8a51218827168cdclairehostatic const KhmerCharClass khmerCharClasses[] = { 1745569331642446be05292e3e1f8a51218827168cdclaireho _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */ 1755569331642446be05292e3e1f8a51218827168cdclaireho _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */ 1765569331642446be05292e3e1f8a51218827168cdclaireho _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */ 1775569331642446be05292e3e1f8a51218827168cdclaireho _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */ 1785569331642446be05292e3e1f8a51218827168cdclaireho _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */ 1795569331642446be05292e3e1f8a51218827168cdclaireho _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx /* 17D0 - 17DF */ 1805569331642446be05292e3e1f8a51218827168cdclaireho}; 1815569331642446be05292e3e1f8a51218827168cdclaireho 1825569331642446be05292e3e1f8a51218827168cdclaireho/* this enum must reflect the range of khmerCharClasses */ 1835569331642446be05292e3e1f8a51218827168cdclairehoenum KhmerCharClassesRange { 1845569331642446be05292e3e1f8a51218827168cdclaireho KhmerFirstChar = 0x1780, 1855569331642446be05292e3e1f8a51218827168cdclaireho KhmerLastChar = 0x17df 1865569331642446be05292e3e1f8a51218827168cdclaireho}; 1875569331642446be05292e3e1f8a51218827168cdclaireho 1885569331642446be05292e3e1f8a51218827168cdclaireho/* 1895569331642446be05292e3e1f8a51218827168cdclaireho// Below we define how a character in the input string is either in the khmerCharClasses table 1905569331642446be05292e3e1f8a51218827168cdclaireho// (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear 1915569331642446be05292e3e1f8a51218827168cdclaireho// within the syllable, but are not in the table) we also get their type back, or an unknown object 1925569331642446be05292e3e1f8a51218827168cdclaireho// in which case we get _xx (CC_RESERVED) back 1935569331642446be05292e3e1f8a51218827168cdclaireho*/ 1945569331642446be05292e3e1f8a51218827168cdclairehostatic KhmerCharClass getKhmerCharClass(HB_UChar16 uc) 1955569331642446be05292e3e1f8a51218827168cdclaireho{ 1965569331642446be05292e3e1f8a51218827168cdclaireho if (uc == C_SIGN_ZWJ) { 1975569331642446be05292e3e1f8a51218827168cdclaireho return CC_ZERO_WIDTH_J_MARK; 1985569331642446be05292e3e1f8a51218827168cdclaireho } 1995569331642446be05292e3e1f8a51218827168cdclaireho 2005569331642446be05292e3e1f8a51218827168cdclaireho if (uc == C_SIGN_ZWNJ) { 2015569331642446be05292e3e1f8a51218827168cdclaireho return CC_ZERO_WIDTH_NJ_MARK; 2025569331642446be05292e3e1f8a51218827168cdclaireho } 2035569331642446be05292e3e1f8a51218827168cdclaireho 2045569331642446be05292e3e1f8a51218827168cdclaireho if (uc < KhmerFirstChar || uc > KhmerLastChar) { 2055569331642446be05292e3e1f8a51218827168cdclaireho return CC_RESERVED; 2065569331642446be05292e3e1f8a51218827168cdclaireho } 2075569331642446be05292e3e1f8a51218827168cdclaireho 2085569331642446be05292e3e1f8a51218827168cdclaireho return khmerCharClasses[uc - KhmerFirstChar]; 2095569331642446be05292e3e1f8a51218827168cdclaireho} 2105569331642446be05292e3e1f8a51218827168cdclaireho 2115569331642446be05292e3e1f8a51218827168cdclaireho 2125569331642446be05292e3e1f8a51218827168cdclaireho/* 2135569331642446be05292e3e1f8a51218827168cdclaireho// The stateTable is used to calculate the end (the length) of a well 2145569331642446be05292e3e1f8a51218827168cdclaireho// formed Khmer Syllable. 2155569331642446be05292e3e1f8a51218827168cdclaireho// 2165569331642446be05292e3e1f8a51218827168cdclaireho// Each horizontal line is ordered exactly the same way as the values in KhmerClassTable 2175569331642446be05292e3e1f8a51218827168cdclaireho// CharClassValues. This coincidence of values allows the follow up of the table. 2185569331642446be05292e3e1f8a51218827168cdclaireho// 2195569331642446be05292e3e1f8a51218827168cdclaireho// Each line corresponds to a state, which does not necessarily need to be a type 2205569331642446be05292e3e1f8a51218827168cdclaireho// of component... for example, state 2 is a base, with is always a first character 2215569331642446be05292e3e1f8a51218827168cdclaireho// in the syllable, but the state could be produced a consonant of any type when 2225569331642446be05292e3e1f8a51218827168cdclaireho// it is the first character that is analysed (in ground state). 2235569331642446be05292e3e1f8a51218827168cdclaireho// 2245569331642446be05292e3e1f8a51218827168cdclaireho// Differentiating 3 types of consonants is necessary in order to 2255569331642446be05292e3e1f8a51218827168cdclaireho// forbid the use of certain combinations, such as having a second 2265569331642446be05292e3e1f8a51218827168cdclaireho// coeng after a coeng RO, 2275569331642446be05292e3e1f8a51218827168cdclaireho// The inexistent possibility of having a type 3 after another type 3 is permitted, 2285569331642446be05292e3e1f8a51218827168cdclaireho// eliminating it would very much complicate the table, and it does not create typing 2295569331642446be05292e3e1f8a51218827168cdclaireho// problems, as the case above. 2305569331642446be05292e3e1f8a51218827168cdclaireho// 2315569331642446be05292e3e1f8a51218827168cdclaireho// The table is quite complex, in order to limit the number of coeng consonants 2325569331642446be05292e3e1f8a51218827168cdclaireho// to 2 (by means of the table). 2335569331642446be05292e3e1f8a51218827168cdclaireho// 2345569331642446be05292e3e1f8a51218827168cdclaireho// There a peculiarity, as far as Unicode is concerned: 2355569331642446be05292e3e1f8a51218827168cdclaireho// - The consonant-shifter is considered in two possible different 2365569331642446be05292e3e1f8a51218827168cdclaireho// locations, the one considered in Unicode 3.0 and the one considered in 2375569331642446be05292e3e1f8a51218827168cdclaireho// Unicode 4.0. (there is a backwards compatibility problem in this standard). 2385569331642446be05292e3e1f8a51218827168cdclaireho// 2395569331642446be05292e3e1f8a51218827168cdclaireho// 2405569331642446be05292e3e1f8a51218827168cdclaireho// xx independent character, such as a number, punctuation sign or non-khmer char 2415569331642446be05292e3e1f8a51218827168cdclaireho// 2425569331642446be05292e3e1f8a51218827168cdclaireho// c1 Khmer consonant of type 1 or an independent vowel 2435569331642446be05292e3e1f8a51218827168cdclaireho// that is, a letter in which the subscript for is only under the 2445569331642446be05292e3e1f8a51218827168cdclaireho// base, not taking any space to the right or to the left 2455569331642446be05292e3e1f8a51218827168cdclaireho// 2465569331642446be05292e3e1f8a51218827168cdclaireho// c2 Khmer consonant of type 2, the coeng form takes space under 2475569331642446be05292e3e1f8a51218827168cdclaireho// and to the left of the base (only RO is of this type) 2485569331642446be05292e3e1f8a51218827168cdclaireho// 2495569331642446be05292e3e1f8a51218827168cdclaireho// c3 Khmer consonant of type 3. Its subscript form takes space under 2505569331642446be05292e3e1f8a51218827168cdclaireho// and to the right of the base. 2515569331642446be05292e3e1f8a51218827168cdclaireho// 2525569331642446be05292e3e1f8a51218827168cdclaireho// cs Khmer consonant shifter 2535569331642446be05292e3e1f8a51218827168cdclaireho// 2545569331642446be05292e3e1f8a51218827168cdclaireho// rb Khmer robat 2555569331642446be05292e3e1f8a51218827168cdclaireho// 2565569331642446be05292e3e1f8a51218827168cdclaireho// co coeng character (u17D2) 2575569331642446be05292e3e1f8a51218827168cdclaireho// 2585569331642446be05292e3e1f8a51218827168cdclaireho// dv dependent vowel (including split vowels, they are treated in the same way). 2595569331642446be05292e3e1f8a51218827168cdclaireho// even if dv is not defined above, the component that is really tested for is 2605569331642446be05292e3e1f8a51218827168cdclaireho// KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels 2615569331642446be05292e3e1f8a51218827168cdclaireho// 2625569331642446be05292e3e1f8a51218827168cdclaireho// zwj Zero Width joiner 2635569331642446be05292e3e1f8a51218827168cdclaireho// 2645569331642446be05292e3e1f8a51218827168cdclaireho// zwnj Zero width non joiner 2655569331642446be05292e3e1f8a51218827168cdclaireho// 2665569331642446be05292e3e1f8a51218827168cdclaireho// sa above sign 2675569331642446be05292e3e1f8a51218827168cdclaireho// 2685569331642446be05292e3e1f8a51218827168cdclaireho// sp post sign 2695569331642446be05292e3e1f8a51218827168cdclaireho// 2705569331642446be05292e3e1f8a51218827168cdclaireho// there are lines with equal content but for an easier understanding 2715569331642446be05292e3e1f8a51218827168cdclaireho// (and maybe change in the future) we did not join them 2725569331642446be05292e3e1f8a51218827168cdclaireho*/ 2735569331642446be05292e3e1f8a51218827168cdclairehostatic const signed char khmerStateTable[][CC_COUNT] = 2745569331642446be05292e3e1f8a51218827168cdclaireho{ 2755569331642446be05292e3e1f8a51218827168cdclaireho /* xx c1 c2 c3 zwnj cs rb co dv sa sp zwj */ 2765569331642446be05292e3e1f8a51218827168cdclaireho { 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, /* 0 - ground state */ 2775569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 1 - exit state (or sign to the right of the syllable) */ 2785569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, /* 2 - Base consonant */ 2795569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, /* 3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel */ 2805569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, /* 4 - First register shifter */ 2815569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, /* 5 - Robat */ 2825569331642446be05292e3e1f8a51218827168cdclaireho {-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, /* 6 - First Coeng */ 2835569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 7 - First consonant of type 1 after coeng */ 2845569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, /* 8 - First consonant of type 2 after coeng */ 2855569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 9 - First consonant or type 3 after ceong */ 2865569331642446be05292e3e1f8a51218827168cdclaireho {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */ 2875569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */ 2885569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */ 2895569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 13 - Second register shifter */ 2905569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */ 2915569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */ 2925569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, /* 16 - dependent vowel */ 2935569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, /* 17 - sign above */ 2945569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */ 2955569331642446be05292e3e1f8a51218827168cdclaireho {-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */ 2965569331642446be05292e3e1f8a51218827168cdclaireho {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, /* 20 - dependent vowel after a Robat */ 2975569331642446be05292e3e1f8a51218827168cdclaireho}; 2985569331642446be05292e3e1f8a51218827168cdclaireho 2995569331642446be05292e3e1f8a51218827168cdclaireho 3005569331642446be05292e3e1f8a51218827168cdclaireho/* #define KHMER_DEBUG */ 3015569331642446be05292e3e1f8a51218827168cdclaireho#ifdef KHMER_DEBUG 3025569331642446be05292e3e1f8a51218827168cdclaireho#define KHDEBUG qDebug 3035569331642446be05292e3e1f8a51218827168cdclaireho#else 3045569331642446be05292e3e1f8a51218827168cdclaireho#define KHDEBUG if(0) printf 3055569331642446be05292e3e1f8a51218827168cdclaireho#endif 3065569331642446be05292e3e1f8a51218827168cdclaireho 3075569331642446be05292e3e1f8a51218827168cdclaireho/* 3085569331642446be05292e3e1f8a51218827168cdclaireho// Given an input string of characters and a location in which to start looking 3095569331642446be05292e3e1f8a51218827168cdclaireho// calculate, using the state table, which one is the last character of the syllable 3105569331642446be05292e3e1f8a51218827168cdclaireho// that starts in the starting position. 3115569331642446be05292e3e1f8a51218827168cdclaireho*/ 3125569331642446be05292e3e1f8a51218827168cdclairehostatic int khmer_nextSyllableBoundary(const HB_UChar16 *s, int start, int end, HB_Bool *invalid) 3135569331642446be05292e3e1f8a51218827168cdclaireho{ 3145569331642446be05292e3e1f8a51218827168cdclaireho const HB_UChar16 *uc = s + start; 3155569331642446be05292e3e1f8a51218827168cdclaireho int state = 0; 3165569331642446be05292e3e1f8a51218827168cdclaireho int pos = start; 3175569331642446be05292e3e1f8a51218827168cdclaireho *invalid = FALSE; 3185569331642446be05292e3e1f8a51218827168cdclaireho 3195569331642446be05292e3e1f8a51218827168cdclaireho while (pos < end) { 3205569331642446be05292e3e1f8a51218827168cdclaireho KhmerCharClass charClass = getKhmerCharClass(*uc); 3215569331642446be05292e3e1f8a51218827168cdclaireho if (pos == start) { 3225569331642446be05292e3e1f8a51218827168cdclaireho *invalid = (charClass > 0) && ! (charClass & CF_CONSONANT); 3235569331642446be05292e3e1f8a51218827168cdclaireho } 3245569331642446be05292e3e1f8a51218827168cdclaireho state = khmerStateTable[state][charClass & CF_CLASS_MASK]; 3255569331642446be05292e3e1f8a51218827168cdclaireho 3265569331642446be05292e3e1f8a51218827168cdclaireho KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state, 3275569331642446be05292e3e1f8a51218827168cdclaireho charClass, *uc ); 3285569331642446be05292e3e1f8a51218827168cdclaireho 3295569331642446be05292e3e1f8a51218827168cdclaireho if (state < 0) { 3305569331642446be05292e3e1f8a51218827168cdclaireho break; 3315569331642446be05292e3e1f8a51218827168cdclaireho } 3325569331642446be05292e3e1f8a51218827168cdclaireho ++uc; 3335569331642446be05292e3e1f8a51218827168cdclaireho ++pos; 3345569331642446be05292e3e1f8a51218827168cdclaireho } 3355569331642446be05292e3e1f8a51218827168cdclaireho return pos; 3365569331642446be05292e3e1f8a51218827168cdclaireho} 3375569331642446be05292e3e1f8a51218827168cdclaireho 3385569331642446be05292e3e1f8a51218827168cdclaireho#ifndef NO_OPENTYPE 3395569331642446be05292e3e1f8a51218827168cdclairehostatic const HB_OpenTypeFeature khmer_features[] = { 3405569331642446be05292e3e1f8a51218827168cdclaireho { HB_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty }, 3415569331642446be05292e3e1f8a51218827168cdclaireho { HB_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty }, 3425569331642446be05292e3e1f8a51218827168cdclaireho { HB_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty }, 3435569331642446be05292e3e1f8a51218827168cdclaireho { HB_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty }, 3445569331642446be05292e3e1f8a51218827168cdclaireho { HB_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty }, 3455569331642446be05292e3e1f8a51218827168cdclaireho { HB_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty }, 3465569331642446be05292e3e1f8a51218827168cdclaireho { HB_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty }, 3475569331642446be05292e3e1f8a51218827168cdclaireho { HB_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty }, 3485569331642446be05292e3e1f8a51218827168cdclaireho { HB_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty }, 3495569331642446be05292e3e1f8a51218827168cdclaireho { 0, 0 } 3505569331642446be05292e3e1f8a51218827168cdclaireho}; 3515569331642446be05292e3e1f8a51218827168cdclaireho#endif 3525569331642446be05292e3e1f8a51218827168cdclaireho 3535569331642446be05292e3e1f8a51218827168cdclaireho 3545569331642446be05292e3e1f8a51218827168cdclairehostatic HB_Bool khmer_shape_syllable(HB_Bool openType, HB_ShaperItem *item) 3555569331642446be05292e3e1f8a51218827168cdclaireho{ 3565569331642446be05292e3e1f8a51218827168cdclaireho/* KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length, 3575569331642446be05292e3e1f8a51218827168cdclaireho item->string->mid(item->from, item->length).toUtf8().data()); */ 3585569331642446be05292e3e1f8a51218827168cdclaireho 3595569331642446be05292e3e1f8a51218827168cdclaireho int len = 0; 3605569331642446be05292e3e1f8a51218827168cdclaireho int syllableEnd = item->item.pos + item->item.length; 3615569331642446be05292e3e1f8a51218827168cdclaireho unsigned short reordered[16]; 3625569331642446be05292e3e1f8a51218827168cdclaireho unsigned char properties[16]; 3635569331642446be05292e3e1f8a51218827168cdclaireho enum { 3645569331642446be05292e3e1f8a51218827168cdclaireho AboveForm = 0x01, 3655569331642446be05292e3e1f8a51218827168cdclaireho PreForm = 0x02, 3665569331642446be05292e3e1f8a51218827168cdclaireho PostForm = 0x04, 3675569331642446be05292e3e1f8a51218827168cdclaireho BelowForm = 0x08 3685569331642446be05292e3e1f8a51218827168cdclaireho }; 3695569331642446be05292e3e1f8a51218827168cdclaireho#ifndef NO_OPENTYPE 3705569331642446be05292e3e1f8a51218827168cdclaireho const int availableGlyphs = item->num_glyphs; 3715569331642446be05292e3e1f8a51218827168cdclaireho#endif 3725569331642446be05292e3e1f8a51218827168cdclaireho int coengRo; 3735569331642446be05292e3e1f8a51218827168cdclaireho int i; 3745569331642446be05292e3e1f8a51218827168cdclaireho 3755569331642446be05292e3e1f8a51218827168cdclaireho /* according to the specs this is the max length one can get 3765569331642446be05292e3e1f8a51218827168cdclaireho ### the real value should be smaller */ 3775569331642446be05292e3e1f8a51218827168cdclaireho assert(item->item.length < 13); 3785569331642446be05292e3e1f8a51218827168cdclaireho 3795569331642446be05292e3e1f8a51218827168cdclaireho memset(properties, 0, 16*sizeof(unsigned char)); 3805569331642446be05292e3e1f8a51218827168cdclaireho 3815569331642446be05292e3e1f8a51218827168cdclaireho#ifdef KHMER_DEBUG 3825569331642446be05292e3e1f8a51218827168cdclaireho qDebug("original:"); 3835569331642446be05292e3e1f8a51218827168cdclaireho for (int i = from; i < syllableEnd; i++) { 3845569331642446be05292e3e1f8a51218827168cdclaireho qDebug(" %d: %4x", i, string[i]); 3855569331642446be05292e3e1f8a51218827168cdclaireho } 3865569331642446be05292e3e1f8a51218827168cdclaireho#endif 3875569331642446be05292e3e1f8a51218827168cdclaireho 3885569331642446be05292e3e1f8a51218827168cdclaireho /* 3895569331642446be05292e3e1f8a51218827168cdclaireho // write a pre vowel or the pre part of a split vowel first 3905569331642446be05292e3e1f8a51218827168cdclaireho // and look out for coeng + ro. RO is the only vowel of type 2, and 3915569331642446be05292e3e1f8a51218827168cdclaireho // therefore the only one that requires saving space before the base. 3925569331642446be05292e3e1f8a51218827168cdclaireho */ 3935569331642446be05292e3e1f8a51218827168cdclaireho coengRo = -1; /* There is no Coeng Ro, if found this value will change */ 3945569331642446be05292e3e1f8a51218827168cdclaireho for (i = item->item.pos; i < syllableEnd; i += 1) { 3955569331642446be05292e3e1f8a51218827168cdclaireho KhmerCharClass charClass = getKhmerCharClass(item->string[i]); 3965569331642446be05292e3e1f8a51218827168cdclaireho 3975569331642446be05292e3e1f8a51218827168cdclaireho /* if a split vowel, write the pre part. In Khmer the pre part 3985569331642446be05292e3e1f8a51218827168cdclaireho is the same for all split vowels, same glyph as pre vowel C_VOWEL_E */ 3995569331642446be05292e3e1f8a51218827168cdclaireho if (charClass & CF_SPLIT_VOWEL) { 4005569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = C_VOWEL_E; 4015569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = PreForm; 4025569331642446be05292e3e1f8a51218827168cdclaireho ++len; 4035569331642446be05292e3e1f8a51218827168cdclaireho break; /* there can be only one vowel */ 4045569331642446be05292e3e1f8a51218827168cdclaireho } 4055569331642446be05292e3e1f8a51218827168cdclaireho /* if a vowel with pos before write it out */ 4065569331642446be05292e3e1f8a51218827168cdclaireho if (charClass & CF_POS_BEFORE) { 4075569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = item->string[i]; 4085569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = PreForm; 4095569331642446be05292e3e1f8a51218827168cdclaireho ++len; 4105569331642446be05292e3e1f8a51218827168cdclaireho break; /* there can be only one vowel */ 4115569331642446be05292e3e1f8a51218827168cdclaireho } 4125569331642446be05292e3e1f8a51218827168cdclaireho /* look for coeng + ro and remember position 4135569331642446be05292e3e1f8a51218827168cdclaireho works because coeng + ro is always in front of a vowel (if there is a vowel) 4145569331642446be05292e3e1f8a51218827168cdclaireho and because CC_CONSONANT2 is enough to identify it, as it is the only consonant 4155569331642446be05292e3e1f8a51218827168cdclaireho with this flag */ 4165569331642446be05292e3e1f8a51218827168cdclaireho if ( (charClass & CF_COENG) && (i + 1 < syllableEnd) && 4175569331642446be05292e3e1f8a51218827168cdclaireho ( (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT2) ) { 4185569331642446be05292e3e1f8a51218827168cdclaireho coengRo = i; 4195569331642446be05292e3e1f8a51218827168cdclaireho } 4205569331642446be05292e3e1f8a51218827168cdclaireho } 4215569331642446be05292e3e1f8a51218827168cdclaireho 4225569331642446be05292e3e1f8a51218827168cdclaireho /* write coeng + ro if found */ 4235569331642446be05292e3e1f8a51218827168cdclaireho if (coengRo > -1) { 4245569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = C_COENG; 4255569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = PreForm; 4265569331642446be05292e3e1f8a51218827168cdclaireho ++len; 4275569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = C_RO; 4285569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = PreForm; 4295569331642446be05292e3e1f8a51218827168cdclaireho ++len; 4305569331642446be05292e3e1f8a51218827168cdclaireho } 4315569331642446be05292e3e1f8a51218827168cdclaireho 4325569331642446be05292e3e1f8a51218827168cdclaireho /* 4335569331642446be05292e3e1f8a51218827168cdclaireho shall we add a dotted circle? 4345569331642446be05292e3e1f8a51218827168cdclaireho If in the position in which the base should be (first char in the string) there is 4355569331642446be05292e3e1f8a51218827168cdclaireho a character that has the Dotted circle flag (a character that cannot be a base) 4365569331642446be05292e3e1f8a51218827168cdclaireho then write a dotted circle */ 4375569331642446be05292e3e1f8a51218827168cdclaireho if (getKhmerCharClass(item->string[item->item.pos]) & CF_DOTTED_CIRCLE) { 4385569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = C_DOTTED_CIRCLE; 4395569331642446be05292e3e1f8a51218827168cdclaireho ++len; 4405569331642446be05292e3e1f8a51218827168cdclaireho } 4415569331642446be05292e3e1f8a51218827168cdclaireho 4425569331642446be05292e3e1f8a51218827168cdclaireho /* copy what is left to the output, skipping before vowels and 4435569331642446be05292e3e1f8a51218827168cdclaireho coeng Ro if they are present */ 4445569331642446be05292e3e1f8a51218827168cdclaireho for (i = item->item.pos; i < syllableEnd; i += 1) { 4455569331642446be05292e3e1f8a51218827168cdclaireho HB_UChar16 uc = item->string[i]; 4465569331642446be05292e3e1f8a51218827168cdclaireho KhmerCharClass charClass = getKhmerCharClass(uc); 4475569331642446be05292e3e1f8a51218827168cdclaireho 4485569331642446be05292e3e1f8a51218827168cdclaireho /* skip a before vowel, it was already processed */ 4495569331642446be05292e3e1f8a51218827168cdclaireho if (charClass & CF_POS_BEFORE) { 4505569331642446be05292e3e1f8a51218827168cdclaireho continue; 4515569331642446be05292e3e1f8a51218827168cdclaireho } 4525569331642446be05292e3e1f8a51218827168cdclaireho 4535569331642446be05292e3e1f8a51218827168cdclaireho /* skip coeng + ro, it was already processed */ 4545569331642446be05292e3e1f8a51218827168cdclaireho if (i == coengRo) { 4555569331642446be05292e3e1f8a51218827168cdclaireho i += 1; 4565569331642446be05292e3e1f8a51218827168cdclaireho continue; 4575569331642446be05292e3e1f8a51218827168cdclaireho } 4585569331642446be05292e3e1f8a51218827168cdclaireho 4595569331642446be05292e3e1f8a51218827168cdclaireho switch (charClass & CF_POS_MASK) 4605569331642446be05292e3e1f8a51218827168cdclaireho { 4615569331642446be05292e3e1f8a51218827168cdclaireho case CF_POS_ABOVE : 4625569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = uc; 4635569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = AboveForm; 4645569331642446be05292e3e1f8a51218827168cdclaireho ++len; 4655569331642446be05292e3e1f8a51218827168cdclaireho break; 4665569331642446be05292e3e1f8a51218827168cdclaireho 4675569331642446be05292e3e1f8a51218827168cdclaireho case CF_POS_AFTER : 4685569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = uc; 4695569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = PostForm; 4705569331642446be05292e3e1f8a51218827168cdclaireho ++len; 4715569331642446be05292e3e1f8a51218827168cdclaireho break; 4725569331642446be05292e3e1f8a51218827168cdclaireho 4735569331642446be05292e3e1f8a51218827168cdclaireho case CF_POS_BELOW : 4745569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = uc; 4755569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = BelowForm; 4765569331642446be05292e3e1f8a51218827168cdclaireho ++len; 4775569331642446be05292e3e1f8a51218827168cdclaireho break; 4785569331642446be05292e3e1f8a51218827168cdclaireho 4795569331642446be05292e3e1f8a51218827168cdclaireho default: 4805569331642446be05292e3e1f8a51218827168cdclaireho /* assign the correct flags to a coeng consonant 4815569331642446be05292e3e1f8a51218827168cdclaireho Consonants of type 3 are taged as Post forms and those type 1 as below forms */ 4825569331642446be05292e3e1f8a51218827168cdclaireho if ( (charClass & CF_COENG) && i + 1 < syllableEnd ) { 4835569331642446be05292e3e1f8a51218827168cdclaireho unsigned char property = (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT3 ? 4845569331642446be05292e3e1f8a51218827168cdclaireho PostForm : BelowForm; 4855569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = uc; 4865569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = property; 4875569331642446be05292e3e1f8a51218827168cdclaireho ++len; 4885569331642446be05292e3e1f8a51218827168cdclaireho i += 1; 4895569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = item->string[i]; 4905569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = property; 4915569331642446be05292e3e1f8a51218827168cdclaireho ++len; 4925569331642446be05292e3e1f8a51218827168cdclaireho break; 4935569331642446be05292e3e1f8a51218827168cdclaireho } 4945569331642446be05292e3e1f8a51218827168cdclaireho 4955569331642446be05292e3e1f8a51218827168cdclaireho /* if a shifter is followed by an above vowel change the shifter to below form, 4965569331642446be05292e3e1f8a51218827168cdclaireho an above vowel can have two possible positions i + 1 or i + 3 4975569331642446be05292e3e1f8a51218827168cdclaireho (position i+1 corresponds to unicode 3, position i+3 to Unicode 4) 4985569331642446be05292e3e1f8a51218827168cdclaireho and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two 4995569331642446be05292e3e1f8a51218827168cdclaireho different positions, right after the shifter or after a vowel (Unicode 4) */ 5005569331642446be05292e3e1f8a51218827168cdclaireho if ( (charClass & CF_SHIFTER) && (i + 1 < syllableEnd) ) { 5015569331642446be05292e3e1f8a51218827168cdclaireho if (getKhmerCharClass(item->string[i+1]) & CF_ABOVE_VOWEL ) { 5025569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = uc; 5035569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = BelowForm; 5045569331642446be05292e3e1f8a51218827168cdclaireho ++len; 5055569331642446be05292e3e1f8a51218827168cdclaireho break; 5065569331642446be05292e3e1f8a51218827168cdclaireho } 5075569331642446be05292e3e1f8a51218827168cdclaireho if (i + 2 < syllableEnd && 5085569331642446be05292e3e1f8a51218827168cdclaireho (item->string[i+1] == C_VOWEL_AA) && 5095569331642446be05292e3e1f8a51218827168cdclaireho (item->string[i+2] == C_SIGN_NIKAHIT) ) 5105569331642446be05292e3e1f8a51218827168cdclaireho { 5115569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = uc; 5125569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = BelowForm; 5135569331642446be05292e3e1f8a51218827168cdclaireho ++len; 5145569331642446be05292e3e1f8a51218827168cdclaireho break; 5155569331642446be05292e3e1f8a51218827168cdclaireho } 5165569331642446be05292e3e1f8a51218827168cdclaireho if (i + 3 < syllableEnd && (getKhmerCharClass(item->string[i+3]) & CF_ABOVE_VOWEL) ) { 5175569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = uc; 5185569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = BelowForm; 5195569331642446be05292e3e1f8a51218827168cdclaireho ++len; 5205569331642446be05292e3e1f8a51218827168cdclaireho break; 5215569331642446be05292e3e1f8a51218827168cdclaireho } 5225569331642446be05292e3e1f8a51218827168cdclaireho if (i + 4 < syllableEnd && 5235569331642446be05292e3e1f8a51218827168cdclaireho (item->string[i+3] == C_VOWEL_AA) && 5245569331642446be05292e3e1f8a51218827168cdclaireho (item->string[i+4] == C_SIGN_NIKAHIT) ) 5255569331642446be05292e3e1f8a51218827168cdclaireho { 5265569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = uc; 5275569331642446be05292e3e1f8a51218827168cdclaireho properties[len] = BelowForm; 5285569331642446be05292e3e1f8a51218827168cdclaireho ++len; 5295569331642446be05292e3e1f8a51218827168cdclaireho break; 5305569331642446be05292e3e1f8a51218827168cdclaireho } 5315569331642446be05292e3e1f8a51218827168cdclaireho } 5325569331642446be05292e3e1f8a51218827168cdclaireho 5335569331642446be05292e3e1f8a51218827168cdclaireho /* default - any other characters */ 5345569331642446be05292e3e1f8a51218827168cdclaireho reordered[len] = uc; 5355569331642446be05292e3e1f8a51218827168cdclaireho ++len; 5365569331642446be05292e3e1f8a51218827168cdclaireho break; 5375569331642446be05292e3e1f8a51218827168cdclaireho } /* switch */ 5385569331642446be05292e3e1f8a51218827168cdclaireho } /* for */ 5395569331642446be05292e3e1f8a51218827168cdclaireho 5405569331642446be05292e3e1f8a51218827168cdclaireho if (!item->font->klass->convertStringToGlyphIndices(item->font, 5415569331642446be05292e3e1f8a51218827168cdclaireho reordered, len, 5425569331642446be05292e3e1f8a51218827168cdclaireho item->glyphs, &item->num_glyphs, 5435569331642446be05292e3e1f8a51218827168cdclaireho item->item.bidiLevel % 2)) 5445569331642446be05292e3e1f8a51218827168cdclaireho return FALSE; 5455569331642446be05292e3e1f8a51218827168cdclaireho 5465569331642446be05292e3e1f8a51218827168cdclaireho 5475569331642446be05292e3e1f8a51218827168cdclaireho KHDEBUG("after shaping: len=%d", len); 5485569331642446be05292e3e1f8a51218827168cdclaireho for (i = 0; i < len; i++) { 5495569331642446be05292e3e1f8a51218827168cdclaireho item->attributes[i].mark = FALSE; 5505569331642446be05292e3e1f8a51218827168cdclaireho item->attributes[i].clusterStart = FALSE; 5515569331642446be05292e3e1f8a51218827168cdclaireho item->attributes[i].justification = 0; 5525569331642446be05292e3e1f8a51218827168cdclaireho item->attributes[i].zeroWidth = FALSE; 5535569331642446be05292e3e1f8a51218827168cdclaireho KHDEBUG(" %d: %4x property=%x", i, reordered[i], properties[i]); 5545569331642446be05292e3e1f8a51218827168cdclaireho } 5555569331642446be05292e3e1f8a51218827168cdclaireho 5565569331642446be05292e3e1f8a51218827168cdclaireho /* now we have the syllable in the right order, and can start running it through open type. */ 5575569331642446be05292e3e1f8a51218827168cdclaireho 5585569331642446be05292e3e1f8a51218827168cdclaireho#ifndef NO_OPENTYPE 5595569331642446be05292e3e1f8a51218827168cdclaireho if (openType) { 5605569331642446be05292e3e1f8a51218827168cdclaireho hb_uint32 where[16]; 5615569331642446be05292e3e1f8a51218827168cdclaireho for (i = 0; i < len; ++i) { 5625569331642446be05292e3e1f8a51218827168cdclaireho where[i] = ~(PreSubstProperty 5635569331642446be05292e3e1f8a51218827168cdclaireho | BelowSubstProperty 5645569331642446be05292e3e1f8a51218827168cdclaireho | AboveSubstProperty 5655569331642446be05292e3e1f8a51218827168cdclaireho | PostSubstProperty 5665569331642446be05292e3e1f8a51218827168cdclaireho | CligProperty 5675569331642446be05292e3e1f8a51218827168cdclaireho | PositioningProperties); 5685569331642446be05292e3e1f8a51218827168cdclaireho if (properties[i] == PreForm) 5695569331642446be05292e3e1f8a51218827168cdclaireho where[i] &= ~PreFormProperty; 5705569331642446be05292e3e1f8a51218827168cdclaireho else if (properties[i] == BelowForm) 5715569331642446be05292e3e1f8a51218827168cdclaireho where[i] &= ~BelowFormProperty; 5725569331642446be05292e3e1f8a51218827168cdclaireho else if (properties[i] == AboveForm) 5735569331642446be05292e3e1f8a51218827168cdclaireho where[i] &= ~AboveFormProperty; 5745569331642446be05292e3e1f8a51218827168cdclaireho else if (properties[i] == PostForm) 5755569331642446be05292e3e1f8a51218827168cdclaireho where[i] &= ~PostFormProperty; 5765569331642446be05292e3e1f8a51218827168cdclaireho } 5775569331642446be05292e3e1f8a51218827168cdclaireho 5785569331642446be05292e3e1f8a51218827168cdclaireho HB_OpenTypeShape(item, where); 5795569331642446be05292e3e1f8a51218827168cdclaireho if (!HB_OpenTypePosition(item, availableGlyphs, /*doLogClusters*/FALSE)) 5805569331642446be05292e3e1f8a51218827168cdclaireho return FALSE; 5815569331642446be05292e3e1f8a51218827168cdclaireho } else 5825569331642446be05292e3e1f8a51218827168cdclaireho#endif 5835569331642446be05292e3e1f8a51218827168cdclaireho { 5845569331642446be05292e3e1f8a51218827168cdclaireho KHDEBUG("Not using openType"); 5855569331642446be05292e3e1f8a51218827168cdclaireho HB_HeuristicPosition(item); 5865569331642446be05292e3e1f8a51218827168cdclaireho } 5875569331642446be05292e3e1f8a51218827168cdclaireho 5885569331642446be05292e3e1f8a51218827168cdclaireho item->attributes[0].clusterStart = TRUE; 5895569331642446be05292e3e1f8a51218827168cdclaireho return TRUE; 5905569331642446be05292e3e1f8a51218827168cdclaireho} 5915569331642446be05292e3e1f8a51218827168cdclaireho 5925569331642446be05292e3e1f8a51218827168cdclairehoHB_Bool HB_KhmerShape(HB_ShaperItem *item) 5935569331642446be05292e3e1f8a51218827168cdclaireho{ 5945569331642446be05292e3e1f8a51218827168cdclaireho HB_Bool openType = FALSE; 5955569331642446be05292e3e1f8a51218827168cdclaireho unsigned short *logClusters = item->log_clusters; 5965569331642446be05292e3e1f8a51218827168cdclaireho int i; 5975569331642446be05292e3e1f8a51218827168cdclaireho 5985569331642446be05292e3e1f8a51218827168cdclaireho HB_ShaperItem syllable = *item; 5995569331642446be05292e3e1f8a51218827168cdclaireho int first_glyph = 0; 6005569331642446be05292e3e1f8a51218827168cdclaireho 6015569331642446be05292e3e1f8a51218827168cdclaireho int sstart = item->item.pos; 6025569331642446be05292e3e1f8a51218827168cdclaireho int end = sstart + item->item.length; 6035569331642446be05292e3e1f8a51218827168cdclaireho 6045569331642446be05292e3e1f8a51218827168cdclaireho assert(item->item.script == HB_Script_Khmer); 6055569331642446be05292e3e1f8a51218827168cdclaireho 6065569331642446be05292e3e1f8a51218827168cdclaireho#ifndef NO_OPENTYPE 6075569331642446be05292e3e1f8a51218827168cdclaireho openType = HB_SelectScript(item, khmer_features); 6085569331642446be05292e3e1f8a51218827168cdclaireho#endif 6095569331642446be05292e3e1f8a51218827168cdclaireho 6105569331642446be05292e3e1f8a51218827168cdclaireho KHDEBUG("khmer_shape: from %d length %d", item->item.pos, item->item.length); 6115569331642446be05292e3e1f8a51218827168cdclaireho while (sstart < end) { 6125569331642446be05292e3e1f8a51218827168cdclaireho HB_Bool invalid; 6135569331642446be05292e3e1f8a51218827168cdclaireho int send = khmer_nextSyllableBoundary(item->string, sstart, end, &invalid); 6145569331642446be05292e3e1f8a51218827168cdclaireho KHDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart, 6155569331642446be05292e3e1f8a51218827168cdclaireho invalid ? "TRUE" : "FALSE"); 6165569331642446be05292e3e1f8a51218827168cdclaireho syllable.item.pos = sstart; 6175569331642446be05292e3e1f8a51218827168cdclaireho syllable.item.length = send-sstart; 6185569331642446be05292e3e1f8a51218827168cdclaireho syllable.glyphs = item->glyphs + first_glyph; 6195569331642446be05292e3e1f8a51218827168cdclaireho syllable.attributes = item->attributes + first_glyph; 6205569331642446be05292e3e1f8a51218827168cdclaireho syllable.offsets = item->offsets + first_glyph; 6215569331642446be05292e3e1f8a51218827168cdclaireho syllable.advances = item->advances + first_glyph; 6225569331642446be05292e3e1f8a51218827168cdclaireho syllable.num_glyphs = item->num_glyphs - first_glyph; 6235569331642446be05292e3e1f8a51218827168cdclaireho if (!khmer_shape_syllable(openType, &syllable)) { 6245569331642446be05292e3e1f8a51218827168cdclaireho KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs); 6255569331642446be05292e3e1f8a51218827168cdclaireho item->num_glyphs += syllable.num_glyphs; 6265569331642446be05292e3e1f8a51218827168cdclaireho return FALSE; 6275569331642446be05292e3e1f8a51218827168cdclaireho } 6285569331642446be05292e3e1f8a51218827168cdclaireho /* fix logcluster array */ 6295569331642446be05292e3e1f8a51218827168cdclaireho KHDEBUG("syllable:"); 6305569331642446be05292e3e1f8a51218827168cdclaireho for (i = first_glyph; i < first_glyph + (int)syllable.num_glyphs; ++i) 6315569331642446be05292e3e1f8a51218827168cdclaireho KHDEBUG(" %d -> glyph %x", i, item->glyphs[i]); 6325569331642446be05292e3e1f8a51218827168cdclaireho KHDEBUG(" logclusters:"); 6335569331642446be05292e3e1f8a51218827168cdclaireho for (i = sstart; i < send; ++i) { 6345569331642446be05292e3e1f8a51218827168cdclaireho KHDEBUG(" %d -> glyph %d", i, first_glyph); 6355569331642446be05292e3e1f8a51218827168cdclaireho logClusters[i-item->item.pos] = first_glyph; 6365569331642446be05292e3e1f8a51218827168cdclaireho } 6375569331642446be05292e3e1f8a51218827168cdclaireho sstart = send; 6385569331642446be05292e3e1f8a51218827168cdclaireho first_glyph += syllable.num_glyphs; 6395569331642446be05292e3e1f8a51218827168cdclaireho } 6405569331642446be05292e3e1f8a51218827168cdclaireho item->num_glyphs = first_glyph; 6415569331642446be05292e3e1f8a51218827168cdclaireho return TRUE; 6425569331642446be05292e3e1f8a51218827168cdclaireho} 6435569331642446be05292e3e1f8a51218827168cdclaireho 6445569331642446be05292e3e1f8a51218827168cdclairehovoid HB_KhmerAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes) 6455569331642446be05292e3e1f8a51218827168cdclaireho{ 6465569331642446be05292e3e1f8a51218827168cdclaireho int end = from + len; 6475569331642446be05292e3e1f8a51218827168cdclaireho const HB_UChar16 *uc = text + from; 6485569331642446be05292e3e1f8a51218827168cdclaireho hb_uint32 i = 0; 6495569331642446be05292e3e1f8a51218827168cdclaireho HB_UNUSED(script); 6505569331642446be05292e3e1f8a51218827168cdclaireho attributes += from; 6515569331642446be05292e3e1f8a51218827168cdclaireho while ( i < len ) { 6525569331642446be05292e3e1f8a51218827168cdclaireho HB_Bool invalid; 6535569331642446be05292e3e1f8a51218827168cdclaireho hb_uint32 boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from; 6545569331642446be05292e3e1f8a51218827168cdclaireho 6555569331642446be05292e3e1f8a51218827168cdclaireho attributes[i].charStop = TRUE; 6565569331642446be05292e3e1f8a51218827168cdclaireho 6575569331642446be05292e3e1f8a51218827168cdclaireho if ( boundary > len-1 ) boundary = len; 6585569331642446be05292e3e1f8a51218827168cdclaireho i++; 6595569331642446be05292e3e1f8a51218827168cdclaireho while ( i < boundary ) { 6605569331642446be05292e3e1f8a51218827168cdclaireho attributes[i].charStop = FALSE; 6615569331642446be05292e3e1f8a51218827168cdclaireho ++uc; 6625569331642446be05292e3e1f8a51218827168cdclaireho ++i; 6635569331642446be05292e3e1f8a51218827168cdclaireho } 6645569331642446be05292e3e1f8a51218827168cdclaireho assert( i == boundary ); 6655569331642446be05292e3e1f8a51218827168cdclaireho } 6665569331642446be05292e3e1f8a51218827168cdclaireho} 6675569331642446be05292e3e1f8a51218827168cdclaireho 668