1/*
2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
3 *
4 * This is part of HarfBuzz, an OpenType Layout engine library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 */
24
25#include "harfbuzz-shaper.h"
26#include "harfbuzz-shaper-private.h"
27
28#include <assert.h>
29#include <stdio.h>
30
31/*
32//  Vocabulary
33//      Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the
34//                      center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,
35//                      split vowels, signs... but there is only one base in a syllable, it has to be coded as
36//                      the first character of the syllable.
37//      split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
38//                      Khmer language has five of them. Khmer split vowels either have one part before the
39//                      base and one after the base or they have a part before the base and a part above the base.
40//                      The first part of all Khmer split vowels is the same character, identical to
41//                      the glyph of Khmer dependent vowel SRA EI
42//      coeng -->  modifier used in Khmer to construct coeng (subscript) consonants
43//                 Differently than indian languages, the coeng modifies the consonant that follows it,
44//                 not the one preceding it  Each consonant has two forms, the base form and the subscript form
45//                 the base form is the normal one (using the consonants code-point), the subscript form is
46//                 displayed when the combination coeng + consonant is encountered.
47//      Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
48//      Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
49//      Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
50//      Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
51//                           if it is attached to a consonant of the first series or a consonant of the second series
52//                           Most consonants have an equivalent in the other series, but some of theme exist only in
53//                           one series (for example SA). If we want to use the consonant SA with a vowel sound that
54//                           can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
55//                           of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
56//                           x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
57//                           MUSIKATOAN a second series consonant to have a first series vowel sound.
58//                           Consonant shifter are both normally supercript marks, but, when they are followed by a
59//                           superscript, they change shape and take the form of subscript dependent vowel SRA U.
60//                           If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
61//                           should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
62//                           be placed after the coeng consonant.
63//      Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base
64//                           Each vowel has its own position. Only one vowel per syllable is allowed.
65//      Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
66//                           Allowed in a syllable.
67//
68//
69//   order is important here! This order must be the same that is found in each horizontal
70//   line in the statetable for Khmer (see khmerStateTable) .
71*/
72enum KhmerCharClassValues {
73    CC_RESERVED             =  0,
74    CC_CONSONANT            =  1, /* Consonant of type 1 or independent vowel */
75    CC_CONSONANT2           =  2, /* Consonant of type 2 */
76    CC_CONSONANT3           =  3, /* Consonant of type 3 */
77    CC_ZERO_WIDTH_NJ_MARK   =  4, /* Zero Width non joiner character (0x200C) */
78    CC_CONSONANT_SHIFTER    =  5,
79    CC_ROBAT                =  6, /* Khmer special diacritic accent -treated differently in state table */
80    CC_COENG                =  7, /* Subscript consonant combining character */
81    CC_DEPENDENT_VOWEL      =  8,
82    CC_SIGN_ABOVE           =  9,
83    CC_SIGN_AFTER           = 10,
84    CC_ZERO_WIDTH_J_MARK    = 11, /* Zero width joiner character */
85    CC_COUNT                = 12  /* This is the number of character classes */
86};
87
88
89enum KhmerCharClassFlags {
90    CF_CLASS_MASK    = 0x0000FFFF,
91
92    CF_CONSONANT     = 0x01000000,  /* flag to speed up comparing */
93    CF_SPLIT_VOWEL   = 0x02000000,  /* flag for a split vowel -> the first part is added in front of the syllable */
94    CF_DOTTED_CIRCLE = 0x04000000,  /* add a dotted circle if a character with this flag is the first in a syllable */
95    CF_COENG         = 0x08000000,  /* flag to speed up comparing */
96    CF_SHIFTER       = 0x10000000,  /* flag to speed up comparing */
97    CF_ABOVE_VOWEL   = 0x20000000,  /* flag to speed up comparing */
98
99    /* position flags */
100    CF_POS_BEFORE    = 0x00080000,
101    CF_POS_BELOW     = 0x00040000,
102    CF_POS_ABOVE     = 0x00020000,
103    CF_POS_AFTER     = 0x00010000,
104    CF_POS_MASK      = 0x000f0000
105};
106
107
108/* Characters that get referred to by name */
109enum KhmerChar {
110    C_SIGN_ZWNJ     = 0x200C,
111    C_SIGN_ZWJ      = 0x200D,
112    C_RO            = 0x179A,
113    C_VOWEL_AA      = 0x17B6,
114    C_SIGN_NIKAHIT  = 0x17C6,
115    C_VOWEL_E       = 0x17C1,
116    C_COENG         = 0x17D2
117};
118
119
120/*
121//  simple classes, they are used in the statetable (in this file) to control the length of a syllable
122//  they are also used to know where a character should be placed (location in reference to the base character)
123//  and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
124//  indicate error in syllable construction
125*/
126enum {
127    _xx = CC_RESERVED,
128    _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
129    _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER,
130    _c1 = CC_CONSONANT | CF_CONSONANT,
131    _c2 = CC_CONSONANT2 | CF_CONSONANT,
132    _c3 = CC_CONSONANT3 | CF_CONSONANT,
133    _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE,
134    _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER,
135    _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE,
136    _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE,
137    _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL,
138    _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE,
139    _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE,
140
141    /* split vowel */
142    _va = _da | CF_SPLIT_VOWEL,
143    _vr = _dr | CF_SPLIT_VOWEL
144};
145
146
147/*
148//   Character class: a character class value
149//   ORed with character class flags.
150*/
151typedef unsigned long KhmerCharClass;
152
153
154/*
155//  Character class tables
156//  _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
157//  _sa Sign placed above the base
158//  _sp Sign placed after the base
159//  _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
160//  _c2 Consonant of type 2 (only RO)
161//  _c3 Consonant of type 3
162//  _rb Khmer sign robat u17CC. combining mark for subscript consonants
163//  _cd Consonant-shifter
164//  _dl Dependent vowel placed before the base (left of the base)
165//  _db Dependent vowel placed below the base
166//  _da Dependent vowel placed above the base
167//  _dr Dependent vowel placed behind the base (right of the base)
168//  _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
169//      it to create a subscript consonant or independent vowel
170//  _va Khmer split vowel in which the first part is before the base and the second one above the base
171//  _vr Khmer split vowel in which the first part is before the base and the second one behind (right of) the base
172*/
173static const KhmerCharClass khmerCharClasses[] = {
174    _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */
175    _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */
176    _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */
177    _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */
178    _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */
179    _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx  /* 17D0 - 17DF */
180};
181
182/* this enum must reflect the range of khmerCharClasses */
183enum KhmerCharClassesRange {
184    KhmerFirstChar = 0x1780,
185    KhmerLastChar  = 0x17df
186};
187
188/*
189//  Below we define how a character in the input string is either in the khmerCharClasses table
190//  (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear
191//  within the syllable, but are not in the table) we also get their type back, or an unknown object
192//  in which case we get _xx (CC_RESERVED) back
193*/
194static KhmerCharClass getKhmerCharClass(HB_UChar16 uc)
195{
196    if (uc == C_SIGN_ZWJ) {
197        return CC_ZERO_WIDTH_J_MARK;
198    }
199
200    if (uc == C_SIGN_ZWNJ) {
201        return CC_ZERO_WIDTH_NJ_MARK;
202    }
203
204    if (uc < KhmerFirstChar || uc > KhmerLastChar) {
205        return CC_RESERVED;
206    }
207
208    return khmerCharClasses[uc - KhmerFirstChar];
209}
210
211
212/*
213//  The stateTable is used to calculate the end (the length) of a well
214//  formed Khmer Syllable.
215//
216//  Each horizontal line is ordered exactly the same way as the values in KhmerClassTable
217//  CharClassValues. This coincidence of values allows the follow up of the table.
218//
219//  Each line corresponds to a state, which does not necessarily need to be a type
220//  of component... for example, state 2 is a base, with is always a first character
221//  in the syllable, but the state could be produced a consonant of any type when
222//  it is the first character that is analysed (in ground state).
223//
224//  Differentiating 3 types of consonants is necessary in order to
225//  forbid the use of certain combinations, such as having a second
226//  coeng after a coeng RO,
227//  The inexistent possibility of having a type 3 after another type 3 is permitted,
228//  eliminating it would very much complicate the table, and it does not create typing
229//  problems, as the case above.
230//
231//  The table is quite complex, in order to limit the number of coeng consonants
232//  to 2 (by means of the table).
233//
234//  There a peculiarity, as far as Unicode is concerned:
235//  - The consonant-shifter is considered in two possible different
236//    locations, the one considered in Unicode 3.0 and the one considered in
237//    Unicode 4.0. (there is a backwards compatibility problem in this standard).
238//
239//
240//  xx    independent character, such as a number, punctuation sign or non-khmer char
241//
242//  c1    Khmer consonant of type 1 or an independent vowel
243//        that is, a letter in which the subscript for is only under the
244//        base, not taking any space to the right or to the left
245//
246//  c2    Khmer consonant of type 2, the coeng form takes space under
247//        and to the left of the base (only RO is of this type)
248//
249//  c3    Khmer consonant of type 3. Its subscript form takes space under
250//        and to the right of the base.
251//
252//  cs    Khmer consonant shifter
253//
254//  rb    Khmer robat
255//
256//  co    coeng character (u17D2)
257//
258//  dv    dependent vowel (including split vowels, they are treated in the same way).
259//        even if dv is not defined above, the component that is really tested for is
260//        KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels
261//
262//  zwj   Zero Width joiner
263//
264//  zwnj  Zero width non joiner
265//
266//  sa    above sign
267//
268//  sp    post sign
269//
270//  there are lines with equal content but for an easier understanding
271//  (and maybe change in the future) we did not join them
272*/
273static const signed char khmerStateTable[][CC_COUNT] =
274{
275    /* xx  c1  c2  c3 zwnj cs  rb  co  dv  sa  sp zwj */
276    { 1,  2,  2,  2,  1,  1,  1,  6,  1,  1,  1,  2}, /*  0 - ground state */
277    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /*  1 - exit state (or sign to the right of the syllable) */
278    {-1, -1, -1, -1,  3,  4,  5,  6, 16, 17,  1, -1}, /*  2 - Base consonant */
279    {-1, -1, -1, -1, -1,  4, -1, -1, 16, -1, -1, -1}, /*  3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel */
280    {-1, -1, -1, -1, 15, -1, -1,  6, 16, 17,  1, 14}, /*  4 - First register shifter */
281    {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1,  1, -1}, /*  5 - Robat */
282    {-1,  7,  8,  9, -1, -1, -1, -1, -1, -1, -1, -1}, /*  6 - First Coeng */
283    {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  7 - First consonant of type 1 after coeng */
284    {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17,  1, 14}, /*  8 - First consonant of type 2 after coeng */
285    {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  9 - First consonant or type 3 after ceong */
286    {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */
287    {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */
288    {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */
289    {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 13 - Second register shifter */
290    {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */
291    {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */
292    {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17,  1, 18}, /* 16 - dependent vowel */
293    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, 18}, /* 17 - sign above */
294    {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */
295    {-1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */
296    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1}, /* 20 - dependent vowel after a Robat */
297};
298
299
300/*  #define KHMER_DEBUG */
301#ifdef KHMER_DEBUG
302#define KHDEBUG qDebug
303#else
304#define KHDEBUG if(0) printf
305#endif
306
307/*
308//  Given an input string of characters and a location in which to start looking
309//  calculate, using the state table, which one is the last character of the syllable
310//  that starts in the starting position.
311*/
312static int khmer_nextSyllableBoundary(const HB_UChar16 *s, int start, int end, HB_Bool *invalid)
313{
314    const HB_UChar16 *uc = s + start;
315    int state = 0;
316    int pos = start;
317    *invalid = FALSE;
318
319    while (pos < end) {
320        KhmerCharClass charClass = getKhmerCharClass(*uc);
321        if (pos == start) {
322            *invalid = (charClass > 0) && ! (charClass & CF_CONSONANT);
323        }
324        state = khmerStateTable[state][charClass & CF_CLASS_MASK];
325
326        KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state,
327                charClass, *uc );
328
329        if (state < 0) {
330            break;
331        }
332        ++uc;
333        ++pos;
334    }
335    return pos;
336}
337
338#ifndef NO_OPENTYPE
339static const HB_OpenTypeFeature khmer_features[] = {
340    { HB_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty },
341    { HB_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty },
342    { HB_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty },
343    { HB_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty },
344    { HB_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty },
345    { HB_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty },
346    { HB_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty },
347    { HB_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty },
348    { HB_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty },
349    { 0, 0 }
350};
351#endif
352
353
354static HB_Bool khmer_shape_syllable(HB_Bool openType, HB_ShaperItem *item)
355{
356/*    KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length,
357  	    item->string->mid(item->from, item->length).toUtf8().data()); */
358
359    int len = 0;
360    int syllableEnd = item->item.pos + item->item.length;
361    unsigned short reordered[16];
362    unsigned char properties[16];
363    enum {
364	AboveForm = 0x01,
365	PreForm = 0x02,
366	PostForm = 0x04,
367	BelowForm = 0x08
368    };
369#ifndef NO_OPENTYPE
370    const int availableGlyphs = item->num_glyphs;
371#endif
372    int coengRo;
373    int i;
374
375    /* according to the specs this is the max length one can get
376       ### the real value should be smaller */
377    assert(item->item.length < 13);
378
379    memset(properties, 0, 16*sizeof(unsigned char));
380
381#ifdef KHMER_DEBUG
382    qDebug("original:");
383    for (int i = from; i < syllableEnd; i++) {
384        qDebug("    %d: %4x", i, string[i]);
385    }
386#endif
387
388    /*
389    // write a pre vowel or the pre part of a split vowel first
390    // and look out for coeng + ro. RO is the only vowel of type 2, and
391    // therefore the only one that requires saving space before the base.
392    */
393    coengRo = -1;  /* There is no Coeng Ro, if found this value will change */
394    for (i = item->item.pos; i < syllableEnd; i += 1) {
395        KhmerCharClass charClass = getKhmerCharClass(item->string[i]);
396
397        /* if a split vowel, write the pre part. In Khmer the pre part
398           is the same for all split vowels, same glyph as pre vowel C_VOWEL_E */
399        if (charClass & CF_SPLIT_VOWEL) {
400            reordered[len] = C_VOWEL_E;
401            properties[len] = PreForm;
402            ++len;
403            break; /* there can be only one vowel */
404        }
405        /* if a vowel with pos before write it out */
406        if (charClass & CF_POS_BEFORE) {
407            reordered[len] = item->string[i];
408            properties[len] = PreForm;
409            ++len;
410            break; /* there can be only one vowel */
411        }
412        /* look for coeng + ro and remember position
413           works because coeng + ro is always in front of a vowel (if there is a vowel)
414           and because CC_CONSONANT2 is enough to identify it, as it is the only consonant
415           with this flag */
416        if ( (charClass & CF_COENG) && (i + 1 < syllableEnd) &&
417              ( (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT2) ) {
418            coengRo = i;
419        }
420    }
421
422    /* write coeng + ro if found */
423    if (coengRo > -1) {
424        reordered[len] = C_COENG;
425        properties[len] = PreForm;
426        ++len;
427        reordered[len] = C_RO;
428        properties[len] = PreForm;
429        ++len;
430    }
431
432    /*
433       shall we add a dotted circle?
434       If in the position in which the base should be (first char in the string) there is
435       a character that has the Dotted circle flag (a character that cannot be a base)
436       then write a dotted circle */
437    if (getKhmerCharClass(item->string[item->item.pos]) & CF_DOTTED_CIRCLE) {
438        reordered[len] = C_DOTTED_CIRCLE;
439        ++len;
440    }
441
442    /* copy what is left to the output, skipping before vowels and
443       coeng Ro if they are present */
444    for (i = item->item.pos; i < syllableEnd; i += 1) {
445        HB_UChar16 uc = item->string[i];
446        KhmerCharClass charClass = getKhmerCharClass(uc);
447
448        /* skip a before vowel, it was already processed */
449        if (charClass & CF_POS_BEFORE) {
450            continue;
451        }
452
453        /* skip coeng + ro, it was already processed */
454        if (i == coengRo) {
455            i += 1;
456            continue;
457        }
458
459        switch (charClass & CF_POS_MASK)
460        {
461            case CF_POS_ABOVE :
462                reordered[len] = uc;
463                properties[len] = AboveForm;
464                ++len;
465                break;
466
467            case CF_POS_AFTER :
468                reordered[len] = uc;
469                properties[len] = PostForm;
470                ++len;
471                break;
472
473            case CF_POS_BELOW :
474                reordered[len] = uc;
475                properties[len] = BelowForm;
476                ++len;
477                break;
478
479            default:
480                /* assign the correct flags to a coeng consonant
481                   Consonants of type 3 are taged as Post forms and those type 1 as below forms */
482                if ( (charClass & CF_COENG) && i + 1 < syllableEnd ) {
483                    unsigned char property = (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT3 ?
484                                              PostForm : BelowForm;
485                    reordered[len] = uc;
486                    properties[len] = property;
487                    ++len;
488                    i += 1;
489                    reordered[len] = item->string[i];
490                    properties[len] = property;
491                    ++len;
492                    break;
493                }
494
495                /* if a shifter is followed by an above vowel change the shifter to below form,
496                   an above vowel can have two possible positions i + 1 or i + 3
497                   (position i+1 corresponds to unicode 3, position i+3 to Unicode 4)
498                   and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two
499                   different positions, right after the shifter or after a vowel (Unicode 4) */
500                if ( (charClass & CF_SHIFTER) && (i + 1 < syllableEnd) ) {
501                    if (getKhmerCharClass(item->string[i+1]) & CF_ABOVE_VOWEL ) {
502                        reordered[len] = uc;
503                        properties[len] = BelowForm;
504                        ++len;
505                        break;
506                    }
507                    if (i + 2 < syllableEnd &&
508                        (item->string[i+1] == C_VOWEL_AA) &&
509                        (item->string[i+2] == C_SIGN_NIKAHIT) )
510                    {
511                        reordered[len] = uc;
512                        properties[len] = BelowForm;
513                        ++len;
514                        break;
515                    }
516                    if (i + 3 < syllableEnd && (getKhmerCharClass(item->string[i+3]) & CF_ABOVE_VOWEL) ) {
517                        reordered[len] = uc;
518                        properties[len] = BelowForm;
519                        ++len;
520                        break;
521                    }
522                    if (i + 4 < syllableEnd &&
523                        (item->string[i+3] == C_VOWEL_AA) &&
524                        (item->string[i+4] == C_SIGN_NIKAHIT) )
525                    {
526                        reordered[len] = uc;
527                        properties[len] = BelowForm;
528                        ++len;
529                        break;
530                    }
531                }
532
533                /* default - any other characters */
534                reordered[len] = uc;
535                ++len;
536                break;
537        } /* switch */
538    } /* for */
539
540    if (!item->font->klass->convertStringToGlyphIndices(item->font,
541                                                        reordered, len,
542                                                        item->glyphs, &item->num_glyphs,
543                                                        item->item.bidiLevel % 2))
544        return FALSE;
545
546
547    KHDEBUG("after shaping: len=%d", len);
548    for (i = 0; i < len; i++) {
549	item->attributes[i].mark = FALSE;
550	item->attributes[i].clusterStart = FALSE;
551	item->attributes[i].justification = 0;
552	item->attributes[i].zeroWidth = FALSE;
553	KHDEBUG("    %d: %4x property=%x", i, reordered[i], properties[i]);
554    }
555
556    /* now we have the syllable in the right order, and can start running it through open type. */
557
558#ifndef NO_OPENTYPE
559    if (openType) {
560 	hb_uint32 where[16];
561        for (i = 0; i < len; ++i) {
562            where[i] = ~(PreSubstProperty
563                         | BelowSubstProperty
564                         | AboveSubstProperty
565                         | PostSubstProperty
566                         | CligProperty
567                         | PositioningProperties);
568            if (properties[i] == PreForm)
569                where[i] &= ~PreFormProperty;
570            else if (properties[i] == BelowForm)
571                where[i] &= ~BelowFormProperty;
572            else if (properties[i] == AboveForm)
573                where[i] &= ~AboveFormProperty;
574            else if (properties[i] == PostForm)
575                where[i] &= ~PostFormProperty;
576        }
577
578        HB_OpenTypeShape(item, where);
579        if (!HB_OpenTypePosition(item, availableGlyphs, /*doLogClusters*/FALSE))
580            return FALSE;
581    } else
582#endif
583    {
584	KHDEBUG("Not using openType");
585        HB_HeuristicPosition(item);
586    }
587
588    item->attributes[0].clusterStart = TRUE;
589    return TRUE;
590}
591
592HB_Bool HB_KhmerShape(HB_ShaperItem *item)
593{
594    HB_Bool openType = FALSE;
595    unsigned short *logClusters = item->log_clusters;
596    int i;
597
598    HB_ShaperItem syllable = *item;
599    int first_glyph = 0;
600
601    int sstart = item->item.pos;
602    int end = sstart + item->item.length;
603
604    assert(item->item.script == HB_Script_Khmer);
605
606#ifndef NO_OPENTYPE
607    openType = HB_SelectScript(item, khmer_features);
608#endif
609
610    KHDEBUG("khmer_shape: from %d length %d", item->item.pos, item->item.length);
611    while (sstart < end) {
612        HB_Bool invalid;
613        int send = khmer_nextSyllableBoundary(item->string, sstart, end, &invalid);
614        KHDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
615               invalid ? "TRUE" : "FALSE");
616        syllable.item.pos = sstart;
617        syllable.item.length = send-sstart;
618        syllable.glyphs = item->glyphs + first_glyph;
619        syllable.attributes = item->attributes + first_glyph;
620        syllable.offsets = item->offsets + first_glyph;
621        syllable.advances = item->advances + first_glyph;
622        syllable.num_glyphs = item->num_glyphs - first_glyph;
623        if (!khmer_shape_syllable(openType, &syllable)) {
624            KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
625            item->num_glyphs += syllable.num_glyphs;
626            return FALSE;
627        }
628        /* fix logcluster array */
629        KHDEBUG("syllable:");
630        for (i = first_glyph; i < first_glyph + (int)syllable.num_glyphs; ++i)
631            KHDEBUG("        %d -> glyph %x", i, item->glyphs[i]);
632        KHDEBUG("    logclusters:");
633        for (i = sstart; i < send; ++i) {
634            KHDEBUG("        %d -> glyph %d", i, first_glyph);
635            logClusters[i-item->item.pos] = first_glyph;
636        }
637        sstart = send;
638        first_glyph += syllable.num_glyphs;
639    }
640    item->num_glyphs = first_glyph;
641    return TRUE;
642}
643
644void HB_KhmerAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
645{
646    int end = from + len;
647    const HB_UChar16 *uc = text + from;
648    hb_uint32 i = 0;
649    HB_UNUSED(script);
650    attributes += from;
651    while ( i < len ) {
652	HB_Bool invalid;
653	hb_uint32 boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
654
655	attributes[i].charStop = TRUE;
656
657	if ( boundary > len-1 ) boundary = len;
658	i++;
659	while ( i < boundary ) {
660	    attributes[i].charStop = FALSE;
661	    ++uc;
662	    ++i;
663	}
664	assert( i == boundary );
665    }
666}
667
668