1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This file is a modification of the ICU file IndicReordering.h
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * by Jens Herden and Javier Sola for Khmer language
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef __KHMERREORDERING_H
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define __KHMERREORDERING_H
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \file
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \internal
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "LETypes.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "OpenTypeTables.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass LEGlyphStorage;
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Vocabulary
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                     center of the syllable, it can be souranded by coeng (subscript) consonants, vowels,
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                     split vowels, signs... but there is only one base in a syllable, it has to be coded as
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                     the first character of the syllable.
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                     Khmer language has five of them. Khmer split vowels either have one part before the
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                     base and one after the base or they have a part before the base and a part above the base.
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                     The first part of all Khmer split vowels is the same character, identical to
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                     the glyph of Khmer dependent vowel SRA EI
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     coeng -->  modifier used in Khmer to construct coeng (subscript) consonants
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                Differently than indian languages, the coeng modifies the consonant that follows it,
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                not the one preceding it  Each consonant has two forms, the base form and the subscript form
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                the base form is the normal one (using the consonants code-point), the subscript form is
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                displayed when the combination coeng + consonant is encountered.
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          if it is attached to a consonant of the first series or a consonant of the second series
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          Most consonants have an equivalent in the other series, but some of theme exist only in
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          one series (for example SA). If we want to use the consonant SA with a vowel sound that
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          MUSIKATOAN a second series consonant to have a first series vowel sound.
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          Consonant shifter are both normally supercript marks, but, when they are followed by a
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          superscript, they change shape and take the form of subscript dependent vowel SRA U.
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          be placed after the coeng consonant.
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          Each vowel has its own position. Only one vowel per syllable is allowed.
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//     Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                          Allowed in a syllable.
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct KhmerClassTable    // This list must include all types of components that can be used inside a syllable
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enum CharClassValues  // order is important here! This order must be the same that is found in each horizontal
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          // line in the statetable for Khmer (file KhmerReordering.cpp).
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_RESERVED             =  0,
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_CONSONANT            =  1, // consonant of type 1 or independent vowel
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_CONSONANT2           =  2, // Consonant of type 2
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_CONSONANT3           =  3, // Consonant of type 3
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_ZERO_WIDTH_NJ_MARK   =  4, // Zero Width non joiner character (0x200C)
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_CONSONANT_SHIFTER    =  5,
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_ROBAT                =  6, // Khmer special diacritic accent -treated differently in state table
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_COENG                =  7, // Subscript consonant combining character
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_DEPENDENT_VOWEL      =  8,
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_SIGN_ABOVE           =  9,
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_SIGN_AFTER           = 10,
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_ZERO_WIDTH_J_MARK    = 11, // Zero width joiner character
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CC_COUNT                = 12  // This is the number of character classes
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    };
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    enum CharClassFlags
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_CLASS_MASK    = 0x0000FFFF,
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_CONSONANT     = 0x01000000,  // flag to speed up comparing
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_SPLIT_VOWEL   = 0x02000000,  // flag for a split vowel -> the first part is added in front of the syllable
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_DOTTED_CIRCLE = 0x04000000,  // add a dotted circle if a character with this flag is the first in a syllable
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_COENG         = 0x08000000,  // flag to speed up comparing
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_SHIFTER       = 0x10000000,  // flag to speed up comparing
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_ABOVE_VOWEL   = 0x20000000,  // flag to speed up comparing
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // position flags
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_POS_BEFORE    = 0x00080000,
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_POS_BELOW     = 0x00040000,
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_POS_ABOVE     = 0x00020000,
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_POS_AFTER     = 0x00010000,
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        CF_POS_MASK      = 0x000f0000
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    };
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    typedef le_uint32 CharClass;
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    typedef le_int32 ScriptFlags;
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    LEUnicode firstChar;   // for Khmer this will become x1780
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    LEUnicode lastChar;    //  and this x17DF
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const CharClass *classTable;
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    CharClass getCharClass(LEUnicode ch) const;
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static const KhmerClassTable *getKhmerClassTable();
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass KhmerReordering /* not : public UObject because all methods are static */ {
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        LEUnicode *outChars, LEGlyphStorage &glyphStorage);
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static const FeatureMap *getFeatureMap(le_int32 &count);
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // do not instantiate
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    KhmerReordering();
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
133