1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This file is a modification of the ICU file IndicReordering.h 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * by Jens Herden and Javier Sola for Khmer language 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef __KHMERREORDERING_H 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define __KHMERREORDERING_H 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \file 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * \internal 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "LETypes.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "OpenTypeTables.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass LEGlyphStorage; 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Vocabulary 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Base -> A consonant or an independent vowel in its full (not subscript) form. It is the 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// center of the syllable, it can be souranded by coeng (subscript) consonants, vowels, 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// split vowels, signs... but there is only one base in a syllable, it has to be coded as 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// the first character of the syllable. 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant). 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Khmer language has five of them. Khmer split vowels either have one part before the 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// base and one after the base or they have a part before the base and a part above the base. 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// The first part of all Khmer split vowels is the same character, identical to 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// the glyph of Khmer dependent vowel SRA EI 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// coeng --> modifier used in Khmer to construct coeng (subscript) consonants 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Differently than indian languages, the coeng modifies the consonant that follows it, 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// not the one preceding it Each consonant has two forms, the base form and the subscript form 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// the base form is the normal one (using the consonants code-point), the subscript form is 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// displayed when the combination coeng + consonant is encountered. 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO) 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA) 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// if it is attached to a consonant of the first series or a consonant of the second series 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Most consonants have an equivalent in the other series, but some of theme exist only in 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// one series (for example SA). If we want to use the consonant SA with a vowel sound that 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// can only be done with a vowel sound that corresponds to a vowel accompanying a consonant 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// MUSIKATOAN a second series consonant to have a first series vowel sound. 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Consonant shifter are both normally supercript marks, but, when they are followed by a 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// superscript, they change shape and take the form of subscript dependent vowel SRA U. 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// be placed after the coeng consonant. 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Each vowel has its own position. Only one vowel per syllable is allowed. 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Allowed in a syllable. 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct KhmerClassTable // This list must include all types of components that can be used inside a syllable 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enum CharClassValues // order is important here! This order must be the same that is found in each horizontal 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // line in the statetable for Khmer (file KhmerReordering.cpp). 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_RESERVED = 0, 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_CONSONANT = 1, // consonant of type 1 or independent vowel 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_CONSONANT2 = 2, // Consonant of type 2 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_CONSONANT3 = 3, // Consonant of type 3 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_ZERO_WIDTH_NJ_MARK = 4, // Zero Width non joiner character (0x200C) 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_CONSONANT_SHIFTER = 5, 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_ROBAT = 6, // Khmer special diacritic accent -treated differently in state table 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_COENG = 7, // Subscript consonant combining character 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_DEPENDENT_VOWEL = 8, 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_SIGN_ABOVE = 9, 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_SIGN_AFTER = 10, 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_ZERO_WIDTH_J_MARK = 11, // Zero width joiner character 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CC_COUNT = 12 // This is the number of character classes 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enum CharClassFlags 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_CLASS_MASK = 0x0000FFFF, 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_CONSONANT = 0x01000000, // flag to speed up comparing 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_SPLIT_VOWEL = 0x02000000, // flag for a split vowel -> the first part is added in front of the syllable 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_COENG = 0x08000000, // flag to speed up comparing 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_SHIFTER = 0x10000000, // flag to speed up comparing 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_ABOVE_VOWEL = 0x20000000, // flag to speed up comparing 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // position flags 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_POS_BEFORE = 0x00080000, 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_POS_BELOW = 0x00040000, 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_POS_ABOVE = 0x00020000, 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_POS_AFTER = 0x00010000, 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CF_POS_MASK = 0x000f0000 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru typedef le_uint32 CharClass; 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru typedef le_int32 ScriptFlags; 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru LEUnicode firstChar; // for Khmer this will become x1780 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru LEUnicode lastChar; // and this x17DF 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const CharClass *classTable; 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CharClass getCharClass(LEUnicode ch) const; 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const KhmerClassTable *getKhmerClassTable(); 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass KhmerReordering /* not : public UObject because all methods are static */ { 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode, 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru LEUnicode *outChars, LEGlyphStorage &glyphStorage); 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const FeatureMap *getFeatureMap(le_int32 &count); 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // do not instantiate 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru KhmerReordering(); 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount); 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 133