1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (C) Copyright IBM Corp. 1998-2007 - All Rights Reserved 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This file is a modification of the ICU file IndicReordering.cpp 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * by Jens Herden and Javier Sola for Khmer language 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "LETypes.h" 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "OpenTypeTables.h" 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "KhmerReordering.h" 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "LEGlyphStorage.h" 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Characters that get refered to by name... 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru C_SIGN_ZWNJ = 0x200C, 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru C_SIGN_ZWJ = 0x200D, 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru C_DOTTED_CIRCLE = 0x25CC, 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru C_RO = 0x179A, 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru C_VOWEL_AA = 0x17B6, 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru C_SIGN_NIKAHIT = 0x17C6, 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru C_VOWEL_E = 0x17C1, 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru C_COENG = 0x17D2 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // simple classes, they are used in the statetable (in this file) to control the length of a syllable 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // they are also used to know where a character should be placed (location in reference to the base character) 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and also to know if a character, when independtly displayed, should be displayed with a dotted-circle to 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // indicate error in syllable construction 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _xx = KhmerClassTable::CC_RESERVED, 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _sa = KhmerClassTable::CC_SIGN_ABOVE | KhmerClassTable::CF_DOTTED_CIRCLE | KhmerClassTable::CF_POS_ABOVE, 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _sp = KhmerClassTable::CC_SIGN_AFTER | KhmerClassTable::CF_DOTTED_CIRCLE| KhmerClassTable::CF_POS_AFTER, 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _c1 = KhmerClassTable::CC_CONSONANT | KhmerClassTable::CF_CONSONANT, 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _c2 = KhmerClassTable::CC_CONSONANT2 | KhmerClassTable::CF_CONSONANT, 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _c3 = KhmerClassTable::CC_CONSONANT3 | KhmerClassTable::CF_CONSONANT, 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _rb = KhmerClassTable::CC_ROBAT | KhmerClassTable::CF_POS_ABOVE | KhmerClassTable::CF_DOTTED_CIRCLE, 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _cs = KhmerClassTable::CC_CONSONANT_SHIFTER | KhmerClassTable::CF_DOTTED_CIRCLE | KhmerClassTable::CF_SHIFTER, 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dl = KhmerClassTable::CC_DEPENDENT_VOWEL | KhmerClassTable::CF_POS_BEFORE | KhmerClassTable::CF_DOTTED_CIRCLE, 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _db = KhmerClassTable::CC_DEPENDENT_VOWEL | KhmerClassTable::CF_POS_BELOW | KhmerClassTable::CF_DOTTED_CIRCLE, 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _da = KhmerClassTable::CC_DEPENDENT_VOWEL | KhmerClassTable::CF_POS_ABOVE | KhmerClassTable::CF_DOTTED_CIRCLE | KhmerClassTable::CF_ABOVE_VOWEL, 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dr = KhmerClassTable::CC_DEPENDENT_VOWEL | KhmerClassTable::CF_POS_AFTER | KhmerClassTable::CF_DOTTED_CIRCLE, 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _co = KhmerClassTable::CC_COENG | KhmerClassTable::CF_COENG | KhmerClassTable::CF_DOTTED_CIRCLE, 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // split vowel 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _va = _da | KhmerClassTable::CF_SPLIT_VOWEL, 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _vr = _dr | KhmerClassTable::CF_SPLIT_VOWEL 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Character class tables 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs... 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _sa Sign placed above the base 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _sp Sign placed after the base 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants) 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _c2 Consonant of type 2 (only RO) 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _c3 Consonant of type 3 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _rb Khmer sign robat u17CC. combining mark for subscript consonants 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _cd Consonant-shifter 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _dl Dependent vowel placed before the base (left of the base) 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _db Dependent vowel placed below the base 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _da Dependent vowel placed above the base 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _dr Dependent vowel placed behind the base (right of the base) 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// it to create a subscript consonant or independent vowel 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _va Khmer split vowel in wich the first part is before the base and the second one above the base 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// _vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const KhmerClassTable::CharClass khmerCharClasses[] = 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, // 1780 - 178F 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, // 1790 - 179F 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, // 17A0 - 17AF 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, // 17B0 - 17BF 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, // 17C0 - 17CF 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx, // 17D0 - 17DF 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Khmer Class Tables 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The range of characters defined in the above table is defined here. FOr Khmer 1780 to 17DF 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Even if the Khmer range is bigger, all other characters are not combinable, and therefore treated 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// as _xx 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const KhmerClassTable khmerClassTable = {0x1780, 0x17df, khmerCharClasses}; 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Below we define how a character in the input string is either in the khmerCharClasses table 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// within the syllable, but are not in the table) we also get their type back, or an unknown object 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// in which case we get _xx (CC_RESERVED) back 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruKhmerClassTable::CharClass KhmerClassTable::getCharClass(LEUnicode ch) const 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ch == C_SIGN_ZWJ) { 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return CC_ZERO_WIDTH_J_MARK; 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ch == C_SIGN_ZWNJ) { 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return CC_ZERO_WIDTH_NJ_MARK; 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ch < firstChar || ch > lastChar) { 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return CC_RESERVED; 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return classTable[ch - firstChar]; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst KhmerClassTable *KhmerClassTable::getKhmerClassTable() 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return &khmerClassTable; 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass KhmerReorderingOutput : public UMemory { 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru le_int32 fSyllableCount; 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru le_int32 fOutIndex; 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru LEUnicode *fOutChars; 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru LEGlyphStorage &fGlyphStorage; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru KhmerReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage) 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru : fSyllableCount(0), fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage) 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // nothing else to do... 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~KhmerReorderingOutput() 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // nothing to do here... 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void reset() 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSyllableCount += 1; 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask charFeatures) 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru LEErrorCode success = LE_NO_ERROR; 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOutChars[fOutIndex] = ch; 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGlyphStorage.setCharIndex(fOutIndex, charIndex, success); 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fGlyphStorage.setAuxData(fOutIndex, charFeatures | (fSyllableCount & LE_GLYPH_GROUP_MASK), success); 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOutIndex += 1; 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru le_int32 getOutputIndex() 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fOutIndex; 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define blwfFeatureTag LE_BLWF_FEATURE_TAG 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define pstfFeatureTag LE_PSTF_FEATURE_TAG 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define presFeatureTag LE_PRES_FEATURE_TAG 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define blwsFeatureTag LE_BLWS_FEATURE_TAG 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define abvsFeatureTag LE_ABVS_FEATURE_TAG 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define pstsFeatureTag LE_PSTS_FEATURE_TAG 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define blwmFeatureTag LE_BLWM_FEATURE_TAG 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define abvmFeatureTag LE_ABVM_FEATURE_TAG 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define distFeatureTag LE_DIST_FEATURE_TAG 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define prefFeatureTag LE_PREF_FEATURE_TAG 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define abvfFeatureTag LE_ABVF_FEATURE_TAG 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define cligFeatureTag LE_CLIG_FEATURE_TAG 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define mkmkFeatureTag LE_MKMK_FEATURE_TAG 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define prefFeatureMask 0x80000000UL 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define blwfFeatureMask 0x40000000UL 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define abvfFeatureMask 0x20000000UL 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define pstfFeatureMask 0x10000000UL 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define presFeatureMask 0x08000000UL 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define blwsFeatureMask 0x04000000UL 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define abvsFeatureMask 0x02000000UL 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define pstsFeatureMask 0x01000000UL 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define cligFeatureMask 0x00800000UL 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define distFeatureMask 0x00400000UL 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define blwmFeatureMask 0x00200000UL 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define abvmFeatureMask 0x00100000UL 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define mkmkFeatureMask 0x00080000UL 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define tagPref (prefFeatureMask | presFeatureMask | cligFeatureMask | distFeatureMask) 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define tagAbvf (abvfFeatureMask | abvsFeatureMask | cligFeatureMask | distFeatureMask | abvmFeatureMask | mkmkFeatureMask) 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define tagPstf (blwfFeatureMask | blwsFeatureMask | prefFeatureMask | presFeatureMask | pstfFeatureMask | pstsFeatureMask | cligFeatureMask | distFeatureMask | blwmFeatureMask) 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define tagBlwf (blwfFeatureMask | blwsFeatureMask | cligFeatureMask | distFeatureMask | blwmFeatureMask | mkmkFeatureMask) 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define tagDefault (prefFeatureMask | blwfFeatureMask | presFeatureMask | blwsFeatureMask | cligFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask | mkmkFeatureMask) 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// These are in the order in which the features need to be applied 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// for correct processing 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const FeatureMap featureMap[] = 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Shaping features 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {prefFeatureTag, prefFeatureMask}, 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {blwfFeatureTag, blwfFeatureMask}, 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {abvfFeatureTag, abvfFeatureMask}, 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {pstfFeatureTag, pstfFeatureMask}, 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {presFeatureTag, presFeatureMask}, 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {blwsFeatureTag, blwsFeatureMask}, 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {abvsFeatureTag, abvsFeatureMask}, 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {pstsFeatureTag, pstsFeatureMask}, 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {cligFeatureTag, cligFeatureMask}, 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Positioning features 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {distFeatureTag, distFeatureMask}, 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {blwmFeatureTag, blwmFeatureMask}, 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {abvmFeatureTag, abvmFeatureMask}, 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {mkmkFeatureTag, mkmkFeatureMask}, 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const le_int32 featureMapCount = LE_ARRAY_SIZE(featureMap); 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The stateTable is used to calculate the end (the length) of a well 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// formed Khmer Syllable. 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Each horizontal line is ordered exactly the same way as the values in KhmerClassTable 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// CharClassValues in KhmerReordering.h This coincidence of values allows the 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// follow up of the table. 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Each line corresponds to a state, which does not necessarily need to be a type 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// of component... for example, state 2 is a base, with is always a first character 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// in the syllable, but the state could be produced a consonant of any type when 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// it is the first character that is analysed (in ground state). 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Differentiating 3 types of consonants is necessary in order to 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// forbid the use of certain combinations, such as having a second 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// coeng after a coeng RO, 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The inexistent possibility of having a type 3 after another type 3 is permitted, 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// eliminating it would very much complicate the table, and it does not create typing 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// problems, as the case above. 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// The table is quite complex, in order to limit the number of coeng consonants 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// to 2 (by means of the table). 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// There a peculiarity, as far as Unicode is concerned: 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// - The consonant-shifter is considered in two possible different 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// locations, the one considered in Unicode 3.0 and the one considered in 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Unicode 4.0. (there is a backwards compatibility problem in this standard). 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// xx independent character, such as a number, punctuation sign or non-khmer char 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// c1 Khmer consonant of type 1 or an independent vowel 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// that is, a letter in which the subscript for is only under the 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// base, not taking any space to the right or to the left 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// c2 Khmer consonant of type 2, the coeng form takes space under 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and to the left of the base (only RO is of this type) 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// c3 Khmer consonant of type 3. Its subscript form takes space under 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and to the right of the base. 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// cs Khmer consonant shifter 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// rb Khmer robat 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// co coeng character (u17D2) 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// dv dependent vowel (including split vowels, they are treated in the same way). 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// even if dv is not defined above, the component that is really tested for is 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// zwj Zero Width joiner 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// zwnj Zero width non joiner 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// sa above sign 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// sp post sign 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// there are lines with equal content but for an easier understanding 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// (and maybe change in the future) we did not join them 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const le_int8 khmerStateTable[][KhmerClassTable::CC_COUNT] = 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// xx c1 c2 c3 zwnj cs rb co dv sa sp zwj 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, // 0 - ground state 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state (or sign to the right of the syllable) 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, // 2 - Base consonant 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, // 3 - First ZWNJ before a register shifter 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // It can only be followed by a shifter or a vowel 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, // 4 - First register shifter 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, // 5 - Robat 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - First Coeng 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, // 7 - First consonant of type 1 after coeng 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, // 8 - First consonant of type 2 after coeng 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, // 9 - First consonant or type 3 after ceong 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 10 - Second Coeng (no register shifter before) 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, // 11 - Second coeng consonant (or ind. vowel) no register shifter before 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, // 12 - Second ZWNJ before a register shifter 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, // 13 - Second register shifter 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, // 14 - ZWJ before vowel 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, // 15 - ZWNJ before vowel 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, // 16 - dependent vowel 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, // 17 - sign above 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, // 18 - ZWJ after vowel 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, // 19 - Third coeng 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, // 20 - dependent vowel after a Robat 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst FeatureMap *KhmerReordering::getFeatureMap(le_int32 &count) 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count = featureMapCount; 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return featureMap; 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Given an input string of characters and a location in which to start looking 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// calculate, using the state table, which one is the last character of the syllable 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// that starts in the starting position. 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querule_int32 KhmerReordering::findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount) 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru le_int32 cursor = prev; 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru le_int8 state = 0; 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (cursor < charCount) { 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru KhmerClassTable::CharClass charClass = (classTable->getCharClass(chars[cursor]) & KhmerClassTable::CF_CLASS_MASK); 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state = khmerStateTable[state][charClass]; 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (state < 0) { 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cursor += 1; 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return cursor; 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This is the real reordering function as applied to the Khmer language 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querule_int32 KhmerReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32 /*scriptCode*/, 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru LEUnicode *outChars, LEGlyphStorage &glyphStorage) 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const KhmerClassTable *classTable = KhmerClassTable::getKhmerClassTable(); 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru KhmerReorderingOutput output(outChars, glyphStorage); 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru KhmerClassTable::CharClass charClass; 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru le_int32 i, prev = 0, coengRo; 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This loop only exits when we reach the end of a run, which may contain 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // several syllables. 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (prev < charCount) { 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru le_int32 syllable = findSyllable(classTable, chars, prev, charCount); 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.reset(); 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // write a pre vowel or the pre part of a split vowel first 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and look out for coeng + ro. RO is the only vowel of type 2, and 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // therefore the only one that requires saving space before the base. 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coengRo = -1; // There is no Coeng Ro, if found this value will change 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = prev; i < syllable; i += 1) { 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charClass = classTable->getCharClass(chars[i]); 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if a split vowel, write the pre part. In Khmer the pre part 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // is the same for all split vowels, same glyph as pre vowel C_VOWEL_E 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (charClass & KhmerClassTable::CF_SPLIT_VOWEL) { 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(C_VOWEL_E, i, tagPref); 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // there can be only one vowel 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if a vowel with pos before write it out 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (charClass & KhmerClassTable::CF_POS_BEFORE) { 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(chars[i], i, tagPref); 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // there can be only one vowel 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // look for coeng + ro and remember position 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // works because coeng + ro is always in front of a vowel (if there is a vowel) 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and because CC_CONSONANT2 is enough to identify it, as it is the only consonant 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // with this flag 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( (charClass & KhmerClassTable::CF_COENG) && (i + 1 < syllable) && 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ( (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_CLASS_MASK) == KhmerClassTable::CC_CONSONANT2) ) 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru coengRo = i; 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // write coeng + ro if found 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (coengRo > -1) { 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(C_COENG, coengRo, tagPref); 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(C_RO, coengRo + 1, tagPref); 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // shall we add a dotted circle? 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If in the position in which the base should be (first char in the string) there is 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a character that has the Dotted circle flag (a character that cannot be a base) 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // then write a dotted circle 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (classTable->getCharClass(chars[prev]) & KhmerClassTable::CF_DOTTED_CIRCLE) { 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(C_DOTTED_CIRCLE, prev, tagDefault); 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // copy what is left to the output, skipping before vowels and coeng Ro if they are present 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = prev; i < syllable; i += 1) { 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru charClass = classTable->getCharClass(chars[i]); 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // skip a before vowel, it was already processed 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (charClass & KhmerClassTable::CF_POS_BEFORE) { 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // skip coeng + ro, it was already processed 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i == coengRo) { 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i += 1; 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (charClass & KhmerClassTable::CF_POS_MASK) { 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case KhmerClassTable::CF_POS_ABOVE : 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(chars[i], i, tagAbvf); 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case KhmerClassTable::CF_POS_AFTER : 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(chars[i], i, tagPstf); 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case KhmerClassTable::CF_POS_BELOW : 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(chars[i], i, tagBlwf); 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assign the correct flags to a coeng consonant 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Consonants of type 3 are taged as Post forms and those type 1 as below forms 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( (charClass & KhmerClassTable::CF_COENG) && i + 1 < syllable ) { 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_CLASS_MASK) 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru == KhmerClassTable::CC_CONSONANT3) { 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(chars[i], i, tagPstf); 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i += 1; 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(chars[i], i, tagPstf); 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(chars[i], i, tagBlwf); 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i += 1; 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(chars[i], i, tagBlwf); 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if a shifter is followed by an above vowel change the shifter to below form, 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // an above vowel can have two possible positions i + 1 or i + 3 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (position i+1 corresponds to unicode 3, position i+3 to Unicode 4) 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // different positions, right after the shifter or after a vowel (Unicode 4) 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( (charClass & KhmerClassTable::CF_SHIFTER) && (i + 1 < syllable) ) { 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_ABOVE_VOWEL) 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru || (i + 2 < syllable 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && ( (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_CLASS_MASK) == C_VOWEL_AA) 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && ( (classTable->getCharClass(chars[i + 2]) & KhmerClassTable::CF_CLASS_MASK) == C_SIGN_NIKAHIT)) 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru || (i + 3 < syllable && (classTable->getCharClass(chars[i + 3]) & KhmerClassTable::CF_ABOVE_VOWEL)) 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru || (i + 4 < syllable 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && ( (classTable->getCharClass(chars[i + 3]) & KhmerClassTable::CF_CLASS_MASK) == C_VOWEL_AA) 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && ( (classTable->getCharClass(chars[i + 4]) & KhmerClassTable::CF_CLASS_MASK) == C_SIGN_NIKAHIT) ) ) 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(chars[i], i, tagBlwf); 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // default - any other characters 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output.writeChar(chars[i], i, tagDefault); 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } // switch 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } // for 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev = syllable; // move the pointer to the start of next syllable 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return output.getOutputIndex(); 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 500