1/*
2/ *
3 * (C) Copyright IBM Corp. 1998-2009 - All Rights Reserved
4 *
5 */
6
7#include "LETypes.h"
8#include "OpenTypeTables.h"
9#include "OpenTypeUtilities.h"
10#include "IndicReordering.h"
11#include "LEGlyphStorage.h"
12#include "MPreFixups.h"
13
14U_NAMESPACE_BEGIN
15
16#define loclFeatureTag LE_LOCL_FEATURE_TAG
17#define initFeatureTag LE_INIT_FEATURE_TAG
18#define nuktFeatureTag LE_NUKT_FEATURE_TAG
19#define akhnFeatureTag LE_AKHN_FEATURE_TAG
20#define rphfFeatureTag LE_RPHF_FEATURE_TAG
21#define rkrfFeatureTag LE_RKRF_FEATURE_TAG
22#define blwfFeatureTag LE_BLWF_FEATURE_TAG
23#define halfFeatureTag LE_HALF_FEATURE_TAG
24#define pstfFeatureTag LE_PSTF_FEATURE_TAG
25#define vatuFeatureTag LE_VATU_FEATURE_TAG
26#define presFeatureTag LE_PRES_FEATURE_TAG
27#define blwsFeatureTag LE_BLWS_FEATURE_TAG
28#define abvsFeatureTag LE_ABVS_FEATURE_TAG
29#define pstsFeatureTag LE_PSTS_FEATURE_TAG
30#define halnFeatureTag LE_HALN_FEATURE_TAG
31#define cjctFeatureTag LE_CJCT_FEATURE_TAG
32#define blwmFeatureTag LE_BLWM_FEATURE_TAG
33#define abvmFeatureTag LE_ABVM_FEATURE_TAG
34#define distFeatureTag LE_DIST_FEATURE_TAG
35#define caltFeatureTag LE_CALT_FEATURE_TAG
36#define kernFeatureTag LE_KERN_FEATURE_TAG
37
38#define loclFeatureMask 0x80000000UL
39#define rphfFeatureMask 0x40000000UL
40#define blwfFeatureMask 0x20000000UL
41#define halfFeatureMask 0x10000000UL
42#define pstfFeatureMask 0x08000000UL
43#define nuktFeatureMask 0x04000000UL
44#define akhnFeatureMask 0x02000000UL
45#define vatuFeatureMask 0x01000000UL
46#define presFeatureMask 0x00800000UL
47#define blwsFeatureMask 0x00400000UL
48#define abvsFeatureMask 0x00200000UL
49#define pstsFeatureMask 0x00100000UL
50#define halnFeatureMask 0x00080000UL
51#define blwmFeatureMask 0x00040000UL
52#define abvmFeatureMask 0x00020000UL
53#define distFeatureMask 0x00010000UL
54#define initFeatureMask 0x00008000UL
55#define cjctFeatureMask 0x00004000UL
56#define rkrfFeatureMask 0x00002000UL
57#define caltFeatureMask 0x00001000UL
58#define kernFeatureMask 0x00000800UL
59
60// Syllable structure bits
61#define baseConsonantMask       0x00000400UL
62#define consonantMask           0x00000200UL
63#define halfConsonantMask       0x00000100UL
64#define rephConsonantMask       0x00000080UL
65#define matraMask               0x00000040UL
66#define vowelModifierMask       0x00000020UL
67#define markPositionMask        0x00000018UL
68
69#define postBasePosition        0x00000000UL
70#define preBasePosition         0x00000008UL
71#define aboveBasePosition       0x00000010UL
72#define belowBasePosition       0x00000018UL
73
74#define repositionedGlyphMask   0x00000002UL
75
76#define basicShapingFormsMask ( loclFeatureMask | nuktFeatureMask | akhnFeatureMask | rkrfFeatureMask | blwfFeatureMask | halfFeatureMask | vatuFeatureMask | cjctFeatureMask )
77#define positioningFormsMask ( kernFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask )
78#define presentationFormsMask ( presFeatureMask | abvsFeatureMask | blwsFeatureMask | pstsFeatureMask | halnFeatureMask | caltFeatureMask )
79
80
81#define C_MALAYALAM_VOWEL_SIGN_U 0x0D41
82#define	C_DOTTED_CIRCLE 0x25CC
83#define NO_GLYPH 0xFFFF
84
85// Some level of debate as to the proper value for MAX_CONSONANTS_PER_SYLLABLE.  Ticket 5588 states that 4
86// is the magic number according to ISCII, but 5 seems to be the more consistent with XP.
87#define MAX_CONSONANTS_PER_SYLLABLE 5
88
89#define INDIC_BLOCK_SIZE 0x7F
90
91class IndicReorderingOutput : public UMemory {
92private:
93    le_int32   fSyllableCount;
94    le_int32   fOutIndex;
95    LEUnicode *fOutChars;
96
97    LEGlyphStorage &fGlyphStorage;
98
99    LEUnicode   fMpre;
100    le_int32    fMpreIndex;
101
102    LEUnicode   fMbelow;
103    le_int32    fMbelowIndex;
104
105    LEUnicode   fMabove;
106    le_int32    fMaboveIndex;
107
108    LEUnicode   fMpost;
109    le_int32    fMpostIndex;
110
111    LEUnicode   fLengthMark;
112    le_int32    fLengthMarkIndex;
113
114    LEUnicode   fAlLakuna;
115    le_int32    fAlLakunaIndex;
116
117    FeatureMask fMatraFeatures;
118
119    le_int32    fMPreOutIndex;
120    MPreFixups *fMPreFixups;
121
122    LEUnicode   fVMabove;
123    LEUnicode   fVMpost;
124    le_int32    fVMIndex;
125    FeatureMask fVMFeatures;
126
127    LEUnicode   fSMabove;
128    LEUnicode   fSMbelow;
129    le_int32    fSMIndex;
130    FeatureMask fSMFeatures;
131
132    LEUnicode   fPreBaseConsonant;
133    LEUnicode   fPreBaseVirama;
134    le_int32    fPBCIndex;
135    FeatureMask fPBCFeatures;
136
137    void saveMatra(LEUnicode matra, le_int32 matraIndex, IndicClassTable::CharClass matraClass)
138    {
139        // FIXME: check if already set, or if not a matra...
140        if (IndicClassTable::isLengthMark(matraClass)) {
141            fLengthMark = matra;
142            fLengthMarkIndex = matraIndex;
143        } else if (IndicClassTable::isAlLakuna(matraClass)) {
144            fAlLakuna = matra;
145            fAlLakunaIndex = matraIndex;
146        } else {
147            switch (matraClass & CF_POS_MASK) {
148            case CF_POS_BEFORE:
149                fMpre = matra;
150                fMpreIndex = matraIndex;
151                break;
152
153            case CF_POS_BELOW:
154                fMbelow = matra;
155                fMbelowIndex = matraIndex;
156                break;
157
158            case CF_POS_ABOVE:
159                fMabove = matra;
160                fMaboveIndex = matraIndex;
161                break;
162
163            case CF_POS_AFTER:
164                fMpost = matra;
165                fMpostIndex = matraIndex;
166                break;
167
168            default:
169                // can't get here...
170                break;
171           }
172        }
173    }
174
175public:
176    IndicReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage, MPreFixups *mpreFixups)
177        : fSyllableCount(0), fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage),
178          fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0),
179          fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0),
180          fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups),
181          fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0),
182          fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0),
183          fPreBaseConsonant(0), fPreBaseVirama(0), fPBCIndex(0), fPBCFeatures(0)
184    {
185        // nothing else to do...
186    }
187
188    ~IndicReorderingOutput()
189    {
190        // nothing to do here...
191    }
192
193    void reset()
194    {
195        fSyllableCount += 1;
196
197        fMpre = fMbelow = fMabove = fMpost = fLengthMark = fAlLakuna = 0;
198        fMPreOutIndex = -1;
199
200        fVMabove = fVMpost  = 0;
201        fSMabove = fSMbelow = 0;
202
203        fPreBaseConsonant = fPreBaseVirama = 0;
204    }
205
206    void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask charFeatures)
207    {
208        LEErrorCode success = LE_NO_ERROR;
209
210        fOutChars[fOutIndex] = ch;
211
212        fGlyphStorage.setCharIndex(fOutIndex, charIndex, success);
213        fGlyphStorage.setAuxData(fOutIndex, charFeatures | (fSyllableCount & LE_GLYPH_GROUP_MASK), success);
214
215        fOutIndex += 1;
216    }
217
218    void setFeatures ( le_uint32 charIndex, FeatureMask charFeatures)
219    {
220        LEErrorCode success = LE_NO_ERROR;
221
222        fGlyphStorage.setAuxData( charIndex, charFeatures, success );
223
224    }
225
226    FeatureMask getFeatures ( le_uint32 charIndex )
227    {
228        LEErrorCode success = LE_NO_ERROR;
229        return fGlyphStorage.getAuxData(charIndex,success);
230    }
231
232	void decomposeReorderMatras ( const IndicClassTable *classTable, le_int32 beginSyllable, le_int32 nextSyllable, le_int32 inv_count ) {
233		le_int32 i;
234        LEErrorCode success = LE_NO_ERROR;
235
236		for ( i = beginSyllable ; i < nextSyllable ; i++ ) {
237			if ( classTable->isMatra(fOutChars[i+inv_count])) {
238				IndicClassTable::CharClass matraClass = classTable->getCharClass(fOutChars[i+inv_count]);
239				if ( classTable->isSplitMatra(matraClass)) {
240					le_int32 saveIndex = fGlyphStorage.getCharIndex(i+inv_count,success);
241					le_uint32 saveAuxData = fGlyphStorage.getAuxData(i+inv_count,success);
242                    const SplitMatra *splitMatra = classTable->getSplitMatra(matraClass);
243                    int j;
244                    for (j = 0 ; *(splitMatra)[j] != 0 ; j++) {
245                        LEUnicode piece = (*splitMatra)[j];
246						if ( j == 0 ) {
247							fOutChars[i+inv_count] = piece;
248							matraClass = classTable->getCharClass(piece);
249						} else {
250							insertCharacter(piece,i+1+inv_count,saveIndex,saveAuxData);
251							nextSyllable++;
252						}
253 				    }
254				}
255
256				if ((matraClass & CF_POS_MASK) == CF_POS_BEFORE) {
257                    moveCharacter(i+inv_count,beginSyllable+inv_count);
258				}
259			}
260		}
261	}
262
263	void moveCharacter( le_int32 fromPosition, le_int32 toPosition ) {
264		le_int32 i,saveIndex;
265		le_uint32 saveAuxData;
266		LEUnicode saveChar = fOutChars[fromPosition];
267	    LEErrorCode success = LE_NO_ERROR;
268		LEErrorCode success2 = LE_NO_ERROR;
269		saveIndex = fGlyphStorage.getCharIndex(fromPosition,success);
270        saveAuxData = fGlyphStorage.getAuxData(fromPosition,success);
271
272		if ( fromPosition > toPosition ) {
273			for ( i = fromPosition ; i > toPosition ; i-- ) {
274				fOutChars[i] = fOutChars[i-1];
275				fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i-1,success2),success);
276				fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i-1,success2), success);
277
278			}
279		} else {
280			for ( i = fromPosition ; i < toPosition ; i++ ) {
281				fOutChars[i] = fOutChars[i+1];
282				fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i+1,success2),success);
283				fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i+1,success2), success);
284			}
285
286		}
287		fOutChars[toPosition] = saveChar;
288		fGlyphStorage.setCharIndex(toPosition,saveIndex,success);
289		fGlyphStorage.setAuxData(toPosition,saveAuxData,success);
290
291	}
292	void insertCharacter( LEUnicode ch, le_int32 toPosition, le_int32 charIndex, le_uint32 auxData ) {
293	    LEErrorCode success = LE_NO_ERROR;
294        le_int32 i;
295		fOutIndex += 1;
296
297		for ( i = fOutIndex ; i > toPosition ; i--) {
298				fOutChars[i] = fOutChars[i-1];
299				fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i-1,success),success);
300				fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i-1,success), success);
301		}
302
303		fOutChars[toPosition] = ch;
304		fGlyphStorage.setCharIndex(toPosition,charIndex,success);
305		fGlyphStorage.setAuxData(toPosition,auxData,success);
306
307	}
308	void removeCharacter( le_int32 fromPosition ) {
309	    LEErrorCode success = LE_NO_ERROR;
310        le_int32 i;
311		fOutIndex -= 1;
312
313		for ( i = fromPosition ; i < fOutIndex ; i--) {
314				fOutChars[i] = fOutChars[i+1];
315				fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i+1,success),success);
316				fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i+1,success), success);
317		}
318	}
319
320    le_bool noteMatra(const IndicClassTable *classTable, LEUnicode matra, le_uint32 matraIndex, FeatureMask matraFeatures, le_bool wordStart)
321    {
322        IndicClassTable::CharClass matraClass = classTable->getCharClass(matra);
323
324        fMatraFeatures  = matraFeatures;
325
326        if (wordStart) {
327            fMatraFeatures |= initFeatureMask;
328        }
329
330        if (IndicClassTable::isMatra(matraClass)) {
331            if (IndicClassTable::isSplitMatra(matraClass)) {
332                const SplitMatra *splitMatra = classTable->getSplitMatra(matraClass);
333                int i;
334
335                for (i = 0; i < 3 && (*splitMatra)[i] != 0; i += 1) {
336                    LEUnicode piece = (*splitMatra)[i];
337                    IndicClassTable::CharClass pieceClass = classTable->getCharClass(piece);
338
339                    saveMatra(piece, matraIndex, pieceClass);
340                }
341            } else {
342                saveMatra(matra, matraIndex, matraClass);
343            }
344
345            return TRUE;
346        }
347
348        return FALSE;
349    }
350
351    void noteVowelModifier(const IndicClassTable *classTable, LEUnicode vowelModifier, le_uint32 vowelModifierIndex, FeatureMask vowelModifierFeatures)
352    {
353        IndicClassTable::CharClass vmClass = classTable->getCharClass(vowelModifier);
354
355        fVMIndex = vowelModifierIndex;
356        fVMFeatures  = vowelModifierFeatures;
357
358        if (IndicClassTable::isVowelModifier(vmClass)) {
359           switch (vmClass & CF_POS_MASK) {
360           case CF_POS_ABOVE:
361               fVMabove = vowelModifier;
362               break;
363
364           case CF_POS_AFTER:
365               fVMpost = vowelModifier;
366               break;
367
368           default:
369               // FIXME: this is an error...
370               break;
371           }
372        }
373    }
374
375    void noteStressMark(const IndicClassTable *classTable, LEUnicode stressMark, le_uint32 stressMarkIndex, FeatureMask stressMarkFeatures)
376    {
377       IndicClassTable::CharClass smClass = classTable->getCharClass(stressMark);
378
379        fSMIndex = stressMarkIndex;
380        fSMFeatures  = stressMarkFeatures;
381
382        if (IndicClassTable::isStressMark(smClass)) {
383            switch (smClass & CF_POS_MASK) {
384            case CF_POS_ABOVE:
385                fSMabove = stressMark;
386                break;
387
388            case CF_POS_BELOW:
389                fSMbelow = stressMark;
390                break;
391
392            default:
393                // FIXME: this is an error...
394                break;
395           }
396        }
397    }
398
399    void notePreBaseConsonant(le_uint32 index,LEUnicode PBConsonant, LEUnicode PBVirama, FeatureMask features)
400    {
401        fPBCIndex = index;
402        fPreBaseConsonant = PBConsonant;
403        fPreBaseVirama = PBVirama;
404        fPBCFeatures = features;
405    }
406
407    void noteBaseConsonant()
408    {
409        if (fMPreFixups != NULL && fMPreOutIndex >= 0) {
410            fMPreFixups->add(fOutIndex, fMPreOutIndex);
411        }
412    }
413
414    // Handles Al-Lakuna in Sinhala split vowels.
415    void writeAlLakuna()
416    {
417        if (fAlLakuna != 0) {
418            writeChar(fAlLakuna, fAlLakunaIndex, fMatraFeatures);
419        }
420    }
421
422    void writeMpre()
423    {
424        if (fMpre != 0) {
425            fMPreOutIndex = fOutIndex;
426            writeChar(fMpre, fMpreIndex, fMatraFeatures);
427        }
428    }
429
430    void writeMbelow()
431    {
432        if (fMbelow != 0) {
433            writeChar(fMbelow, fMbelowIndex, fMatraFeatures);
434        }
435    }
436
437    void writeMabove()
438    {
439        if (fMabove != 0) {
440            writeChar(fMabove, fMaboveIndex, fMatraFeatures);
441        }
442    }
443
444    void writeMpost()
445    {
446        if (fMpost != 0) {
447            writeChar(fMpost, fMpostIndex, fMatraFeatures);
448        }
449    }
450
451    void writeLengthMark()
452    {
453        if (fLengthMark != 0) {
454            writeChar(fLengthMark, fLengthMarkIndex, fMatraFeatures);
455        }
456    }
457
458    void writeVMabove()
459    {
460        if (fVMabove != 0) {
461            writeChar(fVMabove, fVMIndex, fVMFeatures);
462        }
463    }
464
465    void writeVMpost()
466    {
467        if (fVMpost != 0) {
468            writeChar(fVMpost, fVMIndex, fVMFeatures);
469        }
470    }
471
472    void writeSMabove()
473    {
474        if (fSMabove != 0) {
475            writeChar(fSMabove, fSMIndex, fSMFeatures);
476        }
477    }
478
479    void writeSMbelow()
480    {
481        if (fSMbelow != 0) {
482            writeChar(fSMbelow, fSMIndex, fSMFeatures);
483        }
484    }
485
486    void writePreBaseConsonant()
487    {
488        // The TDIL spec says that consonant + virama + RRA should produce a rakar in Malayalam.  However,
489        // it seems that almost none of the fonts for Malayalam are set up to handle this.
490        // So, we're going to force the issue here by using the rakar as defined with RA in most fonts.
491
492        if (fPreBaseConsonant == 0x0d31) { // RRA
493            fPreBaseConsonant = 0x0d30; // RA
494        }
495
496        if (fPreBaseConsonant != 0) {
497            writeChar(fPreBaseConsonant, fPBCIndex, fPBCFeatures);
498            writeChar(fPreBaseVirama,fPBCIndex-1,fPBCFeatures);
499        }
500    }
501
502    le_int32 getOutputIndex()
503    {
504        return fOutIndex;
505    }
506};
507
508
509
510// TODO: Find better names for these!
511#define tagArray4 (loclFeatureMask | nuktFeatureMask | akhnFeatureMask | vatuFeatureMask | presFeatureMask | blwsFeatureMask | abvsFeatureMask | pstsFeatureMask | halnFeatureMask | blwmFeatureMask | abvmFeatureMask | distFeatureMask)
512#define tagArray3 (pstfFeatureMask | tagArray4)
513#define tagArray2 (halfFeatureMask | tagArray3)
514#define tagArray1 (blwfFeatureMask | tagArray2)
515#define tagArray0 (rphfFeatureMask | tagArray1)
516
517static const FeatureMap featureMap[] = {
518    {loclFeatureTag, loclFeatureMask},
519    {initFeatureTag, initFeatureMask},
520    {nuktFeatureTag, nuktFeatureMask},
521    {akhnFeatureTag, akhnFeatureMask},
522    {rphfFeatureTag, rphfFeatureMask},
523    {blwfFeatureTag, blwfFeatureMask},
524    {halfFeatureTag, halfFeatureMask},
525    {pstfFeatureTag, pstfFeatureMask},
526    {vatuFeatureTag, vatuFeatureMask},
527    {presFeatureTag, presFeatureMask},
528    {blwsFeatureTag, blwsFeatureMask},
529    {abvsFeatureTag, abvsFeatureMask},
530    {pstsFeatureTag, pstsFeatureMask},
531    {halnFeatureTag, halnFeatureMask},
532    {blwmFeatureTag, blwmFeatureMask},
533    {abvmFeatureTag, abvmFeatureMask},
534    {distFeatureTag, distFeatureMask}
535};
536
537static const le_int32 featureCount = LE_ARRAY_SIZE(featureMap);
538
539static const FeatureMap v2FeatureMap[] = {
540	{loclFeatureTag, loclFeatureMask},
541    {nuktFeatureTag, nuktFeatureMask},
542    {akhnFeatureTag, akhnFeatureMask},
543    {rphfFeatureTag, rphfFeatureMask},
544	{rkrfFeatureTag, rkrfFeatureMask},
545	{blwfFeatureTag, blwfFeatureMask},
546    {halfFeatureTag, halfFeatureMask},
547    {vatuFeatureTag, vatuFeatureMask},
548    {cjctFeatureTag, cjctFeatureMask},
549    {presFeatureTag, presFeatureMask},
550    {abvsFeatureTag, abvsFeatureMask},
551    {blwsFeatureTag, blwsFeatureMask},
552    {pstsFeatureTag, pstsFeatureMask},
553	{halnFeatureTag, halnFeatureMask},
554	{caltFeatureTag, caltFeatureMask},
555    {kernFeatureTag, kernFeatureMask},
556    {distFeatureTag, distFeatureMask},
557    {abvmFeatureTag, abvmFeatureMask},
558    {blwmFeatureTag, blwmFeatureMask}
559};
560
561static const le_int32 v2FeatureMapCount = LE_ARRAY_SIZE(v2FeatureMap);
562
563static const le_int8 stateTable[][CC_COUNT] =
564{
565//   xx  vm  sm  iv  i2  i3  ct  cn  nu  dv  s1  s2  s3  vr  zw  al
566    { 1,  6,  1,  5,  8, 11,  3,  2,  1,  5,  9,  5,  5,  1,  1,  1}, //  0 - ground state
567    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, //  1 - exit state
568    {-1,  6,  1, -1, -1, -1, -1, -1, -1,  5,  9,  5,  5,  4, 12, -1}, //  2 - consonant with nukta
569    {-1,  6,  1, -1, -1, -1, -1, -1,  2,  5,  9,  5,  5,  4, 12, 13}, //  3 - consonant
570    {-1, -1, -1, -1, -1, -1,  3,  2, -1, -1, -1, -1, -1, -1,  7, -1}, //  4 - consonant virama
571    {-1,  6,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, //  5 - dependent vowels
572    {-1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, //  6 - vowel mark
573    {-1, -1, -1, -1, -1, -1,  3,  2, -1, -1, -1, -1, -1, -1, -1, -1}, //  7 - consonant virama ZWJ, consonant ZWJ virama
574    {-1,  6,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  4, -1, -1}, //  8 - independent vowels that can take a virama
575    {-1,  6,  1, -1, -1, -1, -1, -1, -1, -1, -1, 10,  5, -1, -1, -1}, //  9 - first part of split vowel
576    {-1,  6,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  5, -1, -1, -1}, // 10 - second part of split vowel
577    {-1,  6,  1, -1, -1, -1, -1, -1, -1,  5,  9,  5,  5,  4, -1, -1}, // 11 - independent vowels that can take an iv
578    {-1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  7, -1,  7}, // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?)
579    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  7, -1}  // 13 - consonant al-lakuna ZWJ consonant
580};
581
582
583const FeatureMap *IndicReordering::getFeatureMap(le_int32 &count)
584{
585    count = featureCount;
586
587    return featureMap;
588}
589
590const FeatureMap *IndicReordering::getv2FeatureMap(le_int32 &count)
591{
592    count = v2FeatureMapCount;
593
594    return v2FeatureMap;
595}
596
597le_int32 IndicReordering::findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount)
598{
599    le_int32 cursor = prev;
600    le_int8 state = 0;
601    le_int8 consonant_count = 0;
602
603    while (cursor < charCount) {
604        IndicClassTable::CharClass charClass = classTable->getCharClass(chars[cursor]);
605
606        if ( IndicClassTable::isConsonant(charClass) ) {
607            consonant_count++;
608            if ( consonant_count > MAX_CONSONANTS_PER_SYLLABLE ) {
609                break;
610            }
611        }
612
613        state = stateTable[state][charClass & CF_CLASS_MASK];
614
615        if (state < 0) {
616            break;
617        }
618
619        cursor += 1;
620    }
621
622    return cursor;
623}
624
625le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode,
626                                  LEUnicode *outChars, LEGlyphStorage &glyphStorage,
627                                  MPreFixups **outMPreFixups, LEErrorCode& success)
628{
629    if (LE_FAILURE(success)) {
630        return 0;
631    }
632
633    MPreFixups *mpreFixups = NULL;
634    const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode);
635
636    if (classTable->scriptFlags & SF_MPRE_FIXUP) {
637        mpreFixups = new MPreFixups(charCount);
638        if (mpreFixups == NULL) {
639            success = LE_MEMORY_ALLOCATION_ERROR;
640            return 0;
641        }
642    }
643
644    IndicReorderingOutput output(outChars, glyphStorage, mpreFixups);
645    le_int32 i, prev = 0;
646    le_bool lastInWord = FALSE;
647
648    while (prev < charCount) {
649        le_int32 syllable = findSyllable(classTable, chars, prev, charCount);
650        le_int32 matra, markStart = syllable;
651
652        output.reset();
653
654        if (classTable->isStressMark(chars[markStart - 1])) {
655            markStart -= 1;
656            output.noteStressMark(classTable, chars[markStart], markStart, tagArray1);
657        }
658
659        if (markStart != prev && classTable->isVowelModifier(chars[markStart - 1])) {
660            markStart -= 1;
661            output.noteVowelModifier(classTable, chars[markStart], markStart, tagArray1);
662        }
663
664        matra = markStart - 1;
665
666        while (output.noteMatra(classTable, chars[matra], matra, tagArray1, !lastInWord) && matra != prev) {
667            matra -= 1;
668        }
669
670        lastInWord = TRUE;
671
672        switch (classTable->getCharClass(chars[prev]) & CF_CLASS_MASK) {
673        case CC_RESERVED:
674            lastInWord = FALSE;
675            /* fall through */
676
677        case CC_INDEPENDENT_VOWEL:
678        case CC_ZERO_WIDTH_MARK:
679            for (i = prev; i < syllable; i += 1) {
680                output.writeChar(chars[i], i, tagArray1);
681            }
682
683            break;
684
685        case CC_AL_LAKUNA:
686        case CC_NUKTA:
687            output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1);
688            output.writeChar(chars[prev], prev, tagArray1);
689            break;
690
691        case CC_VIRAMA:
692            // A lone virama is illegal unless it follows a
693            // MALAYALAM_VOWEL_SIGN_U. Such a usage is called
694            // "samvruthokaram".
695            if (chars[prev - 1] != C_MALAYALAM_VOWEL_SIGN_U) {
696                output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1);
697            }
698
699            output.writeChar(chars[prev], prev, tagArray1);
700            break;
701
702        case CC_DEPENDENT_VOWEL:
703        case CC_SPLIT_VOWEL_PIECE_1:
704        case CC_SPLIT_VOWEL_PIECE_2:
705        case CC_SPLIT_VOWEL_PIECE_3:
706        case CC_VOWEL_MODIFIER:
707        case CC_STRESS_MARK:
708            output.writeMpre();
709
710            output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1);
711
712            output.writeMbelow();
713            output.writeSMbelow();
714            output.writeMabove();
715
716            if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) {
717                output.writeMpost();
718            }
719
720            if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) {
721                output.writeVMabove();
722                output.writeSMabove(); // FIXME: there are no SM's in these scripts...
723            }
724
725            if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) {
726                output.writeMpost();
727            }
728
729            output.writeLengthMark();
730            output.writeAlLakuna();
731
732            if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) {
733                output.writeVMabove();
734                output.writeSMabove();
735            }
736
737            output.writeVMpost();
738            break;
739
740        case CC_INDEPENDENT_VOWEL_2:
741        case CC_INDEPENDENT_VOWEL_3:
742        case CC_CONSONANT:
743        case CC_CONSONANT_WITH_NUKTA:
744        {
745            le_uint32 length = markStart - prev;
746            le_int32  lastConsonant = markStart - 1;
747            le_int32  baseLimit = prev;
748
749            // Check for REPH at front of syllable
750            if (length > 2 && classTable->isReph(chars[prev]) && classTable->isVirama(chars[prev + 1]) && chars[prev + 2] != C_SIGN_ZWNJ) {
751                baseLimit += 2;
752
753                // Check for eyelash RA, if the script supports it
754                if ((classTable->scriptFlags & SF_EYELASH_RA) != 0 &&
755                    chars[baseLimit] == C_SIGN_ZWJ) {
756                    if (length > 3) {
757                        baseLimit += 1;
758                    } else {
759                        baseLimit -= 2;
760                    }
761                }
762            }
763
764            while (lastConsonant > baseLimit && !classTable->isConsonant(chars[lastConsonant])) {
765                lastConsonant -= 1;
766            }
767
768
769            IndicClassTable::CharClass charClass = CC_RESERVED;
770            IndicClassTable::CharClass nextClass = CC_RESERVED;
771            le_int32 baseConsonant = lastConsonant;
772            le_int32 postBase = lastConsonant + 1;
773            le_int32 postBaseLimit = classTable->scriptFlags & SF_POST_BASE_LIMIT_MASK;
774            le_bool  seenVattu = FALSE;
775            le_bool  seenBelowBaseForm = FALSE;
776            le_bool  seenPreBaseForm = FALSE;
777            le_bool  hasNukta = FALSE;
778            le_bool  hasBelowBaseForm = FALSE;
779            le_bool  hasPostBaseForm = FALSE;
780            le_bool  hasPreBaseForm = FALSE;
781
782            if (postBase < markStart && classTable->isNukta(chars[postBase])) {
783                charClass = CC_NUKTA;
784                postBase += 1;
785            }
786
787            while (baseConsonant > baseLimit) {
788                nextClass = charClass;
789                hasNukta  = IndicClassTable::isNukta(nextClass);
790                charClass = classTable->getCharClass(chars[baseConsonant]);
791
792                hasBelowBaseForm = IndicClassTable::hasBelowBaseForm(charClass) && !hasNukta;
793                hasPostBaseForm  = IndicClassTable::hasPostBaseForm(charClass)  && !hasNukta;
794                hasPreBaseForm = IndicClassTable::hasPreBaseForm(charClass) && !hasNukta;
795
796                if (IndicClassTable::isConsonant(charClass)) {
797                    if (postBaseLimit == 0 || seenVattu ||
798                        (baseConsonant > baseLimit && !classTable->isVirama(chars[baseConsonant - 1])) ||
799                        !(hasBelowBaseForm || hasPostBaseForm || hasPreBaseForm)) {
800                        break;
801                    }
802
803                    // Note any pre-base consonants
804                    if ( baseConsonant == lastConsonant && lastConsonant > 0 &&
805                         hasPreBaseForm && classTable->isVirama(chars[baseConsonant - 1])) {
806                        output.notePreBaseConsonant(lastConsonant,chars[lastConsonant],chars[lastConsonant-1],tagArray2);
807                        seenPreBaseForm = TRUE;
808
809                    }
810                    // consonants with nuktas are never vattus
811                    seenVattu = IndicClassTable::isVattu(charClass) && !hasNukta;
812
813                    // consonants with nuktas never have below- or post-base forms
814                    if (hasPostBaseForm) {
815                        if (seenBelowBaseForm) {
816                            break;
817                        }
818
819                        postBase = baseConsonant;
820                    } else if (hasBelowBaseForm) {
821                        seenBelowBaseForm = TRUE;
822                    }
823
824                    postBaseLimit -= 1;
825                }
826
827                baseConsonant -= 1;
828            }
829
830            // Write Mpre
831            output.writeMpre();
832
833            // Write eyelash RA
834            // NOTE: baseLimit == prev + 3 iff eyelash RA present...
835            if (baseLimit == prev + 3) {
836                output.writeChar(chars[prev], prev, tagArray2);
837                output.writeChar(chars[prev + 1], prev + 1, tagArray2);
838                output.writeChar(chars[prev + 2], prev + 2, tagArray2);
839            }
840
841            // write any pre-base consonants
842            output.writePreBaseConsonant();
843
844            le_bool supressVattu = TRUE;
845
846            for (i = baseLimit; i < baseConsonant; i += 1) {
847                LEUnicode ch = chars[i];
848                // Don't put 'pstf' or 'blwf' on anything before the base consonant.
849                FeatureMask features = tagArray1 & ~( pstfFeatureMask | blwfFeatureMask );
850
851                charClass = classTable->getCharClass(ch);
852                nextClass = classTable->getCharClass(chars[i + 1]);
853                hasNukta  = IndicClassTable::isNukta(nextClass);
854
855                if (IndicClassTable::isConsonant(charClass)) {
856                    if (IndicClassTable::isVattu(charClass) && !hasNukta && supressVattu) {
857                        features = tagArray4;
858                    }
859
860                    supressVattu = IndicClassTable::isVattu(charClass) && !hasNukta;
861                } else if (IndicClassTable::isVirama(charClass) && chars[i + 1] == C_SIGN_ZWNJ)
862                {
863                    features = tagArray4;
864                }
865
866                output.writeChar(ch, i, features);
867            }
868
869            le_int32 bcSpan = baseConsonant + 1;
870
871            if (bcSpan < markStart && classTable->isNukta(chars[bcSpan])) {
872                bcSpan += 1;
873            }
874
875            if (baseConsonant == lastConsonant && bcSpan < markStart &&
876                 (classTable->isVirama(chars[bcSpan]) || classTable->isAlLakuna(chars[bcSpan]))) {
877                bcSpan += 1;
878
879                if (bcSpan < markStart && chars[bcSpan] == C_SIGN_ZWNJ) {
880                    bcSpan += 1;
881                }
882            }
883
884            // note the base consonant for post-GSUB fixups
885            output.noteBaseConsonant();
886
887            // write base consonant
888            for (i = baseConsonant; i < bcSpan; i += 1) {
889                output.writeChar(chars[i], i, tagArray4);
890            }
891
892            if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) {
893                output.writeMbelow();
894                output.writeSMbelow(); // FIXME: there are no SMs in these scripts...
895                output.writeMabove();
896                output.writeMpost();
897            }
898
899            // write below-base consonants
900            if (baseConsonant != lastConsonant && !seenPreBaseForm) {
901                for (i = bcSpan + 1; i < postBase; i += 1) {
902                    output.writeChar(chars[i], i, tagArray1);
903                }
904
905                if (postBase > lastConsonant) {
906                    // write halant that was after base consonant
907                    output.writeChar(chars[bcSpan], bcSpan, tagArray1);
908                }
909            }
910
911            // write Mbelow, SMbelow, Mabove
912            if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) {
913                output.writeMbelow();
914                output.writeSMbelow();
915                output.writeMabove();
916            }
917
918            if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) {
919                if (baseLimit == prev + 2) {
920                    output.writeChar(chars[prev], prev, tagArray0);
921                    output.writeChar(chars[prev + 1], prev + 1, tagArray0);
922                }
923
924                output.writeVMabove();
925                output.writeSMabove(); // FIXME: there are no SM's in these scripts...
926            }
927
928            // write post-base consonants
929            // FIXME: does this put the right tags on post-base consonants?
930            if (baseConsonant != lastConsonant && !seenPreBaseForm) {
931                if (postBase <= lastConsonant) {
932                    for (i = postBase; i <= lastConsonant; i += 1) {
933                        output.writeChar(chars[i], i, tagArray3);
934                    }
935
936                    // write halant that was after base consonant
937                    output.writeChar(chars[bcSpan], bcSpan, tagArray1);
938                }
939
940                // write the training halant, if there is one
941                if (lastConsonant < matra && classTable->isVirama(chars[matra])) {
942                    output.writeChar(chars[matra], matra, tagArray4);
943                }
944            }
945
946            // write Mpost
947            if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) {
948                output.writeMpost();
949            }
950
951            output.writeLengthMark();
952            output.writeAlLakuna();
953
954            // write reph
955            if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) {
956                if (baseLimit == prev + 2) {
957                    output.writeChar(chars[prev], prev, tagArray0);
958                    output.writeChar(chars[prev + 1], prev + 1, tagArray0);
959                }
960
961                output.writeVMabove();
962                output.writeSMabove();
963            }
964
965            output.writeVMpost();
966
967            break;
968        }
969
970        default:
971            break;
972        }
973
974        prev = syllable;
975    }
976
977    *outMPreFixups = mpreFixups;
978
979    return output.getOutputIndex();
980}
981
982void IndicReordering::adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage, LEErrorCode& success)
983{
984    if (mpreFixups != NULL) {
985        mpreFixups->apply(glyphStorage, success);
986
987        delete mpreFixups;
988    }
989}
990
991void IndicReordering::applyPresentationForms(LEGlyphStorage &glyphStorage, le_int32 count)
992{
993    LEErrorCode success = LE_NO_ERROR;
994
995//  This sets us up for 2nd pass of glyph substitution as well as setting the feature masks for the
996//  GPOS table lookups
997
998    for ( le_int32 i = 0 ; i < count ; i++ ) {
999        glyphStorage.setAuxData(i, ( presentationFormsMask | positioningFormsMask ), success);
1000    }
1001
1002}
1003void IndicReordering::finalReordering(LEGlyphStorage &glyphStorage, le_int32 count)
1004{
1005    LEErrorCode success = LE_NO_ERROR;
1006
1007    // Reposition REPH as appropriate
1008
1009    for ( le_int32 i = 0 ; i < count ; i++ ) {
1010
1011        le_int32 tmpAuxData = glyphStorage.getAuxData(i,success);
1012        LEGlyphID tmpGlyph = glyphStorage.getGlyphID(i,success);
1013
1014        if ( ( tmpGlyph != NO_GLYPH ) && (tmpAuxData & rephConsonantMask) && !(tmpAuxData & repositionedGlyphMask))  {
1015
1016            le_bool targetPositionFound = false;
1017            le_int32 targetPosition = i+1;
1018            le_int32 baseConsonantData;
1019
1020            while (!targetPositionFound) {
1021                tmpGlyph = glyphStorage.getGlyphID(targetPosition,success);
1022                tmpAuxData = glyphStorage.getAuxData(targetPosition,success);
1023
1024                if ( tmpAuxData & baseConsonantMask ) {
1025                    baseConsonantData = tmpAuxData;
1026                    targetPositionFound = true;
1027                } else {
1028                    targetPosition++;
1029                }
1030            }
1031
1032            // Make sure we are not putting the reph into an empty hole
1033
1034            le_bool targetPositionHasGlyph = false;
1035            while (!targetPositionHasGlyph) {
1036                tmpGlyph = glyphStorage.getGlyphID(targetPosition,success);
1037                if ( tmpGlyph != NO_GLYPH ) {
1038                    targetPositionHasGlyph = true;
1039                } else {
1040                    targetPosition--;
1041                }
1042            }
1043
1044            // Make sure that REPH is positioned after any above base or post base matras
1045            //
1046            le_bool checkMatraDone = false;
1047            le_int32 checkMatraPosition = targetPosition+1;
1048            while ( !checkMatraDone ) {
1049               tmpAuxData = glyphStorage.getAuxData(checkMatraPosition,success);
1050               if ( checkMatraPosition >= count || ( (tmpAuxData ^ baseConsonantData) & LE_GLYPH_GROUP_MASK)) {
1051                   checkMatraDone = true;
1052                   continue;
1053               }
1054               if ( (tmpAuxData & matraMask) &&
1055                    (((tmpAuxData & markPositionMask) == aboveBasePosition) ||
1056                      ((tmpAuxData & markPositionMask) == postBasePosition))) {
1057                   targetPosition = checkMatraPosition;
1058               }
1059               checkMatraPosition++;
1060            }
1061
1062            glyphStorage.moveGlyph(i,targetPosition,repositionedGlyphMask);
1063        }
1064    }
1065}
1066
1067
1068le_int32 IndicReordering::v2process(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode,
1069                                  LEUnicode *outChars, LEGlyphStorage &glyphStorage)
1070{
1071    const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode);
1072
1073    DynamicProperties dynProps[INDIC_BLOCK_SIZE];
1074    IndicReordering::getDynamicProperties(dynProps,classTable);
1075
1076    IndicReorderingOutput output(outChars, glyphStorage, NULL);
1077    le_int32 i, firstConsonant, baseConsonant, secondConsonant, inv_count = 0, beginSyllable = 0;
1078    //le_bool lastInWord = FALSE;
1079
1080    while (beginSyllable < charCount) {
1081        le_int32 nextSyllable = findSyllable(classTable, chars, beginSyllable, charCount);
1082
1083        output.reset();
1084
1085		// Find the First Consonant
1086		for ( firstConsonant = beginSyllable ; firstConsonant < nextSyllable ; firstConsonant++ ) {
1087			 if ( classTable->isConsonant(chars[firstConsonant]) ) {
1088					break;
1089				}
1090		}
1091
1092        // Find the base consonant
1093
1094        baseConsonant = nextSyllable - 1;
1095        secondConsonant = firstConsonant;
1096
1097        // TODO: Use Dynamic Properties for hasBelowBaseForm and hasPostBaseForm()
1098
1099        while ( baseConsonant > firstConsonant ) {
1100            if ( classTable->isConsonant(chars[baseConsonant]) &&
1101                 !classTable->hasBelowBaseForm(chars[baseConsonant]) &&
1102                 !classTable->hasPostBaseForm(chars[baseConsonant]) ) {
1103                break;
1104            }
1105            else {
1106                if ( classTable->isConsonant(chars[baseConsonant]) ) {
1107                    secondConsonant = baseConsonant;
1108                }
1109                baseConsonant--;
1110            }
1111        }
1112
1113        // If the syllable starts with Ra + Halant ( in a script that has Reph ) and has more than one
1114        // consonant, Ra is excluced from candidates for base consonants
1115
1116        if ( classTable->isReph(chars[beginSyllable]) &&
1117             beginSyllable+1 < nextSyllable && classTable->isVirama(chars[beginSyllable+1]) &&
1118             secondConsonant != firstConsonant) {
1119            baseConsonant = secondConsonant;
1120        }
1121
1122	    // Populate the output
1123		for ( i = beginSyllable ; i < nextSyllable ; i++ ) {
1124
1125            // Handle invalid combinartions
1126
1127            if ( classTable->isVirama(chars[beginSyllable]) ||
1128			     classTable->isMatra(chars[beginSyllable]) ||
1129			     classTable->isVowelModifier(chars[beginSyllable]) ||
1130			     classTable->isNukta(chars[beginSyllable]) ) {
1131                     output.writeChar(C_DOTTED_CIRCLE,beginSyllable,basicShapingFormsMask);
1132                     inv_count++;
1133            }
1134             output.writeChar(chars[i],i, basicShapingFormsMask);
1135
1136        }
1137
1138        // Adjust features and set syllable structure bits
1139
1140        for ( i = beginSyllable ; i < nextSyllable ; i++ ) {
1141
1142            FeatureMask outMask = output.getFeatures(i+inv_count);
1143            FeatureMask saveMask = outMask;
1144
1145            // Since reph can only validly occur at the beginning of a syllable
1146            // We only apply it to the first 2 characters in the syllable, to keep it from
1147            // conflicting with other features ( i.e. rkrf )
1148
1149            // TODO : Use the dynamic property for determining isREPH
1150            if ( i == beginSyllable && i < baseConsonant && classTable->isReph(chars[i]) &&
1151                 i+1 < nextSyllable && classTable->isVirama(chars[i+1])) {
1152                outMask |= rphfFeatureMask;
1153                outMask |= rephConsonantMask;
1154                output.setFeatures(i+1+inv_count,outMask);
1155
1156            }
1157
1158            if ( i == baseConsonant ) {
1159                outMask |= baseConsonantMask;
1160            }
1161
1162            if ( classTable->isMatra(chars[i])) {
1163                    outMask |= matraMask;
1164                    if ( classTable->hasAboveBaseForm(chars[i])) {
1165                        outMask |= aboveBasePosition;
1166                    } else if ( classTable->hasBelowBaseForm(chars[i])) {
1167                        outMask |= belowBasePosition;
1168                    }
1169            }
1170
1171            // Don't apply half form to virama that stands alone at the end of a syllable
1172            // to prevent half forms from forming when syllable ends with virama
1173
1174            if ( classTable->isVirama(chars[i]) && (i+1 == nextSyllable) ) {
1175                outMask ^= halfFeatureMask;
1176                if ( classTable->isConsonant(chars[i-1]) ) {
1177                    FeatureMask tmp = output.getFeatures(i-1+inv_count);
1178                    tmp ^= halfFeatureMask;
1179                    output.setFeatures(i-1+inv_count,tmp);
1180                }
1181            }
1182
1183            if ( outMask != saveMask ) {
1184                output.setFeatures(i+inv_count,outMask);
1185            }
1186		}
1187
1188	    output.decomposeReorderMatras(classTable,beginSyllable,nextSyllable,inv_count);
1189
1190        beginSyllable = nextSyllable;
1191	}
1192
1193
1194    return output.getOutputIndex();
1195}
1196
1197
1198void IndicReordering::getDynamicProperties( DynamicProperties *, const IndicClassTable *classTable ) {
1199
1200
1201    LEUnicode currentChar;
1202    LEUnicode virama;
1203    LEUnicode workChars[2];
1204    LEGlyphStorage workGlyphs;
1205
1206    IndicReorderingOutput workOutput(workChars, workGlyphs, NULL);
1207
1208    //le_int32 offset = 0;
1209
1210    // First find the relevant virama for the script we are dealing with
1211
1212    for ( currentChar = classTable->firstChar ; currentChar <= classTable->lastChar ; currentChar++ ) {
1213        if ( classTable->isVirama(currentChar)) {
1214            virama = currentChar;
1215            break;
1216        }
1217    }
1218
1219    for ( currentChar = classTable->firstChar ; currentChar <= classTable->lastChar ; currentChar++ ) {
1220        if ( classTable->isConsonant(currentChar)) {
1221            workOutput.reset();
1222        }
1223    }
1224
1225
1226}
1227
1228U_NAMESPACE_END
1229