harfbuzz-indic.cpp revision db0ba8cf3183e1c6296b2d1bde2af3f83e1affc7
1/*
2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
3 *
4 * This is part of HarfBuzz, an OpenType Layout engine library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 */
24
25#include "harfbuzz-shaper.h"
26#include "harfbuzz-shaper-private.h"
27
28#include <assert.h>
29#include <stdio.h>
30
31#define FLAG(x) (1 << (x))
32
33static HB_Bool isLetter(HB_UChar16 ucs)
34{
35    const int test = FLAG(HB_Letter_Uppercase) |
36                     FLAG(HB_Letter_Lowercase) |
37                     FLAG(HB_Letter_Titlecase) |
38                     FLAG(HB_Letter_Modifier) |
39                     FLAG(HB_Letter_Other);
40    return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test);
41}
42
43static HB_Bool isMark(HB_UChar16 ucs)
44{
45    const int test = FLAG(HB_Mark_NonSpacing) |
46                     FLAG(HB_Mark_SpacingCombining) |
47                     FLAG(HB_Mark_Enclosing);
48    return !!(FLAG(HB_GetUnicodeCharCategory(ucs)) & test);
49}
50
51enum Form {
52    Invalid = 0x0,
53    UnknownForm = Invalid,
54    Consonant,
55    Nukta,
56    Halant,
57    Matra,
58    VowelMark,
59    StressMark,
60    IndependentVowel,
61    LengthMark,
62    Control,
63    Other
64};
65
66static const unsigned char indicForms[0xe00-0x900] = {
67    // Devangari
68    Invalid, VowelMark, VowelMark, VowelMark,
69    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
70    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
71    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
72
73    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
74    IndependentVowel, Consonant, Consonant, Consonant,
75    Consonant, Consonant, Consonant, Consonant,
76    Consonant, Consonant, Consonant, Consonant,
77
78    Consonant, Consonant, Consonant, Consonant,
79    Consonant, Consonant, Consonant, Consonant,
80    Consonant, Consonant, Consonant, Consonant,
81    Consonant, Consonant, Consonant, Consonant,
82
83    Consonant, Consonant, Consonant, Consonant,
84    Consonant, Consonant, Consonant, Consonant,
85    Consonant, Consonant, UnknownForm, UnknownForm,
86    Nukta, Other, Matra, Matra,
87
88    Matra, Matra, Matra, Matra,
89    Matra, Matra, Matra, Matra,
90    Matra, Matra, Matra, Matra,
91    Matra, Halant, UnknownForm, UnknownForm,
92
93    Other, StressMark, StressMark, StressMark,
94    StressMark, UnknownForm, UnknownForm, UnknownForm,
95    Consonant, Consonant, Consonant, Consonant,
96    Consonant, Consonant, Consonant, Consonant,
97
98    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
99    Other, Other, Other, Other,
100    Other, Other, Other, Other,
101    Other, Other, Other, Other,
102
103    Other, Other, Other, Other,
104    Other, Other, Other, Other,
105    Other, Other, Other, Consonant,
106    Consonant, Consonant /* ??? */, Consonant, Consonant,
107
108    // Bengali
109    Invalid, VowelMark, VowelMark, VowelMark,
110    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
111    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
112    IndependentVowel, Invalid, Invalid, IndependentVowel,
113
114    IndependentVowel, Invalid, Invalid, IndependentVowel,
115    IndependentVowel, Consonant, Consonant, Consonant,
116    Consonant, Consonant, Consonant, Consonant,
117    Consonant, Consonant, Consonant, Consonant,
118
119    Consonant, Consonant, Consonant, Consonant,
120    Consonant, Consonant, Consonant, Consonant,
121    Consonant, Invalid, Consonant, Consonant,
122    Consonant, Consonant, Consonant, Consonant,
123
124    Consonant, Invalid, Consonant, Invalid,
125    Invalid, Invalid, Consonant, Consonant,
126    Consonant, Consonant, UnknownForm, UnknownForm,
127    Nukta, Other, Matra, Matra,
128
129    Matra, Matra, Matra, Matra,
130    Matra, Invalid, Invalid, Matra,
131    Matra, Invalid, Invalid, Matra,
132    Matra, Halant, Consonant, UnknownForm,
133
134    Invalid, Invalid, Invalid, Invalid,
135    Invalid, Invalid, Invalid, VowelMark,
136    Invalid, Invalid, Invalid, Invalid,
137    Consonant, Consonant, Invalid, Consonant,
138
139    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
140    Other, Other, Other, Other,
141    Other, Other, Other, Other,
142    Other, Other, Other, Other,
143
144    Consonant, Consonant, Other, Other,
145    Other, Other, Other, Other,
146    Other, Other, Other, Other,
147    Other, Other, Other, Other,
148
149    // Gurmukhi
150    Invalid, VowelMark, VowelMark, VowelMark,
151    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
152    IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
153    Invalid, Invalid, Invalid, IndependentVowel,
154
155    IndependentVowel, Invalid, Invalid, IndependentVowel,
156    IndependentVowel, Consonant, Consonant, Consonant,
157    Consonant, Consonant, Consonant, Consonant,
158    Consonant, Consonant, Consonant, Consonant,
159
160    Consonant, Consonant, Consonant, Consonant,
161    Consonant, Consonant, Consonant, Consonant,
162    Consonant, Invalid, Consonant, Consonant,
163    Consonant, Consonant, Consonant, Consonant,
164
165    Consonant, Invalid, Consonant, Consonant,
166    Invalid, Consonant, Consonant, Invalid,
167    Consonant, Consonant, UnknownForm, UnknownForm,
168    Nukta, Other, Matra, Matra,
169
170    Matra, Matra, Matra, Invalid,
171    Invalid, Invalid, Invalid, Matra,
172    Matra, Invalid, Invalid, Matra,
173    Matra, Halant, UnknownForm, UnknownForm,
174
175    Invalid, Invalid, Invalid, Invalid,
176    Invalid, UnknownForm, UnknownForm, UnknownForm,
177    Invalid, Consonant, Consonant, Consonant,
178    Consonant, Invalid, Consonant, Invalid,
179
180    Other, Other, Invalid, Invalid,
181    Other, Other, Other, Other,
182    Other, Other, Other, Other,
183    Other, Other, Other, Other,
184
185    StressMark, StressMark, Consonant, Consonant,
186    Other, Other, Other, Other,
187    Other, Other, Other, Other,
188    Other, Other, Other, Other,
189
190    // Gujarati
191    Invalid, VowelMark, VowelMark, VowelMark,
192    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
193    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
194    IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
195
196    IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
197    IndependentVowel, Consonant, Consonant, Consonant,
198    Consonant, Consonant, Consonant, Consonant,
199    Consonant, Consonant, Consonant, Consonant,
200
201    Consonant, Consonant, Consonant, Consonant,
202    Consonant, Consonant, Consonant, Consonant,
203    Consonant, Invalid, Consonant, Consonant,
204    Consonant, Consonant, Consonant, Consonant,
205
206    Consonant, Invalid, Consonant, Consonant,
207    Invalid, Consonant, Consonant, Consonant,
208    Consonant, Consonant, UnknownForm, UnknownForm,
209    Nukta, Other, Matra, Matra,
210
211    Matra, Matra, Matra, Matra,
212    Matra, Matra, Invalid, Matra,
213    Matra, Matra, Invalid, Matra,
214    Matra, Halant, UnknownForm, UnknownForm,
215
216    Other, UnknownForm, UnknownForm, UnknownForm,
217    UnknownForm, UnknownForm, UnknownForm, UnknownForm,
218    UnknownForm, UnknownForm, UnknownForm, UnknownForm,
219    UnknownForm, UnknownForm, UnknownForm, UnknownForm,
220
221    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
222    Other, Other, Other, Other,
223    Other, Other, Other, Other,
224    Other, Other, Other, Other,
225
226    Other, Other, Other, Other,
227    Other, Other, Other, Other,
228    Other, Other, Other, Other,
229    Other, Other, Other, Other,
230
231    // Oriya
232    Invalid, VowelMark, VowelMark, VowelMark,
233    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
234    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
235    IndependentVowel, Invalid, Invalid, IndependentVowel,
236
237    IndependentVowel, Invalid, Invalid, IndependentVowel,
238    IndependentVowel, Consonant, Consonant, Consonant,
239    Consonant, Consonant, Consonant, Consonant,
240    Consonant, Consonant, Consonant, Consonant,
241
242    Consonant, Consonant, Consonant, Consonant,
243    Consonant, Consonant, Consonant, Consonant,
244    Consonant, Invalid, Consonant, Consonant,
245    Consonant, Consonant, Consonant, Consonant,
246
247    Consonant, Invalid, Consonant, Consonant,
248    Invalid, Consonant, Consonant, Consonant,
249    Consonant, Consonant, UnknownForm, UnknownForm,
250    Nukta, Other, Matra, Matra,
251
252    Matra, Matra, Matra, Matra,
253    Invalid, Invalid, Invalid, Matra,
254    Matra, Invalid, Invalid, Matra,
255    Matra, Halant, UnknownForm, UnknownForm,
256
257    Other, Invalid, Invalid, Invalid,
258    Invalid, UnknownForm, LengthMark, LengthMark,
259    Invalid, Invalid, Invalid, Invalid,
260    Consonant, Consonant, Invalid, Consonant,
261
262    IndependentVowel, IndependentVowel, Invalid, Invalid,
263    Invalid, Invalid, Other, Other,
264    Other, Other, Other, Other,
265    Other, Other, Other, Other,
266
267    Other, Consonant, Other, Other,
268    Other, Other, Other, Other,
269    Other, Other, Other, Other,
270    Other, Other, Other, Other,
271
272    //Tamil
273    Invalid, Invalid, VowelMark, Other,
274    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
275    IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
276    Invalid, Invalid, IndependentVowel, IndependentVowel,
277
278    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
279    IndependentVowel, Consonant, Invalid, Invalid,
280    Invalid, Consonant, Consonant, Invalid,
281    Consonant, Invalid, Consonant, Consonant,
282
283    Invalid, Invalid, Invalid, Consonant,
284    Consonant, Invalid, Invalid, Invalid,
285    Consonant, Consonant, Consonant, Invalid,
286    Invalid, Invalid, Consonant, Consonant,
287
288    Consonant, Consonant, Consonant, Consonant,
289    Consonant, Consonant, Consonant, Consonant,
290    Consonant, Consonant, UnknownForm, UnknownForm,
291    Invalid, Invalid, Matra, Matra,
292
293    Matra, Matra, Matra, Invalid,
294    Invalid, Invalid, Matra, Matra,
295    Matra, Invalid, Matra, Matra,
296    Matra, Halant, Invalid, Invalid,
297
298    Invalid, Invalid, Invalid, Invalid,
299    Invalid, Invalid, Invalid, LengthMark,
300    Invalid, Invalid, Invalid, Invalid,
301    Invalid, Invalid, Invalid, Invalid,
302
303    Invalid, Invalid, Invalid, Invalid,
304    Invalid, Invalid, Other, Other,
305    Other, Other, Other, Other,
306    Other, Other, Other, Other,
307
308    Other, Other, Other, Other,
309    Other, Other, Other, Other,
310    Other, Other, Other, Other,
311    Other, Other, Other, Other,
312
313    // Telugu
314    Invalid, VowelMark, VowelMark, VowelMark,
315    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
316    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
317    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
318
319    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
320    IndependentVowel, Consonant, Consonant, Consonant,
321    Consonant, Consonant, Consonant, Consonant,
322    Consonant, Consonant, Consonant, Consonant,
323
324    Consonant, Consonant, Consonant, Consonant,
325    Consonant, Consonant, Consonant, Consonant,
326    Consonant, Invalid, Consonant, Consonant,
327    Consonant, Consonant, Consonant, Consonant,
328
329    Consonant, Consonant, Consonant, Consonant,
330    Invalid, Consonant, Consonant, Consonant,
331    Consonant, Consonant, UnknownForm, UnknownForm,
332    Invalid, Invalid, Matra, Matra,
333
334    Matra, Matra, Matra, Matra,
335    Matra, Invalid, Matra, Matra,
336    Matra, Invalid, Matra, Matra,
337    Matra, Halant, Invalid, Invalid,
338
339    Invalid, Invalid, Invalid, Invalid,
340    Invalid, LengthMark, Matra, Invalid,
341    Invalid, Invalid, Invalid, Invalid,
342    Invalid, Invalid, Invalid, Invalid,
343
344    IndependentVowel, IndependentVowel, Invalid, Invalid,
345    Invalid, Invalid, Other, Other,
346    Other, Other, Other, Other,
347    Other, Other, Other, Other,
348
349    Other, Other, Other, Other,
350    Other, Other, Other, Other,
351    Other, Other, Other, Other,
352    Other, Other, Other, Other,
353
354    // Kannada
355    Invalid, Invalid, VowelMark, VowelMark,
356    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
357    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
358    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
359
360    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
361    IndependentVowel, Consonant, Consonant, Consonant,
362    Consonant, Consonant, Consonant, Consonant,
363    Consonant, Consonant, Consonant, Consonant,
364
365    Consonant, Consonant, Consonant, Consonant,
366    Consonant, Consonant, Consonant, Consonant,
367    Consonant, Invalid, Consonant, Consonant,
368    Consonant, Consonant, Consonant, Consonant,
369
370    Consonant, Consonant, Consonant, Consonant,
371    Invalid, Consonant, Consonant, Consonant,
372    Consonant, Consonant, UnknownForm, UnknownForm,
373    Nukta, Other, Matra, Matra,
374
375    Matra, Matra, Matra, Matra,
376    Matra, Invalid, Matra, Matra,
377    Matra, Invalid, Matra, Matra,
378    Matra, Halant, Invalid, Invalid,
379
380    Invalid, Invalid, Invalid, Invalid,
381    Invalid, LengthMark, LengthMark, Invalid,
382    Invalid, Invalid, Invalid, Invalid,
383    Invalid, Invalid, Consonant, Invalid,
384
385    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
386    Invalid, Invalid, Other, Other,
387    Other, Other, Other, Other,
388    Other, Other, Other, Other,
389
390    Other, Other, Other, Other,
391    Other, Other, Other, Other,
392    Other, Other, Other, Other,
393    Other, Other, Other, Other,
394
395    // Malayalam
396    Invalid, Invalid, VowelMark, VowelMark,
397    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
398    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
399    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
400
401    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
402    IndependentVowel, Consonant, Consonant, Consonant,
403    Consonant, Consonant, Consonant, Consonant,
404    Consonant, Consonant, Consonant, Consonant,
405
406    Consonant, Consonant, Consonant, Consonant,
407    Consonant, Consonant, Consonant, Consonant,
408    Consonant, Invalid, Consonant, Consonant,
409    Consonant, Consonant, Consonant, Consonant,
410
411    Consonant, Consonant, Consonant, Consonant,
412    Consonant, Consonant, Consonant, Consonant,
413    Consonant, Consonant, UnknownForm, UnknownForm,
414    Invalid, Invalid, Matra, Matra,
415
416    Matra, Matra, Matra, Matra,
417    Invalid, Invalid, Matra, Matra,
418    Matra, Invalid, Matra, Matra,
419    Matra, Halant, Invalid, Invalid,
420
421    Invalid, Invalid, Invalid, Invalid,
422    Invalid, Invalid, Invalid, Matra,
423    Invalid, Invalid, Invalid, Invalid,
424    Invalid, Invalid, Invalid, Invalid,
425
426    IndependentVowel, IndependentVowel, Invalid, Invalid,
427    Invalid, Invalid, Other, Other,
428    Other, Other, Other, Other,
429    Other, Other, Other, Other,
430
431    Other, Other, Other, Other,
432    Other, Other, Other, Other,
433    Other, Other, Other, Other,
434    Other, Other, Other, Other,
435
436    // Sinhala
437    Invalid, Invalid, VowelMark, VowelMark,
438    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
439    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
440    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
441
442    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
443    IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
444    Invalid, Invalid, Consonant, Consonant,
445    Consonant, Consonant, Consonant, Consonant,
446
447    Consonant, Consonant, Consonant, Consonant,
448    Consonant, Consonant, Consonant, Consonant,
449    Consonant, Consonant, Consonant, Consonant,
450    Consonant, Consonant, Consonant, Consonant,
451
452    Consonant, Consonant, Invalid, Consonant,
453    Consonant, Consonant, Consonant, Consonant,
454    Consonant, Consonant, Consonant, Consonant,
455    Invalid, Consonant, Invalid, Invalid,
456
457    Consonant, Consonant, Consonant, Consonant,
458    Consonant, Consonant, Consonant, Invalid,
459    Invalid, Invalid, Halant, Invalid,
460    Invalid, Invalid, Invalid, Matra,
461
462    Matra, Matra, Matra, Matra,
463    Matra, Invalid, Matra, Invalid,
464    Matra, Matra, Matra, Matra,
465    Matra, Matra, Matra, Matra,
466
467    Invalid, Invalid, Invalid, Invalid,
468    Invalid, Invalid, Invalid, Invalid,
469    Invalid, Invalid, Invalid, Invalid,
470    Invalid, Invalid, Invalid, Invalid,
471
472    Invalid, Invalid, Matra, Matra,
473    Other, Other, Other, Other,
474    Other, Other, Other, Other,
475    Other, Other, Other, Other,
476};
477
478enum Position {
479    None,
480    Pre,
481    Above,
482    Below,
483    Post,
484    Split,
485    Base,
486    Reph,
487    Vattu,
488    Inherit
489};
490
491static const unsigned char indicPosition[0xe00-0x900] = {
492    // Devanagari
493    None, Above, Above, Post,
494    None, None, None, None,
495    None, None, None, None,
496    None, None, None, None,
497
498    None, None, None, None,
499    None, None, None, None,
500    None, None, None, None,
501    None, None, None, None,
502
503    None, None, None, None,
504    None, None, None, None,
505    None, None, None, None,
506    None, None, None, None,
507
508    Below, None, None, None,
509    None, None, None, None,
510    None, None, None, None,
511    None, None, Post, Pre,
512
513    Post, Below, Below, Below,
514    Below, Above, Above, Above,
515    Above, Post, Post, Post,
516    Post, None, None, None,
517
518    None, Above, Below, Above,
519    Above, None, None, None,
520    None, None, None, None,
521    None, None, None, None,
522
523    None, None, Below, Below,
524    None, None, None, None,
525    None, None, None, None,
526    None, None, None, None,
527
528    None, None, None, None,
529    None, None, None, None,
530    None, None, None, None,
531    None, None, None, None,
532
533    // Bengali
534    None, Above, Post, Post,
535    None, None, None, None,
536    None, None, None, None,
537    None, None, None, None,
538
539    None, None, None, None,
540    None, None, None, None,
541    None, None, None, None,
542    None, None, None, None,
543
544    None, None, None, None,
545    None, None, None, None,
546    None, None, None, None,
547    Below, None, None, Post,
548
549    Below, None, None, None,
550    None, None, None, None,
551    None, None, None, None,
552    Below, None, Post, Pre,
553
554    Post, Below, Below, Below,
555    Below, None, None, Pre,
556    Pre, None, None, Split,
557    Split, Below, None, None,
558
559    None, None, None, None,
560    None, None, None, Post,
561    None, None, None, None,
562    None, None, None, None,
563
564    None, None, Below, Below,
565    None, None, None, None,
566    None, None, None, None,
567    None, None, None, None,
568
569    Below, None, None, None,
570    None, None, None, None,
571    None, None, None, None,
572    None, None, None, None,
573
574    // Gurmukhi
575    None, Above, Above, Post,
576    None, None, None, None,
577    None, None, None, None,
578    None, None, None, None,
579
580    None, None, None, None,
581    None, None, None, None,
582    None, None, None, None,
583    None, None, None, None,
584
585    None, None, None, None,
586    None, None, None, None,
587    None, None, None, None,
588    None, None, None, Post,
589
590    Below, None, None, None,
591    None, Below, None, None,
592    None, Below, None, None,
593    Below, None, Post, Pre,
594
595    Post, Below, Below, None,
596    None, None, None, Above,
597    Above, None, None, Above,
598    Above, None, None, None,
599
600    None, None, None, None,
601    None, None, None, None,
602    None, None, None, None,
603    None, None, None, None,
604
605    None, None, None, None,
606    None, None, None, None,
607    None, None, None, None,
608    None, None, None, None,
609
610    Above, Above, None, None,
611    None, None, None, None,
612    None, None, None, None,
613    None, None, None, None,
614
615    // Gujarati
616    None, Above, Above, Post,
617    None, None, None, None,
618    None, None, None, None,
619    None, None, None, None,
620
621    None, None, None, None,
622    None, None, None, None,
623    None, None, None, None,
624    None, None, None, None,
625
626    None, None, None, None,
627    None, None, None, None,
628    None, None, None, None,
629    None, None, None, None,
630
631    Below, None, None, None,
632    None, None, None, None,
633    None, None, None, None,
634    None, None, Post, Pre,
635
636    Post, Below, Below, Below,
637    Below, Above, None, Above,
638    Above, Post, None, Post,
639    Post, None, None, None,
640
641    None, None, None, None,
642    None, None, None, None,
643    None, None, None, None,
644    None, None, None, None,
645
646    None, None, Below, Below,
647    None, None, None, None,
648    None, None, None, None,
649    None, None, None, None,
650
651    None, None, None, None,
652    None, None, None, None,
653    None, None, None, None,
654    None, None, None, None,
655
656    // Oriya
657    None, Above, Post, Post,
658    None, None, None, None,
659    None, None, None, None,
660    None, None, None, None,
661
662    None, None, None, None,
663    None, None, None, None,
664    None, None, None, None,
665    None, None, None, None,
666
667    None, None, None, None,
668    Below, None, None, None,
669    Below, None, None, None,
670    Below, Below, Below, Post,
671
672    Below, None, Below, Below,
673    None, None, None, None,
674    None, None, None, None,
675    None, None, Post, Above,
676
677    Post, Below, Below, Below,
678    None, None, None, Pre,
679    Split, None, None, Split,
680    Split, None, None, None,
681
682    None, None, None, None,
683    None, None, Above, Post,
684    None, None, None, None,
685    None, None, None, Post,
686
687    None, None, None, None,
688    None, None, None, None,
689    None, None, None, None,
690    None, None, None, None,
691
692    None, Below, None, None,
693    None, None, None, None,
694    None, None, None, None,
695    None, None, None, None,
696
697    // Tamil
698    None, None, Above, None,
699    None, None, None, None,
700    None, None, None, None,
701    None, None, None, None,
702
703    None, None, None, None,
704    None, None, None, None,
705    None, None, None, None,
706    None, None, None, None,
707
708    None, None, None, None,
709    None, None, None, None,
710    None, None, None, None,
711    None, None, None, None,
712
713    None, None, None, None,
714    None, None, None, None,
715    None, None, None, None,
716    None, None, Post, Post,
717
718    Above, Below, Below, None,
719    None, None, Pre, Pre,
720    Pre, None, Split, Split,
721    Split, Halant, None, None,
722
723    None, None, None, None,
724    None, None, None, Post,
725    None, None, None, None,
726    None, None, None, None,
727
728    None, None, None, None,
729    None, None, None, None,
730    None, None, None, None,
731    None, None, None, None,
732
733    None, None, None, None,
734    None, None, None, None,
735    None, None, None, None,
736    None, None, None, None,
737
738    // Telugu
739    None, Post, Post, Post,
740    None, None, None, None,
741    None, None, None, None,
742    None, None, None, None,
743
744    None, None, None, None,
745    None, Below, Below, Below,
746    Below, Below, Below, Below,
747    Below, Below, Below, Below,
748
749    Below, Below, Below, Below,
750    Below, Below, Below, Below,
751    Below, None, Below, Below,
752    Below, Below, Below, Below,
753
754    Below, None, Below, Below,
755    None, Below, Below, Below,
756    Below, Below, None, None,
757    None, None, Post, Above,
758
759    Above, Post, Post, Post,
760    Post, None, Above, Above,
761    Split, None, Post, Above,
762    Above, Halant, None, None,
763
764    None, None, None, None,
765    None, Above, Below, None,
766    None, None, None, None,
767    None, None, None, None,
768
769    None, None, None, None,
770    None, None, None, None,
771    None, None, None, None,
772    None, None, None, None,
773
774    None, None, None, None,
775    None, None, None, None,
776    None, None, None, None,
777    None, None, None, None,
778
779    // Kannada
780    None, None, Post, Post,
781    None, None, None, None,
782    None, None, None, None,
783    None, None, None, None,
784
785    None, None, None, None,
786    None, Below, Below, Below,
787    Below, Below, Below, Below,
788    Below, Below, Below, Below,
789
790    Below, Below, Below, Below,
791    Below, Below, Below, Below,
792    Below, Below, Below, Below,
793    Below, Below, Below, Below,
794
795    Below, None, Below, Below,
796    None, Below, Below, Below,
797    Below, Below, None, None,
798    None, None, Post, Above,
799
800    Split, Post, Post, Post,
801    Post, None, Above, Split,
802    Split, None, Split, Split,
803    Above, Halant, None, None,
804
805    None, None, None, None,
806    None, Post, Post, None,
807    None, None, None, None,
808    None, None, Below, None,
809
810    None, None, Below, Below,
811    None, None, None, None,
812    None, None, None, None,
813    None, None, None, None,
814
815    None, None, None, None,
816    None, None, None, None,
817    None, None, None, None,
818    None, None, None, None,
819
820    // Malayalam
821    None, None, Post, Post,
822    None, None, None, None,
823    None, None, None, None,
824    None, None, None, None,
825
826    None, None, None, None,
827    None, None, None, None,
828    None, None, None, None,
829    None, None, None, None,
830
831    None, None, None, None,
832    None, None, None, None,
833    None, None, None, None,
834    None, None, None, Post,
835
836    Post, None, Below, None,
837    None, Post, None, None,
838    None, None, None, None,
839    None, None, Post, Post,
840
841    Post, Post, Post, Post,
842    None, None, Pre, Pre,
843    Pre, None, Split, Split,
844    Split, Halant, None, None,
845
846    None, None, None, None,
847    None, None, None, Post,
848    None, None, None, None,
849    None, None, None, None,
850
851    None, None, None, None,
852    None, None, None, None,
853    None, None, None, None,
854    None, None, None, None,
855
856    None, None, None, None,
857    None, None, None, None,
858    None, None, None, None,
859    None, None, None, None,
860
861    // Sinhala
862    None, None, Post, Post,
863    None, None, None, None,
864    None, None, None, None,
865    None, None, None, None,
866
867    None, None, None, None,
868    None, None, None, None,
869    None, None, None, None,
870    None, None, None, None,
871
872    None, None, None, None,
873    None, None, None, None,
874    None, None, None, None,
875    None, None, None, None,
876
877    None, None, None, None,
878    None, None, None, None,
879    None, None, None, None,
880    None, None, None, None,
881
882    None, None, None, None,
883    None, None, None, None,
884    None, None, None, None,
885    None, None, None, Post,
886
887    Post, Post, Above, Above,
888    Below, None, Below, None,
889    Post, Pre, Split, Pre,
890    Split, Split, Split, Post,
891
892    None, None, None, None,
893    None, None, None, None,
894    None, None, None, None,
895    None, None, None, None,
896
897    None, None, Post, Post,
898    None, None, None, None,
899    None, None, None, None,
900    None, None, None, None
901};
902
903static inline Form form(unsigned short uc) {
904    if (uc < 0x900 || uc > 0xdff) {
905        if (uc == 0x25cc)
906            return Consonant;
907        if (uc == 0x200c || uc == 0x200d)
908            return Control;
909        return Other;
910    }
911    return (Form)indicForms[uc-0x900];
912}
913
914static inline Position indic_position(unsigned short uc) {
915    if (uc < 0x900 || uc > 0xdff)
916        return None;
917    return (Position) indicPosition[uc-0x900];
918}
919
920
921enum IndicScriptProperties {
922    HasReph = 0x01,
923    HasSplit = 0x02
924};
925
926const hb_uint8 scriptProperties[10] = {
927    // Devanagari,
928    HasReph,
929    // Bengali,
930    HasReph|HasSplit,
931    // Gurmukhi,
932    0,
933    // Gujarati,
934    HasReph,
935    // Oriya,
936    HasReph|HasSplit,
937    // Tamil,
938    HasSplit,
939    // Telugu,
940    HasSplit,
941    // Kannada,
942    HasSplit|HasReph,
943    // Malayalam,
944    HasSplit,
945    // Sinhala,
946    HasSplit
947};
948
949struct IndicOrdering {
950    Form form;
951    Position position;
952};
953
954static const IndicOrdering devanagari_order [] = {
955    { Consonant, Below },
956    { Matra, Below },
957    { VowelMark, Below },
958    { StressMark, Below },
959    { Matra, Above },
960    { Matra, Post },
961    { Consonant, Reph },
962    { VowelMark, Above },
963    { StressMark, Above },
964    { VowelMark, Post },
965    { (Form)0, None }
966};
967
968static const IndicOrdering bengali_order [] = {
969    { Consonant, Below },
970    { Matra, Below },
971    { Matra, Above },
972    { Consonant, Reph },
973    { VowelMark, Above },
974    { Consonant, Post },
975    { Matra, Post },
976    { VowelMark, Post },
977    { (Form)0, None }
978};
979
980static const IndicOrdering gurmukhi_order [] = {
981    { Consonant, Below },
982    { Matra, Below },
983    { Matra, Above },
984    { Consonant, Post },
985    { Matra, Post },
986    { VowelMark, Above },
987    { (Form)0, None }
988};
989
990static const IndicOrdering tamil_order [] = {
991    { Matra, Above },
992    { Matra, Post },
993    { VowelMark, Post },
994    { (Form)0, None }
995};
996
997static const IndicOrdering telugu_order [] = {
998    { Matra, Above },
999    { Matra, Below },
1000    { Matra, Post },
1001    { Consonant, Below },
1002    { Consonant, Post },
1003    { VowelMark, Post },
1004    { (Form)0, None }
1005};
1006
1007static const IndicOrdering kannada_order [] = {
1008    { Matra, Above },
1009    { Matra, Post },
1010    { Consonant, Below },
1011    { Consonant, Post },
1012    { LengthMark, Post },
1013    { Consonant, Reph },
1014    { VowelMark, Post },
1015    { (Form)0, None }
1016};
1017
1018static const IndicOrdering malayalam_order [] = {
1019    { Consonant, Below },
1020    { Matra, Below },
1021    { Consonant, Reph },
1022    { Consonant, Post },
1023    { Matra, Post },
1024    { VowelMark, Post },
1025    { (Form)0, None }
1026};
1027
1028static const IndicOrdering sinhala_order [] = {
1029    { Matra, Below },
1030    { Matra, Above },
1031    { Matra, Post },
1032    { VowelMark, Post },
1033    { (Form)0, None }
1034};
1035
1036static const IndicOrdering * const indic_order[] = {
1037    devanagari_order, // Devanagari
1038    bengali_order, // Bengali
1039    gurmukhi_order, // Gurmukhi
1040    devanagari_order, // Gujarati
1041    bengali_order, // Oriya
1042    tamil_order, // Tamil
1043    telugu_order, // Telugu
1044    kannada_order, // Kannada
1045    malayalam_order, // Malayalam
1046    sinhala_order // Sinhala
1047};
1048
1049
1050
1051// vowel matras that have to be split into two parts.
1052static const unsigned short split_matras[]  = {
1053    //  matra, split1, split2, split3
1054
1055    // bengalis
1056    0x9cb, 0x9c7, 0x9be, 0x0,
1057    0x9cc, 0x9c7, 0x9d7, 0x0,
1058    // oriya
1059    0xb48, 0xb47, 0xb56, 0x0,
1060    0xb4b, 0xb47, 0xb3e, 0x0,
1061    0xb4c, 0xb47, 0xb57, 0x0,
1062    // tamil
1063    0xbca, 0xbc6, 0xbbe, 0x0,
1064    0xbcb, 0xbc7, 0xbbe, 0x0,
1065    0xbcc, 0xbc6, 0xbd7, 0x0,
1066    // telugu
1067    0xc48, 0xc46, 0xc56, 0x0,
1068    // kannada
1069    0xcc0, 0xcbf, 0xcd5, 0x0,
1070    0xcc7, 0xcc6, 0xcd5, 0x0,
1071    0xcc8, 0xcc6, 0xcd6, 0x0,
1072    0xcca, 0xcc6, 0xcc2, 0x0,
1073    0xccb, 0xcc6, 0xcc2, 0xcd5,
1074    // malayalam
1075    0xd4a, 0xd46, 0xd3e, 0x0,
1076    0xd4b, 0xd47, 0xd3e, 0x0,
1077    0xd4c, 0xd46, 0xd57, 0x0,
1078    // sinhala
1079    0xdda, 0xdd9, 0xdca, 0x0,
1080    0xddc, 0xdd9, 0xdcf, 0x0,
1081    0xddd, 0xdd9, 0xdcf, 0xdca,
1082    0xdde, 0xdd9, 0xddf, 0x0,
1083    0xffff
1084};
1085
1086static inline void splitMatra(unsigned short *reordered, int matra, int &len)
1087{
1088    unsigned short matra_uc = reordered[matra];
1089    //qDebug("matra=%d, reordered[matra]=%x", matra, reordered[matra]);
1090
1091    const unsigned short *split = split_matras;
1092    while (split[0] < matra_uc)
1093        split += 4;
1094
1095    assert(*split == matra_uc);
1096    ++split;
1097
1098    int added_chars = split[2] == 0x0 ? 1 : 2;
1099
1100    memmove(reordered + matra + added_chars, reordered + matra, (len-matra)*sizeof(unsigned short));
1101    reordered[matra] = split[0];
1102    reordered[matra+1] = split[1];
1103    if(added_chars == 2)
1104        reordered[matra+2] = split[2];
1105    len += added_chars;
1106}
1107
1108#ifndef NO_OPENTYPE
1109static const HB_OpenTypeFeature indic_features[] = {
1110    { HB_MAKE_TAG('l', 'o', 'c', 'a'), LocaProperty },
1111    { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
1112    { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
1113    { HB_MAKE_TAG('n', 'u', 'k', 't'), NuktaProperty },
1114    { HB_MAKE_TAG('a', 'k', 'h', 'n'), AkhantProperty },
1115    { HB_MAKE_TAG('r', 'p', 'h', 'f'), RephProperty },
1116    { HB_MAKE_TAG('b', 'l', 'w', 'f'), BelowFormProperty },
1117    { HB_MAKE_TAG('h', 'a', 'l', 'f'), HalfFormProperty },
1118    { HB_MAKE_TAG('p', 's', 't', 'f'), PostFormProperty },
1119    { HB_MAKE_TAG('c', 'j', 'c', 't'), ConjunctFormProperty },
1120    { HB_MAKE_TAG('v', 'a', 't', 'u'), VattuProperty },
1121    { HB_MAKE_TAG('p', 'r', 'e', 's'), PreSubstProperty },
1122    { HB_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty },
1123    { HB_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty },
1124    { HB_MAKE_TAG('p', 's', 't', 's'), PostSubstProperty },
1125    { HB_MAKE_TAG('h', 'a', 'l', 'n'), HalantProperty },
1126    { HB_MAKE_TAG('c', 'a', 'l', 't'), IndicCaltProperty },
1127    { 0, 0 }
1128};
1129#endif
1130
1131// #define INDIC_DEBUG
1132#ifdef INDIC_DEBUG
1133#define IDEBUG hb_debug
1134#include <stdarg.h>
1135
1136static void hb_debug(const char *msg, ...)
1137{
1138    va_list ap;
1139    va_start(ap, msg); // use variable arg list
1140    vfprintf(stderr, msg, ap);
1141    va_end(ap);
1142    fprintf(stderr, "\n");
1143}
1144
1145#else
1146#define IDEBUG if(0) printf
1147#endif
1148
1149#if 0 //def INDIC_DEBUG
1150static QString propertiesToString(int properties)
1151{
1152    QString res;
1153    properties = ~properties;
1154    if (properties & LocaProperty)
1155        res += "Loca ";
1156    if (properties & CcmpProperty)
1157        res += "Ccmp ";
1158    if (properties & InitProperty)
1159        res += "Init ";
1160    if (properties & NuktaProperty)
1161        res += "Nukta ";
1162    if (properties & AkhantProperty)
1163        res += "Akhant ";
1164    if (properties & RephProperty)
1165        res += "Reph ";
1166    if (properties & PreFormProperty)
1167        res += "PreForm ";
1168    if (properties & BelowFormProperty)
1169        res += "BelowForm ";
1170    if (properties & AboveFormProperty)
1171        res += "AboveForm ";
1172    if (properties & HalfFormProperty)
1173        res += "HalfForm ";
1174    if (properties & PostFormProperty)
1175        res += "PostForm ";
1176    if (properties & ConjunctFormProperty)
1177        res += "PostForm ";
1178    if (properties & VattuProperty)
1179        res += "Vattu ";
1180    if (properties & PreSubstProperty)
1181        res += "PreSubst ";
1182    if (properties & BelowSubstProperty)
1183        res += "BelowSubst ";
1184    if (properties & AboveSubstProperty)
1185        res += "AboveSubst ";
1186    if (properties & PostSubstProperty)
1187        res += "PostSubst ";
1188    if (properties & HalantProperty)
1189        res += "Halant ";
1190    if (properties & CligProperty)
1191        res += "Clig ";
1192    if (properties & IndicCaltProperty)
1193        res += "Calt ";
1194    return res;
1195}
1196#endif
1197
1198static bool indic_shape_syllable(HB_Bool openType, HB_ShaperItem *item, bool invalid)
1199{
1200    HB_Script script = item->item.script;
1201    assert(script >= HB_Script_Devanagari && script <= HB_Script_Sinhala);
1202    const unsigned short script_base = 0x0900 + 0x80*(script-HB_Script_Devanagari);
1203    const unsigned short ra = script_base + 0x30;
1204    const unsigned short halant = script_base + 0x4d;
1205    const unsigned short nukta = script_base + 0x3c;
1206    bool control = false;
1207
1208    int len = (int)item->item.length;
1209    IDEBUG(">>>>> indic shape: from=%d, len=%d invalid=%d", item->item.pos, item->item.length, invalid);
1210
1211    if ((int)item->num_glyphs < len+4) {
1212        item->num_glyphs = len+4;
1213        return false;
1214    }
1215
1216    HB_STACKARRAY(HB_UChar16, reordered, len + 4);
1217    HB_STACKARRAY(hb_uint8, position, len + 4);
1218
1219    unsigned char properties = scriptProperties[script-HB_Script_Devanagari];
1220
1221    if (invalid) {
1222        *reordered = 0x25cc;
1223        memcpy(reordered+1, item->string + item->item.pos, len*sizeof(HB_UChar16));
1224        len++;
1225    } else {
1226        memcpy(reordered, item->string + item->item.pos, len*sizeof(HB_UChar16));
1227    }
1228    if (reordered[len-1] == 0x200c) // zero width non joiner
1229        len--;
1230
1231    int i;
1232    int base = 0;
1233    int reph = -1;
1234
1235#ifdef INDIC_DEBUG
1236    IDEBUG("original:");
1237    for (i = 0; i < len; i++) {
1238        IDEBUG("    %d: %4x", i, reordered[i]);
1239    }
1240#endif
1241
1242    if (len != 1) {
1243        HB_UChar16 *uc = reordered;
1244        bool beginsWithRa = false;
1245
1246        // Rule 1: find base consonant
1247        //
1248        // The shaping engine finds the base consonant of the
1249        // syllable, using the following algorithm: starting from the
1250        // end of the syllable, move backwards until a consonant is
1251        // found that does not have a below-base or post-base form
1252        // (post-base forms have to follow below-base forms), or
1253        // arrive at the first consonant. The consonant stopped at
1254        // will be the base.
1255        //
1256        //  * If the syllable starts with Ra + H (in a script that has
1257        //    'Reph'), Ra is excluded from candidates for base
1258        //    consonants.
1259        //
1260        // * In Kannada and Telugu, the base consonant cannot be
1261        //   farther than 3 consonants from the end of the syllable.
1262        // #### replace the HasReph property by testing if the feature exists in the font!
1263        if (form(*uc) == Consonant || (script == HB_Script_Bengali && form(*uc) == IndependentVowel)) {
1264            if ((properties & HasReph) && (len > 2) &&
1265                (*uc == ra || *uc == 0x9f0) && *(uc+1) == halant)
1266                beginsWithRa = true;
1267
1268            if (beginsWithRa && form(*(uc+2)) == Control)
1269                beginsWithRa = false;
1270
1271            base = (beginsWithRa ? 2 : 0);
1272            IDEBUG("    length = %d, beginsWithRa = %d, base=%d", len, beginsWithRa, base);
1273
1274            int lastConsonant = 0;
1275            int matra = -1;
1276            // we remember:
1277            // * the last consonant since we need it for rule 2
1278            // * the matras position for rule 3 and 4
1279
1280            // figure out possible base glyphs
1281            memset(position, 0, len);
1282            if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
1283                bool vattu = false;
1284                for (i = base; i < len; ++i) {
1285                    position[i] = form(uc[i]);
1286                    if (position[i] == Consonant) {
1287                        lastConsonant = i;
1288                        vattu = (!vattu && uc[i] == ra);
1289                        if (vattu) {
1290                            IDEBUG("excluding vattu glyph at %d from base candidates", i);
1291                            position[i] = Vattu;
1292                        }
1293                    } else if (position[i] == Matra) {
1294                        matra = i;
1295                    }
1296                }
1297            } else {
1298                for (i = base; i < len; ++i) {
1299                    position[i] = form(uc[i]);
1300                    if (position[i] == Consonant)
1301                        lastConsonant = i;
1302                    else if (matra < 0 && position[i] == Matra)
1303                        matra = i;
1304                }
1305            }
1306            int skipped = 0;
1307            Position pos = Post;
1308            for (i = len-1; i >= base; i--) {
1309                if (position[i] != Consonant && (position[i] != Control || script == HB_Script_Kannada))
1310                    continue;
1311
1312                if (i < len-1 && position[i] == Control && position[i+1] == Consonant) {
1313                    base = i+1;
1314                    break;
1315                }
1316
1317                Position charPosition = indic_position(uc[i]);
1318                if (pos == Post && charPosition == Post) {
1319                    pos = Post;
1320                } else if ((pos == Post || pos == Below) && charPosition == Below) {
1321                    if (script == HB_Script_Devanagari || script == HB_Script_Gujarati)
1322                        base = i;
1323                    pos = Below;
1324                } else {
1325                    base = i;
1326                    break;
1327                }
1328                if (skipped == 2 && (script == HB_Script_Kannada || script == HB_Script_Telugu)) {
1329                    base = i;
1330                    break;
1331                }
1332                ++skipped;
1333            }
1334
1335            IDEBUG("    base consonant at %d skipped=%d, lastConsonant=%d", base, skipped, lastConsonant);
1336
1337            // Rule 2:
1338            //
1339            // If the base consonant is not the last one, Uniscribe
1340            // moves the halant from the base consonant to the last
1341            // one.
1342            if (lastConsonant > base) {
1343                int halantPos = 0;
1344                if (uc[base+1] == halant)
1345                    halantPos = base + 1;
1346                else if (uc[base+1] == nukta && uc[base+2] == halant)
1347                    halantPos = base + 2;
1348                if (halantPos > 0) {
1349                    IDEBUG("    moving halant from %d to %d!", base+1, lastConsonant);
1350                    for (i = halantPos; i < lastConsonant; i++)
1351                        uc[i] = uc[i+1];
1352                    uc[lastConsonant] = halant;
1353                }
1354            }
1355
1356            // Rule 3:
1357            //
1358            // If the syllable starts with Ra + H, Uniscribe moves
1359            // this combination so that it follows either:
1360
1361            // * the post-base 'matra' (if any) or the base consonant
1362            //   (in scripts that show similarity to Devanagari, i.e.,
1363            //   Devanagari, Gujarati, Bengali)
1364            // * the base consonant (other scripts)
1365            // * the end of the syllable (Kannada)
1366
1367            Position matra_position = None;
1368            if (matra > 0)
1369                matra_position = indic_position(uc[matra]);
1370            IDEBUG("    matra at %d with form %d, base=%d", matra, matra_position, base);
1371
1372            if (beginsWithRa && base != 0) {
1373                int toPos = base+1;
1374                if (toPos < len && uc[toPos] == nukta)
1375                    toPos++;
1376                if (toPos < len && uc[toPos] == halant)
1377                    toPos++;
1378                if (toPos < len && uc[toPos] == 0x200d)
1379                    toPos++;
1380                if (toPos < len-1 && uc[toPos] == ra && uc[toPos+1] == halant)
1381                    toPos += 2;
1382                if (script == HB_Script_Devanagari || script == HB_Script_Gujarati || script == HB_Script_Bengali) {
1383                    if (matra_position == Post || matra_position == Split) {
1384                        toPos = matra+1;
1385                        matra -= 2;
1386                    }
1387                } else if (script == HB_Script_Kannada) {
1388                    toPos = len;
1389                    matra -= 2;
1390                }
1391
1392                IDEBUG("moving leading ra+halant to position %d", toPos);
1393                for (i = 2; i < toPos; i++)
1394                    uc[i-2] = uc[i];
1395                uc[toPos-2] = ra;
1396                uc[toPos-1] = halant;
1397                base -= 2;
1398                if (properties & HasReph)
1399                    reph = toPos-2;
1400            }
1401
1402            // Rule 4:
1403
1404            // Uniscribe splits two- or three-part matras into their
1405            // parts. This splitting is a character-to-character
1406            // operation).
1407            //
1408            //      Uniscribe describes some moving operations for these
1409            //      matras here. For shaping however all pre matras need
1410            //      to be at the beginning of the syllable, so we just move
1411            //      them there now.
1412            if (matra_position == Split) {
1413                splitMatra(uc, matra, len);
1414                // Handle three-part matras (0xccb in Kannada)
1415                matra_position = indic_position(uc[matra]);
1416            }
1417
1418            if (matra_position == Pre) {
1419                unsigned short m = uc[matra];
1420                while (matra--)
1421                    uc[matra+1] = uc[matra];
1422                uc[0] = m;
1423                base++;
1424            }
1425        }
1426
1427        // Rule 5:
1428        //
1429        // Uniscribe classifies consonants and 'matra' parts as
1430        // pre-base, above-base (Reph), below-base or post-base. This
1431        // classification exists on the character code level and is
1432        // language-dependent, not font-dependent.
1433        for (i = 0; i < base; ++i)
1434            position[i] = Pre;
1435        position[base] = Base;
1436        for (i = base+1; i < len; ++i) {
1437            position[i] = indic_position(uc[i]);
1438            // #### replace by adjusting table
1439            if (uc[i] == nukta || uc[i] == halant)
1440                position[i] = Inherit;
1441        }
1442        if (reph > 0) {
1443            // recalculate reph, it might have changed.
1444            for (i = base+1; i < len; ++i)
1445                if (uc[i] == ra)
1446                    reph = i;
1447            position[reph] = Reph;
1448            position[reph+1] = Inherit;
1449        }
1450
1451        // all reordering happens now to the chars after the base
1452        int fixed = base+1;
1453        if (fixed < len && uc[fixed] == nukta)
1454            fixed++;
1455        if (fixed < len && uc[fixed] == halant)
1456            fixed++;
1457        if (fixed < len && uc[fixed] == 0x200d)
1458            fixed++;
1459
1460#ifdef INDIC_DEBUG
1461        for (i = fixed; i < len; ++i)
1462            IDEBUG("position[%d] = %d, form=%d uc=%x", i, position[i], form(uc[i]), uc[i]);
1463#endif
1464        // we continuosly position the matras and vowel marks and increase the fixed
1465        // until we reached the end.
1466        const IndicOrdering *finalOrder = indic_order[script-HB_Script_Devanagari];
1467
1468        IDEBUG("    reordering pass:");
1469        IDEBUG("        base=%d fixed=%d", base, fixed);
1470        int toMove = 0;
1471        while (finalOrder[toMove].form && fixed < len-1) {
1472            IDEBUG("        fixed = %d, toMove=%d, moving form %d with pos %d", fixed, toMove, finalOrder[toMove].form, finalOrder[toMove].position);
1473            for (i = fixed; i < len; i++) {
1474//                IDEBUG() << "           i=" << i << "uc=" << hex << uc[i] << "form=" << form(uc[i])
1475//                         << "position=" << position[i];
1476                if (form(uc[i]) == finalOrder[toMove].form &&
1477                     position[i] == finalOrder[toMove].position) {
1478                    // need to move this glyph
1479                    int to = fixed;
1480                    if (i < len-1 && position[i+1] == Inherit) {
1481                        IDEBUG("         moving two chars from %d to %d", i, to);
1482                        unsigned short ch = uc[i];
1483                        unsigned short ch2 = uc[i+1];
1484                        unsigned char pos = position[i];
1485                        for (int j = i+1; j > to+1; j--) {
1486                            uc[j] = uc[j-2];
1487                            position[j] = position[j-2];
1488                        }
1489                        uc[to] = ch;
1490                        uc[to+1] = ch2;
1491                        position[to] = pos;
1492                        position[to+1] = pos;
1493                        fixed += 2;
1494                    } else {
1495                        IDEBUG("         moving one char from %d to %d", i, to);
1496                        unsigned short ch = uc[i];
1497                        unsigned char pos = position[i];
1498                        for (int j = i; j > to; j--) {
1499                            uc[j] = uc[j-1];
1500                            position[j] = position[j-1];
1501                        }
1502                        uc[to] = ch;
1503                        position[to] = pos;
1504                        fixed++;
1505                    }
1506                }
1507            }
1508            toMove++;
1509        }
1510
1511    }
1512
1513    if (reph > 0) {
1514        // recalculate reph, it might have changed.
1515        for (i = base+1; i < len; ++i)
1516            if (reordered[i] == ra)
1517                reph = i;
1518    }
1519
1520#ifndef NO_OPENTYPE
1521    const int availableGlyphs = item->num_glyphs;
1522#endif
1523    if (!item->font->klass->convertStringToGlyphIndices(item->font,
1524                                                        reordered, len,
1525                                                        item->glyphs, &item->num_glyphs,
1526                                                        item->item.bidiLevel % 2))
1527        goto error;
1528
1529
1530    IDEBUG("  base=%d, reph=%d", base, reph);
1531    IDEBUG("reordered:");
1532    for (i = 0; i < len; i++) {
1533        item->attributes[i].mark = false;
1534        item->attributes[i].clusterStart = false;
1535        item->attributes[i].justification = 0;
1536        item->attributes[i].zeroWidth = false;
1537        IDEBUG("    %d: %4x", i, reordered[i]);
1538    }
1539
1540    // now we have the syllable in the right order, and can start running it through open type.
1541
1542    for (i = 0; i < len; ++i)
1543        control |= (form(reordered[i]) == Control);
1544
1545#ifndef NO_OPENTYPE
1546    if (openType) {
1547
1548        // we need to keep track of where the base glyph is for some
1549        // scripts and use the cluster feature for this.  This
1550        // also means we have to correct the logCluster output from
1551        // the open type engine manually afterwards.  for indic this
1552        // is rather simple, as all chars just point to the first
1553        // glyph in the syllable.
1554        HB_STACKARRAY(unsigned short, clusters, len);
1555        HB_STACKARRAY(unsigned int, properties, len);
1556
1557        for (i = 0; i < len; ++i)
1558            clusters[i] = i;
1559
1560        // features we should always apply
1561        for (i = 0; i < len; ++i)
1562            properties[i] = ~(LocaProperty
1563                              | CcmpProperty
1564                              | NuktaProperty
1565                              | VattuProperty
1566                              | ConjunctFormProperty
1567                              | PreSubstProperty
1568                              | BelowSubstProperty
1569                              | AboveSubstProperty
1570                              | PostSubstProperty
1571                              | HalantProperty
1572                              | IndicCaltProperty
1573                              | PositioningProperties);
1574
1575        // Loca always applies
1576        // Ccmp always applies
1577        // Init
1578        if (item->item.pos == 0
1579            || !(isLetter(item->string[item->item.pos-1]) || isMark(item->string[item->item.pos-1])))
1580            properties[0] &= ~InitProperty;
1581
1582        // Nukta always applies
1583        // Akhant
1584        for (i = 0; i <= base; ++i)
1585            properties[i] &= ~AkhantProperty;
1586        // Reph
1587        if (reph >= 0) {
1588            properties[reph] &= ~RephProperty;
1589            properties[reph+1] &= ~RephProperty;
1590        }
1591        // BelowForm
1592        for (i = base+1; i < len; ++i)
1593            properties[i] &= ~BelowFormProperty;
1594
1595        if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
1596            // vattu glyphs need this aswell
1597            bool vattu = false;
1598            for (i = base-2; i > 1; --i) {
1599                if (form(reordered[i]) == Consonant) {
1600                    vattu = (!vattu && reordered[i] == ra);
1601                    if (vattu) {
1602                        IDEBUG("forming vattu ligature at %d", i);
1603                        properties[i] &= ~BelowFormProperty;
1604                        properties[i+1] &= ~BelowFormProperty;
1605                    }
1606                }
1607            }
1608        }
1609        // HalfFormProperty
1610        for (i = 0; i < base; ++i)
1611            properties[i] &= ~HalfFormProperty;
1612        if (control) {
1613            for (i = 2; i < len; ++i) {
1614                if (reordered[i] == 0x200d /* ZWJ */) {
1615                    properties[i-1] &= ~HalfFormProperty;
1616                    properties[i-2] &= ~HalfFormProperty;
1617                } else if (reordered[i] == 0x200c /* ZWNJ */) {
1618                    properties[i-1] &= ~HalfFormProperty;
1619                    properties[i-2] &= ~HalfFormProperty;
1620                }
1621            }
1622        }
1623        // PostFormProperty
1624        for (i = base+1; i < len; ++i)
1625            properties[i] &= ~PostFormProperty;
1626        // vattu always applies
1627        // pres always applies
1628        // blws always applies
1629        // abvs always applies
1630        // psts always applies
1631        // halant always applies
1632        // calt always applies
1633
1634#ifdef INDIC_DEBUG
1635//        {
1636//            IDEBUG("OT properties:");
1637//            for (int i = 0; i < len; ++i)
1638//                qDebug("    i: %s", ::propertiesToString(properties[i]).toLatin1().data());
1639//        }
1640#endif
1641
1642        // initialize
1643        item->log_clusters = clusters;
1644        HB_OpenTypeShape(item, properties);
1645
1646        int newLen = item->face->buffer->in_length;
1647        HB_GlyphItem otl_glyphs = item->face->buffer->in_string;
1648
1649        // move the left matra back to its correct position in malayalam and tamil
1650        if ((script == HB_Script_Malayalam || script == HB_Script_Tamil) && (form(reordered[0]) == Matra)) {
1651//             qDebug("reordering matra, len=%d", newLen);
1652            // need to find the base in the shaped string and move the matra there
1653            int basePos = 0;
1654            while (basePos < newLen && (int)otl_glyphs[basePos].cluster <= base)
1655                basePos++;
1656            --basePos;
1657            if (basePos < newLen && basePos > 1) {
1658//                 qDebug("moving prebase matra to position %d in syllable newlen=%d", basePos, newLen);
1659                HB_GlyphItemRec m = otl_glyphs[0];
1660                --basePos;
1661                for (i = 0; i < basePos; ++i)
1662                    otl_glyphs[i] = otl_glyphs[i+1];
1663                otl_glyphs[basePos] = m;
1664            }
1665        }
1666
1667        HB_Bool positioned = HB_OpenTypePosition(item, availableGlyphs, false);
1668
1669        HB_FREE_STACKARRAY(clusters);
1670        HB_FREE_STACKARRAY(properties);
1671
1672        if (!positioned)
1673            goto error;
1674
1675        if (control) {
1676            IDEBUG("found a control char in the syllable");
1677            hb_uint32 i = 0, j = 0;
1678            while (i < item->num_glyphs) {
1679                if (form(reordered[otl_glyphs[i].cluster]) == Control) {
1680                    ++i;
1681                    if (i >= item->num_glyphs)
1682                        break;
1683                }
1684                item->glyphs[j] = item->glyphs[i];
1685                item->attributes[j] = item->attributes[i];
1686                // BEGIN android-added
1687                item->offsets[j] = item->offsets[i];
1688                item->advances[j] = item->advances[i];
1689                // END android-added
1690                ++i;
1691                ++j;
1692            }
1693            item->num_glyphs = j;
1694        }
1695
1696    } else {
1697        HB_HeuristicPosition(item);
1698    }
1699#endif // NO_OPENTYPE
1700    item->attributes[0].clusterStart = true;
1701
1702    HB_FREE_STACKARRAY(reordered);
1703    HB_FREE_STACKARRAY(position);
1704
1705    IDEBUG("<<<<<<");
1706    return true;
1707
1708error:
1709    HB_FREE_STACKARRAY(reordered);
1710    HB_FREE_STACKARRAY(position);
1711    return false;
1712}
1713
1714/* syllables are of the form:
1715
1716   (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark?
1717   (Consonant Nukta? Halant)* Consonant Halant
1718   IndependentVowel VowelMark? StressMark?
1719
1720   We return syllable boundaries on invalid combinations aswell
1721*/
1722static int indic_nextSyllableBoundary(HB_Script script, const HB_UChar16 *s, int start, int end, bool *invalid)
1723{
1724    *invalid = false;
1725    IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end);
1726    const HB_UChar16 *uc = s+start;
1727
1728    int pos = 0;
1729    Form state = form(uc[pos]);
1730    IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);
1731    pos++;
1732
1733    if (state != Consonant && state != IndependentVowel) {
1734        if (state != Other)
1735            *invalid = true;
1736        goto finish;
1737    }
1738
1739    while (pos < end - start) {
1740        Form newState = form(uc[pos]);
1741        IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]);
1742        switch(newState) {
1743        case Control:
1744            newState = state;
1745 	    if (state == Halant && uc[pos] == 0x200d /* ZWJ */)
1746  		break;
1747            // the control character should be the last char in the item
1748            ++pos;
1749            goto finish;
1750        case Consonant:
1751	    if (state == Halant && (script != HB_Script_Sinhala || uc[pos-1] == 0x200d /* ZWJ */))
1752                break;
1753            goto finish;
1754        case Halant:
1755            if (state == Nukta || state == Consonant)
1756                break;
1757            // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya
1758            if (script == HB_Script_Bengali && pos == 1 &&
1759                 (uc[0] == 0x0985 || uc[0] == 0x098f))
1760                break;
1761            // Sinhala uses the Halant as a component of certain matras. Allow these, but keep the state on Matra.
1762            if (script == HB_Script_Sinhala && state == Matra) {
1763                ++pos;
1764                continue;
1765            }
1766            if (script == HB_Script_Malayalam && state == Matra && uc[pos-1] == 0x0d41) {
1767                ++pos;
1768                continue;
1769            }
1770            goto finish;
1771        case Nukta:
1772            if (state == Consonant)
1773                break;
1774            goto finish;
1775        case StressMark:
1776            if (state == VowelMark)
1777                break;
1778            // fall through
1779        case VowelMark:
1780            if (state == Matra || state == LengthMark || state == IndependentVowel)
1781                break;
1782            // fall through
1783        case Matra:
1784            if (state == Consonant || state == Nukta)
1785                break;
1786            if (state == Matra) {
1787                // ### needs proper testing for correct two/three part matras
1788                break;
1789            }
1790            // ### not sure if this is correct. If it is, does it apply only to Bengali or should
1791            // it work for all Indic languages?
1792            // the combination Independent_A + Vowel Sign AA is allowed.
1793            if (script == HB_Script_Bengali && uc[pos] == 0x9be && uc[pos-1] == 0x985)
1794                break;
1795            if (script == HB_Script_Tamil && state == Matra) {
1796                if (uc[pos-1] == 0x0bc6 &&
1797                     (uc[pos] == 0xbbe || uc[pos] == 0xbd7))
1798                    break;
1799                if (uc[pos-1] == 0x0bc7 && uc[pos] == 0xbbe)
1800                    break;
1801            }
1802            goto finish;
1803
1804        case LengthMark:
1805            if (state == Matra) {
1806                // ### needs proper testing for correct two/three part matras
1807                break;
1808            }
1809        case IndependentVowel:
1810        case Invalid:
1811        case Other:
1812            goto finish;
1813        }
1814        state = newState;
1815        pos++;
1816    }
1817 finish:
1818    return pos+start;
1819}
1820
1821HB_Bool HB_IndicShape(HB_ShaperItem *item)
1822{
1823    assert(item->item.script >= HB_Script_Devanagari && item->item.script <= HB_Script_Sinhala);
1824
1825    HB_Bool openType = false;
1826#ifndef NO_OPENTYPE
1827    openType = HB_SelectScript(item, indic_features);
1828#endif
1829    unsigned short *logClusters = item->log_clusters;
1830
1831    HB_ShaperItem syllable = *item;
1832    int first_glyph = 0;
1833
1834    int sstart = item->item.pos;
1835    int end = sstart + item->item.length;
1836    IDEBUG("indic_shape: from %d length %d", item->item.pos, item->item.length);
1837    while (sstart < end) {
1838        bool invalid;
1839        int send = indic_nextSyllableBoundary(item->item.script, item->string, sstart, end, &invalid);
1840        IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
1841               invalid ? "true" : "false");
1842        syllable.item.pos = sstart;
1843        syllable.item.length = send-sstart;
1844        syllable.glyphs = item->glyphs + first_glyph;
1845        syllable.attributes = item->attributes + first_glyph;
1846        syllable.offsets = item->offsets + first_glyph;
1847        syllable.advances = item->advances + first_glyph;
1848        syllable.num_glyphs = item->num_glyphs - first_glyph;
1849        if (!indic_shape_syllable(openType, &syllable, invalid)) {
1850            IDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
1851            item->num_glyphs += syllable.num_glyphs;
1852            return false;
1853        }
1854        // fix logcluster array
1855        IDEBUG("syllable:");
1856        hb_uint32 g;
1857        for (g = first_glyph; g < first_glyph + syllable.num_glyphs; ++g)
1858            IDEBUG("        %d -> glyph %x", g, item->glyphs[g]);
1859        IDEBUG("    logclusters:");
1860        int i;
1861        for (i = sstart; i < send; ++i) {
1862            IDEBUG("        %d -> glyph %d", i, first_glyph);
1863            logClusters[i-item->item.pos] = first_glyph;
1864        }
1865        sstart = send;
1866        first_glyph += syllable.num_glyphs;
1867    }
1868    item->num_glyphs = first_glyph;
1869    return true;
1870}
1871
1872void HB_IndicAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
1873{
1874    int end = from + len;
1875    const HB_UChar16 *uc = text + from;
1876    attributes += from;
1877    hb_uint32 i = 0;
1878    while (i < len) {
1879        bool invalid;
1880        hb_uint32 boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
1881         attributes[i].charStop = true;
1882
1883        if (boundary > len-1) boundary = len;
1884        i++;
1885        while (i < boundary) {
1886            attributes[i].charStop = false;
1887            ++uc;
1888            ++i;
1889        }
1890        assert(i == boundary);
1891    }
1892
1893
1894}
1895
1896
1897