harfbuzz-indic.cpp revision 873b7b3e703e0f228f8d2d12896def00e281adf2
1/*
2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
3 *
4 * This is part of HarfBuzz, an OpenType Layout engine library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 */
24
25#include "harfbuzz-shaper.h"
26#include "harfbuzz-shaper-private.h"
27
28#include <assert.h>
29#include <stdio.h>
30
31#define FLAG(x) (1 << (x))
32
33static HB_Bool isLetter(HB_UChar16 ucs)
34{
35    const int test = FLAG(HB_Letter_Uppercase) |
36                     FLAG(HB_Letter_Lowercase) |
37                     FLAG(HB_Letter_Titlecase) |
38                     FLAG(HB_Letter_Modifier) |
39                     FLAG(HB_Letter_Other);
40    // BEGIN android-changed
41    // Check the value is zero or not instead of casting int to HB_Bool(unsigned char).
42    return (FLAG(HB_GetUnicodeCharCategory(ucs)) & test) != 0;
43    // END android-changed
44}
45
46static HB_Bool isMark(HB_UChar16 ucs)
47{
48    const int test = FLAG(HB_Mark_NonSpacing) |
49                     FLAG(HB_Mark_SpacingCombining) |
50                     FLAG(HB_Mark_Enclosing);
51    // BEGIN android-changed
52    // Check the value is zero or not instead of casting int to HB_Bool(unsigned char).
53    return (FLAG(HB_GetUnicodeCharCategory(ucs)) & test) != 0;
54    // END android-changed
55}
56
57enum Form {
58    Invalid = 0x0,
59    UnknownForm = Invalid,
60    Consonant,
61    Nukta,
62    Halant,
63    Matra,
64    VowelMark,
65    StressMark,
66    IndependentVowel,
67    LengthMark,
68    Control,
69    Other
70};
71
72static const unsigned char indicForms[0xe00-0x900] = {
73    // Devangari
74    Invalid, VowelMark, VowelMark, VowelMark,
75    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
76    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
77    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
78
79    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
80    IndependentVowel, Consonant, Consonant, Consonant,
81    Consonant, Consonant, Consonant, Consonant,
82    Consonant, Consonant, Consonant, Consonant,
83
84    Consonant, Consonant, Consonant, Consonant,
85    Consonant, Consonant, Consonant, Consonant,
86    Consonant, Consonant, Consonant, Consonant,
87    Consonant, Consonant, Consonant, Consonant,
88
89    Consonant, Consonant, Consonant, Consonant,
90    Consonant, Consonant, Consonant, Consonant,
91    Consonant, Consonant, UnknownForm, UnknownForm,
92    Nukta, Other, Matra, Matra,
93
94    Matra, Matra, Matra, Matra,
95    Matra, Matra, Matra, Matra,
96    Matra, Matra, Matra, Matra,
97    Matra, Halant, UnknownForm, UnknownForm,
98
99    Other, StressMark, StressMark, StressMark,
100    StressMark, UnknownForm, UnknownForm, UnknownForm,
101    Consonant, Consonant, Consonant, Consonant,
102    Consonant, Consonant, Consonant, Consonant,
103
104    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
105    Other, Other, Other, Other,
106    Other, Other, Other, Other,
107    Other, Other, Other, Other,
108
109    Other, Other, Other, Other,
110    Other, Other, Other, Other,
111    Other, Other, Other, Consonant,
112    Consonant, Consonant /* ??? */, Consonant, Consonant,
113
114    // Bengali
115    Invalid, VowelMark, VowelMark, VowelMark,
116    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
117    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
118    IndependentVowel, Invalid, Invalid, IndependentVowel,
119
120    IndependentVowel, Invalid, Invalid, IndependentVowel,
121    IndependentVowel, Consonant, Consonant, Consonant,
122    Consonant, Consonant, Consonant, Consonant,
123    Consonant, Consonant, Consonant, Consonant,
124
125    Consonant, Consonant, Consonant, Consonant,
126    Consonant, Consonant, Consonant, Consonant,
127    Consonant, Invalid, Consonant, Consonant,
128    Consonant, Consonant, Consonant, Consonant,
129
130    Consonant, Invalid, Consonant, Invalid,
131    Invalid, Invalid, Consonant, Consonant,
132    Consonant, Consonant, UnknownForm, UnknownForm,
133    Nukta, Other, Matra, Matra,
134
135    Matra, Matra, Matra, Matra,
136    Matra, Invalid, Invalid, Matra,
137    Matra, Invalid, Invalid, Matra,
138    Matra, Halant, Consonant, UnknownForm,
139
140    Invalid, Invalid, Invalid, Invalid,
141    Invalid, Invalid, Invalid, VowelMark,
142    Invalid, Invalid, Invalid, Invalid,
143    Consonant, Consonant, Invalid, Consonant,
144
145    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
146    Other, Other, Other, Other,
147    Other, Other, Other, Other,
148    Other, Other, Other, Other,
149
150    Consonant, Consonant, Other, Other,
151    Other, Other, Other, Other,
152    Other, Other, Other, Other,
153    Other, Other, Other, Other,
154
155    // Gurmukhi
156    Invalid, VowelMark, VowelMark, VowelMark,
157    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
158    IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
159    Invalid, Invalid, Invalid, IndependentVowel,
160
161    IndependentVowel, Invalid, Invalid, IndependentVowel,
162    IndependentVowel, Consonant, Consonant, Consonant,
163    Consonant, Consonant, Consonant, Consonant,
164    Consonant, Consonant, Consonant, Consonant,
165
166    Consonant, Consonant, Consonant, Consonant,
167    Consonant, Consonant, Consonant, Consonant,
168    Consonant, Invalid, Consonant, Consonant,
169    Consonant, Consonant, Consonant, Consonant,
170
171    Consonant, Invalid, Consonant, Consonant,
172    Invalid, Consonant, Consonant, Invalid,
173    Consonant, Consonant, UnknownForm, UnknownForm,
174    Nukta, Other, Matra, Matra,
175
176    Matra, Matra, Matra, Invalid,
177    Invalid, Invalid, Invalid, Matra,
178    Matra, Invalid, Invalid, Matra,
179    Matra, Halant, UnknownForm, UnknownForm,
180
181    Invalid, Invalid, Invalid, Invalid,
182    Invalid, UnknownForm, UnknownForm, UnknownForm,
183    Invalid, Consonant, Consonant, Consonant,
184    Consonant, Invalid, Consonant, Invalid,
185
186    Other, Other, Invalid, Invalid,
187    Other, Other, Other, Other,
188    Other, Other, Other, Other,
189    Other, Other, Other, Other,
190
191    StressMark, StressMark, Consonant, Consonant,
192    Other, Other, Other, Other,
193    Other, Other, Other, Other,
194    Other, Other, Other, Other,
195
196    // Gujarati
197    Invalid, VowelMark, VowelMark, VowelMark,
198    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
199    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
200    IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
201
202    IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
203    IndependentVowel, Consonant, Consonant, Consonant,
204    Consonant, Consonant, Consonant, Consonant,
205    Consonant, Consonant, Consonant, Consonant,
206
207    Consonant, Consonant, Consonant, Consonant,
208    Consonant, Consonant, Consonant, Consonant,
209    Consonant, Invalid, Consonant, Consonant,
210    Consonant, Consonant, Consonant, Consonant,
211
212    Consonant, Invalid, Consonant, Consonant,
213    Invalid, Consonant, Consonant, Consonant,
214    Consonant, Consonant, UnknownForm, UnknownForm,
215    Nukta, Other, Matra, Matra,
216
217    Matra, Matra, Matra, Matra,
218    Matra, Matra, Invalid, Matra,
219    Matra, Matra, Invalid, Matra,
220    Matra, Halant, UnknownForm, UnknownForm,
221
222    Other, UnknownForm, UnknownForm, UnknownForm,
223    UnknownForm, UnknownForm, UnknownForm, UnknownForm,
224    UnknownForm, UnknownForm, UnknownForm, UnknownForm,
225    UnknownForm, UnknownForm, UnknownForm, UnknownForm,
226
227    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
228    Other, Other, Other, Other,
229    Other, Other, Other, Other,
230    Other, Other, Other, Other,
231
232    Other, Other, Other, Other,
233    Other, Other, Other, Other,
234    Other, Other, Other, Other,
235    Other, Other, Other, Other,
236
237    // Oriya
238    Invalid, VowelMark, VowelMark, VowelMark,
239    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
240    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
241    IndependentVowel, Invalid, Invalid, IndependentVowel,
242
243    IndependentVowel, Invalid, Invalid, IndependentVowel,
244    IndependentVowel, Consonant, Consonant, Consonant,
245    Consonant, Consonant, Consonant, Consonant,
246    Consonant, Consonant, Consonant, Consonant,
247
248    Consonant, Consonant, Consonant, Consonant,
249    Consonant, Consonant, Consonant, Consonant,
250    Consonant, Invalid, Consonant, Consonant,
251    Consonant, Consonant, Consonant, Consonant,
252
253    Consonant, Invalid, Consonant, Consonant,
254    Invalid, Consonant, Consonant, Consonant,
255    Consonant, Consonant, UnknownForm, UnknownForm,
256    Nukta, Other, Matra, Matra,
257
258    Matra, Matra, Matra, Matra,
259    Invalid, Invalid, Invalid, Matra,
260    Matra, Invalid, Invalid, Matra,
261    Matra, Halant, UnknownForm, UnknownForm,
262
263    Other, Invalid, Invalid, Invalid,
264    Invalid, UnknownForm, LengthMark, LengthMark,
265    Invalid, Invalid, Invalid, Invalid,
266    Consonant, Consonant, Invalid, Consonant,
267
268    IndependentVowel, IndependentVowel, Invalid, Invalid,
269    Invalid, Invalid, Other, Other,
270    Other, Other, Other, Other,
271    Other, Other, Other, Other,
272
273    Other, Consonant, Other, Other,
274    Other, Other, Other, Other,
275    Other, Other, Other, Other,
276    Other, Other, Other, Other,
277
278    //Tamil
279    Invalid, Invalid, VowelMark, Other,
280    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
281    IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
282    Invalid, Invalid, IndependentVowel, IndependentVowel,
283
284    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
285    IndependentVowel, Consonant, Invalid, Invalid,
286    Invalid, Consonant, Consonant, Invalid,
287    Consonant, Invalid, Consonant, Consonant,
288
289    Invalid, Invalid, Invalid, Consonant,
290    Consonant, Invalid, Invalid, Invalid,
291    Consonant, Consonant, Consonant, Invalid,
292    Invalid, Invalid, Consonant, Consonant,
293
294    Consonant, Consonant, Consonant, Consonant,
295    Consonant, Consonant, Consonant, Consonant,
296    Consonant, Consonant, UnknownForm, UnknownForm,
297    Invalid, Invalid, Matra, Matra,
298
299    Matra, Matra, Matra, Invalid,
300    Invalid, Invalid, Matra, Matra,
301    Matra, Invalid, Matra, Matra,
302    Matra, Halant, Invalid, Invalid,
303
304    Invalid, Invalid, Invalid, Invalid,
305    Invalid, Invalid, Invalid, LengthMark,
306    Invalid, Invalid, Invalid, Invalid,
307    Invalid, Invalid, Invalid, Invalid,
308
309    Invalid, Invalid, Invalid, Invalid,
310    Invalid, Invalid, Other, Other,
311    Other, Other, Other, Other,
312    Other, Other, Other, Other,
313
314    Other, Other, Other, Other,
315    Other, Other, Other, Other,
316    Other, Other, Other, Other,
317    Other, Other, Other, Other,
318
319    // Telugu
320    Invalid, VowelMark, VowelMark, VowelMark,
321    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
322    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
323    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
324
325    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
326    IndependentVowel, Consonant, Consonant, Consonant,
327    Consonant, Consonant, Consonant, Consonant,
328    Consonant, Consonant, Consonant, Consonant,
329
330    Consonant, Consonant, Consonant, Consonant,
331    Consonant, Consonant, Consonant, Consonant,
332    Consonant, Invalid, Consonant, Consonant,
333    Consonant, Consonant, Consonant, Consonant,
334
335    Consonant, Consonant, Consonant, Consonant,
336    Invalid, Consonant, Consonant, Consonant,
337    Consonant, Consonant, UnknownForm, UnknownForm,
338    Invalid, Invalid, Matra, Matra,
339
340    Matra, Matra, Matra, Matra,
341    Matra, Invalid, Matra, Matra,
342    Matra, Invalid, Matra, Matra,
343    Matra, Halant, Invalid, Invalid,
344
345    Invalid, Invalid, Invalid, Invalid,
346    Invalid, LengthMark, Matra, Invalid,
347    Invalid, Invalid, Invalid, Invalid,
348    Invalid, Invalid, Invalid, Invalid,
349
350    IndependentVowel, IndependentVowel, Invalid, Invalid,
351    Invalid, Invalid, Other, Other,
352    Other, Other, Other, Other,
353    Other, Other, Other, Other,
354
355    Other, Other, Other, Other,
356    Other, Other, Other, Other,
357    Other, Other, Other, Other,
358    Other, Other, Other, Other,
359
360    // Kannada
361    Invalid, Invalid, VowelMark, VowelMark,
362    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
363    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
364    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
365
366    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
367    IndependentVowel, Consonant, Consonant, Consonant,
368    Consonant, Consonant, Consonant, Consonant,
369    Consonant, Consonant, Consonant, Consonant,
370
371    Consonant, Consonant, Consonant, Consonant,
372    Consonant, Consonant, Consonant, Consonant,
373    Consonant, Invalid, Consonant, Consonant,
374    Consonant, Consonant, Consonant, Consonant,
375
376    Consonant, Consonant, Consonant, Consonant,
377    Invalid, Consonant, Consonant, Consonant,
378    Consonant, Consonant, UnknownForm, UnknownForm,
379    Nukta, Other, Matra, Matra,
380
381    Matra, Matra, Matra, Matra,
382    Matra, Invalid, Matra, Matra,
383    Matra, Invalid, Matra, Matra,
384    Matra, Halant, Invalid, Invalid,
385
386    Invalid, Invalid, Invalid, Invalid,
387    Invalid, LengthMark, LengthMark, Invalid,
388    Invalid, Invalid, Invalid, Invalid,
389    Invalid, Invalid, Consonant, Invalid,
390
391    IndependentVowel, IndependentVowel, VowelMark, VowelMark,
392    Invalid, Invalid, Other, Other,
393    Other, Other, Other, Other,
394    Other, Other, Other, Other,
395
396    Other, Other, Other, Other,
397    Other, Other, Other, Other,
398    Other, Other, Other, Other,
399    Other, Other, Other, Other,
400
401    // Malayalam
402    Invalid, Invalid, VowelMark, VowelMark,
403    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
404    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
405    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
406
407    IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
408    IndependentVowel, Consonant, Consonant, Consonant,
409    Consonant, Consonant, Consonant, Consonant,
410    Consonant, Consonant, Consonant, Consonant,
411
412    Consonant, Consonant, Consonant, Consonant,
413    Consonant, Consonant, Consonant, Consonant,
414    Consonant, Invalid, Consonant, Consonant,
415    Consonant, Consonant, Consonant, Consonant,
416
417    Consonant, Consonant, Consonant, Consonant,
418    Consonant, Consonant, Consonant, Consonant,
419    Consonant, Consonant, UnknownForm, UnknownForm,
420    Invalid, Invalid, Matra, Matra,
421
422    Matra, Matra, Matra, Matra,
423    Invalid, Invalid, Matra, Matra,
424    Matra, Invalid, Matra, Matra,
425    Matra, Halant, Invalid, Invalid,
426
427    Invalid, Invalid, Invalid, Invalid,
428    Invalid, Invalid, Invalid, Matra,
429    Invalid, Invalid, Invalid, Invalid,
430    Invalid, Invalid, Invalid, Invalid,
431
432    IndependentVowel, IndependentVowel, Invalid, Invalid,
433    Invalid, Invalid, Other, Other,
434    Other, Other, Other, Other,
435    Other, Other, Other, Other,
436
437    Other, Other, Other, Other,
438    Other, Other, Other, Other,
439    Other, Other, Other, Other,
440    Other, Other, Other, Other,
441
442    // Sinhala
443    Invalid, Invalid, VowelMark, VowelMark,
444    Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
445    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
446    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
447
448    IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
449    IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
450    Invalid, Invalid, Consonant, Consonant,
451    Consonant, Consonant, Consonant, Consonant,
452
453    Consonant, Consonant, Consonant, Consonant,
454    Consonant, Consonant, Consonant, Consonant,
455    Consonant, Consonant, Consonant, Consonant,
456    Consonant, Consonant, Consonant, Consonant,
457
458    Consonant, Consonant, Invalid, Consonant,
459    Consonant, Consonant, Consonant, Consonant,
460    Consonant, Consonant, Consonant, Consonant,
461    Invalid, Consonant, Invalid, Invalid,
462
463    Consonant, Consonant, Consonant, Consonant,
464    Consonant, Consonant, Consonant, Invalid,
465    Invalid, Invalid, Halant, Invalid,
466    Invalid, Invalid, Invalid, Matra,
467
468    Matra, Matra, Matra, Matra,
469    Matra, Invalid, Matra, Invalid,
470    Matra, Matra, Matra, Matra,
471    Matra, Matra, Matra, Matra,
472
473    Invalid, Invalid, Invalid, Invalid,
474    Invalid, Invalid, Invalid, Invalid,
475    Invalid, Invalid, Invalid, Invalid,
476    Invalid, Invalid, Invalid, Invalid,
477
478    Invalid, Invalid, Matra, Matra,
479    Other, Other, Other, Other,
480    Other, Other, Other, Other,
481    Other, Other, Other, Other,
482};
483
484enum Position {
485    None,
486    Pre,
487    Above,
488    Below,
489    Post,
490    Split,
491    Base,
492    Reph,
493    Vattu,
494    Inherit
495};
496
497static const unsigned char indicPosition[0xe00-0x900] = {
498    // Devanagari
499    None, Above, Above, Post,
500    None, None, None, None,
501    None, None, None, None,
502    None, None, None, None,
503
504    None, None, None, None,
505    None, None, None, None,
506    None, None, None, None,
507    None, None, None, None,
508
509    None, None, None, None,
510    None, None, None, None,
511    None, None, None, None,
512    None, None, None, None,
513
514    Below, None, None, None,
515    None, None, None, None,
516    None, None, None, None,
517    None, None, Post, Pre,
518
519    Post, Below, Below, Below,
520    Below, Above, Above, Above,
521    Above, Post, Post, Post,
522    Post, None, None, None,
523
524    None, Above, Below, Above,
525    Above, None, None, None,
526    None, None, None, None,
527    None, None, None, None,
528
529    None, None, Below, Below,
530    None, None, None, None,
531    None, None, None, None,
532    None, None, None, None,
533
534    None, None, None, None,
535    None, None, None, None,
536    None, None, None, None,
537    None, None, None, None,
538
539    // Bengali
540    None, Above, Post, Post,
541    None, None, None, None,
542    None, None, None, None,
543    None, None, None, None,
544
545    None, None, None, None,
546    None, None, None, None,
547    None, None, None, None,
548    None, None, None, None,
549
550    None, None, None, None,
551    None, None, None, None,
552    None, None, None, None,
553    Below, None, None, Post,
554
555    Below, None, None, None,
556    None, None, None, None,
557    None, None, None, None,
558    Below, None, Post, Pre,
559
560    Post, Below, Below, Below,
561    Below, None, None, Pre,
562    Pre, None, None, Split,
563    Split, Below, None, None,
564
565    None, None, None, None,
566    None, None, None, Post,
567    None, None, None, None,
568    None, None, None, None,
569
570    None, None, Below, Below,
571    None, None, None, None,
572    None, None, None, None,
573    None, None, None, None,
574
575    Below, None, None, None,
576    None, None, None, None,
577    None, None, None, None,
578    None, None, None, None,
579
580    // Gurmukhi
581    None, Above, Above, Post,
582    None, None, None, None,
583    None, None, None, None,
584    None, None, None, None,
585
586    None, None, None, None,
587    None, None, None, None,
588    None, None, None, None,
589    None, None, None, None,
590
591    None, None, None, None,
592    None, None, None, None,
593    None, None, None, None,
594    None, None, None, Post,
595
596    Below, None, None, None,
597    None, Below, None, None,
598    None, Below, None, None,
599    Below, None, Post, Pre,
600
601    Post, Below, Below, None,
602    None, None, None, Above,
603    Above, None, None, Above,
604    Above, None, None, None,
605
606    None, None, None, None,
607    None, None, None, None,
608    None, None, None, None,
609    None, None, None, None,
610
611    None, None, None, None,
612    None, None, None, None,
613    None, None, None, None,
614    None, None, None, None,
615
616    Above, Above, None, None,
617    None, None, None, None,
618    None, None, None, None,
619    None, None, None, None,
620
621    // Gujarati
622    None, Above, Above, Post,
623    None, None, None, None,
624    None, None, None, None,
625    None, None, None, None,
626
627    None, None, None, None,
628    None, None, None, None,
629    None, None, None, None,
630    None, None, None, None,
631
632    None, None, None, None,
633    None, None, None, None,
634    None, None, None, None,
635    None, None, None, None,
636
637    Below, None, None, None,
638    None, None, None, None,
639    None, None, None, None,
640    None, None, Post, Pre,
641
642    Post, Below, Below, Below,
643    Below, Above, None, Above,
644    Above, Post, None, Post,
645    Post, None, None, None,
646
647    None, None, None, None,
648    None, None, None, None,
649    None, None, None, None,
650    None, None, None, None,
651
652    None, None, Below, Below,
653    None, None, None, None,
654    None, None, None, None,
655    None, None, None, None,
656
657    None, None, None, None,
658    None, None, None, None,
659    None, None, None, None,
660    None, None, None, None,
661
662    // Oriya
663    None, Above, Post, Post,
664    None, None, None, None,
665    None, None, None, None,
666    None, None, None, None,
667
668    None, None, None, None,
669    None, None, None, None,
670    None, None, None, None,
671    None, None, None, None,
672
673    None, None, None, None,
674    Below, None, None, None,
675    Below, None, None, None,
676    Below, Below, Below, Post,
677
678    Below, None, Below, Below,
679    None, None, None, None,
680    None, None, None, None,
681    None, None, Post, Above,
682
683    Post, Below, Below, Below,
684    None, None, None, Pre,
685    Split, None, None, Split,
686    Split, None, None, None,
687
688    None, None, None, None,
689    None, None, Above, Post,
690    None, None, None, None,
691    None, None, None, Post,
692
693    None, None, None, None,
694    None, None, None, None,
695    None, None, None, None,
696    None, None, None, None,
697
698    None, Below, None, None,
699    None, None, None, None,
700    None, None, None, None,
701    None, None, None, None,
702
703    // Tamil
704    None, None, Above, None,
705    None, None, None, None,
706    None, None, None, None,
707    None, None, None, None,
708
709    None, None, None, None,
710    None, None, None, None,
711    None, None, None, None,
712    None, None, None, None,
713
714    None, None, None, None,
715    None, None, None, None,
716    None, None, None, None,
717    None, None, None, None,
718
719    None, None, None, None,
720    None, None, None, None,
721    None, None, None, None,
722    None, None, Post, Post,
723
724    Above, Below, Below, None,
725    None, None, Pre, Pre,
726    Pre, None, Split, Split,
727    Split, Halant, None, None,
728
729    None, None, None, None,
730    None, None, None, Post,
731    None, None, None, None,
732    None, None, None, None,
733
734    None, None, None, None,
735    None, None, None, None,
736    None, None, None, None,
737    None, None, None, None,
738
739    None, None, None, None,
740    None, None, None, None,
741    None, None, None, None,
742    None, None, None, None,
743
744    // Telugu
745    None, Post, Post, Post,
746    None, None, None, None,
747    None, None, None, None,
748    None, None, None, None,
749
750    None, None, None, None,
751    None, Below, Below, Below,
752    Below, Below, Below, Below,
753    Below, Below, Below, Below,
754
755    Below, Below, Below, Below,
756    Below, Below, Below, Below,
757    Below, None, Below, Below,
758    Below, Below, Below, Below,
759
760    Below, None, Below, Below,
761    None, Below, Below, Below,
762    Below, Below, None, None,
763    None, None, Post, Above,
764
765    Above, Post, Post, Post,
766    Post, None, Above, Above,
767    Split, None, Post, Above,
768    Above, Halant, None, None,
769
770    None, None, None, None,
771    None, Above, Below, None,
772    None, None, None, None,
773    None, None, None, None,
774
775    None, None, None, None,
776    None, None, None, None,
777    None, None, None, None,
778    None, None, None, None,
779
780    None, None, None, None,
781    None, None, None, None,
782    None, None, None, None,
783    None, None, None, None,
784
785    // Kannada
786    None, None, Post, Post,
787    None, None, None, None,
788    None, None, None, None,
789    None, None, None, None,
790
791    None, None, None, None,
792    None, Below, Below, Below,
793    Below, Below, Below, Below,
794    Below, Below, Below, Below,
795
796    Below, Below, Below, Below,
797    Below, Below, Below, Below,
798    Below, Below, Below, Below,
799    Below, Below, Below, Below,
800
801    Below, None, Below, Below,
802    None, Below, Below, Below,
803    Below, Below, None, None,
804    None, None, Post, Above,
805
806    Split, Post, Post, Post,
807    Post, None, Above, Split,
808    Split, None, Split, Split,
809    Above, Halant, None, None,
810
811    None, None, None, None,
812    None, Post, Post, None,
813    None, None, None, None,
814    None, None, Below, None,
815
816    None, None, Below, Below,
817    None, None, None, None,
818    None, None, None, None,
819    None, None, None, None,
820
821    None, None, None, None,
822    None, None, None, None,
823    None, None, None, None,
824    None, None, None, None,
825
826    // Malayalam
827    None, None, Post, Post,
828    None, None, None, None,
829    None, None, None, None,
830    None, None, None, None,
831
832    None, None, None, None,
833    None, None, None, None,
834    None, None, None, None,
835    None, None, None, None,
836
837    None, None, None, None,
838    None, None, None, None,
839    None, None, None, None,
840    None, None, None, Post,
841
842    Post, None, Below, None,
843    None, Post, None, None,
844    None, None, None, None,
845    None, None, Post, Post,
846
847    Post, Post, Post, Post,
848    None, None, Pre, Pre,
849    Pre, None, Split, Split,
850    Split, Halant, None, None,
851
852    None, None, None, None,
853    None, None, None, Post,
854    None, None, None, None,
855    None, None, None, None,
856
857    None, None, None, None,
858    None, None, None, None,
859    None, None, None, None,
860    None, None, None, None,
861
862    None, None, None, None,
863    None, None, None, None,
864    None, None, None, None,
865    None, None, None, None,
866
867    // Sinhala
868    None, None, Post, Post,
869    None, None, None, None,
870    None, None, None, None,
871    None, None, None, None,
872
873    None, None, None, None,
874    None, None, None, None,
875    None, None, None, None,
876    None, None, None, None,
877
878    None, None, None, None,
879    None, None, None, None,
880    None, None, None, None,
881    None, None, None, None,
882
883    None, None, None, None,
884    None, None, None, None,
885    None, None, None, None,
886    None, None, None, None,
887
888    None, None, None, None,
889    None, None, None, None,
890    None, None, None, None,
891    None, None, None, Post,
892
893    Post, Post, Above, Above,
894    Below, None, Below, None,
895    Post, Pre, Split, Pre,
896    Split, Split, Split, Post,
897
898    None, None, None, None,
899    None, None, None, None,
900    None, None, None, None,
901    None, None, None, None,
902
903    None, None, Post, Post,
904    None, None, None, None,
905    None, None, None, None,
906    None, None, None, None
907};
908
909static inline Form form(unsigned short uc) {
910    if (uc < 0x900 || uc > 0xdff) {
911        if (uc == 0x25cc)
912            return Consonant;
913        if (uc == 0x200c || uc == 0x200d)
914            return Control;
915        return Other;
916    }
917    return (Form)indicForms[uc-0x900];
918}
919
920static inline Position indic_position(unsigned short uc) {
921    if (uc < 0x900 || uc > 0xdff)
922        return None;
923    return (Position) indicPosition[uc-0x900];
924}
925
926
927enum IndicScriptProperties {
928    HasReph = 0x01,
929    HasSplit = 0x02
930};
931
932const hb_uint8 scriptProperties[10] = {
933    // Devanagari,
934    HasReph,
935    // Bengali,
936    HasReph|HasSplit,
937    // Gurmukhi,
938    0,
939    // Gujarati,
940    HasReph,
941    // Oriya,
942    HasReph|HasSplit,
943    // Tamil,
944    HasSplit,
945    // Telugu,
946    HasSplit,
947    // Kannada,
948    HasSplit|HasReph,
949    // Malayalam,
950    HasSplit,
951    // Sinhala,
952    HasSplit
953};
954
955struct IndicOrdering {
956    Form form;
957    Position position;
958};
959
960static const IndicOrdering devanagari_order [] = {
961    { Consonant, Below },
962    { Matra, Below },
963    { VowelMark, Below },
964    { StressMark, Below },
965    { Matra, Above },
966    { Matra, Post },
967    { Consonant, Reph },
968    { VowelMark, Above },
969    { StressMark, Above },
970    { VowelMark, Post },
971    { (Form)0, None }
972};
973
974static const IndicOrdering bengali_order [] = {
975    { Consonant, Below },
976    { Matra, Below },
977    { Matra, Above },
978    { Consonant, Reph },
979    { VowelMark, Above },
980    { Consonant, Post },
981    { Matra, Post },
982    { VowelMark, Post },
983    { (Form)0, None }
984};
985
986static const IndicOrdering gurmukhi_order [] = {
987    { Consonant, Below },
988    { Matra, Below },
989    { Matra, Above },
990    { Consonant, Post },
991    { Matra, Post },
992    { VowelMark, Above },
993    { (Form)0, None }
994};
995
996static const IndicOrdering tamil_order [] = {
997    { Matra, Above },
998    { Matra, Post },
999    { VowelMark, Post },
1000    { (Form)0, None }
1001};
1002
1003static const IndicOrdering telugu_order [] = {
1004    { Matra, Above },
1005    { Matra, Below },
1006    { Matra, Post },
1007    { Consonant, Below },
1008    { Consonant, Post },
1009    { VowelMark, Post },
1010    { (Form)0, None }
1011};
1012
1013static const IndicOrdering kannada_order [] = {
1014    { Matra, Above },
1015    { Matra, Post },
1016    { Consonant, Below },
1017    { Consonant, Post },
1018    { LengthMark, Post },
1019    { Consonant, Reph },
1020    { VowelMark, Post },
1021    { (Form)0, None }
1022};
1023
1024static const IndicOrdering malayalam_order [] = {
1025    { Consonant, Below },
1026    { Matra, Below },
1027    { Consonant, Reph },
1028    { Consonant, Post },
1029    { Matra, Post },
1030    { VowelMark, Post },
1031    { (Form)0, None }
1032};
1033
1034static const IndicOrdering sinhala_order [] = {
1035    { Matra, Below },
1036    { Matra, Above },
1037    { Matra, Post },
1038    { VowelMark, Post },
1039    { (Form)0, None }
1040};
1041
1042static const IndicOrdering * const indic_order[] = {
1043    devanagari_order, // Devanagari
1044    bengali_order, // Bengali
1045    gurmukhi_order, // Gurmukhi
1046    devanagari_order, // Gujarati
1047    bengali_order, // Oriya
1048    tamil_order, // Tamil
1049    telugu_order, // Telugu
1050    kannada_order, // Kannada
1051    malayalam_order, // Malayalam
1052    sinhala_order // Sinhala
1053};
1054
1055
1056
1057// vowel matras that have to be split into two parts.
1058static const unsigned short split_matras[]  = {
1059    //  matra, split1, split2, split3
1060
1061    // bengalis
1062    0x9cb, 0x9c7, 0x9be, 0x0,
1063    0x9cc, 0x9c7, 0x9d7, 0x0,
1064    // oriya
1065    0xb48, 0xb47, 0xb56, 0x0,
1066    0xb4b, 0xb47, 0xb3e, 0x0,
1067    0xb4c, 0xb47, 0xb57, 0x0,
1068    // tamil
1069    0xbca, 0xbc6, 0xbbe, 0x0,
1070    0xbcb, 0xbc7, 0xbbe, 0x0,
1071    0xbcc, 0xbc6, 0xbd7, 0x0,
1072    // telugu
1073    0xc48, 0xc46, 0xc56, 0x0,
1074    // kannada
1075    0xcc0, 0xcbf, 0xcd5, 0x0,
1076    0xcc7, 0xcc6, 0xcd5, 0x0,
1077    0xcc8, 0xcc6, 0xcd6, 0x0,
1078    0xcca, 0xcc6, 0xcc2, 0x0,
1079    0xccb, 0xcc6, 0xcc2, 0xcd5,
1080    // malayalam
1081    0xd4a, 0xd46, 0xd3e, 0x0,
1082    0xd4b, 0xd47, 0xd3e, 0x0,
1083    0xd4c, 0xd46, 0xd57, 0x0,
1084    // sinhala
1085    0xdda, 0xdd9, 0xdca, 0x0,
1086    0xddc, 0xdd9, 0xdcf, 0x0,
1087    0xddd, 0xdd9, 0xdcf, 0xdca,
1088    0xdde, 0xdd9, 0xddf, 0x0,
1089    0xffff
1090};
1091
1092static inline void splitMatra(unsigned short *reordered, int matra, int &len)
1093{
1094    unsigned short matra_uc = reordered[matra];
1095    //qDebug("matra=%d, reordered[matra]=%x", matra, reordered[matra]);
1096
1097    const unsigned short *split = split_matras;
1098    while (split[0] < matra_uc)
1099        split += 4;
1100
1101    assert(*split == matra_uc);
1102    ++split;
1103
1104    int added_chars = split[2] == 0x0 ? 1 : 2;
1105
1106    memmove(reordered + matra + added_chars, reordered + matra, (len-matra)*sizeof(unsigned short));
1107    reordered[matra] = split[0];
1108    reordered[matra+1] = split[1];
1109    if(added_chars == 2)
1110        reordered[matra+2] = split[2];
1111    len += added_chars;
1112}
1113
1114#ifndef NO_OPENTYPE
1115static const HB_OpenTypeFeature indic_features[] = {
1116    { HB_MAKE_TAG('l', 'o', 'c', 'a'), LocaProperty },
1117    { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
1118    { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
1119    { HB_MAKE_TAG('n', 'u', 'k', 't'), NuktaProperty },
1120    { HB_MAKE_TAG('a', 'k', 'h', 'n'), AkhantProperty },
1121    { HB_MAKE_TAG('r', 'p', 'h', 'f'), RephProperty },
1122    { HB_MAKE_TAG('b', 'l', 'w', 'f'), BelowFormProperty },
1123    { HB_MAKE_TAG('h', 'a', 'l', 'f'), HalfFormProperty },
1124    { HB_MAKE_TAG('p', 's', 't', 'f'), PostFormProperty },
1125    { HB_MAKE_TAG('c', 'j', 'c', 't'), ConjunctFormProperty },
1126    { HB_MAKE_TAG('v', 'a', 't', 'u'), VattuProperty },
1127    { HB_MAKE_TAG('p', 'r', 'e', 's'), PreSubstProperty },
1128    { HB_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty },
1129    { HB_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty },
1130    { HB_MAKE_TAG('p', 's', 't', 's'), PostSubstProperty },
1131    { HB_MAKE_TAG('h', 'a', 'l', 'n'), HalantProperty },
1132    { HB_MAKE_TAG('c', 'a', 'l', 't'), IndicCaltProperty },
1133    { 0, 0 }
1134};
1135#endif
1136
1137// #define INDIC_DEBUG
1138#ifdef INDIC_DEBUG
1139#define IDEBUG hb_debug
1140#include <stdarg.h>
1141
1142static void hb_debug(const char *msg, ...)
1143{
1144    va_list ap;
1145    va_start(ap, msg); // use variable arg list
1146    vfprintf(stderr, msg, ap);
1147    va_end(ap);
1148    fprintf(stderr, "\n");
1149}
1150
1151#else
1152#define IDEBUG if(0) printf
1153#endif
1154
1155#if 0 //def INDIC_DEBUG
1156static QString propertiesToString(int properties)
1157{
1158    QString res;
1159    properties = ~properties;
1160    if (properties & LocaProperty)
1161        res += "Loca ";
1162    if (properties & CcmpProperty)
1163        res += "Ccmp ";
1164    if (properties & InitProperty)
1165        res += "Init ";
1166    if (properties & NuktaProperty)
1167        res += "Nukta ";
1168    if (properties & AkhantProperty)
1169        res += "Akhant ";
1170    if (properties & RephProperty)
1171        res += "Reph ";
1172    if (properties & PreFormProperty)
1173        res += "PreForm ";
1174    if (properties & BelowFormProperty)
1175        res += "BelowForm ";
1176    if (properties & AboveFormProperty)
1177        res += "AboveForm ";
1178    if (properties & HalfFormProperty)
1179        res += "HalfForm ";
1180    if (properties & PostFormProperty)
1181        res += "PostForm ";
1182    if (properties & ConjunctFormProperty)
1183        res += "PostForm ";
1184    if (properties & VattuProperty)
1185        res += "Vattu ";
1186    if (properties & PreSubstProperty)
1187        res += "PreSubst ";
1188    if (properties & BelowSubstProperty)
1189        res += "BelowSubst ";
1190    if (properties & AboveSubstProperty)
1191        res += "AboveSubst ";
1192    if (properties & PostSubstProperty)
1193        res += "PostSubst ";
1194    if (properties & HalantProperty)
1195        res += "Halant ";
1196    if (properties & CligProperty)
1197        res += "Clig ";
1198    if (properties & IndicCaltProperty)
1199        res += "Calt ";
1200    return res;
1201}
1202#endif
1203
1204static bool indic_shape_syllable(HB_Bool openType, HB_ShaperItem *item, bool invalid)
1205{
1206    HB_Script script = item->item.script;
1207    assert(script >= HB_Script_Devanagari && script <= HB_Script_Sinhala);
1208    const unsigned short script_base = 0x0900 + 0x80*(script-HB_Script_Devanagari);
1209    const unsigned short ra = script_base + 0x30;
1210    const unsigned short halant = script_base + 0x4d;
1211    const unsigned short nukta = script_base + 0x3c;
1212    bool control = false;
1213
1214    int len = (int)item->item.length;
1215    IDEBUG(">>>>> indic shape: from=%d, len=%d invalid=%d", item->item.pos, item->item.length, invalid);
1216
1217    if ((int)item->num_glyphs < len+4) {
1218        item->num_glyphs = len+4;
1219        return false;
1220    }
1221
1222    HB_STACKARRAY(HB_UChar16, reordered, len + 4);
1223    HB_STACKARRAY(hb_uint8, position, len + 4);
1224
1225    unsigned char properties = scriptProperties[script-HB_Script_Devanagari];
1226
1227    if (invalid) {
1228        *reordered = 0x25cc;
1229        memcpy(reordered+1, item->string + item->item.pos, len*sizeof(HB_UChar16));
1230        len++;
1231    } else {
1232        memcpy(reordered, item->string + item->item.pos, len*sizeof(HB_UChar16));
1233    }
1234    if (reordered[len-1] == 0x200c) // zero width non joiner
1235        len--;
1236
1237    int i;
1238    int base = 0;
1239    int reph = -1;
1240
1241#ifdef INDIC_DEBUG
1242    IDEBUG("original:");
1243    for (i = 0; i < len; i++) {
1244        IDEBUG("    %d: %4x", i, reordered[i]);
1245    }
1246#endif
1247
1248    if (len != 1) {
1249        HB_UChar16 *uc = reordered;
1250        bool beginsWithRa = false;
1251
1252        // Rule 1: find base consonant
1253        //
1254        // The shaping engine finds the base consonant of the
1255        // syllable, using the following algorithm: starting from the
1256        // end of the syllable, move backwards until a consonant is
1257        // found that does not have a below-base or post-base form
1258        // (post-base forms have to follow below-base forms), or
1259        // arrive at the first consonant. The consonant stopped at
1260        // will be the base.
1261        //
1262        //  * If the syllable starts with Ra + H (in a script that has
1263        //    'Reph'), Ra is excluded from candidates for base
1264        //    consonants.
1265        //
1266        // * In Kannada and Telugu, the base consonant cannot be
1267        //   farther than 3 consonants from the end of the syllable.
1268        // #### replace the HasReph property by testing if the feature exists in the font!
1269        if (form(*uc) == Consonant || (script == HB_Script_Bengali && form(*uc) == IndependentVowel)) {
1270            if ((properties & HasReph) && (len > 2) &&
1271                (*uc == ra || *uc == 0x9f0) && *(uc+1) == halant)
1272                beginsWithRa = true;
1273
1274            if (beginsWithRa && form(*(uc+2)) == Control)
1275                beginsWithRa = false;
1276
1277            base = (beginsWithRa ? 2 : 0);
1278            IDEBUG("    length = %d, beginsWithRa = %d, base=%d", len, beginsWithRa, base);
1279
1280            int lastConsonant = 0;
1281            int matra = -1;
1282            // we remember:
1283            // * the last consonant since we need it for rule 2
1284            // * the matras position for rule 3 and 4
1285
1286            // figure out possible base glyphs
1287            memset(position, 0, len);
1288            if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
1289                bool vattu = false;
1290                for (i = base; i < len; ++i) {
1291                    position[i] = form(uc[i]);
1292                    if (position[i] == Consonant) {
1293                        lastConsonant = i;
1294                        vattu = (!vattu && uc[i] == ra);
1295                        if (vattu) {
1296                            IDEBUG("excluding vattu glyph at %d from base candidates", i);
1297                            position[i] = Vattu;
1298                        }
1299                    } else if (position[i] == Matra) {
1300                        matra = i;
1301                    }
1302                }
1303            } else {
1304                for (i = base; i < len; ++i) {
1305                    position[i] = form(uc[i]);
1306                    if (position[i] == Consonant)
1307                        lastConsonant = i;
1308                    else if (matra < 0 && position[i] == Matra)
1309                        matra = i;
1310                }
1311            }
1312            int skipped = 0;
1313            Position pos = Post;
1314            for (i = len-1; i >= base; i--) {
1315                if (position[i] != Consonant && (position[i] != Control || script == HB_Script_Kannada))
1316                    continue;
1317
1318                if (i < len-1 && position[i] == Control && position[i+1] == Consonant) {
1319                    base = i+1;
1320                    break;
1321                }
1322
1323                Position charPosition = indic_position(uc[i]);
1324                if (pos == Post && charPosition == Post) {
1325                    pos = Post;
1326                } else if ((pos == Post || pos == Below) && charPosition == Below) {
1327                    if (script == HB_Script_Devanagari || script == HB_Script_Gujarati)
1328                        base = i;
1329                    pos = Below;
1330                } else {
1331                    base = i;
1332                    break;
1333                }
1334                if (skipped == 2 && (script == HB_Script_Kannada || script == HB_Script_Telugu)) {
1335                    base = i;
1336                    break;
1337                }
1338                ++skipped;
1339            }
1340
1341            IDEBUG("    base consonant at %d skipped=%d, lastConsonant=%d", base, skipped, lastConsonant);
1342
1343            // Rule 2:
1344            //
1345            // If the base consonant is not the last one, Uniscribe
1346            // moves the halant from the base consonant to the last
1347            // one.
1348            if (lastConsonant > base) {
1349                int halantPos = 0;
1350                if (uc[base+1] == halant)
1351                    halantPos = base + 1;
1352                else if (uc[base+1] == nukta && uc[base+2] == halant)
1353                    halantPos = base + 2;
1354                if (halantPos > 0) {
1355                    IDEBUG("    moving halant from %d to %d!", base+1, lastConsonant);
1356                    for (i = halantPos; i < lastConsonant; i++)
1357                        uc[i] = uc[i+1];
1358                    uc[lastConsonant] = halant;
1359                }
1360            }
1361
1362            // Rule 3:
1363            //
1364            // If the syllable starts with Ra + H, Uniscribe moves
1365            // this combination so that it follows either:
1366
1367            // * the post-base 'matra' (if any) or the base consonant
1368            //   (in scripts that show similarity to Devanagari, i.e.,
1369            //   Devanagari, Gujarati, Bengali)
1370            // * the base consonant (other scripts)
1371            // * the end of the syllable (Kannada)
1372
1373            Position matra_position = None;
1374            if (matra > 0)
1375                matra_position = indic_position(uc[matra]);
1376            IDEBUG("    matra at %d with form %d, base=%d", matra, matra_position, base);
1377
1378            if (beginsWithRa && base != 0) {
1379                int toPos = base+1;
1380                if (toPos < len && uc[toPos] == nukta)
1381                    toPos++;
1382                if (toPos < len && uc[toPos] == halant)
1383                    toPos++;
1384                if (toPos < len && uc[toPos] == 0x200d)
1385                    toPos++;
1386                if (toPos < len-1 && uc[toPos] == ra && uc[toPos+1] == halant)
1387                    toPos += 2;
1388                if (script == HB_Script_Devanagari || script == HB_Script_Gujarati || script == HB_Script_Bengali) {
1389                    if (matra_position == Post || matra_position == Split) {
1390                        toPos = matra+1;
1391                        matra -= 2;
1392                    }
1393                } else if (script == HB_Script_Kannada) {
1394                    toPos = len;
1395                    matra -= 2;
1396                }
1397
1398                IDEBUG("moving leading ra+halant to position %d", toPos);
1399                for (i = 2; i < toPos; i++)
1400                    uc[i-2] = uc[i];
1401                uc[toPos-2] = ra;
1402                uc[toPos-1] = halant;
1403                base -= 2;
1404                if (properties & HasReph)
1405                    reph = toPos-2;
1406            }
1407
1408            // Rule 4:
1409
1410            // Uniscribe splits two- or three-part matras into their
1411            // parts. This splitting is a character-to-character
1412            // operation).
1413            //
1414            //      Uniscribe describes some moving operations for these
1415            //      matras here. For shaping however all pre matras need
1416            //      to be at the beginning of the syllable, so we just move
1417            //      them there now.
1418            if (matra_position == Split) {
1419                splitMatra(uc, matra, len);
1420                // Handle three-part matras (0xccb in Kannada)
1421                matra_position = indic_position(uc[matra]);
1422            }
1423
1424            if (matra_position == Pre) {
1425                unsigned short m = uc[matra];
1426                while (matra--)
1427                    uc[matra+1] = uc[matra];
1428                uc[0] = m;
1429                base++;
1430            }
1431        }
1432
1433        // Rule 5:
1434        //
1435        // Uniscribe classifies consonants and 'matra' parts as
1436        // pre-base, above-base (Reph), below-base or post-base. This
1437        // classification exists on the character code level and is
1438        // language-dependent, not font-dependent.
1439        for (i = 0; i < base; ++i)
1440            position[i] = Pre;
1441        position[base] = Base;
1442        for (i = base+1; i < len; ++i) {
1443            position[i] = indic_position(uc[i]);
1444            // #### replace by adjusting table
1445            if (uc[i] == nukta || uc[i] == halant)
1446                position[i] = Inherit;
1447        }
1448        if (reph > 0) {
1449            // recalculate reph, it might have changed.
1450            for (i = base+1; i < len; ++i)
1451                if (uc[i] == ra)
1452                    reph = i;
1453            position[reph] = Reph;
1454            position[reph+1] = Inherit;
1455        }
1456
1457        // all reordering happens now to the chars after the base
1458        int fixed = base+1;
1459        if (fixed < len && uc[fixed] == nukta)
1460            fixed++;
1461        if (fixed < len && uc[fixed] == halant)
1462            fixed++;
1463        if (fixed < len && uc[fixed] == 0x200d)
1464            fixed++;
1465
1466#ifdef INDIC_DEBUG
1467        for (i = fixed; i < len; ++i)
1468            IDEBUG("position[%d] = %d, form=%d uc=%x", i, position[i], form(uc[i]), uc[i]);
1469#endif
1470        // we continuosly position the matras and vowel marks and increase the fixed
1471        // until we reached the end.
1472        const IndicOrdering *finalOrder = indic_order[script-HB_Script_Devanagari];
1473
1474        IDEBUG("    reordering pass:");
1475        IDEBUG("        base=%d fixed=%d", base, fixed);
1476        int toMove = 0;
1477        while (finalOrder[toMove].form && fixed < len-1) {
1478            IDEBUG("        fixed = %d, toMove=%d, moving form %d with pos %d", fixed, toMove, finalOrder[toMove].form, finalOrder[toMove].position);
1479            for (i = fixed; i < len; i++) {
1480//                IDEBUG() << "           i=" << i << "uc=" << hex << uc[i] << "form=" << form(uc[i])
1481//                         << "position=" << position[i];
1482                if (form(uc[i]) == finalOrder[toMove].form &&
1483                     position[i] == finalOrder[toMove].position) {
1484                    // need to move this glyph
1485                    int to = fixed;
1486                    if (i < len-1 && position[i+1] == Inherit) {
1487                        IDEBUG("         moving two chars from %d to %d", i, to);
1488                        unsigned short ch = uc[i];
1489                        unsigned short ch2 = uc[i+1];
1490                        unsigned char pos = position[i];
1491                        for (int j = i+1; j > to+1; j--) {
1492                            uc[j] = uc[j-2];
1493                            position[j] = position[j-2];
1494                        }
1495                        uc[to] = ch;
1496                        uc[to+1] = ch2;
1497                        position[to] = pos;
1498                        position[to+1] = pos;
1499                        fixed += 2;
1500                    } else {
1501                        IDEBUG("         moving one char from %d to %d", i, to);
1502                        unsigned short ch = uc[i];
1503                        unsigned char pos = position[i];
1504                        for (int j = i; j > to; j--) {
1505                            uc[j] = uc[j-1];
1506                            position[j] = position[j-1];
1507                        }
1508                        uc[to] = ch;
1509                        position[to] = pos;
1510                        fixed++;
1511                    }
1512                }
1513            }
1514            toMove++;
1515        }
1516
1517    }
1518
1519    if (reph > 0) {
1520        // recalculate reph, it might have changed.
1521        for (i = base+1; i < len; ++i)
1522            if (reordered[i] == ra)
1523                reph = i;
1524    }
1525
1526#ifndef NO_OPENTYPE
1527    const int availableGlyphs = item->num_glyphs;
1528#endif
1529    if (!item->font->klass->convertStringToGlyphIndices(item->font,
1530                                                        reordered, len,
1531                                                        item->glyphs, &item->num_glyphs,
1532                                                        item->item.bidiLevel % 2))
1533        goto error;
1534
1535
1536    IDEBUG("  base=%d, reph=%d", base, reph);
1537    IDEBUG("reordered:");
1538    for (i = 0; i < len; i++) {
1539        item->attributes[i].mark = false;
1540        item->attributes[i].clusterStart = false;
1541        item->attributes[i].justification = 0;
1542        item->attributes[i].zeroWidth = false;
1543        IDEBUG("    %d: %4x", i, reordered[i]);
1544    }
1545
1546    // now we have the syllable in the right order, and can start running it through open type.
1547
1548    for (i = 0; i < len; ++i)
1549        control |= (form(reordered[i]) == Control);
1550
1551#ifndef NO_OPENTYPE
1552    if (openType) {
1553
1554        // we need to keep track of where the base glyph is for some
1555        // scripts and use the cluster feature for this.  This
1556        // also means we have to correct the logCluster output from
1557        // the open type engine manually afterwards.  for indic this
1558        // is rather simple, as all chars just point to the first
1559        // glyph in the syllable.
1560        HB_STACKARRAY(unsigned short, clusters, len);
1561        HB_STACKARRAY(unsigned int, properties, len);
1562
1563        for (i = 0; i < len; ++i)
1564            clusters[i] = i;
1565
1566        // features we should always apply
1567        for (i = 0; i < len; ++i)
1568            properties[i] = ~(LocaProperty
1569                              | CcmpProperty
1570                              | NuktaProperty
1571                              | VattuProperty
1572                              | ConjunctFormProperty
1573                              | PreSubstProperty
1574                              | BelowSubstProperty
1575                              | AboveSubstProperty
1576                              | PostSubstProperty
1577                              | HalantProperty
1578                              | IndicCaltProperty
1579                              | PositioningProperties);
1580
1581        // Loca always applies
1582        // Ccmp always applies
1583        // Init
1584        if (item->item.pos == 0
1585            || !(isLetter(item->string[item->item.pos-1]) || isMark(item->string[item->item.pos-1])))
1586            properties[0] &= ~InitProperty;
1587
1588        // Nukta always applies
1589        // Akhant
1590        for (i = 0; i <= base; ++i)
1591            properties[i] &= ~AkhantProperty;
1592        // Reph
1593        if (reph >= 0) {
1594            properties[reph] &= ~RephProperty;
1595            properties[reph+1] &= ~RephProperty;
1596        }
1597        // BelowForm
1598        for (i = base+1; i < len; ++i)
1599            properties[i] &= ~BelowFormProperty;
1600
1601        if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) {
1602            // vattu glyphs need this aswell
1603            bool vattu = false;
1604            for (i = base-2; i > 1; --i) {
1605                if (form(reordered[i]) == Consonant) {
1606                    vattu = (!vattu && reordered[i] == ra);
1607                    if (vattu) {
1608                        IDEBUG("forming vattu ligature at %d", i);
1609                        properties[i] &= ~BelowFormProperty;
1610                        properties[i+1] &= ~BelowFormProperty;
1611                    }
1612                }
1613            }
1614        }
1615        // HalfFormProperty
1616        for (i = 0; i < base; ++i)
1617            properties[i] &= ~HalfFormProperty;
1618        if (control) {
1619            for (i = 2; i < len; ++i) {
1620                if (reordered[i] == 0x200d /* ZWJ */) {
1621                    properties[i-1] &= ~HalfFormProperty;
1622                    properties[i-2] &= ~HalfFormProperty;
1623                } else if (reordered[i] == 0x200c /* ZWNJ */) {
1624                    properties[i-1] &= ~HalfFormProperty;
1625                    properties[i-2] &= ~HalfFormProperty;
1626                }
1627            }
1628        }
1629        // PostFormProperty
1630        for (i = base+1; i < len; ++i)
1631            properties[i] &= ~PostFormProperty;
1632        // vattu always applies
1633        // pres always applies
1634        // blws always applies
1635        // abvs always applies
1636        // psts always applies
1637        // halant always applies
1638        // calt always applies
1639
1640#ifdef INDIC_DEBUG
1641//        {
1642//            IDEBUG("OT properties:");
1643//            for (int i = 0; i < len; ++i)
1644//                qDebug("    i: %s", ::propertiesToString(properties[i]).toLatin1().data());
1645//        }
1646#endif
1647
1648        // initialize
1649        item->log_clusters = clusters;
1650        HB_OpenTypeShape(item, properties);
1651
1652        int newLen = item->face->buffer->in_length;
1653        HB_GlyphItem otl_glyphs = item->face->buffer->in_string;
1654
1655        // move the left matra back to its correct position in malayalam and tamil
1656        if ((script == HB_Script_Malayalam || script == HB_Script_Tamil) && (form(reordered[0]) == Matra)) {
1657//             qDebug("reordering matra, len=%d", newLen);
1658            // need to find the base in the shaped string and move the matra there
1659            int basePos = 0;
1660            while (basePos < newLen && (int)otl_glyphs[basePos].cluster <= base)
1661                basePos++;
1662            --basePos;
1663            if (basePos < newLen && basePos > 1) {
1664//                 qDebug("moving prebase matra to position %d in syllable newlen=%d", basePos, newLen);
1665                HB_GlyphItemRec m = otl_glyphs[0];
1666                --basePos;
1667                for (i = 0; i < basePos; ++i)
1668                    otl_glyphs[i] = otl_glyphs[i+1];
1669                otl_glyphs[basePos] = m;
1670            }
1671        }
1672
1673        HB_Bool positioned = HB_OpenTypePosition(item, availableGlyphs, false);
1674
1675        HB_FREE_STACKARRAY(clusters);
1676        HB_FREE_STACKARRAY(properties);
1677
1678        if (!positioned)
1679            goto error;
1680
1681        if (control) {
1682            IDEBUG("found a control char in the syllable");
1683            hb_uint32 i = 0, j = 0;
1684            while (i < item->num_glyphs) {
1685                if (form(reordered[otl_glyphs[i].cluster]) == Control) {
1686                    ++i;
1687                    if (i >= item->num_glyphs)
1688                        break;
1689                }
1690                item->glyphs[j] = item->glyphs[i];
1691                item->attributes[j] = item->attributes[i];
1692                ++i;
1693                ++j;
1694            }
1695            item->num_glyphs = j;
1696        }
1697
1698    } else {
1699        HB_HeuristicPosition(item);
1700    }
1701#endif // NO_OPENTYPE
1702    item->attributes[0].clusterStart = true;
1703
1704    HB_FREE_STACKARRAY(reordered);
1705    HB_FREE_STACKARRAY(position);
1706
1707    IDEBUG("<<<<<<");
1708    return true;
1709
1710error:
1711    HB_FREE_STACKARRAY(reordered);
1712    HB_FREE_STACKARRAY(position);
1713    return false;
1714}
1715
1716/* syllables are of the form:
1717
1718   (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark?
1719   (Consonant Nukta? Halant)* Consonant Halant
1720   IndependentVowel VowelMark? StressMark?
1721
1722   We return syllable boundaries on invalid combinations aswell
1723*/
1724static int indic_nextSyllableBoundary(HB_Script script, const HB_UChar16 *s, int start, int end, bool *invalid)
1725{
1726    *invalid = false;
1727    IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end);
1728    const HB_UChar16 *uc = s+start;
1729
1730    int pos = 0;
1731    Form state = form(uc[pos]);
1732    IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]);
1733    pos++;
1734
1735    if (state != Consonant && state != IndependentVowel) {
1736        if (state != Other)
1737            *invalid = true;
1738        goto finish;
1739    }
1740
1741    while (pos < end - start) {
1742        Form newState = form(uc[pos]);
1743        IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]);
1744        switch(newState) {
1745        case Control:
1746            newState = state;
1747 	    if (state == Halant && uc[pos] == 0x200d /* ZWJ */)
1748  		break;
1749            // the control character should be the last char in the item
1750            ++pos;
1751            goto finish;
1752        case Consonant:
1753	    if (state == Halant && (script != HB_Script_Sinhala || uc[pos-1] == 0x200d /* ZWJ */))
1754                break;
1755            goto finish;
1756        case Halant:
1757            if (state == Nukta || state == Consonant)
1758                break;
1759            // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya
1760            if (script == HB_Script_Bengali && pos == 1 &&
1761                 (uc[0] == 0x0985 || uc[0] == 0x098f))
1762                break;
1763            // Sinhala uses the Halant as a component of certain matras. Allow these, but keep the state on Matra.
1764            if (script == HB_Script_Sinhala && state == Matra) {
1765                ++pos;
1766                continue;
1767            }
1768            if (script == HB_Script_Malayalam && state == Matra && uc[pos-1] == 0x0d41) {
1769                ++pos;
1770                continue;
1771            }
1772            goto finish;
1773        case Nukta:
1774            if (state == Consonant)
1775                break;
1776            goto finish;
1777        case StressMark:
1778            if (state == VowelMark)
1779                break;
1780            // fall through
1781        case VowelMark:
1782            if (state == Matra || state == LengthMark || state == IndependentVowel)
1783                break;
1784            // fall through
1785        case Matra:
1786            if (state == Consonant || state == Nukta)
1787                break;
1788            if (state == Matra) {
1789                // ### needs proper testing for correct two/three part matras
1790                break;
1791            }
1792            // ### not sure if this is correct. If it is, does it apply only to Bengali or should
1793            // it work for all Indic languages?
1794            // the combination Independent_A + Vowel Sign AA is allowed.
1795            if (script == HB_Script_Bengali && uc[pos] == 0x9be && uc[pos-1] == 0x985)
1796                break;
1797            if (script == HB_Script_Tamil && state == Matra) {
1798                if (uc[pos-1] == 0x0bc6 &&
1799                     (uc[pos] == 0xbbe || uc[pos] == 0xbd7))
1800                    break;
1801                if (uc[pos-1] == 0x0bc7 && uc[pos] == 0xbbe)
1802                    break;
1803            }
1804            goto finish;
1805
1806        case LengthMark:
1807            if (state == Matra) {
1808                // ### needs proper testing for correct two/three part matras
1809                break;
1810            }
1811        case IndependentVowel:
1812        case Invalid:
1813        case Other:
1814            goto finish;
1815        }
1816        state = newState;
1817        pos++;
1818    }
1819 finish:
1820    return pos+start;
1821}
1822
1823HB_Bool HB_IndicShape(HB_ShaperItem *item)
1824{
1825    assert(item->item.script >= HB_Script_Devanagari && item->item.script <= HB_Script_Sinhala);
1826
1827    HB_Bool openType = false;
1828#ifndef NO_OPENTYPE
1829    openType = HB_SelectScript(item, indic_features);
1830#endif
1831    unsigned short *logClusters = item->log_clusters;
1832
1833    HB_ShaperItem syllable = *item;
1834    int first_glyph = 0;
1835
1836    int sstart = item->item.pos;
1837    int end = sstart + item->item.length;
1838    IDEBUG("indic_shape: from %d length %d", item->item.pos, item->item.length);
1839    while (sstart < end) {
1840        bool invalid;
1841        int send = indic_nextSyllableBoundary(item->item.script, item->string, sstart, end, &invalid);
1842        IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
1843               invalid ? "true" : "false");
1844        syllable.item.pos = sstart;
1845        syllable.item.length = send-sstart;
1846        syllable.glyphs = item->glyphs + first_glyph;
1847        syllable.attributes = item->attributes + first_glyph;
1848        syllable.offsets = item->offsets + first_glyph;
1849        syllable.advances = item->advances + first_glyph;
1850        syllable.num_glyphs = item->num_glyphs - first_glyph;
1851        if (!indic_shape_syllable(openType, &syllable, invalid)) {
1852            IDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
1853            item->num_glyphs += syllable.num_glyphs;
1854            return false;
1855        }
1856        // fix logcluster array
1857        IDEBUG("syllable:");
1858        hb_uint32 g;
1859        for (g = first_glyph; g < first_glyph + syllable.num_glyphs; ++g)
1860            IDEBUG("        %d -> glyph %x", g, item->glyphs[g]);
1861        IDEBUG("    logclusters:");
1862        int i;
1863        for (i = sstart; i < send; ++i) {
1864            IDEBUG("        %d -> glyph %d", i, first_glyph);
1865            logClusters[i-item->item.pos] = first_glyph;
1866        }
1867        sstart = send;
1868        first_glyph += syllable.num_glyphs;
1869    }
1870    item->num_glyphs = first_glyph;
1871    return true;
1872}
1873
1874void HB_IndicAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
1875{
1876    int end = from + len;
1877    const HB_UChar16 *uc = text + from;
1878    attributes += from;
1879    hb_uint32 i = 0;
1880    while (i < len) {
1881        bool invalid;
1882        hb_uint32 boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
1883         attributes[i].charStop = true;
1884
1885        if (boundary > len-1) boundary = len;
1886        i++;
1887        while (i < boundary) {
1888            attributes[i].charStop = false;
1889            ++uc;
1890            ++i;
1891        }
1892        assert(i == boundary);
1893    }
1894
1895
1896}
1897
1898
1899