hb-ot-shape-complex-indic.cc revision 2c372b80f6befad69e216e3f218b38640b8cc044
1/* 2 * Copyright © 2011,2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27#include "hb-ot-shape-complex-indic-private.hh" 28#include "hb-ot-shape-private.hh" 29 30#define OLD_INDIC_TAG(script) (((hb_tag_t) script) | 0x20000000) 31#define IS_OLD_INDIC_TAG(tag) ( \ 32 (tag) == OLD_INDIC_TAG (HB_SCRIPT_BENGALI) || \ 33 (tag) == OLD_INDIC_TAG (HB_SCRIPT_DEVANAGARI) || \ 34 (tag) == OLD_INDIC_TAG (HB_SCRIPT_GUJARATI) || \ 35 (tag) == OLD_INDIC_TAG (HB_SCRIPT_GURMUKHI) || \ 36 (tag) == OLD_INDIC_TAG (HB_SCRIPT_KANNADA) || \ 37 (tag) == OLD_INDIC_TAG (HB_SCRIPT_MALAYALAM) || \ 38 (tag) == OLD_INDIC_TAG (HB_SCRIPT_ORIYA) || \ 39 (tag) == OLD_INDIC_TAG (HB_SCRIPT_TAMIL) || \ 40 (tag) == OLD_INDIC_TAG (HB_SCRIPT_TELUGU) \ 41 ) 42struct indic_options_t 43{ 44 int initialized : 1; 45 int uniscribe_bug_compatible : 1; 46}; 47 48union indic_options_union_t { 49 int i; 50 indic_options_t opts; 51}; 52ASSERT_STATIC (sizeof (int) == sizeof (indic_options_union_t)); 53 54static indic_options_union_t 55indic_options_init (void) 56{ 57 indic_options_union_t u; 58 u.i = 0; 59 u.opts.initialized = 1; 60 61 char *c = getenv ("HB_OT_INDIC_OPTIONS"); 62 u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible"); 63 64 return u; 65} 66 67inline indic_options_t 68indic_options (void) 69{ 70 static indic_options_union_t options; 71 72 if (unlikely (!options.i)) { 73 /* This is idempotent and threadsafe. */ 74 options = indic_options_init (); 75 } 76 77 return options.opts; 78} 79 80 81static int 82compare_codepoint (const void *pa, const void *pb) 83{ 84 hb_codepoint_t a = * (hb_codepoint_t *) pa; 85 hb_codepoint_t b = * (hb_codepoint_t *) pb; 86 87 return a < b ? -1 : a == b ? 0 : +1; 88} 89 90static bool 91would_substitute (hb_codepoint_t *glyphs, unsigned int glyphs_count, 92 hb_tag_t feature_tag, hb_ot_map_t *map, hb_face_t *face) 93{ 94 unsigned int lookup_indices[32]; 95 unsigned int offset, len; 96 97 offset = 0; 98 do { 99 len = ARRAY_LENGTH (lookup_indices); 100 hb_ot_layout_feature_get_lookup_indexes (face, HB_OT_TAG_GSUB, 101 map->get_feature_index (0/*GSUB*/, feature_tag), 102 offset, 103 &len, 104 lookup_indices); 105 106 for (unsigned int i = 0; i < len; i++) 107 if (hb_ot_layout_would_substitute_lookup (face, glyphs, glyphs_count, lookup_indices[i])) 108 return true; 109 110 offset += len; 111 } while (len == ARRAY_LENGTH (lookup_indices)); 112 113 return false; 114} 115 116static indic_position_t 117consonant_position (hb_codepoint_t u, hb_ot_map_t *map, hb_font_t *font) 118{ 119 if ((u & ~0x007F) == 0x1780) 120 return POS_BELOW_C; /* In Khmer coeng model, all are subjoining. */ 121 122 hb_codepoint_t virama = (u & ~0x007F) | 0x004D; 123 if ((u & ~0x007F) == 0x0D80) virama = 0x0DCA; /* Sinahla */ 124 hb_codepoint_t glyphs[2]; 125 126 unsigned int virama_pos = IS_OLD_INDIC_TAG (map->get_chosen_script (0)) ? 1 : 0; 127 hb_font_get_glyph (font, virama, 0, &glyphs[virama_pos]); 128 hb_font_get_glyph (font, u, 0, &glyphs[1-virama_pos]); 129 130 hb_face_t *face = hb_font_get_face (font); 131 if (would_substitute (glyphs, ARRAY_LENGTH (glyphs), HB_TAG('p','r','e','f'), map, face)) return POS_BELOW_C; 132 if (would_substitute (glyphs, ARRAY_LENGTH (glyphs), HB_TAG('b','l','w','f'), map, face)) return POS_BELOW_C; 133 if (would_substitute (glyphs, ARRAY_LENGTH (glyphs), HB_TAG('p','s','t','f'), map, face)) return POS_POST_C; 134 return POS_BASE_C; 135} 136 137#define MATRA_POS_LEFT(u) POS_PRE_M 138#define MATRA_POS_RIGHT(u) ( \ 139 IS_DEVA(u) ? POS_AFTER_SUB : \ 140 IS_BENG(u) ? POS_AFTER_POST : \ 141 IS_GURM(u) ? POS_AFTER_POST : \ 142 IS_GUJA(u) ? POS_AFTER_POST : \ 143 IS_ORYA(u) ? POS_AFTER_POST : \ 144 IS_TAML(u) ? POS_AFTER_POST : \ 145 IS_TELU(u) ? (u <= 0x0C42 ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ 146 IS_KNDA(u) ? (u < 0x0CC3 || u > 0xCD6 ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ 147 IS_MLYM(u) ? POS_AFTER_POST : \ 148 IS_SINH(u) ? POS_AFTER_SUB : \ 149 /*default*/ POS_AFTER_SUB \ 150 ) 151#define MATRA_POS_TOP(u) ( /* BENG and MLYM don't have top matras. */ \ 152 IS_DEVA(u) ? POS_AFTER_SUB : \ 153 IS_GURM(u) ? POS_AFTER_SUB : \ 154 IS_GUJA(u) ? POS_AFTER_SUB : \ 155 IS_ORYA(u) ? POS_AFTER_MAIN : \ 156 IS_TAML(u) ? POS_AFTER_SUB : \ 157 IS_TELU(u) ? POS_BEFORE_SUB : \ 158 IS_KNDA(u) ? POS_BEFORE_SUB : \ 159 IS_SINH(u) ? POS_AFTER_SUB : \ 160 /*default*/ POS_AFTER_SUB \ 161 ) 162#define MATRA_POS_BOTTOM(u) ( \ 163 IS_DEVA(u) ? POS_AFTER_SUB : \ 164 IS_BENG(u) ? POS_AFTER_SUB : \ 165 IS_GURM(u) ? POS_AFTER_POST : \ 166 IS_GUJA(u) ? POS_AFTER_POST : \ 167 IS_ORYA(u) ? POS_AFTER_SUB : \ 168 IS_TAML(u) ? POS_AFTER_POST : \ 169 IS_TELU(u) ? POS_BEFORE_SUB : \ 170 IS_KNDA(u) ? POS_BEFORE_SUB : \ 171 IS_MLYM(u) ? POS_AFTER_POST : \ 172 IS_SINH(u) ? POS_AFTER_SUB : \ 173 /*default*/ POS_AFTER_SUB \ 174 ) 175 176 177static indic_position_t 178matra_position (hb_codepoint_t u, indic_position_t side) 179{ 180 switch ((int) side) 181 { 182 case POS_PRE_C: return MATRA_POS_LEFT (u); 183 case POS_POST_C: return MATRA_POS_RIGHT (u); 184 case POS_ABOVE_C: return MATRA_POS_TOP (u); 185 case POS_BELOW_C: return MATRA_POS_BOTTOM (u); 186 }; 187 abort (); 188} 189 190static bool 191is_ra (hb_codepoint_t u) 192{ 193 return !!bsearch (&u, ra_chars, 194 ARRAY_LENGTH (ra_chars), 195 sizeof (ra_chars[0]), 196 compare_codepoint); 197} 198 199#define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ)) 200static bool 201is_joiner (const hb_glyph_info_t &info) 202{ 203 return !!(FLAG (info.indic_category()) & JOINER_FLAGS); 204} 205 206#define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE)) 207static bool 208is_consonant (const hb_glyph_info_t &info) 209{ 210 /* Note: 211 * 212 * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels 213 * cannot happen in a consonant syllable. The plus side however is, we can call the 214 * consonant syllable logic from the vowel syllable function and get it all right! */ 215 return !!(FLAG (info.indic_category()) & CONSONANT_FLAGS); 216} 217 218#define HALANT_OR_COENG_FLAGS (FLAG (OT_H) | FLAG (OT_Coeng)) 219static bool 220is_halant_or_coeng (const hb_glyph_info_t &info) 221{ 222 return !!(FLAG (info.indic_category()) & HALANT_OR_COENG_FLAGS); 223} 224 225static inline void 226set_indic_properties (hb_glyph_info_t &info, hb_ot_map_t *map, hb_font_t *font) 227{ 228 hb_codepoint_t u = info.codepoint; 229 unsigned int type = get_indic_categories (u); 230 231 232 /* 233 * Assign category 234 */ 235 236 indic_category_t cat = (indic_category_t) (type & 0x0F); 237 238 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe 239 * treats U+0951..U+0952 all as OT_VD. 240 * TESTS: 241 * U+092E,U+0947,U+0952 242 * U+092E,U+0952,U+0947 243 * U+092E,U+0947,U+0951 244 * U+092E,U+0951,U+0947 245 * */ 246 if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0951, 0x0954))) 247 cat = OT_VD; 248 249 if (cat == OT_X && 250 unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CB, 0x17D2))) /* Khmer Various signs */ 251 cat = OT_N; 252 253 if (unlikely (u == 0x17D2)) cat = OT_Coeng; /* Khmer coeng */ 254 else if (unlikely (u == 0x200C)) cat = OT_ZWNJ; 255 else if (unlikely (u == 0x200D)) cat = OT_ZWJ; 256 else if (unlikely (u == 0x25CC)) cat = OT_DOTTEDCIRCLE; 257 else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK. More like consonant medial. like 0A75. */ 258 259 if (cat == OT_Repha) { 260 /* There are two kinds of characters marked as Repha: 261 * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer) 262 * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam) 263 * 264 * We recategorize the first kind to look like a Nukta and attached to the base directly. 265 */ 266 if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) 267 cat = OT_N; 268 } 269 270 271 272 273 /* 274 * Assign position. 275 */ 276 277 indic_position_t pos = (indic_position_t) (type >> 4); 278 279 if ((FLAG (cat) & CONSONANT_FLAGS)) 280 { 281 pos = consonant_position (u, map, font); 282 if (is_ra (u)) 283 cat = OT_Ra; 284 } 285 else if (cat == OT_M) 286 { 287 pos = matra_position (u, pos); 288 } 289 else if (cat == OT_SM || cat == OT_VD) 290 { 291 pos = POS_SMVD; 292 } 293 294 295 296 info.indic_category() = cat; 297 info.indic_position() = pos; 298} 299 300 301 302 303 304 305 306struct feature_list_t { 307 hb_tag_t tag; 308 hb_bool_t is_global; 309}; 310 311/* These features are applied one at a time, given the order in this table. */ 312static const feature_list_t 313indic_basic_features[] = 314{ 315 {HB_TAG('n','u','k','t'), true}, 316 {HB_TAG('a','k','h','n'), true}, 317 {HB_TAG('r','p','h','f'), false}, 318 {HB_TAG('r','k','r','f'), true}, 319 {HB_TAG('p','r','e','f'), false}, 320 {HB_TAG('b','l','w','f'), false}, 321 {HB_TAG('h','a','l','f'), false}, 322 {HB_TAG('a','b','v','f'), false}, 323 {HB_TAG('p','s','t','f'), false}, 324 {HB_TAG('c','f','a','r'), false}, 325 {HB_TAG('c','j','c','t'), true}, 326 {HB_TAG('v','a','t','u'), true}, 327}; 328 329/* Same order as the indic_basic_features array */ 330enum { 331 _NUKT, 332 _AKHN, 333 RPHF, 334 _RKRF, 335 PREF, 336 BLWF, 337 HALF, 338 ABVF, 339 PSTF, 340 CFAR, 341 _CJCT, 342 VATU 343}; 344 345/* These features are applied all at once. */ 346static const feature_list_t 347indic_other_features[] = 348{ 349 {HB_TAG('i','n','i','t'), false}, 350 {HB_TAG('p','r','e','s'), true}, 351 {HB_TAG('a','b','v','s'), true}, 352 {HB_TAG('b','l','w','s'), true}, 353 {HB_TAG('p','s','t','s'), true}, 354 {HB_TAG('h','a','l','n'), true}, 355 356 {HB_TAG('d','i','s','t'), true}, 357 {HB_TAG('a','b','v','m'), true}, 358 {HB_TAG('b','l','w','m'), true}, 359}; 360 361 362static void 363initial_reordering (const hb_ot_map_t *map, 364 hb_face_t *face, 365 hb_buffer_t *buffer, 366 void *user_data HB_UNUSED); 367static void 368final_reordering (const hb_ot_map_t *map, 369 hb_face_t *face, 370 hb_buffer_t *buffer, 371 void *user_data HB_UNUSED); 372 373void 374_hb_ot_shape_complex_collect_features_indic (hb_ot_map_builder_t *map, 375 const hb_segment_properties_t *props HB_UNUSED) 376{ 377 map->add_bool_feature (HB_TAG('l','o','c','l')); 378 /* The Indic specs do not require ccmp, but we apply it here since if 379 * there is a use of it, it's typically at the beginning. */ 380 map->add_bool_feature (HB_TAG('c','c','m','p')); 381 382 map->add_gsub_pause (initial_reordering, NULL); 383 384 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++) { 385 map->add_bool_feature (indic_basic_features[i].tag, indic_basic_features[i].is_global); 386 map->add_gsub_pause (NULL, NULL); 387 } 388 389 map->add_gsub_pause (final_reordering, NULL); 390 391 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_other_features); i++) 392 map->add_bool_feature (indic_other_features[i].tag, indic_other_features[i].is_global); 393} 394 395void 396_hb_ot_shape_complex_override_features_indic (hb_ot_map_builder_t *map, 397 const hb_segment_properties_t *props HB_UNUSED) 398{ 399 /* Uniscribe does not apply 'kern'. */ 400 if (indic_options ().uniscribe_bug_compatible) 401 map->add_feature (HB_TAG('k','e','r','n'), 0, true); 402} 403 404 405hb_ot_shape_normalization_mode_t 406_hb_ot_shape_complex_normalization_preference_indic (void) 407{ 408 /* We want split matras decomposed by the common shaping logic. */ 409 return HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED; 410} 411 412 413void 414_hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map, 415 hb_buffer_t *buffer, 416 hb_font_t *font) 417{ 418 HB_BUFFER_ALLOCATE_VAR (buffer, indic_category); 419 HB_BUFFER_ALLOCATE_VAR (buffer, indic_position); 420 421 /* We cannot setup masks here. We save information about characters 422 * and setup masks later on in a pause-callback. */ 423 424 unsigned int count = buffer->len; 425 for (unsigned int i = 0; i < count; i++) 426 set_indic_properties (buffer->info[i], map, font); 427} 428 429static int 430compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) 431{ 432 int a = pa->indic_position(); 433 int b = pb->indic_position(); 434 435 return a < b ? -1 : a == b ? 0 : +1; 436} 437 438/* Rules from: 439 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ 440 441static void 442initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buffer, hb_mask_t *basic_mask_array, 443 unsigned int start, unsigned int end) 444{ 445 hb_glyph_info_t *info = buffer->info; 446 447 448 /* 1. Find base consonant: 449 * 450 * The shaping engine finds the base consonant of the syllable, using the 451 * following algorithm: starting from the end of the syllable, move backwards 452 * until a consonant is found that does not have a below-base or post-base 453 * form (post-base forms have to follow below-base forms), or that is not a 454 * pre-base reordering Ra, or arrive at the first consonant. The consonant 455 * stopped at will be the base. 456 * 457 * o If the syllable starts with Ra + Halant (in a script that has Reph) 458 * and has more than one consonant, Ra is excluded from candidates for 459 * base consonants. 460 */ 461 462 unsigned int base = end; 463 bool has_reph = false; 464 465 { 466 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 467 * and has more than one consonant, Ra is excluded from candidates for 468 * base consonants. */ 469 unsigned int limit = start; 470 if (basic_mask_array[RPHF] && 471 start + 3 <= end && 472 info[start].indic_category() == OT_Ra && 473 info[start + 1].indic_category() == OT_H && 474 (unlikely (buffer->props.script == HB_SCRIPT_SINHALA) ? 475 info[start + 2].indic_category() == OT_ZWJ /* In Sinhala, form Reph only if ZWJ is present */: 476 !is_joiner (info[start + 2] /* In other scripts, any joiner blocks Reph formation */ ) 477 )) 478 { 479 limit += 2; 480 while (limit < end && is_joiner (info[limit])) 481 limit++; 482 base = start; 483 has_reph = true; 484 }; 485 486 enum base_position_t { 487 BASE_FIRST, 488 BASE_LAST 489 } base_pos; 490 491 switch ((hb_tag_t) buffer->props.script) 492 { 493 case HB_SCRIPT_KHMER: 494 base_pos = BASE_FIRST; 495 break; 496 497 default: 498 base_pos = BASE_LAST; 499 break; 500 } 501 502 if (base_pos == BASE_LAST) 503 { 504 /* -> starting from the end of the syllable, move backwards */ 505 unsigned int i = end; 506 do { 507 i--; 508 /* -> until a consonant is found */ 509 if (is_consonant (info[i])) 510 { 511 /* -> that does not have a below-base or post-base form 512 * (post-base forms have to follow below-base forms), */ 513 if (info[i].indic_position() != POS_BELOW_C && 514 info[i].indic_position() != POS_POST_C) 515 { 516 base = i; 517 break; 518 } 519 520 /* -> or that is not a pre-base reordering Ra, 521 * 522 * IMPLEMENTATION NOTES: 523 * 524 * Our pre-base reordering Ra's are marked POS_BELOW, so will be skipped 525 * by the logic above already. 526 */ 527 528 /* -> or arrive at the first consonant. The consonant stopped at will 529 * be the base. */ 530 base = i; 531 } 532 else 533 { 534 /* A ZWJ at the end of syllable, or any ZWJ/ZWNJ in other places, stop the base 535 * search (to request explicit half or halant forms. */ 536 if (is_joiner (info[i]) && (i + 1 < end || info[i].indic_category() == OT_ZWJ)) 537 break; 538 } 539 } while (i > limit); 540 } 541 else 542 { 543 /* In scripts without half forms (eg. Khmer), the first consonant is always the base. */ 544 545 if (!has_reph) 546 base = limit; 547 } 548 549 if (base < start) 550 base = start; /* Just in case... */ 551 552 553 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 554 * and has more than one consonant, Ra is excluded from candidates for 555 * base consonants. */ 556 if (has_reph && base == start) { 557 /* Have no other consonant, so Reph is not formed and Ra becomes base. */ 558 has_reph = false; 559 } 560 } 561 562 563 /* 2. Decompose and reorder Matras: 564 * 565 * Each matra and any syllable modifier sign in the cluster are moved to the 566 * appropriate position relative to the consonant(s) in the cluster. The 567 * shaping engine decomposes two- or three-part matras into their constituent 568 * parts before any repositioning. Matra characters are classified by which 569 * consonant in a conjunct they have affinity for and are reordered to the 570 * following positions: 571 * 572 * o Before first half form in the syllable 573 * o After subjoined consonants 574 * o After post-form consonant 575 * o After main consonant (for above marks) 576 * 577 * IMPLEMENTATION NOTES: 578 * 579 * The normalize() routine has already decomposed matras for us, so we don't 580 * need to worry about that. 581 */ 582 583 584 /* 3. Reorder marks to canonical order: 585 * 586 * Adjacent nukta and halant or nukta and vedic sign are always repositioned 587 * if necessary, so that the nukta is first. 588 * 589 * IMPLEMENTATION NOTES: 590 * 591 * We don't need to do this: the normalize() routine already did this for us. 592 */ 593 594 595 /* Reorder characters */ 596 597 for (unsigned int i = start; i < base; i++) 598 info[i].indic_position() = MIN (POS_PRE_C, (indic_position_t) info[i].indic_position()); 599 600 if (base < end) 601 info[base].indic_position() = POS_BASE_C; 602 603 /* Mark final consonants. A final consonant is one appearing after a matra, 604 * like in Khmer. */ 605 for (unsigned int i = base + 1; i < end; i++) 606 if (info[i].indic_category() == OT_M) { 607 for (unsigned int j = i + 1; j < end; j++) 608 if (is_consonant (info[j])) { 609 info[j].indic_position() = POS_FINAL_C; 610 break; 611 } 612 break; 613 } 614 615 /* Handle beginning Ra */ 616 if (has_reph) 617 info[start].indic_position() = POS_RA_TO_BECOME_REPH; 618 619 /* For old-style Indic script tags, move the first post-base Halant after 620 * last consonant. */ 621 if (IS_OLD_INDIC_TAG (map->get_chosen_script (0))) { 622 for (unsigned int i = base + 1; i < end; i++) 623 if (info[i].indic_category() == OT_H) { 624 unsigned int j; 625 for (j = end - 1; j > i; j--) 626 if (is_consonant (info[j])) 627 break; 628 if (j > i) { 629 /* Move Halant to after last consonant. */ 630 hb_glyph_info_t t = info[i]; 631 memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0])); 632 info[j] = t; 633 } 634 break; 635 } 636 } 637 638 /* Attach ZWJ, ZWNJ, nukta, and halant to previous char to move with them. */ 639 if (!indic_options ().uniscribe_bug_compatible) 640 { 641 /* Please update the Uniscribe branch when touching this! */ 642 for (unsigned int i = start + 1; i < end; i++) 643 if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H)))) 644 info[i].indic_position() = info[i - 1].indic_position(); 645 } else { 646 /* 647 * Uniscribe doesn't move the Halant with Left Matra. 648 * TEST: U+092B,U+093F,U+094DE 649 */ 650 /* Please update the non-Uniscribe branch when touching this! */ 651 for (unsigned int i = start + 1; i < end; i++) 652 if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H)))) { 653 info[i].indic_position() = info[i - 1].indic_position(); 654 if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_PRE_M) 655 for (unsigned int j = i; j > start; j--) 656 if (info[j - 1].indic_position() != POS_PRE_M) { 657 info[i].indic_position() = info[j - 1].indic_position(); 658 break; 659 } 660 } 661 } 662 /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */ 663 { 664 unsigned int last_halant = end; 665 for (unsigned int i = base + 1; i < end; i++) 666 if (is_halant_or_coeng (info[i])) 667 last_halant = i; 668 else if (is_consonant (info[i])) { 669 for (unsigned int j = last_halant; j < i; j++) 670 info[j].indic_position() = info[i].indic_position(); 671 } 672 } 673 674 /* We do bubble-sort, skip malicious clusters attempts */ 675 if (end - start < 64) 676 { 677 /* Sit tight, rock 'n roll! */ 678 hb_bubble_sort (info + start, end - start, compare_indic_order); 679 /* Find base again */ 680 base = end; 681 for (unsigned int i = start; i < end; i++) 682 if (info[i].indic_position() == POS_BASE_C) { 683 base = i; 684 break; 685 } 686 } 687 688 /* Setup masks now */ 689 690 { 691 hb_mask_t mask; 692 693 /* Reph */ 694 for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++) 695 info[i].mask |= basic_mask_array[RPHF]; 696 697 /* Pre-base */ 698 mask = basic_mask_array[HALF]; 699 for (unsigned int i = start; i < base; i++) 700 info[i].mask |= mask; 701 /* Base */ 702 mask = 0; 703 if (base < end) 704 info[base].mask |= mask; 705 /* Post-base */ 706 mask = basic_mask_array[BLWF] | basic_mask_array[ABVF] | basic_mask_array[PSTF]; 707 for (unsigned int i = base + 1; i < end; i++) 708 info[i].mask |= mask; 709 } 710 711 /* XXX This will not match for old-Indic spec since the Halant-Ra order is reversed already. */ 712 if (basic_mask_array[PREF] && base + 3 <= end) 713 { 714 /* Find a Halant,Ra sequence and mark it fore pre-base reordering processing. */ 715 for (unsigned int i = base + 1; i + 1 < end; i++) 716 if (is_halant_or_coeng (info[i]) && 717 info[i + 1].indic_category() == OT_Ra) 718 { 719 info[i++].mask |= basic_mask_array[PREF]; 720 info[i++].mask |= basic_mask_array[PREF]; 721 722 /* Mark the subsequent stuff with 'cfar'. Used in Khmer. 723 * Read the feature spec. 724 * This allows distinguishing the following cases with MS Khmer fonts: 725 * U+1784,U+17D2,U+179A,U+17D2,U+1782 726 * U+1784,U+17D2,U+1782,U+17D2,U+179A 727 */ 728 for (; i < end; i++) 729 info[i].mask |= basic_mask_array[CFAR]; 730 731 break; 732 } 733 } 734 735 /* Apply ZWJ/ZWNJ effects */ 736 for (unsigned int i = start + 1; i < end; i++) 737 if (is_joiner (info[i])) { 738 bool non_joiner = info[i].indic_category() == OT_ZWNJ; 739 unsigned int j = i; 740 741 do { 742 j--; 743 744 /* A ZWJ disables CJCT, however, it's mere presence is enough 745 * to disable ligation. No explicit action needed. */ 746 747 /* A ZWNJ disables HALF. */ 748 if (non_joiner) 749 info[j].mask &= ~basic_mask_array[HALF]; 750 751 } while (j > start && !is_consonant (info[j])); 752 } 753} 754 755 756static void 757initial_reordering_vowel_syllable (const hb_ot_map_t *map, 758 hb_buffer_t *buffer, 759 hb_mask_t *basic_mask_array, 760 unsigned int start, unsigned int end) 761{ 762 /* We made the vowels look like consonants. So let's call the consonant logic! */ 763 initial_reordering_consonant_syllable (map, buffer, basic_mask_array, start, end); 764} 765 766static void 767initial_reordering_standalone_cluster (const hb_ot_map_t *map, 768 hb_buffer_t *buffer, 769 hb_mask_t *basic_mask_array, 770 unsigned int start, unsigned int end) 771{ 772 /* We treat NBSP/dotted-circle as if they are consonants, so we should just chain. 773 * Only if not in compatibility mode that is... */ 774 775 if (indic_options ().uniscribe_bug_compatible) 776 { 777 /* For dotted-circle, this is what Uniscribe does: 778 * If dotted-circle is the last glyph, it just does nothing. 779 * Ie. It doesn't form Reph. */ 780 if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE) 781 return; 782 } 783 784 initial_reordering_consonant_syllable (map, buffer, basic_mask_array, start, end); 785} 786 787static void 788initial_reordering_non_indic (const hb_ot_map_t *map HB_UNUSED, 789 hb_buffer_t *buffer HB_UNUSED, 790 hb_mask_t *basic_mask_array HB_UNUSED, 791 unsigned int start HB_UNUSED, unsigned int end HB_UNUSED) 792{ 793 /* Nothing to do right now. If we ever switch to using the output 794 * buffer in the reordering process, we'd need to next_glyph() here. */ 795} 796 797#include "hb-ot-shape-complex-indic-machine.hh" 798 799static void 800initial_reordering (const hb_ot_map_t *map, 801 hb_face_t *face HB_UNUSED, 802 hb_buffer_t *buffer, 803 void *user_data HB_UNUSED) 804{ 805 hb_mask_t basic_mask_array[ARRAY_LENGTH (indic_basic_features)] = {0}; 806 unsigned int num_masks = ARRAY_LENGTH (indic_basic_features); 807 for (unsigned int i = 0; i < num_masks; i++) 808 basic_mask_array[i] = map->get_1_mask (indic_basic_features[i].tag); 809 810 find_syllables (map, buffer, basic_mask_array); 811} 812 813static void 814final_reordering_syllable (hb_buffer_t *buffer, 815 hb_mask_t init_mask, hb_mask_t pref_mask, 816 unsigned int start, unsigned int end) 817{ 818 hb_glyph_info_t *info = buffer->info; 819 820 /* 4. Final reordering: 821 * 822 * After the localized forms and basic shaping forms GSUB features have been 823 * applied (see below), the shaping engine performs some final glyph 824 * reordering before applying all the remaining font features to the entire 825 * cluster. 826 */ 827 828 /* Find base again */ 829 unsigned int base = end; 830 while (start < base && info[base - 1].indic_position() >= POS_BASE_C) 831 base--; 832 833 unsigned int start_of_last_cluster = base; 834 835 /* o Reorder matras: 836 * 837 * If a pre-base matra character had been reordered before applying basic 838 * features, the glyph can be moved closer to the main consonant based on 839 * whether half-forms had been formed. Actual position for the matra is 840 * defined as “after last standalone halant glyph, after initial matra 841 * position and before the main consonant”. If ZWJ or ZWNJ follow this 842 * halant, position is moved after it. 843 */ 844 845 if (start < base) /* Otherwise there can't be any pre-base matra characters. */ 846 { 847 unsigned int new_pos = base - 1; 848 while (new_pos > start && 849 !(FLAG (info[new_pos].indic_category()) & (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng)))) 850 new_pos--; 851 /* If we found no Halant we are done. Otherwise only proceed if the Halant does 852 * not belong to the Matra itself! */ 853 if (is_halant_or_coeng (info[new_pos]) && 854 info[new_pos].indic_position() != POS_PRE_M) { 855 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 856 if (new_pos + 1 < end && is_joiner (info[new_pos + 1])) 857 new_pos++; 858 859 /* Now go see if there's actually any matras... */ 860 for (unsigned int i = new_pos; i > start; i--) 861 if (info[i - 1].indic_position () == POS_PRE_M) 862 { 863 unsigned int old_pos = i - 1; 864 hb_glyph_info_t tmp = info[old_pos]; 865 memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0])); 866 info[new_pos] = tmp; 867 start_of_last_cluster = MIN (new_pos, start_of_last_cluster); 868 new_pos--; 869 } 870 } 871 } 872 873 874 /* o Reorder reph: 875 * 876 * Reph’s original position is always at the beginning of the syllable, 877 * (i.e. it is not reordered at the character reordering stage). However, 878 * it will be reordered according to the basic-forms shaping results. 879 * Possible positions for reph, depending on the script, are; after main, 880 * before post-base consonant forms, and after post-base consonant forms. 881 */ 882 883 /* If there's anything after the Ra that has the REPH pos, it ought to be halant. 884 * Which means that the font has failed to ligate the Reph. In which case, we 885 * shouldn't move. */ 886 if (start + 1 < end && 887 info[start].indic_position() == POS_RA_TO_BECOME_REPH && 888 info[start + 1].indic_position() != POS_RA_TO_BECOME_REPH) 889 { 890 unsigned int new_reph_pos; 891 892 enum reph_position_t { 893 REPH_AFTER_MAIN, 894 REPH_BEFORE_SUBSCRIPT, 895 REPH_AFTER_SUBSCRIPT, 896 REPH_BEFORE_POSTSCRIPT, 897 REPH_AFTER_POSTSCRIPT 898 } reph_pos; 899 900 /* XXX Figure out old behavior too */ 901 switch ((hb_tag_t) buffer->props.script) 902 { 903 case HB_SCRIPT_MALAYALAM: 904 case HB_SCRIPT_ORIYA: 905 reph_pos = REPH_AFTER_MAIN; 906 break; 907 908 case HB_SCRIPT_GURMUKHI: 909 reph_pos = REPH_BEFORE_SUBSCRIPT; 910 break; 911 912 case HB_SCRIPT_BENGALI: 913 reph_pos = REPH_AFTER_SUBSCRIPT; 914 break; 915 916 default: 917 case HB_SCRIPT_DEVANAGARI: 918 case HB_SCRIPT_GUJARATI: 919 case HB_SCRIPT_SINHALA: 920 reph_pos = REPH_BEFORE_POSTSCRIPT; 921 break; 922 923 case HB_SCRIPT_KANNADA: 924 case HB_SCRIPT_TAMIL: 925 case HB_SCRIPT_TELUGU: 926 reph_pos = REPH_AFTER_POSTSCRIPT; 927 break; 928 } 929 930 /* 1. If reph should be positioned after post-base consonant forms, 931 * proceed to step 5. 932 */ 933 if (reph_pos == REPH_AFTER_POSTSCRIPT) 934 { 935 goto reph_step_5; 936 } 937 938 /* 2. If the reph repositioning class is not after post-base: target 939 * position is after the first explicit halant glyph between the 940 * first post-reph consonant and last main consonant. If ZWJ or ZWNJ 941 * are following this halant, position is moved after it. If such 942 * position is found, this is the target position. Otherwise, 943 * proceed to the next step. 944 * 945 * Note: in old-implementation fonts, where classifications were 946 * fixed in shaping engine, there was no case where reph position 947 * will be found on this step. 948 */ 949 { 950 new_reph_pos = start + 1; 951 while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos])) 952 new_reph_pos++; 953 954 if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) { 955 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ 956 if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) 957 new_reph_pos++; 958 goto reph_move; 959 } 960 } 961 962 /* 3. If reph should be repositioned after the main consonant: find the 963 * first consonant not ligated with main, or find the first 964 * consonant that is not a potential pre-base reordering Ra. 965 */ 966 if (reph_pos == REPH_AFTER_MAIN) 967 { 968 new_reph_pos = base; 969 /* XXX Skip potential pre-base reordering Ra. */ 970 while (new_reph_pos < end && 971 !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_BELOW_C) | FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD)))) 972 new_reph_pos++; 973 if (new_reph_pos < end) 974 goto reph_move; 975 } 976 977 /* 4. If reph should be positioned before post-base consonant, find 978 * first post-base classified consonant not ligated with main. If no 979 * consonant is found, the target position should be before the 980 * first matra, syllable modifier sign or vedic sign. 981 */ 982 /* This is our take on what step 4 is trying to say (and failing, BADLY). */ 983 if (reph_pos == REPH_AFTER_SUBSCRIPT) 984 { 985 new_reph_pos = base; 986 while (new_reph_pos < end && 987 !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD)))) 988 new_reph_pos++; 989 if (new_reph_pos < end) 990 goto reph_move; 991 } 992 993 /* 5. If no consonant is found in steps 3 or 4, move reph to a position 994 * immediately before the first post-base matra, syllable modifier 995 * sign or vedic sign that has a reordering class after the intended 996 * reph position. For example, if the reordering position for reph 997 * is post-main, it will skip above-base matras that also have a 998 * post-main position. 999 */ 1000 reph_step_5: 1001 { 1002 /* Copied from step 2. */ 1003 new_reph_pos = start + 1; 1004 while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos])) 1005 new_reph_pos++; 1006 1007 if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) { 1008 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ 1009 if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) 1010 new_reph_pos++; 1011 goto reph_move; 1012 } 1013 } 1014 1015 /* 6. Otherwise, reorder reph to the end of the syllable. 1016 */ 1017 { 1018 new_reph_pos = end - 1; 1019 while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD) 1020 new_reph_pos--; 1021 1022 /* 1023 * If the Reph is to be ending up after a Matra,Halant sequence, 1024 * position it before that Halant so it can interact with the Matra. 1025 * However, if it's a plain Consonant,Halant we shouldn't do that. 1026 * Uniscribe doesn't do this. 1027 * TEST: U+0930,U+094D,U+0915,U+094B,U+094D 1028 */ 1029 if (!indic_options ().uniscribe_bug_compatible && 1030 unlikely (is_halant_or_coeng (info[new_reph_pos]))) { 1031 for (unsigned int i = base + 1; i < new_reph_pos; i++) 1032 if (info[i].indic_category() == OT_M) { 1033 /* Ok, got it. */ 1034 new_reph_pos--; 1035 } 1036 } 1037 goto reph_move; 1038 } 1039 1040 reph_move: 1041 { 1042 /* Move */ 1043 hb_glyph_info_t reph = info[start]; 1044 memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0])); 1045 info[new_reph_pos] = reph; 1046 start_of_last_cluster = start; /* Yay, one big cluster! */ 1047 } 1048 } 1049 1050 1051 /* o Reorder pre-base reordering consonants: 1052 * 1053 * If a pre-base reordering consonant is found, reorder it according to 1054 * the following rules: 1055 */ 1056 1057 if (pref_mask && base + 1 < end) /* Otherwise there can't be any pre-base reordering Ra. */ 1058 { 1059 for (unsigned int i = base + 1; i < end; i++) 1060 if ((info[i].mask & pref_mask) != 0) 1061 { 1062 /* 1. Only reorder a glyph produced by substitution during application 1063 * of the <pref> feature. (Note that a font may shape a Ra consonant with 1064 * the feature generally but block it in certain contexts.) 1065 */ 1066 if (i + 1 == end || (info[i + 1].mask & pref_mask) == 0) 1067 { 1068 /* 1069 * 2. Try to find a target position the same way as for pre-base matra. 1070 * If it is found, reorder pre-base consonant glyph. 1071 * 1072 * 3. If position is not found, reorder immediately before main 1073 * consonant. 1074 */ 1075 1076 unsigned int new_pos = base; 1077 while (new_pos > start + 1 && 1078 !(FLAG (info[new_pos - 1].indic_category()) & (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng)))) 1079 new_pos--; 1080 1081 if (new_pos > start && is_halant_or_coeng (info[new_pos - 1])) 1082 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 1083 if (new_pos < end && is_joiner (info[new_pos])) 1084 new_pos++; 1085 1086 { 1087 unsigned int old_pos = i; 1088 hb_glyph_info_t tmp = info[old_pos]; 1089 memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0])); 1090 info[new_pos] = tmp; 1091 start_of_last_cluster = MIN (new_pos, start_of_last_cluster); 1092 } 1093 } 1094 1095 break; 1096 } 1097 } 1098 1099 1100 /* Apply 'init' to the Left Matra if it's a word start. */ 1101 if (info[start].indic_position () == POS_PRE_M && 1102 (!start || 1103 !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) & 1104 FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))) 1105 info[start].mask |= init_mask; 1106 1107 1108 /* 1109 * Finish off the clusters and go home! 1110 */ 1111 1112 if (!indic_options ().uniscribe_bug_compatible) 1113 { 1114 /* This is what Uniscribe does. Ie. add cluster boundaries after Halant,ZWNJ. 1115 * This means, half forms are submerged into the main consonants cluster. 1116 * This is unnecessary, and makes cursor positioning harder, but that's what 1117 * Uniscribe does. */ 1118 unsigned int cluster_start = start; 1119 for (unsigned int i = start + 1; i < start_of_last_cluster; i++) 1120 if (is_halant_or_coeng (info[i - 1]) && info[i].indic_category() == OT_ZWNJ) { 1121 i++; 1122 buffer->merge_clusters (cluster_start, i); 1123 cluster_start = i; 1124 } 1125 start_of_last_cluster = cluster_start; 1126 } 1127 1128 buffer->merge_clusters (start_of_last_cluster, end); 1129} 1130 1131 1132static void 1133final_reordering (const hb_ot_map_t *map, 1134 hb_face_t *face HB_UNUSED, 1135 hb_buffer_t *buffer, 1136 void *user_data HB_UNUSED) 1137{ 1138 unsigned int count = buffer->len; 1139 if (!count) return; 1140 1141 hb_mask_t init_mask = map->get_1_mask (HB_TAG('i','n','i','t')); 1142 hb_mask_t pref_mask = map->get_1_mask (HB_TAG('p','r','e','f')); 1143 1144 hb_glyph_info_t *info = buffer->info; 1145 unsigned int last = 0; 1146 unsigned int last_syllable = info[0].syllable(); 1147 for (unsigned int i = 1; i < count; i++) 1148 if (last_syllable != info[i].syllable()) { 1149 final_reordering_syllable (buffer, init_mask, pref_mask, last, i); 1150 last = i; 1151 last_syllable = info[last].syllable(); 1152 } 1153 final_reordering_syllable (buffer, init_mask, pref_mask, last, count); 1154 1155 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category); 1156 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position); 1157} 1158 1159 1160 1161