hb-ot-shape-complex-indic.cc revision 5d32690a3428fa86eb26fe5fcec943a10aa95881
1/* 2 * Copyright © 2011,2012 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27#include "hb-ot-shape-complex-indic-private.hh" 28#include "hb-ot-shape-private.hh" 29 30#define OLD_INDIC_TAG(script) (((hb_tag_t) script) | 0x20000000) 31#define IS_OLD_INDIC_TAG(tag) ( \ 32 (tag) == OLD_INDIC_TAG (HB_SCRIPT_BENGALI) || \ 33 (tag) == OLD_INDIC_TAG (HB_SCRIPT_DEVANAGARI) || \ 34 (tag) == OLD_INDIC_TAG (HB_SCRIPT_GUJARATI) || \ 35 (tag) == OLD_INDIC_TAG (HB_SCRIPT_GURMUKHI) || \ 36 (tag) == OLD_INDIC_TAG (HB_SCRIPT_KANNADA) || \ 37 (tag) == OLD_INDIC_TAG (HB_SCRIPT_MALAYALAM) || \ 38 (tag) == OLD_INDIC_TAG (HB_SCRIPT_ORIYA) || \ 39 (tag) == OLD_INDIC_TAG (HB_SCRIPT_TAMIL) || \ 40 (tag) == OLD_INDIC_TAG (HB_SCRIPT_TELUGU) \ 41 ) 42struct indic_options_t 43{ 44 int initialized : 1; 45 int uniscribe_bug_compatible : 1; 46}; 47 48union indic_options_union_t { 49 int i; 50 indic_options_t opts; 51}; 52ASSERT_STATIC (sizeof (int) == sizeof (indic_options_union_t)); 53 54static indic_options_union_t 55indic_options_init (void) 56{ 57 indic_options_union_t u; 58 u.i = 0; 59 u.opts.initialized = 1; 60 61 char *c = getenv ("HB_OT_INDIC_OPTIONS"); 62 u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible"); 63 64 return u; 65} 66 67inline indic_options_t 68indic_options (void) 69{ 70 static indic_options_union_t options; 71 72 if (unlikely (!options.i)) { 73 /* This is idempotent and threadsafe. */ 74 options = indic_options_init (); 75 } 76 77 return options.opts; 78} 79 80 81static int 82compare_codepoint (const void *pa, const void *pb) 83{ 84 hb_codepoint_t a = * (hb_codepoint_t *) pa; 85 hb_codepoint_t b = * (hb_codepoint_t *) pb; 86 87 return a < b ? -1 : a == b ? 0 : +1; 88} 89 90static indic_position_t 91consonant_position (hb_codepoint_t u) 92{ 93 consonant_position_t *record; 94 95 /* Khmer does not have pre-base half forms. */ 96 if (0x1780 <= u && u <= 0x17FF) 97 return POS_BELOW_C; 98 99 record = (consonant_position_t *) bsearch (&u, consonant_positions, 100 ARRAY_LENGTH (consonant_positions), 101 sizeof (consonant_positions[0]), 102 compare_codepoint); 103 104 return record ? record->position : POS_BASE_C; 105} 106 107static bool 108is_ra (hb_codepoint_t u) 109{ 110 return !!bsearch (&u, ra_chars, 111 ARRAY_LENGTH (ra_chars), 112 sizeof (ra_chars[0]), 113 compare_codepoint); 114} 115 116static bool 117is_joiner (const hb_glyph_info_t &info) 118{ 119 return !!(FLAG (info.indic_category()) & (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ))); 120} 121 122static bool 123is_consonant (const hb_glyph_info_t &info) 124{ 125 /* Note: 126 * 127 * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels 128 * cannot happen in a consonant syllable. The plus side however is, we can call the 129 * consonant syllable logic from the vowel syllable function and get it all right! */ 130 return !!(FLAG (info.indic_category()) & (FLAG (OT_C) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE))); 131} 132 133static bool 134is_halant_or_coeng (const hb_glyph_info_t &info) 135{ 136 return !!(FLAG (info.indic_category()) & (FLAG (OT_H) | FLAG (OT_Coeng))); 137} 138 139struct feature_list_t { 140 hb_tag_t tag; 141 hb_bool_t is_global; 142}; 143 144/* These features are applied one at a time, given the order in this table. */ 145static const feature_list_t 146indic_basic_features[] = 147{ 148 {HB_TAG('n','u','k','t'), true}, 149 {HB_TAG('a','k','h','n'), false}, 150 {HB_TAG('r','p','h','f'), false}, 151 {HB_TAG('r','k','r','f'), true}, 152 {HB_TAG('p','r','e','f'), false}, 153 {HB_TAG('b','l','w','f'), false}, 154 {HB_TAG('h','a','l','f'), false}, 155 {HB_TAG('a','b','v','f'), false}, 156 {HB_TAG('p','s','t','f'), false}, 157 {HB_TAG('c','f','a','r'), false}, 158 {HB_TAG('c','j','c','t'), false}, 159 {HB_TAG('v','a','t','u'), true}, 160}; 161 162/* Same order as the indic_basic_features array */ 163enum { 164 _NUKT, 165 AKHN, 166 RPHF, 167 _RKRF, 168 PREF, 169 BLWF, 170 HALF, 171 ABVF, 172 PSTF, 173 CFAR, 174 CJCT, 175 VATU 176}; 177 178/* These features are applied all at once. */ 179static const feature_list_t 180indic_other_features[] = 181{ 182 {HB_TAG('i','n','i','t'), false}, 183 {HB_TAG('p','r','e','s'), true}, 184 {HB_TAG('a','b','v','s'), true}, 185 {HB_TAG('b','l','w','s'), true}, 186 {HB_TAG('p','s','t','s'), true}, 187 {HB_TAG('h','a','l','n'), true}, 188 189 {HB_TAG('d','i','s','t'), true}, 190 {HB_TAG('a','b','v','m'), true}, 191 {HB_TAG('b','l','w','m'), true}, 192}; 193 194 195static void 196initial_reordering (const hb_ot_map_t *map, 197 hb_face_t *face, 198 hb_buffer_t *buffer, 199 void *user_data HB_UNUSED); 200static void 201final_reordering (const hb_ot_map_t *map, 202 hb_face_t *face, 203 hb_buffer_t *buffer, 204 void *user_data HB_UNUSED); 205 206void 207_hb_ot_shape_complex_collect_features_indic (hb_ot_map_builder_t *map, 208 const hb_segment_properties_t *props HB_UNUSED) 209{ 210 map->add_bool_feature (HB_TAG('l','o','c','l')); 211 /* The Indic specs do not require ccmp, but we apply it here since if 212 * there is a use of it, it's typically at the beginning. */ 213 map->add_bool_feature (HB_TAG('c','c','m','p')); 214 215 map->add_gsub_pause (initial_reordering, NULL); 216 217 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++) { 218 map->add_bool_feature (indic_basic_features[i].tag, indic_basic_features[i].is_global); 219 map->add_gsub_pause (NULL, NULL); 220 } 221 222 map->add_gsub_pause (final_reordering, NULL); 223 224 for (unsigned int i = 0; i < ARRAY_LENGTH (indic_other_features); i++) 225 map->add_bool_feature (indic_other_features[i].tag, indic_other_features[i].is_global); 226} 227 228void 229_hb_ot_shape_complex_override_features_indic (hb_ot_map_builder_t *map, 230 const hb_segment_properties_t *props HB_UNUSED) 231{ 232 /* Uniscribe does not apply 'kern'. */ 233 if (indic_options ().uniscribe_bug_compatible) 234 map->add_feature (HB_TAG('k','e','r','n'), 0, true); 235} 236 237 238hb_ot_shape_normalization_mode_t 239_hb_ot_shape_complex_normalization_preference_indic (void) 240{ 241 /* We want split matras decomposed by the common shaping logic. */ 242 return HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED; 243} 244 245 246void 247_hb_ot_shape_complex_setup_masks_indic (hb_ot_map_t *map HB_UNUSED, 248 hb_buffer_t *buffer, 249 hb_font_t *font HB_UNUSED) 250{ 251 HB_BUFFER_ALLOCATE_VAR (buffer, indic_category); 252 HB_BUFFER_ALLOCATE_VAR (buffer, indic_position); 253 254 /* We cannot setup masks here. We save information about characters 255 * and setup masks later on in a pause-callback. */ 256 257 unsigned int count = buffer->len; 258 for (unsigned int i = 0; i < count; i++) 259 { 260 hb_glyph_info_t &info = buffer->info[i]; 261 unsigned int type = get_indic_categories (info.codepoint); 262 263 info.indic_category() = type & 0x0F; 264 info.indic_position() = type >> 4; 265 266 /* The spec says U+0952 is OT_A. However, testing shows that Uniscribe 267 * treats U+0951..U+0952 all as OT_VD. 268 * TESTS: 269 * U+092E,U+0947,U+0952 270 * U+092E,U+0952,U+0947 271 * U+092E,U+0947,U+0951 272 * U+092E,U+0951,U+0947 273 * */ 274 if (unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x0951, 0x0954))) 275 info.indic_category() = OT_VD; 276 277 if (info.indic_category() == OT_X && 278 unlikely (hb_in_range<hb_codepoint_t> (info.codepoint, 0x17CB, 0x17D0))) 279 info.indic_category() = OT_RS; 280 281 /* Khmer Virama is different since it can be used to form a final consonant. */ 282 if (unlikely (info.codepoint == 0x17D2)) 283 info.indic_category() = OT_Coeng; 284 285 if (is_consonant (info)) { 286 info.indic_position() = consonant_position (info.codepoint); 287 if (is_ra (info.codepoint)) 288 info.indic_category() = OT_Ra; 289 } else if (info.indic_category() == OT_RS) { 290 info.indic_position() = POS_ABOVE_M; 291 } else if (info.indic_category() == OT_SM || 292 info.indic_category() == OT_VD) { 293 info.indic_position() = POS_SMVD; 294 } else if (unlikely (info.codepoint == 0x200C)) 295 info.indic_category() = OT_ZWNJ; 296 else if (unlikely (info.codepoint == 0x200D)) 297 info.indic_category() = OT_ZWJ; 298 else if (unlikely (info.codepoint == 0x25CC)) 299 info.indic_category() = OT_DOTTEDCIRCLE; 300 } 301} 302 303static int 304compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) 305{ 306 int a = pa->indic_position(); 307 int b = pb->indic_position(); 308 309 return a < b ? -1 : a == b ? 0 : +1; 310} 311 312/* Rules from: 313 * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ 314 315static void 316initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buffer, hb_mask_t *basic_mask_array, 317 unsigned int start, unsigned int end) 318{ 319 hb_glyph_info_t *info = buffer->info; 320 321 322 /* 1. Find base consonant: 323 * 324 * The shaping engine finds the base consonant of the syllable, using the 325 * following algorithm: starting from the end of the syllable, move backwards 326 * until a consonant is found that does not have a below-base or post-base 327 * form (post-base forms have to follow below-base forms), or that is not a 328 * pre-base reordering Ra, or arrive at the first consonant. The consonant 329 * stopped at will be the base. 330 * 331 * o If the syllable starts with Ra + Halant (in a script that has Reph) 332 * and has more than one consonant, Ra is excluded from candidates for 333 * base consonants. 334 */ 335 336 unsigned int base = end; 337 bool has_reph = false; 338 339 { 340 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 341 * and has more than one consonant, Ra is excluded from candidates for 342 * base consonants. */ 343 unsigned int limit = start; 344 if (basic_mask_array[RPHF] && 345 start + 3 <= end && 346 info[start].indic_category() == OT_Ra && 347 info[start + 1].indic_category() == OT_H && 348 !is_joiner (info[start + 2])) 349 { 350 limit += 2; 351 base = start; 352 has_reph = true; 353 }; 354 355 if (basic_mask_array[HALF]) 356 { 357 /* -> starting from the end of the syllable, move backwards */ 358 unsigned int i = end; 359 do { 360 i--; 361 /* -> until a consonant is found */ 362 if (is_consonant (info[i])) 363 { 364 /* -> that does not have a below-base or post-base form 365 * (post-base forms have to follow below-base forms), */ 366 if (info[i].indic_position() != POS_BELOW_C && 367 info[i].indic_position() != POS_POST_C) 368 { 369 base = i; 370 break; 371 } 372 373 /* -> or that is not a pre-base reordering Ra, 374 * 375 * TODO 376 */ 377 378 /* -> or arrive at the first consonant. The consonant stopped at will 379 * be the base. */ 380 base = i; 381 } 382 else 383 if (is_joiner (info[i])) 384 break; 385 } while (i > limit); 386 } 387 else 388 { 389 /* In scripts without half forms (eg. Khmer), the first consonant is always the base. */ 390 391 if (!has_reph) 392 base = limit; 393 } 394 395 if (base < start) 396 base = start; /* Just in case... */ 397 398 399 /* -> If the syllable starts with Ra + Halant (in a script that has Reph) 400 * and has more than one consonant, Ra is excluded from candidates for 401 * base consonants. */ 402 if (has_reph && base == start) { 403 /* Have no other consonant, so Reph is not formed and Ra becomes base. */ 404 has_reph = false; 405 } 406 } 407 408 409 /* 2. Decompose and reorder Matras: 410 * 411 * Each matra and any syllable modifier sign in the cluster are moved to the 412 * appropriate position relative to the consonant(s) in the cluster. The 413 * shaping engine decomposes two- or three-part matras into their constituent 414 * parts before any repositioning. Matra characters are classified by which 415 * consonant in a conjunct they have affinity for and are reordered to the 416 * following positions: 417 * 418 * o Before first half form in the syllable 419 * o After subjoined consonants 420 * o After post-form consonant 421 * o After main consonant (for above marks) 422 * 423 * IMPLEMENTATION NOTES: 424 * 425 * The normalize() routine has already decomposed matras for us, so we don't 426 * need to worry about that. 427 */ 428 429 430 /* 3. Reorder marks to canonical order: 431 * 432 * Adjacent nukta and halant or nukta and vedic sign are always repositioned 433 * if necessary, so that the nukta is first. 434 * 435 * IMPLEMENTATION NOTES: 436 * 437 * We don't need to do this: the normalize() routine already did this for us. 438 */ 439 440 441 /* Reorder characters */ 442 443 for (unsigned int i = start; i < base; i++) 444 info[i].indic_position() = POS_PRE_C; 445 446 info[base].indic_position() = POS_BASE_C; 447 448 /* Mark final consonants. A final consonant is one appearing after a matra, 449 * like in Khmer. */ 450 for (unsigned int i = base + 1; i < end; i++) 451 if (info[i].indic_category() == OT_M) { 452 for (unsigned int j = i + 1; j < end; j++) 453 if (is_consonant (info[j])) { 454 info[j].indic_position() = POS_FINAL_C; 455 break; 456 } 457 break; 458 } 459 460 /* Handle beginning Ra */ 461 if (has_reph) 462 info[start].indic_position() = POS_RA_TO_BECOME_REPH; 463 464 /* For old-style Indic script tags, move the first post-base Halant after 465 * last consonant. */ 466 if (IS_OLD_INDIC_TAG (map->get_chosen_script (0))) { 467 for (unsigned int i = base + 1; i < end; i++) 468 if (info[i].indic_category() == OT_H) { 469 unsigned int j; 470 for (j = end - 1; j > i; j--) 471 if (is_consonant (info[j])) 472 break; 473 if (j > i) { 474 /* Move Halant to after last consonant. */ 475 hb_glyph_info_t t = info[i]; 476 memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0])); 477 info[j] = t; 478 } 479 break; 480 } 481 } 482 483 /* Attach ZWJ, ZWNJ, nukta, and halant to previous char to move with them. */ 484 if (!indic_options ().uniscribe_bug_compatible) 485 { 486 /* Please update the Uniscribe branch when touching this! */ 487 for (unsigned int i = start + 1; i < end; i++) 488 if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H)))) 489 info[i].indic_position() = info[i - 1].indic_position(); 490 } else { 491 /* 492 * Uniscribe doesn't move the Halant with Left Matra. 493 * TEST: U+092B,U+093F,U+094DE 494 */ 495 /* Please update the non-Uniscribe branch when touching this! */ 496 for (unsigned int i = start + 1; i < end; i++) 497 if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_H)))) { 498 info[i].indic_position() = info[i - 1].indic_position(); 499 if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_PRE_M) 500 for (unsigned int j = i; j > start; j--) 501 if (info[j - 1].indic_position() != POS_PRE_M) { 502 info[i].indic_position() = info[j - 1].indic_position(); 503 break; 504 } 505 } 506 } 507 /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */ 508 { 509 unsigned int last_halant = end; 510 for (unsigned int i = base + 1; i < end; i++) 511 if (is_halant_or_coeng (info[i])) 512 last_halant = i; 513 else if (is_consonant (info[i])) { 514 for (unsigned int j = last_halant; j < i; j++) 515 info[j].indic_position() = info[i].indic_position(); 516 } 517 } 518 519 /* We do bubble-sort, skip malicious clusters attempts */ 520 if (end - start < 64) 521 { 522 /* Sit tight, rock 'n roll! */ 523 hb_bubble_sort (info + start, end - start, compare_indic_order); 524 /* Find base again */ 525 base = end; 526 for (unsigned int i = start; i < end; i++) 527 if (info[i].indic_position() == POS_BASE_C) { 528 base = i; 529 break; 530 } 531 } 532 533 /* Setup masks now */ 534 535 { 536 hb_mask_t mask; 537 538 /* Reph */ 539 for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++) 540 info[i].mask |= basic_mask_array[RPHF]; 541 542 /* Pre-base */ 543 mask = basic_mask_array[HALF] | basic_mask_array[AKHN] | basic_mask_array[CJCT]; 544 for (unsigned int i = start; i < base; i++) 545 info[i].mask |= mask; 546 /* Base */ 547 mask = basic_mask_array[AKHN] | basic_mask_array[CJCT]; 548 info[base].mask |= mask; 549 /* Post-base */ 550 mask = basic_mask_array[BLWF] | basic_mask_array[ABVF] | basic_mask_array[PSTF] | basic_mask_array[CJCT]; 551 for (unsigned int i = base + 1; i < end; i++) 552 info[i].mask |= mask; 553 } 554 555 /* XXX This will not match for old-Indic spec since the Halant-Ra order is reversed already. */ 556 if (basic_mask_array[PREF] && base + 3 <= end) 557 { 558 /* Find a Halant,Ra sequence and mark it fore pre-base reordering processing. */ 559 for (unsigned int i = base + 1; i + 1 < end; i++) 560 if (is_halant_or_coeng (info[i]) && 561 info[i + 1].indic_category() == OT_Ra) 562 { 563 info[i++].mask |= basic_mask_array[PREF]; 564 info[i++].mask |= basic_mask_array[PREF]; 565 566 /* Mark the subsequent stuff with 'cfar'. Used in Khmer. 567 * Read the feature spec. 568 * This allows distinguishing the following cases with MS Khmer fonts: 569 * U+1784,U+17D2,U+179A,U+17D2,U+1782 570 * U+1784,U+17D2,U+1782,U+17D2,U+179A 571 */ 572 for (; i < end; i++) 573 info[i].mask |= basic_mask_array[CFAR]; 574 575 break; 576 } 577 } 578 579 /* Apply ZWJ/ZWNJ effects */ 580 for (unsigned int i = start + 1; i < end; i++) 581 if (is_joiner (info[i])) { 582 bool non_joiner = info[i].indic_category() == OT_ZWNJ; 583 unsigned int j = i; 584 585 do { 586 j--; 587 588 info[j].mask &= ~basic_mask_array[CJCT]; 589 if (non_joiner) 590 info[j].mask &= ~basic_mask_array[HALF]; 591 592 } while (j > start && !is_consonant (info[j])); 593 } 594} 595 596 597static void 598initial_reordering_vowel_syllable (const hb_ot_map_t *map, 599 hb_buffer_t *buffer, 600 hb_mask_t *basic_mask_array, 601 unsigned int start, unsigned int end) 602{ 603 /* We made the vowels look like consonants. So let's call the consonant logic! */ 604 initial_reordering_consonant_syllable (map, buffer, basic_mask_array, start, end); 605} 606 607static void 608initial_reordering_standalone_cluster (const hb_ot_map_t *map, 609 hb_buffer_t *buffer, 610 hb_mask_t *basic_mask_array, 611 unsigned int start, unsigned int end) 612{ 613 /* We treat NBSP/dotted-circle as if they are consonants, so we should just chain. 614 * Only if not in compatibility mode that is... */ 615 616 if (indic_options ().uniscribe_bug_compatible) 617 { 618 /* For dotted-circle, this is what Uniscribe does: 619 * If dotted-circle is the last glyph, it just does nothing. 620 * Ie. It doesn't form Reph. */ 621 if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE) 622 return; 623 } 624 625 initial_reordering_consonant_syllable (map, buffer, basic_mask_array, start, end); 626} 627 628static void 629initial_reordering_non_indic (const hb_ot_map_t *map HB_UNUSED, 630 hb_buffer_t *buffer HB_UNUSED, 631 hb_mask_t *basic_mask_array HB_UNUSED, 632 unsigned int start HB_UNUSED, unsigned int end HB_UNUSED) 633{ 634 /* Nothing to do right now. If we ever switch to using the output 635 * buffer in the reordering process, we'd need to next_glyph() here. */ 636} 637 638#include "hb-ot-shape-complex-indic-machine.hh" 639 640static void 641initial_reordering (const hb_ot_map_t *map, 642 hb_face_t *face HB_UNUSED, 643 hb_buffer_t *buffer, 644 void *user_data HB_UNUSED) 645{ 646 hb_mask_t basic_mask_array[ARRAY_LENGTH (indic_basic_features)] = {0}; 647 unsigned int num_masks = ARRAY_LENGTH (indic_basic_features); 648 for (unsigned int i = 0; i < num_masks; i++) 649 basic_mask_array[i] = map->get_1_mask (indic_basic_features[i].tag); 650 651 find_syllables (map, buffer, basic_mask_array); 652} 653 654static void 655final_reordering_syllable (hb_buffer_t *buffer, 656 hb_mask_t init_mask, hb_mask_t pref_mask, 657 unsigned int start, unsigned int end) 658{ 659 hb_glyph_info_t *info = buffer->info; 660 661 /* 4. Final reordering: 662 * 663 * After the localized forms and basic shaping forms GSUB features have been 664 * applied (see below), the shaping engine performs some final glyph 665 * reordering before applying all the remaining font features to the entire 666 * cluster. 667 */ 668 669 /* Find base again */ 670 unsigned int base = end; 671 for (unsigned int i = start; i < end; i++) 672 if (info[i].indic_position() == POS_BASE_C) { 673 base = i; 674 break; 675 } 676 677 unsigned int start_of_last_cluster = base; 678 679 /* o Reorder matras: 680 * 681 * If a pre-base matra character had been reordered before applying basic 682 * features, the glyph can be moved closer to the main consonant based on 683 * whether half-forms had been formed. Actual position for the matra is 684 * defined as “after last standalone halant glyph, after initial matra 685 * position and before the main consonant”. If ZWJ or ZWNJ follow this 686 * halant, position is moved after it. 687 */ 688 689 if (start < base) /* Otherwise there can't be any pre-base matra characters. */ 690 { 691 unsigned int new_pos = base - 1; 692 while (new_pos > start && 693 !(FLAG (info[new_pos].indic_category()) & (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng)))) 694 new_pos--; 695 /* If we found no Halant we are done. Otherwise only proceed if the Halant does 696 * not belong to the Matra itself! */ 697 if (is_halant_or_coeng (info[new_pos]) && 698 info[new_pos].indic_position() != POS_PRE_M) { 699 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 700 if (new_pos + 1 < end && is_joiner (info[new_pos + 1])) 701 new_pos++; 702 703 /* Now go see if there's actually any matras... */ 704 for (unsigned int i = new_pos; i > start; i--) 705 if (info[i - 1].indic_position () == POS_PRE_M) 706 { 707 unsigned int old_pos = i - 1; 708 hb_glyph_info_t tmp = info[old_pos]; 709 memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0])); 710 info[new_pos] = tmp; 711 start_of_last_cluster = MIN (new_pos, start_of_last_cluster); 712 new_pos--; 713 } 714 } 715 } 716 717 718 /* o Reorder reph: 719 * 720 * Reph’s original position is always at the beginning of the syllable, 721 * (i.e. it is not reordered at the character reordering stage). However, 722 * it will be reordered according to the basic-forms shaping results. 723 * Possible positions for reph, depending on the script, are; after main, 724 * before post-base consonant forms, and after post-base consonant forms. 725 */ 726 727 /* If there's anything after the Ra that has the REPH pos, it ought to be halant. 728 * Which means that the font has failed to ligate the Reph. In which case, we 729 * shouldn't move. */ 730 if (start + 1 < end && 731 info[start].indic_position() == POS_RA_TO_BECOME_REPH && 732 info[start + 1].indic_position() != POS_RA_TO_BECOME_REPH) 733 { 734 unsigned int new_reph_pos; 735 736 enum reph_position_t { 737 REPH_AFTER_MAIN, 738 REPH_BEFORE_SUBSCRIPT, 739 REPH_AFTER_SUBSCRIPT, 740 REPH_BEFORE_POSTSCRIPT, 741 REPH_AFTER_POSTSCRIPT 742 } reph_pos; 743 744 /* XXX Figure out old behavior too */ 745 switch ((hb_tag_t) buffer->props.script) 746 { 747 case HB_SCRIPT_MALAYALAM: 748 case HB_SCRIPT_ORIYA: 749 reph_pos = REPH_AFTER_MAIN; 750 break; 751 752 case HB_SCRIPT_GURMUKHI: 753 reph_pos = REPH_BEFORE_SUBSCRIPT; 754 break; 755 756 case HB_SCRIPT_BENGALI: 757 reph_pos = REPH_AFTER_SUBSCRIPT; 758 break; 759 760 default: 761 case HB_SCRIPT_DEVANAGARI: 762 case HB_SCRIPT_GUJARATI: 763 reph_pos = REPH_BEFORE_POSTSCRIPT; 764 break; 765 766 case HB_SCRIPT_KANNADA: 767 case HB_SCRIPT_TAMIL: 768 case HB_SCRIPT_TELUGU: 769 reph_pos = REPH_AFTER_POSTSCRIPT; 770 break; 771 } 772 773 /* 1. If reph should be positioned after post-base consonant forms, 774 * proceed to step 5. 775 */ 776 if (reph_pos == REPH_AFTER_POSTSCRIPT) 777 { 778 goto reph_step_5; 779 } 780 781 /* 2. If the reph repositioning class is not after post-base: target 782 * position is after the first explicit halant glyph between the 783 * first post-reph consonant and last main consonant. If ZWJ or ZWNJ 784 * are following this halant, position is moved after it. If such 785 * position is found, this is the target position. Otherwise, 786 * proceed to the next step. 787 * 788 * Note: in old-implementation fonts, where classifications were 789 * fixed in shaping engine, there was no case where reph position 790 * will be found on this step. 791 */ 792 { 793 new_reph_pos = start + 1; 794 while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos])) 795 new_reph_pos++; 796 797 if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) { 798 /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ 799 if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) 800 new_reph_pos++; 801 goto reph_move; 802 } 803 } 804 805 /* 3. If reph should be repositioned after the main consonant: find the 806 * first consonant not ligated with main, or find the first 807 * consonant that is not a potential pre-base reordering Ra. 808 */ 809 if (reph_pos == REPH_AFTER_MAIN) 810 { 811 new_reph_pos = base; 812 /* XXX Skip potential pre-base reordering Ra. */ 813 while (new_reph_pos < end && 814 !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_BELOW_C) | FLAG (POS_POST_C) | FLAG (POS_POST_M) | FLAG (POS_SMVD)))) 815 new_reph_pos++; 816 if (new_reph_pos < end) 817 goto reph_move; 818 } 819 820 /* 4. If reph should be positioned before post-base consonant, find 821 * first post-base classified consonant not ligated with main. If no 822 * consonant is found, the target position should be before the 823 * first matra, syllable modifier sign or vedic sign. 824 */ 825 /* This is our take on what step 4 is trying to say (and failing, BADLY). */ 826 if (reph_pos == REPH_AFTER_SUBSCRIPT) 827 { 828 new_reph_pos = base; 829 while (new_reph_pos < end && 830 !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_POST_M) | FLAG (POS_SMVD)))) 831 new_reph_pos++; 832 if (new_reph_pos < end) 833 goto reph_move; 834 } 835 836 /* 5. If no consonant is found in steps 3 or 4, move reph to a position 837 * immediately before the first post-base matra, syllable modifier 838 * sign or vedic sign that has a reordering class after the intended 839 * reph position. For example, if the reordering position for reph 840 * is post-main, it will skip above-base matras that also have a 841 * post-main position. 842 */ 843 reph_step_5: 844 { 845 /* XXX */ 846 } 847 848 /* 6. Otherwise, reorder reph to the end of the syllable. 849 */ 850 { 851 new_reph_pos = end - 1; 852 while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD) 853 new_reph_pos--; 854 855 /* 856 * If the Reph is to be ending up after a Matra,Halant sequence, 857 * position it before that Halant so it can interact with the Matra. 858 * However, if it's a plain Consonant,Halant we shouldn't do that. 859 * Uniscribe doesn't do this. 860 * TEST: U+0930,U+094D,U+0915,U+094B,U+094D 861 */ 862 if (!indic_options ().uniscribe_bug_compatible && 863 unlikely (is_halant_or_coeng (info[new_reph_pos]))) { 864 for (unsigned int i = base + 1; i < new_reph_pos; i++) 865 if (info[i].indic_category() == OT_M) { 866 /* Ok, got it. */ 867 new_reph_pos--; 868 } 869 } 870 goto reph_move; 871 } 872 873 reph_move: 874 { 875 /* Move */ 876 hb_glyph_info_t reph = info[start]; 877 memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0])); 878 info[new_reph_pos] = reph; 879 start_of_last_cluster = start; /* Yay, one big cluster! */ 880 } 881 } 882 883 884 /* o Reorder pre-base reordering consonants: 885 * 886 * If a pre-base reordering consonant is found, reorder it according to 887 * the following rules: 888 */ 889 890 if (pref_mask && base + 1 < end) /* Otherwise there can't be any pre-base reordering Ra. */ 891 { 892 for (unsigned int i = base + 1; i < end; i++) 893 if ((info[i].mask & pref_mask) != 0) 894 { 895 /* 1. Only reorder a glyph produced by substitution during application 896 * of the <pref> feature. (Note that a font may shape a Ra consonant with 897 * the feature generally but block it in certain contexts.) 898 */ 899 if (i + 1 == end || (info[i + 1].mask & pref_mask) == 0) 900 { 901 /* 902 * 2. Try to find a target position the same way as for pre-base matra. 903 * If it is found, reorder pre-base consonant glyph. 904 * 905 * 3. If position is not found, reorder immediately before main 906 * consonant. 907 */ 908 909 unsigned int new_pos = base; 910 while (new_pos > start + 1 && 911 !(FLAG (info[new_pos - 1].indic_category()) & (FLAG (OT_M) | FLAG (OT_H) | FLAG (OT_Coeng)))) 912 new_pos--; 913 914 if (new_pos > start && is_halant_or_coeng (info[new_pos - 1])) 915 /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ 916 if (new_pos < end && is_joiner (info[new_pos])) 917 new_pos++; 918 919 { 920 unsigned int old_pos = i; 921 hb_glyph_info_t tmp = info[old_pos]; 922 memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0])); 923 info[new_pos] = tmp; 924 start_of_last_cluster = MIN (new_pos, start_of_last_cluster); 925 } 926 } 927 928 break; 929 } 930 } 931 932 933 /* Apply 'init' to the Left Matra if it's a word start. */ 934 if (info[start].indic_position () == POS_PRE_M && 935 (!start || 936 !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) & 937 (FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) | 938 FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) | 939 FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) | 940 FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) | 941 FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) | 942 FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | 943 FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | 944 FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))) 945 info[start].mask |= init_mask; 946 947 948 949 /* Finish off the clusters and go home! */ 950 951 if (!indic_options ().uniscribe_bug_compatible) 952 { 953 /* This is what Uniscribe does. Ie. add cluster boundaries after Halant,ZWNJ. 954 * This means, half forms are submerged into the main consonants cluster. 955 * This is unnecessary, and makes cursor positioning harder, but that's what 956 * Uniscribe does. */ 957 unsigned int cluster_start = start; 958 for (unsigned int i = start + 1; i < start_of_last_cluster; i++) 959 if (is_halant_or_coeng (info[i - 1]) && info[i].indic_category() == OT_ZWNJ) { 960 i++; 961 buffer->merge_clusters (cluster_start, i); 962 cluster_start = i; 963 } 964 start_of_last_cluster = cluster_start; 965 } 966 967 buffer->merge_clusters (start_of_last_cluster, end); 968} 969 970 971static void 972final_reordering (const hb_ot_map_t *map, 973 hb_face_t *face HB_UNUSED, 974 hb_buffer_t *buffer, 975 void *user_data HB_UNUSED) 976{ 977 unsigned int count = buffer->len; 978 if (!count) return; 979 980 hb_mask_t init_mask = map->get_1_mask (HB_TAG('i','n','i','t')); 981 hb_mask_t pref_mask = map->get_1_mask (HB_TAG('p','r','e','f')); 982 983 hb_glyph_info_t *info = buffer->info; 984 unsigned int last = 0; 985 unsigned int last_syllable = info[0].syllable(); 986 for (unsigned int i = 1; i < count; i++) 987 if (last_syllable != info[i].syllable()) { 988 final_reordering_syllable (buffer, init_mask, pref_mask, last, i); 989 last = i; 990 last_syllable = info[last].syllable(); 991 } 992 final_reordering_syllable (buffer, init_mask, pref_mask, last, count); 993 994 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category); 995 HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position); 996} 997 998 999 1000