harfbuzz-indic.cpp revision 873b7b3e703e0f228f8d2d12896def00e281adf2
1/* 2 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 3 * 4 * This is part of HarfBuzz, an OpenType Layout engine library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 */ 24 25#include "harfbuzz-shaper.h" 26#include "harfbuzz-shaper-private.h" 27 28#include <assert.h> 29#include <stdio.h> 30 31#define FLAG(x) (1 << (x)) 32 33static HB_Bool isLetter(HB_UChar16 ucs) 34{ 35 const int test = FLAG(HB_Letter_Uppercase) | 36 FLAG(HB_Letter_Lowercase) | 37 FLAG(HB_Letter_Titlecase) | 38 FLAG(HB_Letter_Modifier) | 39 FLAG(HB_Letter_Other); 40 // BEGIN android-changed 41 // Check the value is zero or not instead of casting int to HB_Bool(unsigned char). 42 return (FLAG(HB_GetUnicodeCharCategory(ucs)) & test) != 0; 43 // END android-changed 44} 45 46static HB_Bool isMark(HB_UChar16 ucs) 47{ 48 const int test = FLAG(HB_Mark_NonSpacing) | 49 FLAG(HB_Mark_SpacingCombining) | 50 FLAG(HB_Mark_Enclosing); 51 // BEGIN android-changed 52 // Check the value is zero or not instead of casting int to HB_Bool(unsigned char). 53 return (FLAG(HB_GetUnicodeCharCategory(ucs)) & test) != 0; 54 // END android-changed 55} 56 57enum Form { 58 Invalid = 0x0, 59 UnknownForm = Invalid, 60 Consonant, 61 Nukta, 62 Halant, 63 Matra, 64 VowelMark, 65 StressMark, 66 IndependentVowel, 67 LengthMark, 68 Control, 69 Other 70}; 71 72static const unsigned char indicForms[0xe00-0x900] = { 73 // Devangari 74 Invalid, VowelMark, VowelMark, VowelMark, 75 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 76 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 77 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 78 79 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 80 IndependentVowel, Consonant, Consonant, Consonant, 81 Consonant, Consonant, Consonant, Consonant, 82 Consonant, Consonant, Consonant, Consonant, 83 84 Consonant, Consonant, Consonant, Consonant, 85 Consonant, Consonant, Consonant, Consonant, 86 Consonant, Consonant, Consonant, Consonant, 87 Consonant, Consonant, Consonant, Consonant, 88 89 Consonant, Consonant, Consonant, Consonant, 90 Consonant, Consonant, Consonant, Consonant, 91 Consonant, Consonant, UnknownForm, UnknownForm, 92 Nukta, Other, Matra, Matra, 93 94 Matra, Matra, Matra, Matra, 95 Matra, Matra, Matra, Matra, 96 Matra, Matra, Matra, Matra, 97 Matra, Halant, UnknownForm, UnknownForm, 98 99 Other, StressMark, StressMark, StressMark, 100 StressMark, UnknownForm, UnknownForm, UnknownForm, 101 Consonant, Consonant, Consonant, Consonant, 102 Consonant, Consonant, Consonant, Consonant, 103 104 IndependentVowel, IndependentVowel, VowelMark, VowelMark, 105 Other, Other, Other, Other, 106 Other, Other, Other, Other, 107 Other, Other, Other, Other, 108 109 Other, Other, Other, Other, 110 Other, Other, Other, Other, 111 Other, Other, Other, Consonant, 112 Consonant, Consonant /* ??? */, Consonant, Consonant, 113 114 // Bengali 115 Invalid, VowelMark, VowelMark, VowelMark, 116 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 117 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 118 IndependentVowel, Invalid, Invalid, IndependentVowel, 119 120 IndependentVowel, Invalid, Invalid, IndependentVowel, 121 IndependentVowel, Consonant, Consonant, Consonant, 122 Consonant, Consonant, Consonant, Consonant, 123 Consonant, Consonant, Consonant, Consonant, 124 125 Consonant, Consonant, Consonant, Consonant, 126 Consonant, Consonant, Consonant, Consonant, 127 Consonant, Invalid, Consonant, Consonant, 128 Consonant, Consonant, Consonant, Consonant, 129 130 Consonant, Invalid, Consonant, Invalid, 131 Invalid, Invalid, Consonant, Consonant, 132 Consonant, Consonant, UnknownForm, UnknownForm, 133 Nukta, Other, Matra, Matra, 134 135 Matra, Matra, Matra, Matra, 136 Matra, Invalid, Invalid, Matra, 137 Matra, Invalid, Invalid, Matra, 138 Matra, Halant, Consonant, UnknownForm, 139 140 Invalid, Invalid, Invalid, Invalid, 141 Invalid, Invalid, Invalid, VowelMark, 142 Invalid, Invalid, Invalid, Invalid, 143 Consonant, Consonant, Invalid, Consonant, 144 145 IndependentVowel, IndependentVowel, VowelMark, VowelMark, 146 Other, Other, Other, Other, 147 Other, Other, Other, Other, 148 Other, Other, Other, Other, 149 150 Consonant, Consonant, Other, Other, 151 Other, Other, Other, Other, 152 Other, Other, Other, Other, 153 Other, Other, Other, Other, 154 155 // Gurmukhi 156 Invalid, VowelMark, VowelMark, VowelMark, 157 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 158 IndependentVowel, IndependentVowel, IndependentVowel, Invalid, 159 Invalid, Invalid, Invalid, IndependentVowel, 160 161 IndependentVowel, Invalid, Invalid, IndependentVowel, 162 IndependentVowel, Consonant, Consonant, Consonant, 163 Consonant, Consonant, Consonant, Consonant, 164 Consonant, Consonant, Consonant, Consonant, 165 166 Consonant, Consonant, Consonant, Consonant, 167 Consonant, Consonant, Consonant, Consonant, 168 Consonant, Invalid, Consonant, Consonant, 169 Consonant, Consonant, Consonant, Consonant, 170 171 Consonant, Invalid, Consonant, Consonant, 172 Invalid, Consonant, Consonant, Invalid, 173 Consonant, Consonant, UnknownForm, UnknownForm, 174 Nukta, Other, Matra, Matra, 175 176 Matra, Matra, Matra, Invalid, 177 Invalid, Invalid, Invalid, Matra, 178 Matra, Invalid, Invalid, Matra, 179 Matra, Halant, UnknownForm, UnknownForm, 180 181 Invalid, Invalid, Invalid, Invalid, 182 Invalid, UnknownForm, UnknownForm, UnknownForm, 183 Invalid, Consonant, Consonant, Consonant, 184 Consonant, Invalid, Consonant, Invalid, 185 186 Other, Other, Invalid, Invalid, 187 Other, Other, Other, Other, 188 Other, Other, Other, Other, 189 Other, Other, Other, Other, 190 191 StressMark, StressMark, Consonant, Consonant, 192 Other, Other, Other, Other, 193 Other, Other, Other, Other, 194 Other, Other, Other, Other, 195 196 // Gujarati 197 Invalid, VowelMark, VowelMark, VowelMark, 198 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 199 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 200 IndependentVowel, IndependentVowel, Invalid, IndependentVowel, 201 202 IndependentVowel, IndependentVowel, Invalid, IndependentVowel, 203 IndependentVowel, Consonant, Consonant, Consonant, 204 Consonant, Consonant, Consonant, Consonant, 205 Consonant, Consonant, Consonant, Consonant, 206 207 Consonant, Consonant, Consonant, Consonant, 208 Consonant, Consonant, Consonant, Consonant, 209 Consonant, Invalid, Consonant, Consonant, 210 Consonant, Consonant, Consonant, Consonant, 211 212 Consonant, Invalid, Consonant, Consonant, 213 Invalid, Consonant, Consonant, Consonant, 214 Consonant, Consonant, UnknownForm, UnknownForm, 215 Nukta, Other, Matra, Matra, 216 217 Matra, Matra, Matra, Matra, 218 Matra, Matra, Invalid, Matra, 219 Matra, Matra, Invalid, Matra, 220 Matra, Halant, UnknownForm, UnknownForm, 221 222 Other, UnknownForm, UnknownForm, UnknownForm, 223 UnknownForm, UnknownForm, UnknownForm, UnknownForm, 224 UnknownForm, UnknownForm, UnknownForm, UnknownForm, 225 UnknownForm, UnknownForm, UnknownForm, UnknownForm, 226 227 IndependentVowel, IndependentVowel, VowelMark, VowelMark, 228 Other, Other, Other, Other, 229 Other, Other, Other, Other, 230 Other, Other, Other, Other, 231 232 Other, Other, Other, Other, 233 Other, Other, Other, Other, 234 Other, Other, Other, Other, 235 Other, Other, Other, Other, 236 237 // Oriya 238 Invalid, VowelMark, VowelMark, VowelMark, 239 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 240 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 241 IndependentVowel, Invalid, Invalid, IndependentVowel, 242 243 IndependentVowel, Invalid, Invalid, IndependentVowel, 244 IndependentVowel, Consonant, Consonant, Consonant, 245 Consonant, Consonant, Consonant, Consonant, 246 Consonant, Consonant, Consonant, Consonant, 247 248 Consonant, Consonant, Consonant, Consonant, 249 Consonant, Consonant, Consonant, Consonant, 250 Consonant, Invalid, Consonant, Consonant, 251 Consonant, Consonant, Consonant, Consonant, 252 253 Consonant, Invalid, Consonant, Consonant, 254 Invalid, Consonant, Consonant, Consonant, 255 Consonant, Consonant, UnknownForm, UnknownForm, 256 Nukta, Other, Matra, Matra, 257 258 Matra, Matra, Matra, Matra, 259 Invalid, Invalid, Invalid, Matra, 260 Matra, Invalid, Invalid, Matra, 261 Matra, Halant, UnknownForm, UnknownForm, 262 263 Other, Invalid, Invalid, Invalid, 264 Invalid, UnknownForm, LengthMark, LengthMark, 265 Invalid, Invalid, Invalid, Invalid, 266 Consonant, Consonant, Invalid, Consonant, 267 268 IndependentVowel, IndependentVowel, Invalid, Invalid, 269 Invalid, Invalid, Other, Other, 270 Other, Other, Other, Other, 271 Other, Other, Other, Other, 272 273 Other, Consonant, Other, Other, 274 Other, Other, Other, Other, 275 Other, Other, Other, Other, 276 Other, Other, Other, Other, 277 278 //Tamil 279 Invalid, Invalid, VowelMark, Other, 280 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 281 IndependentVowel, IndependentVowel, IndependentVowel, Invalid, 282 Invalid, Invalid, IndependentVowel, IndependentVowel, 283 284 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 285 IndependentVowel, Consonant, Invalid, Invalid, 286 Invalid, Consonant, Consonant, Invalid, 287 Consonant, Invalid, Consonant, Consonant, 288 289 Invalid, Invalid, Invalid, Consonant, 290 Consonant, Invalid, Invalid, Invalid, 291 Consonant, Consonant, Consonant, Invalid, 292 Invalid, Invalid, Consonant, Consonant, 293 294 Consonant, Consonant, Consonant, Consonant, 295 Consonant, Consonant, Consonant, Consonant, 296 Consonant, Consonant, UnknownForm, UnknownForm, 297 Invalid, Invalid, Matra, Matra, 298 299 Matra, Matra, Matra, Invalid, 300 Invalid, Invalid, Matra, Matra, 301 Matra, Invalid, Matra, Matra, 302 Matra, Halant, Invalid, Invalid, 303 304 Invalid, Invalid, Invalid, Invalid, 305 Invalid, Invalid, Invalid, LengthMark, 306 Invalid, Invalid, Invalid, Invalid, 307 Invalid, Invalid, Invalid, Invalid, 308 309 Invalid, Invalid, Invalid, Invalid, 310 Invalid, Invalid, Other, Other, 311 Other, Other, Other, Other, 312 Other, Other, Other, Other, 313 314 Other, Other, Other, Other, 315 Other, Other, Other, Other, 316 Other, Other, Other, Other, 317 Other, Other, Other, Other, 318 319 // Telugu 320 Invalid, VowelMark, VowelMark, VowelMark, 321 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 322 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 323 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 324 325 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 326 IndependentVowel, Consonant, Consonant, Consonant, 327 Consonant, Consonant, Consonant, Consonant, 328 Consonant, Consonant, Consonant, Consonant, 329 330 Consonant, Consonant, Consonant, Consonant, 331 Consonant, Consonant, Consonant, Consonant, 332 Consonant, Invalid, Consonant, Consonant, 333 Consonant, Consonant, Consonant, Consonant, 334 335 Consonant, Consonant, Consonant, Consonant, 336 Invalid, Consonant, Consonant, Consonant, 337 Consonant, Consonant, UnknownForm, UnknownForm, 338 Invalid, Invalid, Matra, Matra, 339 340 Matra, Matra, Matra, Matra, 341 Matra, Invalid, Matra, Matra, 342 Matra, Invalid, Matra, Matra, 343 Matra, Halant, Invalid, Invalid, 344 345 Invalid, Invalid, Invalid, Invalid, 346 Invalid, LengthMark, Matra, Invalid, 347 Invalid, Invalid, Invalid, Invalid, 348 Invalid, Invalid, Invalid, Invalid, 349 350 IndependentVowel, IndependentVowel, Invalid, Invalid, 351 Invalid, Invalid, Other, Other, 352 Other, Other, Other, Other, 353 Other, Other, Other, Other, 354 355 Other, Other, Other, Other, 356 Other, Other, Other, Other, 357 Other, Other, Other, Other, 358 Other, Other, Other, Other, 359 360 // Kannada 361 Invalid, Invalid, VowelMark, VowelMark, 362 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 363 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 364 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 365 366 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 367 IndependentVowel, Consonant, Consonant, Consonant, 368 Consonant, Consonant, Consonant, Consonant, 369 Consonant, Consonant, Consonant, Consonant, 370 371 Consonant, Consonant, Consonant, Consonant, 372 Consonant, Consonant, Consonant, Consonant, 373 Consonant, Invalid, Consonant, Consonant, 374 Consonant, Consonant, Consonant, Consonant, 375 376 Consonant, Consonant, Consonant, Consonant, 377 Invalid, Consonant, Consonant, Consonant, 378 Consonant, Consonant, UnknownForm, UnknownForm, 379 Nukta, Other, Matra, Matra, 380 381 Matra, Matra, Matra, Matra, 382 Matra, Invalid, Matra, Matra, 383 Matra, Invalid, Matra, Matra, 384 Matra, Halant, Invalid, Invalid, 385 386 Invalid, Invalid, Invalid, Invalid, 387 Invalid, LengthMark, LengthMark, Invalid, 388 Invalid, Invalid, Invalid, Invalid, 389 Invalid, Invalid, Consonant, Invalid, 390 391 IndependentVowel, IndependentVowel, VowelMark, VowelMark, 392 Invalid, Invalid, Other, Other, 393 Other, Other, Other, Other, 394 Other, Other, Other, Other, 395 396 Other, Other, Other, Other, 397 Other, Other, Other, Other, 398 Other, Other, Other, Other, 399 Other, Other, Other, Other, 400 401 // Malayalam 402 Invalid, Invalid, VowelMark, VowelMark, 403 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 404 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 405 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 406 407 IndependentVowel, Invalid, IndependentVowel, IndependentVowel, 408 IndependentVowel, Consonant, Consonant, Consonant, 409 Consonant, Consonant, Consonant, Consonant, 410 Consonant, Consonant, Consonant, Consonant, 411 412 Consonant, Consonant, Consonant, Consonant, 413 Consonant, Consonant, Consonant, Consonant, 414 Consonant, Invalid, Consonant, Consonant, 415 Consonant, Consonant, Consonant, Consonant, 416 417 Consonant, Consonant, Consonant, Consonant, 418 Consonant, Consonant, Consonant, Consonant, 419 Consonant, Consonant, UnknownForm, UnknownForm, 420 Invalid, Invalid, Matra, Matra, 421 422 Matra, Matra, Matra, Matra, 423 Invalid, Invalid, Matra, Matra, 424 Matra, Invalid, Matra, Matra, 425 Matra, Halant, Invalid, Invalid, 426 427 Invalid, Invalid, Invalid, Invalid, 428 Invalid, Invalid, Invalid, Matra, 429 Invalid, Invalid, Invalid, Invalid, 430 Invalid, Invalid, Invalid, Invalid, 431 432 IndependentVowel, IndependentVowel, Invalid, Invalid, 433 Invalid, Invalid, Other, Other, 434 Other, Other, Other, Other, 435 Other, Other, Other, Other, 436 437 Other, Other, Other, Other, 438 Other, Other, Other, Other, 439 Other, Other, Other, Other, 440 Other, Other, Other, Other, 441 442 // Sinhala 443 Invalid, Invalid, VowelMark, VowelMark, 444 Invalid, IndependentVowel, IndependentVowel, IndependentVowel, 445 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 446 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 447 448 IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, 449 IndependentVowel, IndependentVowel, IndependentVowel, Invalid, 450 Invalid, Invalid, Consonant, Consonant, 451 Consonant, Consonant, Consonant, Consonant, 452 453 Consonant, Consonant, Consonant, Consonant, 454 Consonant, Consonant, Consonant, Consonant, 455 Consonant, Consonant, Consonant, Consonant, 456 Consonant, Consonant, Consonant, Consonant, 457 458 Consonant, Consonant, Invalid, Consonant, 459 Consonant, Consonant, Consonant, Consonant, 460 Consonant, Consonant, Consonant, Consonant, 461 Invalid, Consonant, Invalid, Invalid, 462 463 Consonant, Consonant, Consonant, Consonant, 464 Consonant, Consonant, Consonant, Invalid, 465 Invalid, Invalid, Halant, Invalid, 466 Invalid, Invalid, Invalid, Matra, 467 468 Matra, Matra, Matra, Matra, 469 Matra, Invalid, Matra, Invalid, 470 Matra, Matra, Matra, Matra, 471 Matra, Matra, Matra, Matra, 472 473 Invalid, Invalid, Invalid, Invalid, 474 Invalid, Invalid, Invalid, Invalid, 475 Invalid, Invalid, Invalid, Invalid, 476 Invalid, Invalid, Invalid, Invalid, 477 478 Invalid, Invalid, Matra, Matra, 479 Other, Other, Other, Other, 480 Other, Other, Other, Other, 481 Other, Other, Other, Other, 482}; 483 484enum Position { 485 None, 486 Pre, 487 Above, 488 Below, 489 Post, 490 Split, 491 Base, 492 Reph, 493 Vattu, 494 Inherit 495}; 496 497static const unsigned char indicPosition[0xe00-0x900] = { 498 // Devanagari 499 None, Above, Above, Post, 500 None, None, None, None, 501 None, None, None, None, 502 None, None, None, None, 503 504 None, None, None, None, 505 None, None, None, None, 506 None, None, None, None, 507 None, None, None, None, 508 509 None, None, None, None, 510 None, None, None, None, 511 None, None, None, None, 512 None, None, None, None, 513 514 Below, None, None, None, 515 None, None, None, None, 516 None, None, None, None, 517 None, None, Post, Pre, 518 519 Post, Below, Below, Below, 520 Below, Above, Above, Above, 521 Above, Post, Post, Post, 522 Post, None, None, None, 523 524 None, Above, Below, Above, 525 Above, None, None, None, 526 None, None, None, None, 527 None, None, None, None, 528 529 None, None, Below, Below, 530 None, None, None, None, 531 None, None, None, None, 532 None, None, None, None, 533 534 None, None, None, None, 535 None, None, None, None, 536 None, None, None, None, 537 None, None, None, None, 538 539 // Bengali 540 None, Above, Post, Post, 541 None, None, None, None, 542 None, None, None, None, 543 None, None, None, None, 544 545 None, None, None, None, 546 None, None, None, None, 547 None, None, None, None, 548 None, None, None, None, 549 550 None, None, None, None, 551 None, None, None, None, 552 None, None, None, None, 553 Below, None, None, Post, 554 555 Below, None, None, None, 556 None, None, None, None, 557 None, None, None, None, 558 Below, None, Post, Pre, 559 560 Post, Below, Below, Below, 561 Below, None, None, Pre, 562 Pre, None, None, Split, 563 Split, Below, None, None, 564 565 None, None, None, None, 566 None, None, None, Post, 567 None, None, None, None, 568 None, None, None, None, 569 570 None, None, Below, Below, 571 None, None, None, None, 572 None, None, None, None, 573 None, None, None, None, 574 575 Below, None, None, None, 576 None, None, None, None, 577 None, None, None, None, 578 None, None, None, None, 579 580 // Gurmukhi 581 None, Above, Above, Post, 582 None, None, None, None, 583 None, None, None, None, 584 None, None, None, None, 585 586 None, None, None, None, 587 None, None, None, None, 588 None, None, None, None, 589 None, None, None, None, 590 591 None, None, None, None, 592 None, None, None, None, 593 None, None, None, None, 594 None, None, None, Post, 595 596 Below, None, None, None, 597 None, Below, None, None, 598 None, Below, None, None, 599 Below, None, Post, Pre, 600 601 Post, Below, Below, None, 602 None, None, None, Above, 603 Above, None, None, Above, 604 Above, None, None, None, 605 606 None, None, None, None, 607 None, None, None, None, 608 None, None, None, None, 609 None, None, None, None, 610 611 None, None, None, None, 612 None, None, None, None, 613 None, None, None, None, 614 None, None, None, None, 615 616 Above, Above, None, None, 617 None, None, None, None, 618 None, None, None, None, 619 None, None, None, None, 620 621 // Gujarati 622 None, Above, Above, Post, 623 None, None, None, None, 624 None, None, None, None, 625 None, None, None, None, 626 627 None, None, None, None, 628 None, None, None, None, 629 None, None, None, None, 630 None, None, None, None, 631 632 None, None, None, None, 633 None, None, None, None, 634 None, None, None, None, 635 None, None, None, None, 636 637 Below, None, None, None, 638 None, None, None, None, 639 None, None, None, None, 640 None, None, Post, Pre, 641 642 Post, Below, Below, Below, 643 Below, Above, None, Above, 644 Above, Post, None, Post, 645 Post, None, None, None, 646 647 None, None, None, None, 648 None, None, None, None, 649 None, None, None, None, 650 None, None, None, None, 651 652 None, None, Below, Below, 653 None, None, None, None, 654 None, None, None, None, 655 None, None, None, None, 656 657 None, None, None, None, 658 None, None, None, None, 659 None, None, None, None, 660 None, None, None, None, 661 662 // Oriya 663 None, Above, Post, Post, 664 None, None, None, None, 665 None, None, None, None, 666 None, None, None, None, 667 668 None, None, None, None, 669 None, None, None, None, 670 None, None, None, None, 671 None, None, None, None, 672 673 None, None, None, None, 674 Below, None, None, None, 675 Below, None, None, None, 676 Below, Below, Below, Post, 677 678 Below, None, Below, Below, 679 None, None, None, None, 680 None, None, None, None, 681 None, None, Post, Above, 682 683 Post, Below, Below, Below, 684 None, None, None, Pre, 685 Split, None, None, Split, 686 Split, None, None, None, 687 688 None, None, None, None, 689 None, None, Above, Post, 690 None, None, None, None, 691 None, None, None, Post, 692 693 None, None, None, None, 694 None, None, None, None, 695 None, None, None, None, 696 None, None, None, None, 697 698 None, Below, None, None, 699 None, None, None, None, 700 None, None, None, None, 701 None, None, None, None, 702 703 // Tamil 704 None, None, Above, None, 705 None, None, None, None, 706 None, None, None, None, 707 None, None, None, None, 708 709 None, None, None, None, 710 None, None, None, None, 711 None, None, None, None, 712 None, None, None, None, 713 714 None, None, None, None, 715 None, None, None, None, 716 None, None, None, None, 717 None, None, None, None, 718 719 None, None, None, None, 720 None, None, None, None, 721 None, None, None, None, 722 None, None, Post, Post, 723 724 Above, Below, Below, None, 725 None, None, Pre, Pre, 726 Pre, None, Split, Split, 727 Split, Halant, None, None, 728 729 None, None, None, None, 730 None, None, None, Post, 731 None, None, None, None, 732 None, None, None, None, 733 734 None, None, None, None, 735 None, None, None, None, 736 None, None, None, None, 737 None, None, None, None, 738 739 None, None, None, None, 740 None, None, None, None, 741 None, None, None, None, 742 None, None, None, None, 743 744 // Telugu 745 None, Post, Post, Post, 746 None, None, None, None, 747 None, None, None, None, 748 None, None, None, None, 749 750 None, None, None, None, 751 None, Below, Below, Below, 752 Below, Below, Below, Below, 753 Below, Below, Below, Below, 754 755 Below, Below, Below, Below, 756 Below, Below, Below, Below, 757 Below, None, Below, Below, 758 Below, Below, Below, Below, 759 760 Below, None, Below, Below, 761 None, Below, Below, Below, 762 Below, Below, None, None, 763 None, None, Post, Above, 764 765 Above, Post, Post, Post, 766 Post, None, Above, Above, 767 Split, None, Post, Above, 768 Above, Halant, None, None, 769 770 None, None, None, None, 771 None, Above, Below, None, 772 None, None, None, None, 773 None, None, None, None, 774 775 None, None, None, None, 776 None, None, None, None, 777 None, None, None, None, 778 None, None, None, None, 779 780 None, None, None, None, 781 None, None, None, None, 782 None, None, None, None, 783 None, None, None, None, 784 785 // Kannada 786 None, None, Post, Post, 787 None, None, None, None, 788 None, None, None, None, 789 None, None, None, None, 790 791 None, None, None, None, 792 None, Below, Below, Below, 793 Below, Below, Below, Below, 794 Below, Below, Below, Below, 795 796 Below, Below, Below, Below, 797 Below, Below, Below, Below, 798 Below, Below, Below, Below, 799 Below, Below, Below, Below, 800 801 Below, None, Below, Below, 802 None, Below, Below, Below, 803 Below, Below, None, None, 804 None, None, Post, Above, 805 806 Split, Post, Post, Post, 807 Post, None, Above, Split, 808 Split, None, Split, Split, 809 Above, Halant, None, None, 810 811 None, None, None, None, 812 None, Post, Post, None, 813 None, None, None, None, 814 None, None, Below, None, 815 816 None, None, Below, Below, 817 None, None, None, None, 818 None, None, None, None, 819 None, None, None, None, 820 821 None, None, None, None, 822 None, None, None, None, 823 None, None, None, None, 824 None, None, None, None, 825 826 // Malayalam 827 None, None, Post, Post, 828 None, None, None, None, 829 None, None, None, None, 830 None, None, None, None, 831 832 None, None, None, None, 833 None, None, None, None, 834 None, None, None, None, 835 None, None, None, None, 836 837 None, None, None, None, 838 None, None, None, None, 839 None, None, None, None, 840 None, None, None, Post, 841 842 Post, None, Below, None, 843 None, Post, None, None, 844 None, None, None, None, 845 None, None, Post, Post, 846 847 Post, Post, Post, Post, 848 None, None, Pre, Pre, 849 Pre, None, Split, Split, 850 Split, Halant, None, None, 851 852 None, None, None, None, 853 None, None, None, Post, 854 None, None, None, None, 855 None, None, None, None, 856 857 None, None, None, None, 858 None, None, None, None, 859 None, None, None, None, 860 None, None, None, None, 861 862 None, None, None, None, 863 None, None, None, None, 864 None, None, None, None, 865 None, None, None, None, 866 867 // Sinhala 868 None, None, Post, Post, 869 None, None, None, None, 870 None, None, None, None, 871 None, None, None, None, 872 873 None, None, None, None, 874 None, None, None, None, 875 None, None, None, None, 876 None, None, None, None, 877 878 None, None, None, None, 879 None, None, None, None, 880 None, None, None, None, 881 None, None, None, None, 882 883 None, None, None, None, 884 None, None, None, None, 885 None, None, None, None, 886 None, None, None, None, 887 888 None, None, None, None, 889 None, None, None, None, 890 None, None, None, None, 891 None, None, None, Post, 892 893 Post, Post, Above, Above, 894 Below, None, Below, None, 895 Post, Pre, Split, Pre, 896 Split, Split, Split, Post, 897 898 None, None, None, None, 899 None, None, None, None, 900 None, None, None, None, 901 None, None, None, None, 902 903 None, None, Post, Post, 904 None, None, None, None, 905 None, None, None, None, 906 None, None, None, None 907}; 908 909static inline Form form(unsigned short uc) { 910 if (uc < 0x900 || uc > 0xdff) { 911 if (uc == 0x25cc) 912 return Consonant; 913 if (uc == 0x200c || uc == 0x200d) 914 return Control; 915 return Other; 916 } 917 return (Form)indicForms[uc-0x900]; 918} 919 920static inline Position indic_position(unsigned short uc) { 921 if (uc < 0x900 || uc > 0xdff) 922 return None; 923 return (Position) indicPosition[uc-0x900]; 924} 925 926 927enum IndicScriptProperties { 928 HasReph = 0x01, 929 HasSplit = 0x02 930}; 931 932const hb_uint8 scriptProperties[10] = { 933 // Devanagari, 934 HasReph, 935 // Bengali, 936 HasReph|HasSplit, 937 // Gurmukhi, 938 0, 939 // Gujarati, 940 HasReph, 941 // Oriya, 942 HasReph|HasSplit, 943 // Tamil, 944 HasSplit, 945 // Telugu, 946 HasSplit, 947 // Kannada, 948 HasSplit|HasReph, 949 // Malayalam, 950 HasSplit, 951 // Sinhala, 952 HasSplit 953}; 954 955struct IndicOrdering { 956 Form form; 957 Position position; 958}; 959 960static const IndicOrdering devanagari_order [] = { 961 { Consonant, Below }, 962 { Matra, Below }, 963 { VowelMark, Below }, 964 { StressMark, Below }, 965 { Matra, Above }, 966 { Matra, Post }, 967 { Consonant, Reph }, 968 { VowelMark, Above }, 969 { StressMark, Above }, 970 { VowelMark, Post }, 971 { (Form)0, None } 972}; 973 974static const IndicOrdering bengali_order [] = { 975 { Consonant, Below }, 976 { Matra, Below }, 977 { Matra, Above }, 978 { Consonant, Reph }, 979 { VowelMark, Above }, 980 { Consonant, Post }, 981 { Matra, Post }, 982 { VowelMark, Post }, 983 { (Form)0, None } 984}; 985 986static const IndicOrdering gurmukhi_order [] = { 987 { Consonant, Below }, 988 { Matra, Below }, 989 { Matra, Above }, 990 { Consonant, Post }, 991 { Matra, Post }, 992 { VowelMark, Above }, 993 { (Form)0, None } 994}; 995 996static const IndicOrdering tamil_order [] = { 997 { Matra, Above }, 998 { Matra, Post }, 999 { VowelMark, Post }, 1000 { (Form)0, None } 1001}; 1002 1003static const IndicOrdering telugu_order [] = { 1004 { Matra, Above }, 1005 { Matra, Below }, 1006 { Matra, Post }, 1007 { Consonant, Below }, 1008 { Consonant, Post }, 1009 { VowelMark, Post }, 1010 { (Form)0, None } 1011}; 1012 1013static const IndicOrdering kannada_order [] = { 1014 { Matra, Above }, 1015 { Matra, Post }, 1016 { Consonant, Below }, 1017 { Consonant, Post }, 1018 { LengthMark, Post }, 1019 { Consonant, Reph }, 1020 { VowelMark, Post }, 1021 { (Form)0, None } 1022}; 1023 1024static const IndicOrdering malayalam_order [] = { 1025 { Consonant, Below }, 1026 { Matra, Below }, 1027 { Consonant, Reph }, 1028 { Consonant, Post }, 1029 { Matra, Post }, 1030 { VowelMark, Post }, 1031 { (Form)0, None } 1032}; 1033 1034static const IndicOrdering sinhala_order [] = { 1035 { Matra, Below }, 1036 { Matra, Above }, 1037 { Matra, Post }, 1038 { VowelMark, Post }, 1039 { (Form)0, None } 1040}; 1041 1042static const IndicOrdering * const indic_order[] = { 1043 devanagari_order, // Devanagari 1044 bengali_order, // Bengali 1045 gurmukhi_order, // Gurmukhi 1046 devanagari_order, // Gujarati 1047 bengali_order, // Oriya 1048 tamil_order, // Tamil 1049 telugu_order, // Telugu 1050 kannada_order, // Kannada 1051 malayalam_order, // Malayalam 1052 sinhala_order // Sinhala 1053}; 1054 1055 1056 1057// vowel matras that have to be split into two parts. 1058static const unsigned short split_matras[] = { 1059 // matra, split1, split2, split3 1060 1061 // bengalis 1062 0x9cb, 0x9c7, 0x9be, 0x0, 1063 0x9cc, 0x9c7, 0x9d7, 0x0, 1064 // oriya 1065 0xb48, 0xb47, 0xb56, 0x0, 1066 0xb4b, 0xb47, 0xb3e, 0x0, 1067 0xb4c, 0xb47, 0xb57, 0x0, 1068 // tamil 1069 0xbca, 0xbc6, 0xbbe, 0x0, 1070 0xbcb, 0xbc7, 0xbbe, 0x0, 1071 0xbcc, 0xbc6, 0xbd7, 0x0, 1072 // telugu 1073 0xc48, 0xc46, 0xc56, 0x0, 1074 // kannada 1075 0xcc0, 0xcbf, 0xcd5, 0x0, 1076 0xcc7, 0xcc6, 0xcd5, 0x0, 1077 0xcc8, 0xcc6, 0xcd6, 0x0, 1078 0xcca, 0xcc6, 0xcc2, 0x0, 1079 0xccb, 0xcc6, 0xcc2, 0xcd5, 1080 // malayalam 1081 0xd4a, 0xd46, 0xd3e, 0x0, 1082 0xd4b, 0xd47, 0xd3e, 0x0, 1083 0xd4c, 0xd46, 0xd57, 0x0, 1084 // sinhala 1085 0xdda, 0xdd9, 0xdca, 0x0, 1086 0xddc, 0xdd9, 0xdcf, 0x0, 1087 0xddd, 0xdd9, 0xdcf, 0xdca, 1088 0xdde, 0xdd9, 0xddf, 0x0, 1089 0xffff 1090}; 1091 1092static inline void splitMatra(unsigned short *reordered, int matra, int &len) 1093{ 1094 unsigned short matra_uc = reordered[matra]; 1095 //qDebug("matra=%d, reordered[matra]=%x", matra, reordered[matra]); 1096 1097 const unsigned short *split = split_matras; 1098 while (split[0] < matra_uc) 1099 split += 4; 1100 1101 assert(*split == matra_uc); 1102 ++split; 1103 1104 int added_chars = split[2] == 0x0 ? 1 : 2; 1105 1106 memmove(reordered + matra + added_chars, reordered + matra, (len-matra)*sizeof(unsigned short)); 1107 reordered[matra] = split[0]; 1108 reordered[matra+1] = split[1]; 1109 if(added_chars == 2) 1110 reordered[matra+2] = split[2]; 1111 len += added_chars; 1112} 1113 1114#ifndef NO_OPENTYPE 1115static const HB_OpenTypeFeature indic_features[] = { 1116 { HB_MAKE_TAG('l', 'o', 'c', 'a'), LocaProperty }, 1117 { HB_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, 1118 { HB_MAKE_TAG('i', 'n', 'i', 't'), InitProperty }, 1119 { HB_MAKE_TAG('n', 'u', 'k', 't'), NuktaProperty }, 1120 { HB_MAKE_TAG('a', 'k', 'h', 'n'), AkhantProperty }, 1121 { HB_MAKE_TAG('r', 'p', 'h', 'f'), RephProperty }, 1122 { HB_MAKE_TAG('b', 'l', 'w', 'f'), BelowFormProperty }, 1123 { HB_MAKE_TAG('h', 'a', 'l', 'f'), HalfFormProperty }, 1124 { HB_MAKE_TAG('p', 's', 't', 'f'), PostFormProperty }, 1125 { HB_MAKE_TAG('c', 'j', 'c', 't'), ConjunctFormProperty }, 1126 { HB_MAKE_TAG('v', 'a', 't', 'u'), VattuProperty }, 1127 { HB_MAKE_TAG('p', 'r', 'e', 's'), PreSubstProperty }, 1128 { HB_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty }, 1129 { HB_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty }, 1130 { HB_MAKE_TAG('p', 's', 't', 's'), PostSubstProperty }, 1131 { HB_MAKE_TAG('h', 'a', 'l', 'n'), HalantProperty }, 1132 { HB_MAKE_TAG('c', 'a', 'l', 't'), IndicCaltProperty }, 1133 { 0, 0 } 1134}; 1135#endif 1136 1137// #define INDIC_DEBUG 1138#ifdef INDIC_DEBUG 1139#define IDEBUG hb_debug 1140#include <stdarg.h> 1141 1142static void hb_debug(const char *msg, ...) 1143{ 1144 va_list ap; 1145 va_start(ap, msg); // use variable arg list 1146 vfprintf(stderr, msg, ap); 1147 va_end(ap); 1148 fprintf(stderr, "\n"); 1149} 1150 1151#else 1152#define IDEBUG if(0) printf 1153#endif 1154 1155#if 0 //def INDIC_DEBUG 1156static QString propertiesToString(int properties) 1157{ 1158 QString res; 1159 properties = ~properties; 1160 if (properties & LocaProperty) 1161 res += "Loca "; 1162 if (properties & CcmpProperty) 1163 res += "Ccmp "; 1164 if (properties & InitProperty) 1165 res += "Init "; 1166 if (properties & NuktaProperty) 1167 res += "Nukta "; 1168 if (properties & AkhantProperty) 1169 res += "Akhant "; 1170 if (properties & RephProperty) 1171 res += "Reph "; 1172 if (properties & PreFormProperty) 1173 res += "PreForm "; 1174 if (properties & BelowFormProperty) 1175 res += "BelowForm "; 1176 if (properties & AboveFormProperty) 1177 res += "AboveForm "; 1178 if (properties & HalfFormProperty) 1179 res += "HalfForm "; 1180 if (properties & PostFormProperty) 1181 res += "PostForm "; 1182 if (properties & ConjunctFormProperty) 1183 res += "PostForm "; 1184 if (properties & VattuProperty) 1185 res += "Vattu "; 1186 if (properties & PreSubstProperty) 1187 res += "PreSubst "; 1188 if (properties & BelowSubstProperty) 1189 res += "BelowSubst "; 1190 if (properties & AboveSubstProperty) 1191 res += "AboveSubst "; 1192 if (properties & PostSubstProperty) 1193 res += "PostSubst "; 1194 if (properties & HalantProperty) 1195 res += "Halant "; 1196 if (properties & CligProperty) 1197 res += "Clig "; 1198 if (properties & IndicCaltProperty) 1199 res += "Calt "; 1200 return res; 1201} 1202#endif 1203 1204static bool indic_shape_syllable(HB_Bool openType, HB_ShaperItem *item, bool invalid) 1205{ 1206 HB_Script script = item->item.script; 1207 assert(script >= HB_Script_Devanagari && script <= HB_Script_Sinhala); 1208 const unsigned short script_base = 0x0900 + 0x80*(script-HB_Script_Devanagari); 1209 const unsigned short ra = script_base + 0x30; 1210 const unsigned short halant = script_base + 0x4d; 1211 const unsigned short nukta = script_base + 0x3c; 1212 bool control = false; 1213 1214 int len = (int)item->item.length; 1215 IDEBUG(">>>>> indic shape: from=%d, len=%d invalid=%d", item->item.pos, item->item.length, invalid); 1216 1217 if ((int)item->num_glyphs < len+4) { 1218 item->num_glyphs = len+4; 1219 return false; 1220 } 1221 1222 HB_STACKARRAY(HB_UChar16, reordered, len + 4); 1223 HB_STACKARRAY(hb_uint8, position, len + 4); 1224 1225 unsigned char properties = scriptProperties[script-HB_Script_Devanagari]; 1226 1227 if (invalid) { 1228 *reordered = 0x25cc; 1229 memcpy(reordered+1, item->string + item->item.pos, len*sizeof(HB_UChar16)); 1230 len++; 1231 } else { 1232 memcpy(reordered, item->string + item->item.pos, len*sizeof(HB_UChar16)); 1233 } 1234 if (reordered[len-1] == 0x200c) // zero width non joiner 1235 len--; 1236 1237 int i; 1238 int base = 0; 1239 int reph = -1; 1240 1241#ifdef INDIC_DEBUG 1242 IDEBUG("original:"); 1243 for (i = 0; i < len; i++) { 1244 IDEBUG(" %d: %4x", i, reordered[i]); 1245 } 1246#endif 1247 1248 if (len != 1) { 1249 HB_UChar16 *uc = reordered; 1250 bool beginsWithRa = false; 1251 1252 // Rule 1: find base consonant 1253 // 1254 // The shaping engine finds the base consonant of the 1255 // syllable, using the following algorithm: starting from the 1256 // end of the syllable, move backwards until a consonant is 1257 // found that does not have a below-base or post-base form 1258 // (post-base forms have to follow below-base forms), or 1259 // arrive at the first consonant. The consonant stopped at 1260 // will be the base. 1261 // 1262 // * If the syllable starts with Ra + H (in a script that has 1263 // 'Reph'), Ra is excluded from candidates for base 1264 // consonants. 1265 // 1266 // * In Kannada and Telugu, the base consonant cannot be 1267 // farther than 3 consonants from the end of the syllable. 1268 // #### replace the HasReph property by testing if the feature exists in the font! 1269 if (form(*uc) == Consonant || (script == HB_Script_Bengali && form(*uc) == IndependentVowel)) { 1270 if ((properties & HasReph) && (len > 2) && 1271 (*uc == ra || *uc == 0x9f0) && *(uc+1) == halant) 1272 beginsWithRa = true; 1273 1274 if (beginsWithRa && form(*(uc+2)) == Control) 1275 beginsWithRa = false; 1276 1277 base = (beginsWithRa ? 2 : 0); 1278 IDEBUG(" length = %d, beginsWithRa = %d, base=%d", len, beginsWithRa, base); 1279 1280 int lastConsonant = 0; 1281 int matra = -1; 1282 // we remember: 1283 // * the last consonant since we need it for rule 2 1284 // * the matras position for rule 3 and 4 1285 1286 // figure out possible base glyphs 1287 memset(position, 0, len); 1288 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) { 1289 bool vattu = false; 1290 for (i = base; i < len; ++i) { 1291 position[i] = form(uc[i]); 1292 if (position[i] == Consonant) { 1293 lastConsonant = i; 1294 vattu = (!vattu && uc[i] == ra); 1295 if (vattu) { 1296 IDEBUG("excluding vattu glyph at %d from base candidates", i); 1297 position[i] = Vattu; 1298 } 1299 } else if (position[i] == Matra) { 1300 matra = i; 1301 } 1302 } 1303 } else { 1304 for (i = base; i < len; ++i) { 1305 position[i] = form(uc[i]); 1306 if (position[i] == Consonant) 1307 lastConsonant = i; 1308 else if (matra < 0 && position[i] == Matra) 1309 matra = i; 1310 } 1311 } 1312 int skipped = 0; 1313 Position pos = Post; 1314 for (i = len-1; i >= base; i--) { 1315 if (position[i] != Consonant && (position[i] != Control || script == HB_Script_Kannada)) 1316 continue; 1317 1318 if (i < len-1 && position[i] == Control && position[i+1] == Consonant) { 1319 base = i+1; 1320 break; 1321 } 1322 1323 Position charPosition = indic_position(uc[i]); 1324 if (pos == Post && charPosition == Post) { 1325 pos = Post; 1326 } else if ((pos == Post || pos == Below) && charPosition == Below) { 1327 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) 1328 base = i; 1329 pos = Below; 1330 } else { 1331 base = i; 1332 break; 1333 } 1334 if (skipped == 2 && (script == HB_Script_Kannada || script == HB_Script_Telugu)) { 1335 base = i; 1336 break; 1337 } 1338 ++skipped; 1339 } 1340 1341 IDEBUG(" base consonant at %d skipped=%d, lastConsonant=%d", base, skipped, lastConsonant); 1342 1343 // Rule 2: 1344 // 1345 // If the base consonant is not the last one, Uniscribe 1346 // moves the halant from the base consonant to the last 1347 // one. 1348 if (lastConsonant > base) { 1349 int halantPos = 0; 1350 if (uc[base+1] == halant) 1351 halantPos = base + 1; 1352 else if (uc[base+1] == nukta && uc[base+2] == halant) 1353 halantPos = base + 2; 1354 if (halantPos > 0) { 1355 IDEBUG(" moving halant from %d to %d!", base+1, lastConsonant); 1356 for (i = halantPos; i < lastConsonant; i++) 1357 uc[i] = uc[i+1]; 1358 uc[lastConsonant] = halant; 1359 } 1360 } 1361 1362 // Rule 3: 1363 // 1364 // If the syllable starts with Ra + H, Uniscribe moves 1365 // this combination so that it follows either: 1366 1367 // * the post-base 'matra' (if any) or the base consonant 1368 // (in scripts that show similarity to Devanagari, i.e., 1369 // Devanagari, Gujarati, Bengali) 1370 // * the base consonant (other scripts) 1371 // * the end of the syllable (Kannada) 1372 1373 Position matra_position = None; 1374 if (matra > 0) 1375 matra_position = indic_position(uc[matra]); 1376 IDEBUG(" matra at %d with form %d, base=%d", matra, matra_position, base); 1377 1378 if (beginsWithRa && base != 0) { 1379 int toPos = base+1; 1380 if (toPos < len && uc[toPos] == nukta) 1381 toPos++; 1382 if (toPos < len && uc[toPos] == halant) 1383 toPos++; 1384 if (toPos < len && uc[toPos] == 0x200d) 1385 toPos++; 1386 if (toPos < len-1 && uc[toPos] == ra && uc[toPos+1] == halant) 1387 toPos += 2; 1388 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati || script == HB_Script_Bengali) { 1389 if (matra_position == Post || matra_position == Split) { 1390 toPos = matra+1; 1391 matra -= 2; 1392 } 1393 } else if (script == HB_Script_Kannada) { 1394 toPos = len; 1395 matra -= 2; 1396 } 1397 1398 IDEBUG("moving leading ra+halant to position %d", toPos); 1399 for (i = 2; i < toPos; i++) 1400 uc[i-2] = uc[i]; 1401 uc[toPos-2] = ra; 1402 uc[toPos-1] = halant; 1403 base -= 2; 1404 if (properties & HasReph) 1405 reph = toPos-2; 1406 } 1407 1408 // Rule 4: 1409 1410 // Uniscribe splits two- or three-part matras into their 1411 // parts. This splitting is a character-to-character 1412 // operation). 1413 // 1414 // Uniscribe describes some moving operations for these 1415 // matras here. For shaping however all pre matras need 1416 // to be at the beginning of the syllable, so we just move 1417 // them there now. 1418 if (matra_position == Split) { 1419 splitMatra(uc, matra, len); 1420 // Handle three-part matras (0xccb in Kannada) 1421 matra_position = indic_position(uc[matra]); 1422 } 1423 1424 if (matra_position == Pre) { 1425 unsigned short m = uc[matra]; 1426 while (matra--) 1427 uc[matra+1] = uc[matra]; 1428 uc[0] = m; 1429 base++; 1430 } 1431 } 1432 1433 // Rule 5: 1434 // 1435 // Uniscribe classifies consonants and 'matra' parts as 1436 // pre-base, above-base (Reph), below-base or post-base. This 1437 // classification exists on the character code level and is 1438 // language-dependent, not font-dependent. 1439 for (i = 0; i < base; ++i) 1440 position[i] = Pre; 1441 position[base] = Base; 1442 for (i = base+1; i < len; ++i) { 1443 position[i] = indic_position(uc[i]); 1444 // #### replace by adjusting table 1445 if (uc[i] == nukta || uc[i] == halant) 1446 position[i] = Inherit; 1447 } 1448 if (reph > 0) { 1449 // recalculate reph, it might have changed. 1450 for (i = base+1; i < len; ++i) 1451 if (uc[i] == ra) 1452 reph = i; 1453 position[reph] = Reph; 1454 position[reph+1] = Inherit; 1455 } 1456 1457 // all reordering happens now to the chars after the base 1458 int fixed = base+1; 1459 if (fixed < len && uc[fixed] == nukta) 1460 fixed++; 1461 if (fixed < len && uc[fixed] == halant) 1462 fixed++; 1463 if (fixed < len && uc[fixed] == 0x200d) 1464 fixed++; 1465 1466#ifdef INDIC_DEBUG 1467 for (i = fixed; i < len; ++i) 1468 IDEBUG("position[%d] = %d, form=%d uc=%x", i, position[i], form(uc[i]), uc[i]); 1469#endif 1470 // we continuosly position the matras and vowel marks and increase the fixed 1471 // until we reached the end. 1472 const IndicOrdering *finalOrder = indic_order[script-HB_Script_Devanagari]; 1473 1474 IDEBUG(" reordering pass:"); 1475 IDEBUG(" base=%d fixed=%d", base, fixed); 1476 int toMove = 0; 1477 while (finalOrder[toMove].form && fixed < len-1) { 1478 IDEBUG(" fixed = %d, toMove=%d, moving form %d with pos %d", fixed, toMove, finalOrder[toMove].form, finalOrder[toMove].position); 1479 for (i = fixed; i < len; i++) { 1480// IDEBUG() << " i=" << i << "uc=" << hex << uc[i] << "form=" << form(uc[i]) 1481// << "position=" << position[i]; 1482 if (form(uc[i]) == finalOrder[toMove].form && 1483 position[i] == finalOrder[toMove].position) { 1484 // need to move this glyph 1485 int to = fixed; 1486 if (i < len-1 && position[i+1] == Inherit) { 1487 IDEBUG(" moving two chars from %d to %d", i, to); 1488 unsigned short ch = uc[i]; 1489 unsigned short ch2 = uc[i+1]; 1490 unsigned char pos = position[i]; 1491 for (int j = i+1; j > to+1; j--) { 1492 uc[j] = uc[j-2]; 1493 position[j] = position[j-2]; 1494 } 1495 uc[to] = ch; 1496 uc[to+1] = ch2; 1497 position[to] = pos; 1498 position[to+1] = pos; 1499 fixed += 2; 1500 } else { 1501 IDEBUG(" moving one char from %d to %d", i, to); 1502 unsigned short ch = uc[i]; 1503 unsigned char pos = position[i]; 1504 for (int j = i; j > to; j--) { 1505 uc[j] = uc[j-1]; 1506 position[j] = position[j-1]; 1507 } 1508 uc[to] = ch; 1509 position[to] = pos; 1510 fixed++; 1511 } 1512 } 1513 } 1514 toMove++; 1515 } 1516 1517 } 1518 1519 if (reph > 0) { 1520 // recalculate reph, it might have changed. 1521 for (i = base+1; i < len; ++i) 1522 if (reordered[i] == ra) 1523 reph = i; 1524 } 1525 1526#ifndef NO_OPENTYPE 1527 const int availableGlyphs = item->num_glyphs; 1528#endif 1529 if (!item->font->klass->convertStringToGlyphIndices(item->font, 1530 reordered, len, 1531 item->glyphs, &item->num_glyphs, 1532 item->item.bidiLevel % 2)) 1533 goto error; 1534 1535 1536 IDEBUG(" base=%d, reph=%d", base, reph); 1537 IDEBUG("reordered:"); 1538 for (i = 0; i < len; i++) { 1539 item->attributes[i].mark = false; 1540 item->attributes[i].clusterStart = false; 1541 item->attributes[i].justification = 0; 1542 item->attributes[i].zeroWidth = false; 1543 IDEBUG(" %d: %4x", i, reordered[i]); 1544 } 1545 1546 // now we have the syllable in the right order, and can start running it through open type. 1547 1548 for (i = 0; i < len; ++i) 1549 control |= (form(reordered[i]) == Control); 1550 1551#ifndef NO_OPENTYPE 1552 if (openType) { 1553 1554 // we need to keep track of where the base glyph is for some 1555 // scripts and use the cluster feature for this. This 1556 // also means we have to correct the logCluster output from 1557 // the open type engine manually afterwards. for indic this 1558 // is rather simple, as all chars just point to the first 1559 // glyph in the syllable. 1560 HB_STACKARRAY(unsigned short, clusters, len); 1561 HB_STACKARRAY(unsigned int, properties, len); 1562 1563 for (i = 0; i < len; ++i) 1564 clusters[i] = i; 1565 1566 // features we should always apply 1567 for (i = 0; i < len; ++i) 1568 properties[i] = ~(LocaProperty 1569 | CcmpProperty 1570 | NuktaProperty 1571 | VattuProperty 1572 | ConjunctFormProperty 1573 | PreSubstProperty 1574 | BelowSubstProperty 1575 | AboveSubstProperty 1576 | PostSubstProperty 1577 | HalantProperty 1578 | IndicCaltProperty 1579 | PositioningProperties); 1580 1581 // Loca always applies 1582 // Ccmp always applies 1583 // Init 1584 if (item->item.pos == 0 1585 || !(isLetter(item->string[item->item.pos-1]) || isMark(item->string[item->item.pos-1]))) 1586 properties[0] &= ~InitProperty; 1587 1588 // Nukta always applies 1589 // Akhant 1590 for (i = 0; i <= base; ++i) 1591 properties[i] &= ~AkhantProperty; 1592 // Reph 1593 if (reph >= 0) { 1594 properties[reph] &= ~RephProperty; 1595 properties[reph+1] &= ~RephProperty; 1596 } 1597 // BelowForm 1598 for (i = base+1; i < len; ++i) 1599 properties[i] &= ~BelowFormProperty; 1600 1601 if (script == HB_Script_Devanagari || script == HB_Script_Gujarati) { 1602 // vattu glyphs need this aswell 1603 bool vattu = false; 1604 for (i = base-2; i > 1; --i) { 1605 if (form(reordered[i]) == Consonant) { 1606 vattu = (!vattu && reordered[i] == ra); 1607 if (vattu) { 1608 IDEBUG("forming vattu ligature at %d", i); 1609 properties[i] &= ~BelowFormProperty; 1610 properties[i+1] &= ~BelowFormProperty; 1611 } 1612 } 1613 } 1614 } 1615 // HalfFormProperty 1616 for (i = 0; i < base; ++i) 1617 properties[i] &= ~HalfFormProperty; 1618 if (control) { 1619 for (i = 2; i < len; ++i) { 1620 if (reordered[i] == 0x200d /* ZWJ */) { 1621 properties[i-1] &= ~HalfFormProperty; 1622 properties[i-2] &= ~HalfFormProperty; 1623 } else if (reordered[i] == 0x200c /* ZWNJ */) { 1624 properties[i-1] &= ~HalfFormProperty; 1625 properties[i-2] &= ~HalfFormProperty; 1626 } 1627 } 1628 } 1629 // PostFormProperty 1630 for (i = base+1; i < len; ++i) 1631 properties[i] &= ~PostFormProperty; 1632 // vattu always applies 1633 // pres always applies 1634 // blws always applies 1635 // abvs always applies 1636 // psts always applies 1637 // halant always applies 1638 // calt always applies 1639 1640#ifdef INDIC_DEBUG 1641// { 1642// IDEBUG("OT properties:"); 1643// for (int i = 0; i < len; ++i) 1644// qDebug(" i: %s", ::propertiesToString(properties[i]).toLatin1().data()); 1645// } 1646#endif 1647 1648 // initialize 1649 item->log_clusters = clusters; 1650 HB_OpenTypeShape(item, properties); 1651 1652 int newLen = item->face->buffer->in_length; 1653 HB_GlyphItem otl_glyphs = item->face->buffer->in_string; 1654 1655 // move the left matra back to its correct position in malayalam and tamil 1656 if ((script == HB_Script_Malayalam || script == HB_Script_Tamil) && (form(reordered[0]) == Matra)) { 1657// qDebug("reordering matra, len=%d", newLen); 1658 // need to find the base in the shaped string and move the matra there 1659 int basePos = 0; 1660 while (basePos < newLen && (int)otl_glyphs[basePos].cluster <= base) 1661 basePos++; 1662 --basePos; 1663 if (basePos < newLen && basePos > 1) { 1664// qDebug("moving prebase matra to position %d in syllable newlen=%d", basePos, newLen); 1665 HB_GlyphItemRec m = otl_glyphs[0]; 1666 --basePos; 1667 for (i = 0; i < basePos; ++i) 1668 otl_glyphs[i] = otl_glyphs[i+1]; 1669 otl_glyphs[basePos] = m; 1670 } 1671 } 1672 1673 HB_Bool positioned = HB_OpenTypePosition(item, availableGlyphs, false); 1674 1675 HB_FREE_STACKARRAY(clusters); 1676 HB_FREE_STACKARRAY(properties); 1677 1678 if (!positioned) 1679 goto error; 1680 1681 if (control) { 1682 IDEBUG("found a control char in the syllable"); 1683 hb_uint32 i = 0, j = 0; 1684 while (i < item->num_glyphs) { 1685 if (form(reordered[otl_glyphs[i].cluster]) == Control) { 1686 ++i; 1687 if (i >= item->num_glyphs) 1688 break; 1689 } 1690 item->glyphs[j] = item->glyphs[i]; 1691 item->attributes[j] = item->attributes[i]; 1692 ++i; 1693 ++j; 1694 } 1695 item->num_glyphs = j; 1696 } 1697 1698 } else { 1699 HB_HeuristicPosition(item); 1700 } 1701#endif // NO_OPENTYPE 1702 item->attributes[0].clusterStart = true; 1703 1704 HB_FREE_STACKARRAY(reordered); 1705 HB_FREE_STACKARRAY(position); 1706 1707 IDEBUG("<<<<<<"); 1708 return true; 1709 1710error: 1711 HB_FREE_STACKARRAY(reordered); 1712 HB_FREE_STACKARRAY(position); 1713 return false; 1714} 1715 1716/* syllables are of the form: 1717 1718 (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark? 1719 (Consonant Nukta? Halant)* Consonant Halant 1720 IndependentVowel VowelMark? StressMark? 1721 1722 We return syllable boundaries on invalid combinations aswell 1723*/ 1724static int indic_nextSyllableBoundary(HB_Script script, const HB_UChar16 *s, int start, int end, bool *invalid) 1725{ 1726 *invalid = false; 1727 IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end); 1728 const HB_UChar16 *uc = s+start; 1729 1730 int pos = 0; 1731 Form state = form(uc[pos]); 1732 IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos]); 1733 pos++; 1734 1735 if (state != Consonant && state != IndependentVowel) { 1736 if (state != Other) 1737 *invalid = true; 1738 goto finish; 1739 } 1740 1741 while (pos < end - start) { 1742 Form newState = form(uc[pos]); 1743 IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos]); 1744 switch(newState) { 1745 case Control: 1746 newState = state; 1747 if (state == Halant && uc[pos] == 0x200d /* ZWJ */) 1748 break; 1749 // the control character should be the last char in the item 1750 ++pos; 1751 goto finish; 1752 case Consonant: 1753 if (state == Halant && (script != HB_Script_Sinhala || uc[pos-1] == 0x200d /* ZWJ */)) 1754 break; 1755 goto finish; 1756 case Halant: 1757 if (state == Nukta || state == Consonant) 1758 break; 1759 // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya 1760 if (script == HB_Script_Bengali && pos == 1 && 1761 (uc[0] == 0x0985 || uc[0] == 0x098f)) 1762 break; 1763 // Sinhala uses the Halant as a component of certain matras. Allow these, but keep the state on Matra. 1764 if (script == HB_Script_Sinhala && state == Matra) { 1765 ++pos; 1766 continue; 1767 } 1768 if (script == HB_Script_Malayalam && state == Matra && uc[pos-1] == 0x0d41) { 1769 ++pos; 1770 continue; 1771 } 1772 goto finish; 1773 case Nukta: 1774 if (state == Consonant) 1775 break; 1776 goto finish; 1777 case StressMark: 1778 if (state == VowelMark) 1779 break; 1780 // fall through 1781 case VowelMark: 1782 if (state == Matra || state == LengthMark || state == IndependentVowel) 1783 break; 1784 // fall through 1785 case Matra: 1786 if (state == Consonant || state == Nukta) 1787 break; 1788 if (state == Matra) { 1789 // ### needs proper testing for correct two/three part matras 1790 break; 1791 } 1792 // ### not sure if this is correct. If it is, does it apply only to Bengali or should 1793 // it work for all Indic languages? 1794 // the combination Independent_A + Vowel Sign AA is allowed. 1795 if (script == HB_Script_Bengali && uc[pos] == 0x9be && uc[pos-1] == 0x985) 1796 break; 1797 if (script == HB_Script_Tamil && state == Matra) { 1798 if (uc[pos-1] == 0x0bc6 && 1799 (uc[pos] == 0xbbe || uc[pos] == 0xbd7)) 1800 break; 1801 if (uc[pos-1] == 0x0bc7 && uc[pos] == 0xbbe) 1802 break; 1803 } 1804 goto finish; 1805 1806 case LengthMark: 1807 if (state == Matra) { 1808 // ### needs proper testing for correct two/three part matras 1809 break; 1810 } 1811 case IndependentVowel: 1812 case Invalid: 1813 case Other: 1814 goto finish; 1815 } 1816 state = newState; 1817 pos++; 1818 } 1819 finish: 1820 return pos+start; 1821} 1822 1823HB_Bool HB_IndicShape(HB_ShaperItem *item) 1824{ 1825 assert(item->item.script >= HB_Script_Devanagari && item->item.script <= HB_Script_Sinhala); 1826 1827 HB_Bool openType = false; 1828#ifndef NO_OPENTYPE 1829 openType = HB_SelectScript(item, indic_features); 1830#endif 1831 unsigned short *logClusters = item->log_clusters; 1832 1833 HB_ShaperItem syllable = *item; 1834 int first_glyph = 0; 1835 1836 int sstart = item->item.pos; 1837 int end = sstart + item->item.length; 1838 IDEBUG("indic_shape: from %d length %d", item->item.pos, item->item.length); 1839 while (sstart < end) { 1840 bool invalid; 1841 int send = indic_nextSyllableBoundary(item->item.script, item->string, sstart, end, &invalid); 1842 IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart, 1843 invalid ? "true" : "false"); 1844 syllable.item.pos = sstart; 1845 syllable.item.length = send-sstart; 1846 syllable.glyphs = item->glyphs + first_glyph; 1847 syllable.attributes = item->attributes + first_glyph; 1848 syllable.offsets = item->offsets + first_glyph; 1849 syllable.advances = item->advances + first_glyph; 1850 syllable.num_glyphs = item->num_glyphs - first_glyph; 1851 if (!indic_shape_syllable(openType, &syllable, invalid)) { 1852 IDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs); 1853 item->num_glyphs += syllable.num_glyphs; 1854 return false; 1855 } 1856 // fix logcluster array 1857 IDEBUG("syllable:"); 1858 hb_uint32 g; 1859 for (g = first_glyph; g < first_glyph + syllable.num_glyphs; ++g) 1860 IDEBUG(" %d -> glyph %x", g, item->glyphs[g]); 1861 IDEBUG(" logclusters:"); 1862 int i; 1863 for (i = sstart; i < send; ++i) { 1864 IDEBUG(" %d -> glyph %d", i, first_glyph); 1865 logClusters[i-item->item.pos] = first_glyph; 1866 } 1867 sstart = send; 1868 first_glyph += syllable.num_glyphs; 1869 } 1870 item->num_glyphs = first_glyph; 1871 return true; 1872} 1873 1874void HB_IndicAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes) 1875{ 1876 int end = from + len; 1877 const HB_UChar16 *uc = text + from; 1878 attributes += from; 1879 hb_uint32 i = 0; 1880 while (i < len) { 1881 bool invalid; 1882 hb_uint32 boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from; 1883 attributes[i].charStop = true; 1884 1885 if (boundary > len-1) boundary = len; 1886 i++; 1887 while (i < boundary) { 1888 attributes[i].charStop = false; 1889 ++uc; 1890 ++i; 1891 } 1892 assert(i == boundary); 1893 } 1894 1895 1896} 1897 1898 1899