hb-buffer.cc revision c605bbbb6d4b2a98b1f40ca818760088d991f7d1
1/* 2 * Copyright © 1998-2004 David Turner and Werner Lemberg 3 * Copyright © 2004,2007,2009,2010 Red Hat, Inc. 4 * Copyright © 2011 Google, Inc. 5 * 6 * This is part of HarfBuzz, a text shaping library. 7 * 8 * Permission is hereby granted, without written agreement and without 9 * license or royalty fees, to use, copy, modify, and distribute this 10 * software and its documentation for any purpose, provided that the 11 * above copyright notice and the following two paragraphs appear in 12 * all copies of this software. 13 * 14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 18 * DAMAGE. 19 * 20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 25 * 26 * Red Hat Author(s): Owen Taylor, Behdad Esfahbod 27 * Google Author(s): Behdad Esfahbod 28 */ 29 30#include "hb-buffer-private.hh" 31 32#include <string.h> 33 34 35 36#ifndef HB_DEBUG_BUFFER 37#define HB_DEBUG_BUFFER (HB_DEBUG+0) 38#endif 39 40 41static hb_buffer_t _hb_buffer_nil = { 42 HB_OBJECT_HEADER_STATIC, 43 44 &_hb_unicode_funcs_default, 45 { 46 HB_DIRECTION_INVALID, 47 HB_SCRIPT_INVALID, 48 NULL, 49 }, 50 51 TRUE, /* in_error */ 52 TRUE, /* have_output */ 53 TRUE /* have_positions */ 54}; 55 56/* Here is how the buffer works internally: 57 * 58 * There are two info pointers: info and out_info. They always have 59 * the same allocated size, but different lengths. 60 * 61 * As an optimization, both info and out_info may point to the 62 * same piece of memory, which is owned by info. This remains the 63 * case as long as out_len doesn't exceed i at any time. 64 * In that case, swap_buffers() is no-op and the glyph operations operate 65 * mostly in-place. 66 * 67 * As soon as out_info gets longer than info, out_info is moved over 68 * to an alternate buffer (which we reuse the pos buffer for!), and its 69 * current contents (out_len entries) are copied to the new place. 70 * This should all remain transparent to the user. swap_buffers() then 71 * switches info and out_info. 72 */ 73 74 75 76/* Internal API */ 77 78bool 79hb_buffer_t::enlarge (unsigned int size) 80{ 81 if (unlikely (in_error)) 82 return FALSE; 83 84 unsigned int new_allocated = allocated; 85 hb_glyph_position_t *new_pos = NULL; 86 hb_glyph_info_t *new_info = NULL; 87 bool separate_out = out_info != info; 88 89 if (unlikely (_hb_unsigned_int_mul_overflows (size, sizeof (info[0])))) 90 goto done; 91 92 while (size > new_allocated) 93 new_allocated += (new_allocated >> 1) + 32; 94 95 ASSERT_STATIC (sizeof (info[0]) == sizeof (pos[0])); 96 if (unlikely (_hb_unsigned_int_mul_overflows (new_allocated, sizeof (info[0])))) 97 goto done; 98 99 new_pos = (hb_glyph_position_t *) realloc (pos, new_allocated * sizeof (pos[0])); 100 new_info = (hb_glyph_info_t *) realloc (info, new_allocated * sizeof (info[0])); 101 102done: 103 if (unlikely (!new_pos || !new_info)) 104 in_error = TRUE; 105 106 if (likely (new_pos)) 107 pos = new_pos; 108 109 if (likely (new_info)) 110 info = new_info; 111 112 out_info = separate_out ? (hb_glyph_info_t *) pos : info; 113 if (likely (!in_error)) 114 allocated = new_allocated; 115 116 return likely (!in_error); 117} 118 119bool 120hb_buffer_t::make_room_for (unsigned int num_in, 121 unsigned int num_out) 122{ 123 if (unlikely (!ensure (out_len + num_out))) return FALSE; 124 125 if (out_info == info && 126 out_len + num_out > idx + num_in) 127 { 128 assert (have_output); 129 130 out_info = (hb_glyph_info_t *) pos; 131 memcpy (out_info, info, out_len * sizeof (out_info[0])); 132 } 133 134 return TRUE; 135} 136 137void * 138hb_buffer_t::get_scratch_buffer (unsigned int *size) 139{ 140 have_output = FALSE; 141 have_positions = FALSE; 142 out_len = 0; 143 *size = allocated * sizeof (pos[0]); 144 return pos; 145} 146 147 148/* HarfBuzz-Internal API */ 149 150void 151hb_buffer_t::reset (void) 152{ 153 if (unlikely (hb_object_is_inert (this))) 154 return; 155 156 hb_unicode_funcs_destroy (unicode); 157 unicode = _hb_buffer_nil.unicode; 158 159 props = _hb_buffer_nil.props; 160 161 in_error = FALSE; 162 have_output = FALSE; 163 have_positions = FALSE; 164 165 idx = 0; 166 len = 0; 167 out_len = 0; 168 169 serial = 0; 170 memset (allocated_var_bytes, 0, sizeof allocated_var_bytes); 171 memset (allocated_var_owner, 0, sizeof allocated_var_owner); 172 173 out_info = info; 174} 175 176void 177hb_buffer_t::add (hb_codepoint_t codepoint, 178 hb_mask_t mask, 179 unsigned int cluster) 180{ 181 hb_glyph_info_t *glyph; 182 183 if (unlikely (!ensure (len + 1))) return; 184 185 glyph = &info[len]; 186 187 memset (glyph, 0, sizeof (*glyph)); 188 glyph->codepoint = codepoint; 189 glyph->mask = mask; 190 glyph->cluster = cluster; 191 192 len++; 193} 194 195void 196hb_buffer_t::clear_output (void) 197{ 198 if (unlikely (hb_object_is_inert (this))) 199 return; 200 201 have_output = TRUE; 202 have_positions = FALSE; 203 204 out_len = 0; 205 out_info = info; 206} 207 208void 209hb_buffer_t::clear_positions (void) 210{ 211 if (unlikely (hb_object_is_inert (this))) 212 return; 213 214 have_output = FALSE; 215 have_positions = TRUE; 216 217 memset (pos, 0, sizeof (pos[0]) * len); 218} 219 220void 221hb_buffer_t::swap_buffers (void) 222{ 223 if (unlikely (in_error)) return; 224 225 assert (have_output); 226 227 if (out_info != info) 228 { 229 hb_glyph_info_t *tmp_string; 230 tmp_string = info; 231 info = out_info; 232 out_info = tmp_string; 233 pos = (hb_glyph_position_t *) out_info; 234 } 235 236 unsigned int tmp; 237 tmp = len; 238 len = out_len; 239 out_len = tmp; 240 241 idx = 0; 242} 243 244void 245hb_buffer_t::replace_glyphs_be16 (unsigned int num_in, 246 unsigned int num_out, 247 const uint16_t *glyph_data_be) 248{ 249 if (!make_room_for (num_in, num_out)) return; 250 251 hb_glyph_info_t orig_info = info[idx]; 252 for (unsigned int i = 1; i < num_in; i++) 253 { 254 hb_glyph_info_t *inf = &info[idx + i]; 255 orig_info.cluster = MIN (orig_info.cluster, inf->cluster); 256 } 257 258 hb_glyph_info_t *pinfo = &out_info[out_len]; 259 for (unsigned int i = 0; i < num_out; i++) 260 { 261 *pinfo = orig_info; 262 pinfo->codepoint = hb_be_uint16 (glyph_data_be[i]); 263 pinfo++; 264 } 265 266 idx += num_in; 267 out_len += num_out; 268} 269 270void 271hb_buffer_t::output_glyph (hb_codepoint_t glyph_index) 272{ 273 if (!make_room_for (0, 1)) return; 274 275 out_info[out_len] = info[idx]; 276 out_info[out_len].codepoint = glyph_index; 277 278 out_len++; 279} 280 281void 282hb_buffer_t::copy_glyph (void) 283{ 284 if (!make_room_for (0, 1)) return; 285 286 out_info[out_len] = info[idx]; 287 288 out_len++; 289} 290 291void 292hb_buffer_t::replace_glyph (hb_codepoint_t glyph_index) 293{ 294 out_info[out_len] = info[idx]; 295 out_info[out_len].codepoint = glyph_index; 296 297 idx++; 298 out_len++; 299} 300 301void 302hb_buffer_t::next_glyph (void) 303{ 304 if (have_output) 305 { 306 if (out_info != info) 307 { 308 if (unlikely (!ensure (out_len + 1))) return; 309 out_info[out_len] = info[idx]; 310 } 311 else if (out_len != idx) 312 out_info[out_len] = info[idx]; 313 314 out_len++; 315 } 316 317 idx++; 318} 319 320void 321hb_buffer_t::set_masks (hb_mask_t value, 322 hb_mask_t mask, 323 unsigned int cluster_start, 324 unsigned int cluster_end) 325{ 326 hb_mask_t not_mask = ~mask; 327 value &= mask; 328 329 if (!mask) 330 return; 331 332 if (cluster_start == 0 && cluster_end == (unsigned int)-1) { 333 unsigned int count = len; 334 for (unsigned int i = 0; i < count; i++) 335 info[i].mask = (info[i].mask & not_mask) | value; 336 return; 337 } 338 339 unsigned int count = len; 340 for (unsigned int i = 0; i < count; i++) 341 if (cluster_start <= info[i].cluster && info[i].cluster < cluster_end) 342 info[i].mask = (info[i].mask & not_mask) | value; 343} 344 345void 346hb_buffer_t::reverse_range (unsigned int start, 347 unsigned int end) 348{ 349 unsigned int i, j; 350 351 if (start == end - 1) 352 return; 353 354 for (i = start, j = end - 1; i < j; i++, j--) { 355 hb_glyph_info_t t; 356 357 t = info[i]; 358 info[i] = info[j]; 359 info[j] = t; 360 } 361 362 if (pos) { 363 for (i = start, j = end - 1; i < j; i++, j--) { 364 hb_glyph_position_t t; 365 366 t = pos[i]; 367 pos[i] = pos[j]; 368 pos[j] = t; 369 } 370 } 371} 372 373void 374hb_buffer_t::reverse (void) 375{ 376 if (unlikely (!len)) 377 return; 378 379 reverse_range (0, len); 380} 381 382void 383hb_buffer_t::reverse_clusters (void) 384{ 385 unsigned int i, start, count, last_cluster; 386 387 if (unlikely (!len)) 388 return; 389 390 reverse (); 391 392 count = len; 393 start = 0; 394 last_cluster = info[0].cluster; 395 for (i = 1; i < count; i++) { 396 if (last_cluster != info[i].cluster) { 397 reverse_range (start, i); 398 start = i; 399 last_cluster = info[i].cluster; 400 } 401 } 402 reverse_range (start, i); 403} 404 405static inline void 406dump_var_allocation (const hb_buffer_t *buffer) 407{ 408 char buf[80]; 409 for (unsigned int i = 0; i < 8; i++) 410 buf[i] = '0' + buffer->allocated_var_bytes[7 - i]; 411 buf[8] = '\0'; 412 DEBUG_MSG (BUFFER, buffer, 413 "Current var allocation: %s", 414 buf); 415} 416 417void hb_buffer_t::allocate_var (unsigned int byte_i, unsigned int count, const char *owner) 418{ 419 assert (byte_i < 8 && byte_i + count <= 8); 420 421 if (DEBUG (BUFFER)) 422 dump_var_allocation (this); 423 DEBUG_MSG (BUFFER, this, 424 "Allocating var bytes %d..%d for %s", 425 byte_i, byte_i + count - 1, owner); 426 427 for (unsigned int i = byte_i; i < byte_i + count; i++) { 428 assert (!allocated_var_bytes[i]); 429 allocated_var_bytes[i]++; 430 allocated_var_owner[i] = owner; 431 } 432} 433 434void hb_buffer_t::deallocate_var (unsigned int byte_i, unsigned int count, const char *owner) 435{ 436 if (DEBUG (BUFFER)) 437 dump_var_allocation (this); 438 439 DEBUG_MSG (BUFFER, this, 440 "Deallocating var bytes %d..%d for %s", 441 byte_i, byte_i + count - 1, owner); 442 443 assert (byte_i < 8 && byte_i + count <= 8); 444 for (unsigned int i = byte_i; i < byte_i + count; i++) { 445 assert (allocated_var_bytes[i]); 446 assert (0 == strcmp (allocated_var_owner[i], owner)); 447 allocated_var_bytes[i]--; 448 } 449} 450 451void hb_buffer_t::deallocate_var_all (void) 452{ 453 memset (allocated_var_bytes, 0, sizeof (allocated_var_bytes)); 454 memset (allocated_var_owner, 0, sizeof (allocated_var_owner)); 455} 456 457/* Public API */ 458 459hb_buffer_t * 460hb_buffer_create (unsigned int pre_alloc_size) 461{ 462 hb_buffer_t *buffer; 463 464 if (!(buffer = hb_object_create<hb_buffer_t> ())) 465 return &_hb_buffer_nil; 466 467 buffer->reset (); 468 469 if (pre_alloc_size && !buffer->ensure (pre_alloc_size)) { 470 hb_buffer_destroy (buffer); 471 return &_hb_buffer_nil; 472 } 473 474 return buffer; 475} 476 477hb_buffer_t * 478hb_buffer_get_empty (void) 479{ 480 return &_hb_buffer_nil; 481} 482 483hb_buffer_t * 484hb_buffer_reference (hb_buffer_t *buffer) 485{ 486 return hb_object_reference (buffer); 487} 488 489void 490hb_buffer_destroy (hb_buffer_t *buffer) 491{ 492 if (!hb_object_destroy (buffer)) return; 493 494 hb_unicode_funcs_destroy (buffer->unicode); 495 496 free (buffer->info); 497 free (buffer->pos); 498 499 free (buffer); 500} 501 502hb_bool_t 503hb_buffer_set_user_data (hb_buffer_t *buffer, 504 hb_user_data_key_t *key, 505 void * data, 506 hb_destroy_func_t destroy) 507{ 508 return hb_object_set_user_data (buffer, key, data, destroy); 509} 510 511void * 512hb_buffer_get_user_data (hb_buffer_t *buffer, 513 hb_user_data_key_t *key) 514{ 515 return hb_object_get_user_data (buffer, key); 516} 517 518 519void 520hb_buffer_set_unicode_funcs (hb_buffer_t *buffer, 521 hb_unicode_funcs_t *unicode) 522{ 523 if (unlikely (hb_object_is_inert (buffer))) 524 return; 525 526 if (!unicode) 527 unicode = _hb_buffer_nil.unicode; 528 529 hb_unicode_funcs_reference (unicode); 530 hb_unicode_funcs_destroy (buffer->unicode); 531 buffer->unicode = unicode; 532} 533 534hb_unicode_funcs_t * 535hb_buffer_get_unicode_funcs (hb_buffer_t *buffer) 536{ 537 return buffer->unicode; 538} 539 540void 541hb_buffer_set_direction (hb_buffer_t *buffer, 542 hb_direction_t direction) 543 544{ 545 if (unlikely (hb_object_is_inert (buffer))) 546 return; 547 548 buffer->props.direction = direction; 549} 550 551hb_direction_t 552hb_buffer_get_direction (hb_buffer_t *buffer) 553{ 554 return buffer->props.direction; 555} 556 557void 558hb_buffer_set_script (hb_buffer_t *buffer, 559 hb_script_t script) 560{ 561 if (unlikely (hb_object_is_inert (buffer))) 562 return; 563 564 buffer->props.script = script; 565} 566 567hb_script_t 568hb_buffer_get_script (hb_buffer_t *buffer) 569{ 570 return buffer->props.script; 571} 572 573void 574hb_buffer_set_language (hb_buffer_t *buffer, 575 hb_language_t language) 576{ 577 if (unlikely (hb_object_is_inert (buffer))) 578 return; 579 580 buffer->props.language = language; 581} 582 583hb_language_t 584hb_buffer_get_language (hb_buffer_t *buffer) 585{ 586 return buffer->props.language; 587} 588 589 590void 591hb_buffer_reset (hb_buffer_t *buffer) 592{ 593 buffer->reset (); 594} 595 596hb_bool_t 597hb_buffer_pre_allocate (hb_buffer_t *buffer, unsigned int size) 598{ 599 return buffer->ensure (size); 600} 601 602hb_bool_t 603hb_buffer_allocation_successful (hb_buffer_t *buffer) 604{ 605 return !buffer->in_error; 606} 607 608void 609hb_buffer_add (hb_buffer_t *buffer, 610 hb_codepoint_t codepoint, 611 hb_mask_t mask, 612 unsigned int cluster) 613{ 614 buffer->add (codepoint, mask, cluster); 615} 616 617hb_bool_t 618hb_buffer_set_length (hb_buffer_t *buffer, 619 unsigned int length) 620{ 621 if (!buffer->ensure (length)) 622 return FALSE; 623 624 /* Wipe the new space */ 625 if (length > buffer->len) { 626 memset (buffer->info + buffer->len, 0, sizeof (buffer->info[0]) * (length - buffer->len)); 627 if (buffer->have_positions) 628 memset (buffer->pos + buffer->len, 0, sizeof (buffer->pos[0]) * (length - buffer->len)); 629 } 630 631 buffer->len = length; 632 return TRUE; 633} 634 635unsigned int 636hb_buffer_get_length (hb_buffer_t *buffer) 637{ 638 return buffer->len; 639} 640 641/* Return value valid as long as buffer not modified */ 642hb_glyph_info_t * 643hb_buffer_get_glyph_infos (hb_buffer_t *buffer, 644 unsigned int *length) 645{ 646 if (length) 647 *length = buffer->len; 648 649 return (hb_glyph_info_t *) buffer->info; 650} 651 652/* Return value valid as long as buffer not modified */ 653hb_glyph_position_t * 654hb_buffer_get_glyph_positions (hb_buffer_t *buffer, 655 unsigned int *length) 656{ 657 if (!buffer->have_positions) 658 buffer->clear_positions (); 659 660 if (length) 661 *length = buffer->len; 662 663 return (hb_glyph_position_t *) buffer->pos; 664} 665 666void 667hb_buffer_reverse (hb_buffer_t *buffer) 668{ 669 buffer->reverse (); 670} 671 672void 673hb_buffer_reverse_clusters (hb_buffer_t *buffer) 674{ 675 buffer->reverse_clusters (); 676} 677 678#define ADD_UTF(T) \ 679 HB_STMT_START { \ 680 const T *next = (const T *) text + item_offset; \ 681 const T *end = next + item_length; \ 682 while (next < end) { \ 683 hb_codepoint_t u; \ 684 const T *old_next = next; \ 685 next = UTF_NEXT (next, end, u); \ 686 hb_buffer_add (buffer, u, 1, old_next - (const T *) text); \ 687 } \ 688 } HB_STMT_END 689 690 691#define UTF8_COMPUTE(Char, Mask, Len) \ 692 if (Char < 128) { Len = 1; Mask = 0x7f; } \ 693 else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \ 694 else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \ 695 else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \ 696 else Len = 0; 697 698static inline const uint8_t * 699hb_utf8_next (const uint8_t *text, 700 const uint8_t *end, 701 hb_codepoint_t *unicode) 702{ 703 uint8_t c = *text; 704 unsigned int mask, len; 705 706 /* TODO check for overlong sequences? */ 707 708 UTF8_COMPUTE (c, mask, len); 709 if (unlikely (!len || (unsigned int) (end - text) < len)) { 710 *unicode = -1; 711 return text + 1; 712 } else { 713 hb_codepoint_t result; 714 unsigned int i; 715 result = c & mask; 716 for (i = 1; i < len; i++) 717 { 718 if (unlikely ((text[i] & 0xc0) != 0x80)) 719 { 720 *unicode = -1; 721 return text + 1; 722 } 723 result <<= 6; 724 result |= (text[i] & 0x3f); 725 } 726 *unicode = result; 727 return text + len; 728 } 729} 730 731void 732hb_buffer_add_utf8 (hb_buffer_t *buffer, 733 const char *text, 734 unsigned int text_length HB_UNUSED, 735 unsigned int item_offset, 736 unsigned int item_length) 737{ 738#define UTF_NEXT(S, E, U) hb_utf8_next (S, E, &(U)) 739 ADD_UTF (uint8_t); 740#undef UTF_NEXT 741} 742 743static inline const uint16_t * 744hb_utf16_next (const uint16_t *text, 745 const uint16_t *end, 746 hb_codepoint_t *unicode) 747{ 748 uint16_t c = *text++; 749 750 if (unlikely (c >= 0xd800 && c < 0xdc00)) { 751 /* high surrogate */ 752 uint16_t l; 753 if (text < end && ((l = *text), likely (l >= 0xdc00 && l < 0xe000))) { 754 /* low surrogate */ 755 *unicode = ((hb_codepoint_t) ((c) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000); 756 text++; 757 } else 758 *unicode = -1; 759 } else 760 *unicode = c; 761 762 return text; 763} 764 765void 766hb_buffer_add_utf16 (hb_buffer_t *buffer, 767 const uint16_t *text, 768 unsigned int text_length HB_UNUSED, 769 unsigned int item_offset, 770 unsigned int item_length) 771{ 772#define UTF_NEXT(S, E, U) hb_utf16_next (S, E, &(U)) 773 ADD_UTF (uint16_t); 774#undef UTF_NEXT 775} 776 777void 778hb_buffer_add_utf32 (hb_buffer_t *buffer, 779 const uint32_t *text, 780 unsigned int text_length HB_UNUSED, 781 unsigned int item_offset, 782 unsigned int item_length) 783{ 784#define UTF_NEXT(S, E, U) ((U) = *(S), (S)+1) 785 ADD_UTF (uint32_t); 786#undef UTF_NEXT 787} 788 789 790