translate_sse.c revision 4a4e29a9ab96d44fca9bb25064e12715aac85cbd
1/* 2 * Copyright 2003 Tungsten Graphics, inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Keith Whitwell <keithw@tungstengraphics.com> 26 */ 27 28 29#include "pipe/p_config.h" 30#include "pipe/p_compiler.h" 31#include "util/u_memory.h" 32#include "util/u_math.h" 33 34#include "translate.h" 35 36 37#if defined(PIPE_ARCH_X86) 38 39#include "rtasm/rtasm_cpu.h" 40#include "rtasm/rtasm_x86sse.h" 41 42 43#define X 0 44#define Y 1 45#define Z 2 46#define W 3 47 48 49struct translate_buffer { 50 const void *base_ptr; 51 unsigned stride; 52 unsigned max_index; 53}; 54 55struct translate_buffer_varient { 56 unsigned buffer_index; 57 unsigned instance_divisor; 58 void *ptr; /* updated either per vertex or per instance */ 59}; 60 61 62#define ELEMENT_BUFFER_INSTANCE_ID 1001 63 64 65struct translate_sse { 66 struct translate translate; 67 68 struct x86_function linear_func; 69 struct x86_function elt_func; 70 struct x86_function elt16_func; 71 struct x86_function elt8_func; 72 struct x86_function *func; 73 74 boolean loaded_identity; 75 boolean loaded_255; 76 boolean loaded_inv_255; 77 78 float identity[4]; 79 float float_255[4]; 80 float inv_255[4]; 81 82 struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; 83 unsigned nr_buffers; 84 85 /* Multiple buffer varients can map to a single buffer. */ 86 struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS]; 87 unsigned nr_buffer_varients; 88 89 /* Multiple elements can map to a single buffer varient. */ 90 unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS]; 91 92 boolean use_instancing; 93 unsigned instance_id; 94 95 /* these are actually known values, but putting them in a struct 96 * like this is helpful to keep them in sync across the file. 97 */ 98 struct x86_reg tmp_EAX; 99 struct x86_reg idx_EBX; /* either start+i or &elt[i] */ 100 struct x86_reg outbuf_ECX; 101 struct x86_reg machine_EDX; 102 struct x86_reg count_ESI; /* decrements to zero */ 103}; 104 105static int get_offset( const void *a, const void *b ) 106{ 107 return (const char *)b - (const char *)a; 108} 109 110 111 112static struct x86_reg get_identity( struct translate_sse *p ) 113{ 114 struct x86_reg reg = x86_make_reg(file_XMM, 6); 115 116 if (!p->loaded_identity) { 117 p->loaded_identity = TRUE; 118 p->identity[0] = 0; 119 p->identity[1] = 0; 120 p->identity[2] = 0; 121 p->identity[3] = 1; 122 123 sse_movups(p->func, reg, 124 x86_make_disp(p->machine_EDX, 125 get_offset(p, &p->identity[0]))); 126 } 127 128 return reg; 129} 130 131static struct x86_reg get_255( struct translate_sse *p ) 132{ 133 struct x86_reg reg = x86_make_reg(file_XMM, 7); 134 135 if (!p->loaded_255) { 136 p->loaded_255 = TRUE; 137 p->float_255[0] = 138 p->float_255[1] = 139 p->float_255[2] = 140 p->float_255[3] = 255.0f; 141 142 sse_movups(p->func, reg, 143 x86_make_disp(p->machine_EDX, 144 get_offset(p, &p->float_255[0]))); 145 } 146 147 return reg; 148} 149 150static struct x86_reg get_inv_255( struct translate_sse *p ) 151{ 152 struct x86_reg reg = x86_make_reg(file_XMM, 5); 153 154 if (!p->loaded_inv_255) { 155 p->loaded_inv_255 = TRUE; 156 p->inv_255[0] = 157 p->inv_255[1] = 158 p->inv_255[2] = 159 p->inv_255[3] = 1.0f / 255.0f; 160 161 sse_movups(p->func, reg, 162 x86_make_disp(p->machine_EDX, 163 get_offset(p, &p->inv_255[0]))); 164 } 165 166 return reg; 167} 168 169 170static void emit_load_R32G32B32A32( struct translate_sse *p, 171 struct x86_reg data, 172 struct x86_reg arg0 ) 173{ 174 sse_movups(p->func, data, arg0); 175} 176 177static void emit_load_R32G32B32( struct translate_sse *p, 178 struct x86_reg data, 179 struct x86_reg arg0 ) 180{ 181 /* Have to jump through some hoops: 182 * 183 * c 0 0 0 184 * c 0 0 1 185 * 0 0 c 1 186 * a b c 1 187 */ 188 sse_movss(p->func, data, x86_make_disp(arg0, 8)); 189 sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) ); 190 sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) ); 191 sse_movlps(p->func, data, arg0); 192} 193 194static void emit_load_R32G32( struct translate_sse *p, 195 struct x86_reg data, 196 struct x86_reg arg0 ) 197{ 198 /* 0 0 0 1 199 * a b 0 1 200 */ 201 sse_movups(p->func, data, get_identity(p) ); 202 sse_movlps(p->func, data, arg0); 203} 204 205 206static void emit_load_R32( struct translate_sse *p, 207 struct x86_reg data, 208 struct x86_reg arg0 ) 209{ 210 /* a 0 0 0 211 * a 0 0 1 212 */ 213 sse_movss(p->func, data, arg0); 214 sse_orps(p->func, data, get_identity(p) ); 215} 216 217 218static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p, 219 struct x86_reg data, 220 struct x86_reg src ) 221{ 222 223 /* Load and unpack twice: 224 */ 225 sse_movss(p->func, data, src); 226 sse2_punpcklbw(p->func, data, get_identity(p)); 227 sse2_punpcklbw(p->func, data, get_identity(p)); 228 229 /* Convert to float: 230 */ 231 sse2_cvtdq2ps(p->func, data, data); 232 233 234 /* Scale by 1/255.0 235 */ 236 sse_mulps(p->func, data, get_inv_255(p)); 237} 238 239 240 241 242static void emit_store_R32G32B32A32( struct translate_sse *p, 243 struct x86_reg dest, 244 struct x86_reg dataXMM ) 245{ 246 sse_movups(p->func, dest, dataXMM); 247} 248 249static void emit_store_R32G32B32( struct translate_sse *p, 250 struct x86_reg dest, 251 struct x86_reg dataXMM ) 252{ 253 /* Emit two, shuffle, emit one. 254 */ 255 sse_movlps(p->func, dest, dataXMM); 256 sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ 257 sse_movss(p->func, x86_make_disp(dest,8), dataXMM); 258} 259 260static void emit_store_R32G32( struct translate_sse *p, 261 struct x86_reg dest, 262 struct x86_reg dataXMM ) 263{ 264 sse_movlps(p->func, dest, dataXMM); 265} 266 267static void emit_store_R32( struct translate_sse *p, 268 struct x86_reg dest, 269 struct x86_reg dataXMM ) 270{ 271 sse_movss(p->func, dest, dataXMM); 272} 273 274 275 276static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p, 277 struct x86_reg dest, 278 struct x86_reg dataXMM ) 279{ 280 /* Scale by 255.0 281 */ 282 sse_mulps(p->func, dataXMM, get_255(p)); 283 284 /* Pack and emit: 285 */ 286 sse2_cvtps2dq(p->func, dataXMM, dataXMM); 287 sse2_packssdw(p->func, dataXMM, dataXMM); 288 sse2_packuswb(p->func, dataXMM, dataXMM); 289 sse_movss(p->func, dest, dataXMM); 290} 291 292 293 294 295 296/* Extended swizzles? Maybe later. 297 */ 298static void emit_swizzle( struct translate_sse *p, 299 struct x86_reg dest, 300 struct x86_reg src, 301 unsigned char shuffle ) 302{ 303 sse_shufps(p->func, dest, src, shuffle); 304} 305 306 307static boolean translate_attr( struct translate_sse *p, 308 const struct translate_element *a, 309 struct x86_reg srcECX, 310 struct x86_reg dstEAX) 311{ 312 struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); 313 314 switch (a->input_format) { 315 case PIPE_FORMAT_R32_FLOAT: 316 emit_load_R32(p, dataXMM, srcECX); 317 break; 318 case PIPE_FORMAT_R32G32_FLOAT: 319 emit_load_R32G32(p, dataXMM, srcECX); 320 break; 321 case PIPE_FORMAT_R32G32B32_FLOAT: 322 emit_load_R32G32B32(p, dataXMM, srcECX); 323 break; 324 case PIPE_FORMAT_R32G32B32A32_FLOAT: 325 emit_load_R32G32B32A32(p, dataXMM, srcECX); 326 break; 327 case PIPE_FORMAT_B8G8R8A8_UNORM: 328 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); 329 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); 330 break; 331 case PIPE_FORMAT_R8G8B8A8_UNORM: 332 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); 333 break; 334 default: 335 return FALSE; 336 } 337 338 switch (a->output_format) { 339 case PIPE_FORMAT_R32_FLOAT: 340 emit_store_R32(p, dstEAX, dataXMM); 341 break; 342 case PIPE_FORMAT_R32G32_FLOAT: 343 emit_store_R32G32(p, dstEAX, dataXMM); 344 break; 345 case PIPE_FORMAT_R32G32B32_FLOAT: 346 emit_store_R32G32B32(p, dstEAX, dataXMM); 347 break; 348 case PIPE_FORMAT_R32G32B32A32_FLOAT: 349 emit_store_R32G32B32A32(p, dstEAX, dataXMM); 350 break; 351 case PIPE_FORMAT_B8G8R8A8_UNORM: 352 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); 353 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); 354 break; 355 case PIPE_FORMAT_R8G8B8A8_UNORM: 356 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); 357 break; 358 default: 359 return FALSE; 360 } 361 362 return TRUE; 363} 364 365 366static boolean init_inputs( struct translate_sse *p, 367 unsigned index_size ) 368{ 369 unsigned i; 370 struct x86_reg instance_id = x86_make_disp(p->machine_EDX, 371 get_offset(p, &p->instance_id)); 372 373 for (i = 0; i < p->nr_buffer_varients; i++) { 374 struct translate_buffer_varient *varient = &p->buffer_varient[i]; 375 struct translate_buffer *buffer = &p->buffer[varient->buffer_index]; 376 377 if (!index_size || varient->instance_divisor) { 378 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, 379 get_offset(p, &buffer->stride)); 380 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, 381 get_offset(p, &varient->ptr)); 382 struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, 383 get_offset(p, &buffer->base_ptr)); 384 struct x86_reg elt = p->idx_EBX; 385 struct x86_reg tmp_EAX = p->tmp_EAX; 386 387 /* Calculate pointer to first attrib: 388 * base_ptr + stride * index, where index depends on instance divisor 389 */ 390 if (varient->instance_divisor) { 391 /* Our index is instance ID divided by instance divisor. 392 */ 393 x86_mov(p->func, tmp_EAX, instance_id); 394 395 if (varient->instance_divisor != 1) { 396 struct x86_reg tmp_EDX = p->machine_EDX; 397 struct x86_reg tmp_ECX = p->outbuf_ECX; 398 399 /* TODO: Add x86_shr() to rtasm and use it whenever 400 * instance divisor is power of two. 401 */ 402 403 x86_push(p->func, tmp_EDX); 404 x86_push(p->func, tmp_ECX); 405 x86_xor(p->func, tmp_EDX, tmp_EDX); 406 x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor); 407 x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ 408 x86_pop(p->func, tmp_ECX); 409 x86_pop(p->func, tmp_EDX); 410 } 411 } else { 412 x86_mov(p->func, tmp_EAX, elt); 413 } 414 415 /* 416 * TODO: Respect translate_buffer::max_index. 417 */ 418 419 x86_imul(p->func, tmp_EAX, buf_stride); 420 x86_add(p->func, tmp_EAX, buf_base_ptr); 421 422 423 /* In the linear case, keep the buffer pointer instead of the 424 * index number. 425 */ 426 if (!index_size && p->nr_buffer_varients == 1) 427 x86_mov(p->func, elt, tmp_EAX); 428 else 429 x86_mov(p->func, buf_ptr, tmp_EAX); 430 } 431 } 432 433 return TRUE; 434} 435 436 437static struct x86_reg get_buffer_ptr( struct translate_sse *p, 438 unsigned index_size, 439 unsigned var_idx, 440 struct x86_reg elt ) 441{ 442 if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { 443 return x86_make_disp(p->machine_EDX, 444 get_offset(p, &p->instance_id)); 445 } 446 if (!index_size && p->nr_buffer_varients == 1) { 447 return p->idx_EBX; 448 } 449 else if (!index_size || p->buffer_varient[var_idx].instance_divisor) { 450 struct x86_reg ptr = p->tmp_EAX; 451 struct x86_reg buf_ptr = 452 x86_make_disp(p->machine_EDX, 453 get_offset(p, &p->buffer_varient[var_idx].ptr)); 454 455 x86_mov(p->func, ptr, buf_ptr); 456 return ptr; 457 } 458 else { 459 struct x86_reg ptr = p->tmp_EAX; 460 const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx]; 461 462 struct x86_reg buf_stride = 463 x86_make_disp(p->machine_EDX, 464 get_offset(p, &p->buffer[varient->buffer_index].stride)); 465 466 struct x86_reg buf_base_ptr = 467 x86_make_disp(p->machine_EDX, 468 get_offset(p, &p->buffer[varient->buffer_index].base_ptr)); 469 470 471 472 /* Calculate pointer to current attrib: 473 */ 474 switch(index_size) 475 { 476 case 1: 477 x86_movzx8(p->func, ptr, elt); 478 break; 479 case 2: 480 x86_movzx16(p->func, ptr, elt); 481 break; 482 case 4: 483 x86_mov(p->func, ptr, elt); 484 break; 485 } 486 x86_imul(p->func, ptr, buf_stride); 487 x86_add(p->func, ptr, buf_base_ptr); 488 return ptr; 489 } 490} 491 492 493 494static boolean incr_inputs( struct translate_sse *p, 495 unsigned index_size ) 496{ 497 if (!index_size && p->nr_buffer_varients == 1) { 498 struct x86_reg stride = x86_make_disp(p->machine_EDX, 499 get_offset(p, &p->buffer[0].stride)); 500 501 if (p->buffer_varient[0].instance_divisor == 0) { 502 x86_add(p->func, p->idx_EBX, stride); 503 sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); 504 } 505 } 506 else if (!index_size) { 507 unsigned i; 508 509 /* Is this worthwhile?? 510 */ 511 for (i = 0; i < p->nr_buffer_varients; i++) { 512 struct translate_buffer_varient *varient = &p->buffer_varient[i]; 513 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, 514 get_offset(p, &varient->ptr)); 515 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, 516 get_offset(p, &p->buffer[varient->buffer_index].stride)); 517 518 if (varient->instance_divisor == 0) { 519 x86_mov(p->func, p->tmp_EAX, buf_ptr); 520 x86_add(p->func, p->tmp_EAX, buf_stride); 521 if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); 522 x86_mov(p->func, buf_ptr, p->tmp_EAX); 523 } 524 } 525 } 526 else { 527 x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, index_size)); 528 } 529 530 return TRUE; 531} 532 533 534/* Build run( struct translate *machine, 535 * unsigned start, 536 * unsigned count, 537 * void *output_buffer ) 538 * or 539 * run_elts( struct translate *machine, 540 * unsigned *elts, 541 * unsigned count, 542 * void *output_buffer ) 543 * 544 * Lots of hardcoding 545 * 546 * EAX -- pointer to current output vertex 547 * ECX -- pointer to current attribute 548 * 549 */ 550static boolean build_vertex_emit( struct translate_sse *p, 551 struct x86_function *func, 552 unsigned index_size ) 553{ 554 int fixup, label; 555 unsigned j; 556 557 p->tmp_EAX = x86_make_reg(file_REG32, reg_AX); 558 p->idx_EBX = x86_make_reg(file_REG32, reg_BX); 559 p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX); 560 p->machine_EDX = x86_make_reg(file_REG32, reg_DX); 561 p->count_ESI = x86_make_reg(file_REG32, reg_SI); 562 563 p->func = func; 564 p->loaded_inv_255 = FALSE; 565 p->loaded_255 = FALSE; 566 p->loaded_identity = FALSE; 567 568 x86_init_func(p->func); 569 570 /* Push a few regs? 571 */ 572 x86_push(p->func, p->idx_EBX); 573 x86_push(p->func, p->count_ESI); 574 575 /* Load arguments into regs: 576 */ 577 x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1)); 578 x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2)); 579 x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3)); 580 x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5)); 581 582 /* Load instance ID. 583 */ 584 if (p->use_instancing) { 585 x86_mov(p->func, 586 p->tmp_EAX, 587 x86_fn_arg(p->func, 4)); 588 x86_mov(p->func, 589 x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), 590 p->tmp_EAX); 591 } 592 593 /* Get vertex count, compare to zero 594 */ 595 x86_xor(p->func, p->tmp_EAX, p->tmp_EAX); 596 x86_cmp(p->func, p->count_ESI, p->tmp_EAX); 597 fixup = x86_jcc_forward(p->func, cc_E); 598 599 /* always load, needed or not: 600 */ 601 init_inputs(p, index_size); 602 603 /* Note address for loop jump 604 */ 605 label = x86_get_label(p->func); 606 { 607 struct x86_reg elt = !index_size ? p->idx_EBX : x86_deref(p->idx_EBX); 608 int last_varient = -1; 609 struct x86_reg vb; 610 611 for (j = 0; j < p->translate.key.nr_elements; j++) { 612 const struct translate_element *a = &p->translate.key.element[j]; 613 unsigned varient = p->element_to_buffer_varient[j]; 614 615 /* Figure out source pointer address: 616 */ 617 if (varient != last_varient) { 618 last_varient = varient; 619 vb = get_buffer_ptr(p, index_size, varient, elt); 620 } 621 622 if (!translate_attr( p, a, 623 x86_make_disp(vb, a->input_offset), 624 x86_make_disp(p->outbuf_ECX, a->output_offset))) 625 return FALSE; 626 } 627 628 /* Next output vertex: 629 */ 630 x86_lea(p->func, 631 p->outbuf_ECX, 632 x86_make_disp(p->outbuf_ECX, 633 p->translate.key.output_stride)); 634 635 /* Incr index 636 */ 637 incr_inputs( p, index_size ); 638 } 639 640 /* decr count, loop if not zero 641 */ 642 x86_dec(p->func, p->count_ESI); 643 x86_jcc(p->func, cc_NZ, label); 644 645 /* Exit mmx state? 646 */ 647 if (p->func->need_emms) 648 mmx_emms(p->func); 649 650 /* Land forward jump here: 651 */ 652 x86_fixup_fwd_jump(p->func, fixup); 653 654 /* Pop regs and return 655 */ 656 657 x86_pop(p->func, p->count_ESI); 658 x86_pop(p->func, p->idx_EBX); 659 x86_ret(p->func); 660 661 return TRUE; 662} 663 664 665 666 667 668 669 670static void translate_sse_set_buffer( struct translate *translate, 671 unsigned buf, 672 const void *ptr, 673 unsigned stride, 674 unsigned max_index ) 675{ 676 struct translate_sse *p = (struct translate_sse *)translate; 677 678 if (buf < p->nr_buffers) { 679 p->buffer[buf].base_ptr = (char *)ptr; 680 p->buffer[buf].stride = stride; 681 p->buffer[buf].max_index = max_index; 682 } 683 684 if (0) debug_printf("%s %d/%d: %p %d\n", 685 __FUNCTION__, buf, 686 p->nr_buffers, 687 ptr, stride); 688} 689 690 691static void translate_sse_release( struct translate *translate ) 692{ 693 struct translate_sse *p = (struct translate_sse *)translate; 694 695 x86_release_func( &p->linear_func ); 696 x86_release_func( &p->elt_func ); 697 698 FREE(p); 699} 700 701 702struct translate *translate_sse2_create( const struct translate_key *key ) 703{ 704 struct translate_sse *p = NULL; 705 unsigned i; 706 707 if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2()) 708 goto fail; 709 710 p = CALLOC_STRUCT( translate_sse ); 711 if (p == NULL) 712 goto fail; 713 714 p->translate.key = *key; 715 p->translate.release = translate_sse_release; 716 p->translate.set_buffer = translate_sse_set_buffer; 717 718 for (i = 0; i < key->nr_elements; i++) { 719 if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { 720 unsigned j; 721 722 p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); 723 724 if (key->element[i].instance_divisor) { 725 p->use_instancing = TRUE; 726 } 727 728 /* 729 * Map vertex element to vertex buffer varient. 730 */ 731 for (j = 0; j < p->nr_buffer_varients; j++) { 732 if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && 733 p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { 734 break; 735 } 736 } 737 if (j == p->nr_buffer_varients) { 738 p->buffer_varient[j].buffer_index = key->element[i].input_buffer; 739 p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; 740 p->nr_buffer_varients++; 741 } 742 p->element_to_buffer_varient[i] = j; 743 } else { 744 assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); 745 746 p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID; 747 } 748 } 749 750 if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); 751 752 if (!build_vertex_emit(p, &p->linear_func, 0)) 753 goto fail; 754 755 if (!build_vertex_emit(p, &p->elt_func, 4)) 756 goto fail; 757 758 if (!build_vertex_emit(p, &p->elt16_func, 2)) 759 goto fail; 760 761 if (!build_vertex_emit(p, &p->elt8_func, 1)) 762 goto fail; 763 764 p->translate.run = (void*)x86_get_func(&p->linear_func); 765 if (p->translate.run == NULL) 766 goto fail; 767 768 p->translate.run_elts = (void*)x86_get_func(&p->elt_func); 769 if (p->translate.run_elts == NULL) 770 goto fail; 771 772 p->translate.run_elts16 = (void*)x86_get_func(&p->elt16_func); 773 if (p->translate.run_elts16 == NULL) 774 goto fail; 775 776 p->translate.run_elts8 = (void*)x86_get_func(&p->elt8_func); 777 if (p->translate.run_elts8 == NULL) 778 goto fail; 779 780 return &p->translate; 781 782 fail: 783 if (p) 784 translate_sse_release( &p->translate ); 785 786 return NULL; 787} 788 789 790 791#else 792 793struct translate *translate_sse2_create( const struct translate_key *key ) 794{ 795 return NULL; 796} 797 798#endif 799