translate_sse.c revision 68e74f1b0110348a44f589739c6edf3fe8e2b368
1/* 2 * Copyright 2003 Tungsten Graphics, inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Keith Whitwell <keithw@tungstengraphics.com> 26 */ 27 28 29#include "pipe/p_config.h" 30#include "pipe/p_compiler.h" 31#include "util/u_memory.h" 32#include "util/u_math.h" 33 34#include "translate.h" 35 36 37#if defined(PIPE_ARCH_X86) 38 39#include "rtasm/rtasm_cpu.h" 40#include "rtasm/rtasm_x86sse.h" 41 42 43#define X 0 44#define Y 1 45#define Z 2 46#define W 3 47 48 49struct translate_buffer { 50 const void *base_ptr; 51 unsigned stride; 52 unsigned max_index; 53}; 54 55struct translate_buffer_varient { 56 unsigned buffer_index; 57 unsigned instance_divisor; 58 void *ptr; /* updated either per vertex or per instance */ 59}; 60 61 62#define ELEMENT_BUFFER_INSTANCE_ID 1001 63 64 65struct translate_sse { 66 struct translate translate; 67 68 struct x86_function linear_func; 69 struct x86_function elt_func; 70 struct x86_function *func; 71 72 boolean loaded_identity; 73 boolean loaded_255; 74 boolean loaded_inv_255; 75 76 float identity[4]; 77 float float_255[4]; 78 float inv_255[4]; 79 80 struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; 81 unsigned nr_buffers; 82 83 /* Multiple buffer varients can map to a single buffer. */ 84 struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS]; 85 unsigned nr_buffer_varients; 86 87 /* Multiple elements can map to a single buffer varient. */ 88 unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS]; 89 90 boolean use_instancing; 91 unsigned instance_id; 92 93 /* these are actually known values, but putting them in a struct 94 * like this is helpful to keep them in sync across the file. 95 */ 96 struct x86_reg tmp_EAX; 97 struct x86_reg idx_EBX; /* either start+i or &elt[i] */ 98 struct x86_reg outbuf_ECX; 99 struct x86_reg machine_EDX; 100 struct x86_reg count_ESI; /* decrements to zero */ 101}; 102 103static int get_offset( const void *a, const void *b ) 104{ 105 return (const char *)b - (const char *)a; 106} 107 108 109 110static struct x86_reg get_identity( struct translate_sse *p ) 111{ 112 struct x86_reg reg = x86_make_reg(file_XMM, 6); 113 114 if (!p->loaded_identity) { 115 p->loaded_identity = TRUE; 116 p->identity[0] = 0; 117 p->identity[1] = 0; 118 p->identity[2] = 0; 119 p->identity[3] = 1; 120 121 sse_movups(p->func, reg, 122 x86_make_disp(p->machine_EDX, 123 get_offset(p, &p->identity[0]))); 124 } 125 126 return reg; 127} 128 129static struct x86_reg get_255( struct translate_sse *p ) 130{ 131 struct x86_reg reg = x86_make_reg(file_XMM, 7); 132 133 if (!p->loaded_255) { 134 p->loaded_255 = TRUE; 135 p->float_255[0] = 136 p->float_255[1] = 137 p->float_255[2] = 138 p->float_255[3] = 255.0f; 139 140 sse_movups(p->func, reg, 141 x86_make_disp(p->machine_EDX, 142 get_offset(p, &p->float_255[0]))); 143 } 144 145 return reg; 146} 147 148static struct x86_reg get_inv_255( struct translate_sse *p ) 149{ 150 struct x86_reg reg = x86_make_reg(file_XMM, 5); 151 152 if (!p->loaded_inv_255) { 153 p->loaded_inv_255 = TRUE; 154 p->inv_255[0] = 155 p->inv_255[1] = 156 p->inv_255[2] = 157 p->inv_255[3] = 1.0f / 255.0f; 158 159 sse_movups(p->func, reg, 160 x86_make_disp(p->machine_EDX, 161 get_offset(p, &p->inv_255[0]))); 162 } 163 164 return reg; 165} 166 167 168static void emit_load_R32G32B32A32( struct translate_sse *p, 169 struct x86_reg data, 170 struct x86_reg arg0 ) 171{ 172 sse_movups(p->func, data, arg0); 173} 174 175static void emit_load_R32G32B32( struct translate_sse *p, 176 struct x86_reg data, 177 struct x86_reg arg0 ) 178{ 179 /* Have to jump through some hoops: 180 * 181 * c 0 0 0 182 * c 0 0 1 183 * 0 0 c 1 184 * a b c 1 185 */ 186 sse_movss(p->func, data, x86_make_disp(arg0, 8)); 187 sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) ); 188 sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) ); 189 sse_movlps(p->func, data, arg0); 190} 191 192static void emit_load_R32G32( struct translate_sse *p, 193 struct x86_reg data, 194 struct x86_reg arg0 ) 195{ 196 /* 0 0 0 1 197 * a b 0 1 198 */ 199 sse_movups(p->func, data, get_identity(p) ); 200 sse_movlps(p->func, data, arg0); 201} 202 203 204static void emit_load_R32( struct translate_sse *p, 205 struct x86_reg data, 206 struct x86_reg arg0 ) 207{ 208 /* a 0 0 0 209 * a 0 0 1 210 */ 211 sse_movss(p->func, data, arg0); 212 sse_orps(p->func, data, get_identity(p) ); 213} 214 215 216static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p, 217 struct x86_reg data, 218 struct x86_reg src ) 219{ 220 221 /* Load and unpack twice: 222 */ 223 sse_movss(p->func, data, src); 224 sse2_punpcklbw(p->func, data, get_identity(p)); 225 sse2_punpcklbw(p->func, data, get_identity(p)); 226 227 /* Convert to float: 228 */ 229 sse2_cvtdq2ps(p->func, data, data); 230 231 232 /* Scale by 1/255.0 233 */ 234 sse_mulps(p->func, data, get_inv_255(p)); 235} 236 237 238 239 240static void emit_store_R32G32B32A32( struct translate_sse *p, 241 struct x86_reg dest, 242 struct x86_reg dataXMM ) 243{ 244 sse_movups(p->func, dest, dataXMM); 245} 246 247static void emit_store_R32G32B32( struct translate_sse *p, 248 struct x86_reg dest, 249 struct x86_reg dataXMM ) 250{ 251 /* Emit two, shuffle, emit one. 252 */ 253 sse_movlps(p->func, dest, dataXMM); 254 sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ 255 sse_movss(p->func, x86_make_disp(dest,8), dataXMM); 256} 257 258static void emit_store_R32G32( struct translate_sse *p, 259 struct x86_reg dest, 260 struct x86_reg dataXMM ) 261{ 262 sse_movlps(p->func, dest, dataXMM); 263} 264 265static void emit_store_R32( struct translate_sse *p, 266 struct x86_reg dest, 267 struct x86_reg dataXMM ) 268{ 269 sse_movss(p->func, dest, dataXMM); 270} 271 272 273 274static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p, 275 struct x86_reg dest, 276 struct x86_reg dataXMM ) 277{ 278 /* Scale by 255.0 279 */ 280 sse_mulps(p->func, dataXMM, get_255(p)); 281 282 /* Pack and emit: 283 */ 284 sse2_cvtps2dq(p->func, dataXMM, dataXMM); 285 sse2_packssdw(p->func, dataXMM, dataXMM); 286 sse2_packuswb(p->func, dataXMM, dataXMM); 287 sse_movss(p->func, dest, dataXMM); 288} 289 290 291 292 293 294/* Extended swizzles? Maybe later. 295 */ 296static void emit_swizzle( struct translate_sse *p, 297 struct x86_reg dest, 298 struct x86_reg src, 299 unsigned char shuffle ) 300{ 301 sse_shufps(p->func, dest, src, shuffle); 302} 303 304 305static boolean translate_attr( struct translate_sse *p, 306 const struct translate_element *a, 307 struct x86_reg srcECX, 308 struct x86_reg dstEAX) 309{ 310 struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); 311 312 switch (a->input_format) { 313 case PIPE_FORMAT_R32_FLOAT: 314 emit_load_R32(p, dataXMM, srcECX); 315 break; 316 case PIPE_FORMAT_R32G32_FLOAT: 317 emit_load_R32G32(p, dataXMM, srcECX); 318 break; 319 case PIPE_FORMAT_R32G32B32_FLOAT: 320 emit_load_R32G32B32(p, dataXMM, srcECX); 321 break; 322 case PIPE_FORMAT_R32G32B32A32_FLOAT: 323 emit_load_R32G32B32A32(p, dataXMM, srcECX); 324 break; 325 case PIPE_FORMAT_B8G8R8A8_UNORM: 326 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); 327 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); 328 break; 329 case PIPE_FORMAT_R8G8B8A8_UNORM: 330 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); 331 break; 332 default: 333 return FALSE; 334 } 335 336 switch (a->output_format) { 337 case PIPE_FORMAT_R32_FLOAT: 338 emit_store_R32(p, dstEAX, dataXMM); 339 break; 340 case PIPE_FORMAT_R32G32_FLOAT: 341 emit_store_R32G32(p, dstEAX, dataXMM); 342 break; 343 case PIPE_FORMAT_R32G32B32_FLOAT: 344 emit_store_R32G32B32(p, dstEAX, dataXMM); 345 break; 346 case PIPE_FORMAT_R32G32B32A32_FLOAT: 347 emit_store_R32G32B32A32(p, dstEAX, dataXMM); 348 break; 349 case PIPE_FORMAT_B8G8R8A8_UNORM: 350 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); 351 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); 352 break; 353 case PIPE_FORMAT_R8G8B8A8_UNORM: 354 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); 355 break; 356 default: 357 return FALSE; 358 } 359 360 return TRUE; 361} 362 363 364static boolean init_inputs( struct translate_sse *p, 365 boolean linear ) 366{ 367 unsigned i; 368 struct x86_reg instance_id = x86_make_disp(p->machine_EDX, 369 get_offset(p, &p->instance_id)); 370 371 for (i = 0; i < p->nr_buffer_varients; i++) { 372 struct translate_buffer_varient *varient = &p->buffer_varient[i]; 373 struct translate_buffer *buffer = &p->buffer[varient->buffer_index]; 374 375 if (linear || varient->instance_divisor) { 376 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, 377 get_offset(p, &buffer->stride)); 378 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, 379 get_offset(p, &varient->ptr)); 380 struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, 381 get_offset(p, &buffer->base_ptr)); 382 struct x86_reg elt = p->idx_EBX; 383 struct x86_reg tmp_EAX = p->tmp_EAX; 384 385 /* Calculate pointer to first attrib: 386 * base_ptr + stride * index, where index depends on instance divisor 387 */ 388 if (varient->instance_divisor) { 389 /* Our index is instance ID divided by instance divisor. 390 */ 391 x86_mov(p->func, tmp_EAX, instance_id); 392 393 if (varient->instance_divisor != 1) { 394 struct x86_reg tmp_EDX = p->machine_EDX; 395 struct x86_reg tmp_ECX = p->outbuf_ECX; 396 397 /* TODO: Add x86_shr() to rtasm and use it whenever 398 * instance divisor is power of two. 399 */ 400 401 x86_push(p->func, tmp_EDX); 402 x86_push(p->func, tmp_ECX); 403 x86_xor(p->func, tmp_EDX, tmp_EDX); 404 x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor); 405 x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */ 406 x86_pop(p->func, tmp_ECX); 407 x86_pop(p->func, tmp_EDX); 408 } 409 } else { 410 x86_mov(p->func, tmp_EAX, elt); 411 } 412 413 /* 414 * TODO: Respect translate_buffer::max_index. 415 */ 416 417 x86_imul(p->func, tmp_EAX, buf_stride); 418 x86_add(p->func, tmp_EAX, buf_base_ptr); 419 420 421 /* In the linear case, keep the buffer pointer instead of the 422 * index number. 423 */ 424 if (linear && p->nr_buffer_varients == 1) 425 x86_mov(p->func, elt, tmp_EAX); 426 else 427 x86_mov(p->func, buf_ptr, tmp_EAX); 428 } 429 } 430 431 return TRUE; 432} 433 434 435static struct x86_reg get_buffer_ptr( struct translate_sse *p, 436 boolean linear, 437 unsigned var_idx, 438 struct x86_reg elt ) 439{ 440 if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) { 441 return x86_make_disp(p->machine_EDX, 442 get_offset(p, &p->instance_id)); 443 } 444 if (linear && p->nr_buffer_varients == 1) { 445 return p->idx_EBX; 446 } 447 else if (linear || p->buffer_varient[var_idx].instance_divisor) { 448 struct x86_reg ptr = p->tmp_EAX; 449 struct x86_reg buf_ptr = 450 x86_make_disp(p->machine_EDX, 451 get_offset(p, &p->buffer_varient[var_idx].ptr)); 452 453 x86_mov(p->func, ptr, buf_ptr); 454 return ptr; 455 } 456 else { 457 struct x86_reg ptr = p->tmp_EAX; 458 const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx]; 459 460 struct x86_reg buf_stride = 461 x86_make_disp(p->machine_EDX, 462 get_offset(p, &p->buffer[varient->buffer_index].stride)); 463 464 struct x86_reg buf_base_ptr = 465 x86_make_disp(p->machine_EDX, 466 get_offset(p, &p->buffer[varient->buffer_index].base_ptr)); 467 468 469 470 /* Calculate pointer to current attrib: 471 */ 472 x86_mov(p->func, ptr, buf_stride); 473 x86_imul(p->func, ptr, elt); 474 x86_add(p->func, ptr, buf_base_ptr); 475 return ptr; 476 } 477} 478 479 480 481static boolean incr_inputs( struct translate_sse *p, 482 boolean linear ) 483{ 484 if (linear && p->nr_buffer_varients == 1) { 485 struct x86_reg stride = x86_make_disp(p->machine_EDX, 486 get_offset(p, &p->buffer[0].stride)); 487 488 if (p->buffer_varient[0].instance_divisor == 0) { 489 x86_add(p->func, p->idx_EBX, stride); 490 sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); 491 } 492 } 493 else if (linear) { 494 unsigned i; 495 496 /* Is this worthwhile?? 497 */ 498 for (i = 0; i < p->nr_buffer_varients; i++) { 499 struct translate_buffer_varient *varient = &p->buffer_varient[i]; 500 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, 501 get_offset(p, &varient->ptr)); 502 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, 503 get_offset(p, &p->buffer[varient->buffer_index].stride)); 504 505 if (varient->instance_divisor == 0) { 506 x86_mov(p->func, p->tmp_EAX, buf_ptr); 507 x86_add(p->func, p->tmp_EAX, buf_stride); 508 if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); 509 x86_mov(p->func, buf_ptr, p->tmp_EAX); 510 } 511 } 512 } 513 else { 514 x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4)); 515 } 516 517 return TRUE; 518} 519 520 521/* Build run( struct translate *machine, 522 * unsigned start, 523 * unsigned count, 524 * void *output_buffer ) 525 * or 526 * run_elts( struct translate *machine, 527 * unsigned *elts, 528 * unsigned count, 529 * void *output_buffer ) 530 * 531 * Lots of hardcoding 532 * 533 * EAX -- pointer to current output vertex 534 * ECX -- pointer to current attribute 535 * 536 */ 537static boolean build_vertex_emit( struct translate_sse *p, 538 struct x86_function *func, 539 boolean linear ) 540{ 541 int fixup, label; 542 unsigned j; 543 544 p->tmp_EAX = x86_make_reg(file_REG32, reg_AX); 545 p->idx_EBX = x86_make_reg(file_REG32, reg_BX); 546 p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX); 547 p->machine_EDX = x86_make_reg(file_REG32, reg_DX); 548 p->count_ESI = x86_make_reg(file_REG32, reg_SI); 549 550 p->func = func; 551 p->loaded_inv_255 = FALSE; 552 p->loaded_255 = FALSE; 553 p->loaded_identity = FALSE; 554 555 x86_init_func(p->func); 556 557 /* Push a few regs? 558 */ 559 x86_push(p->func, p->idx_EBX); 560 x86_push(p->func, p->count_ESI); 561 562 /* Load arguments into regs: 563 */ 564 x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1)); 565 x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2)); 566 x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3)); 567 x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5)); 568 569 /* Load instance ID. 570 */ 571 if (p->use_instancing) { 572 x86_mov(p->func, 573 p->tmp_EAX, 574 x86_fn_arg(p->func, 4)); 575 x86_mov(p->func, 576 x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)), 577 p->tmp_EAX); 578 } 579 580 /* Get vertex count, compare to zero 581 */ 582 x86_xor(p->func, p->tmp_EAX, p->tmp_EAX); 583 x86_cmp(p->func, p->count_ESI, p->tmp_EAX); 584 fixup = x86_jcc_forward(p->func, cc_E); 585 586 /* always load, needed or not: 587 */ 588 init_inputs(p, linear); 589 590 /* Note address for loop jump 591 */ 592 label = x86_get_label(p->func); 593 { 594 struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX); 595 int last_varient = -1; 596 struct x86_reg vb; 597 598 for (j = 0; j < p->translate.key.nr_elements; j++) { 599 const struct translate_element *a = &p->translate.key.element[j]; 600 unsigned varient = p->element_to_buffer_varient[j]; 601 602 /* Figure out source pointer address: 603 */ 604 if (varient != last_varient) { 605 last_varient = varient; 606 vb = get_buffer_ptr(p, linear, varient, elt); 607 } 608 609 if (!translate_attr( p, a, 610 x86_make_disp(vb, a->input_offset), 611 x86_make_disp(p->outbuf_ECX, a->output_offset))) 612 return FALSE; 613 } 614 615 /* Next output vertex: 616 */ 617 x86_lea(p->func, 618 p->outbuf_ECX, 619 x86_make_disp(p->outbuf_ECX, 620 p->translate.key.output_stride)); 621 622 /* Incr index 623 */ 624 incr_inputs( p, linear ); 625 } 626 627 /* decr count, loop if not zero 628 */ 629 x86_dec(p->func, p->count_ESI); 630 x86_jcc(p->func, cc_NZ, label); 631 632 /* Exit mmx state? 633 */ 634 if (p->func->need_emms) 635 mmx_emms(p->func); 636 637 /* Land forward jump here: 638 */ 639 x86_fixup_fwd_jump(p->func, fixup); 640 641 /* Pop regs and return 642 */ 643 644 x86_pop(p->func, p->count_ESI); 645 x86_pop(p->func, p->idx_EBX); 646 x86_ret(p->func); 647 648 return TRUE; 649} 650 651 652 653 654 655 656 657static void translate_sse_set_buffer( struct translate *translate, 658 unsigned buf, 659 const void *ptr, 660 unsigned stride, 661 unsigned max_index ) 662{ 663 struct translate_sse *p = (struct translate_sse *)translate; 664 665 if (buf < p->nr_buffers) { 666 p->buffer[buf].base_ptr = (char *)ptr; 667 p->buffer[buf].stride = stride; 668 p->buffer[buf].max_index = max_index; 669 } 670 671 if (0) debug_printf("%s %d/%d: %p %d\n", 672 __FUNCTION__, buf, 673 p->nr_buffers, 674 ptr, stride); 675} 676 677 678static void translate_sse_release( struct translate *translate ) 679{ 680 struct translate_sse *p = (struct translate_sse *)translate; 681 682 x86_release_func( &p->linear_func ); 683 x86_release_func( &p->elt_func ); 684 685 FREE(p); 686} 687 688 689struct translate *translate_sse2_create( const struct translate_key *key ) 690{ 691 struct translate_sse *p = NULL; 692 unsigned i; 693 694 if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2()) 695 goto fail; 696 697 p = CALLOC_STRUCT( translate_sse ); 698 if (p == NULL) 699 goto fail; 700 701 p->translate.key = *key; 702 p->translate.release = translate_sse_release; 703 p->translate.set_buffer = translate_sse_set_buffer; 704 705 for (i = 0; i < key->nr_elements; i++) { 706 if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) { 707 unsigned j; 708 709 p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1); 710 711 if (key->element[i].instance_divisor) { 712 p->use_instancing = TRUE; 713 } 714 715 /* 716 * Map vertex element to vertex buffer varient. 717 */ 718 for (j = 0; j < p->nr_buffer_varients; j++) { 719 if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer && 720 p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) { 721 break; 722 } 723 } 724 if (j == p->nr_buffer_varients) { 725 p->buffer_varient[j].buffer_index = key->element[i].input_buffer; 726 p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor; 727 p->nr_buffer_varients++; 728 } 729 p->element_to_buffer_varient[i] = j; 730 } else { 731 assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID); 732 733 p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID; 734 } 735 } 736 737 if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); 738 739 if (!build_vertex_emit(p, &p->linear_func, TRUE)) 740 goto fail; 741 742 if (!build_vertex_emit(p, &p->elt_func, FALSE)) 743 goto fail; 744 745 p->translate.run = (void*)x86_get_func(&p->linear_func); 746 if (p->translate.run == NULL) 747 goto fail; 748 749 p->translate.run_elts = (void*)x86_get_func(&p->elt_func); 750 if (p->translate.run_elts == NULL) 751 goto fail; 752 753 return &p->translate; 754 755 fail: 756 if (p) 757 translate_sse_release( &p->translate ); 758 759 return NULL; 760} 761 762 763 764#else 765 766struct translate *translate_sse2_create( const struct translate_key *key ) 767{ 768 return NULL; 769} 770 771#endif 772