draw_llvm.c revision 154d91cad907ba5643fb3e39717a8f7c5a76049a
1/************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "draw_llvm.h" 29 30#include "draw_context.h" 31#include "draw_vs.h" 32 33#include "gallivm/lp_bld_arit.h" 34#include "gallivm/lp_bld_logic.h" 35#include "gallivm/lp_bld_const.h" 36#include "gallivm/lp_bld_swizzle.h" 37#include "gallivm/lp_bld_struct.h" 38#include "gallivm/lp_bld_type.h" 39#include "gallivm/lp_bld_flow.h" 40#include "gallivm/lp_bld_debug.h" 41#include "gallivm/lp_bld_tgsi.h" 42#include "gallivm/lp_bld_printf.h" 43#include "gallivm/lp_bld_intr.h" 44#include "gallivm/lp_bld_init.h" 45 46#include "tgsi/tgsi_exec.h" 47#include "tgsi/tgsi_dump.h" 48 49#include "util/u_math.h" 50#include "util/u_pointer.h" 51#include "util/u_string.h" 52 53#include <llvm-c/Transforms/Scalar.h> 54 55#define DEBUG_STORE 0 56 57/* generates the draw jit function */ 58static void 59draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); 60static void 61draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); 62 63static void 64init_globals(struct draw_llvm *llvm) 65{ 66 LLVMTypeRef texture_type; 67 68 /* struct draw_jit_texture */ 69 { 70 LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS]; 71 72 elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); 73 elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); 74 elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); 75 elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); 76 elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = 77 LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); 78 elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = 79 LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); 80 elem_types[DRAW_JIT_TEXTURE_DATA] = 81 LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), 82 PIPE_MAX_TEXTURE_LEVELS); 83 elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); 84 elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); 85 elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); 86 elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = 87 LLVMArrayType(LLVMFloatType(), 4); 88 89 texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); 90 91 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, 92 llvm->target, texture_type, 93 DRAW_JIT_TEXTURE_WIDTH); 94 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, 95 llvm->target, texture_type, 96 DRAW_JIT_TEXTURE_HEIGHT); 97 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, 98 llvm->target, texture_type, 99 DRAW_JIT_TEXTURE_DEPTH); 100 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, 101 llvm->target, texture_type, 102 DRAW_JIT_TEXTURE_LAST_LEVEL); 103 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride, 104 llvm->target, texture_type, 105 DRAW_JIT_TEXTURE_ROW_STRIDE); 106 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride, 107 llvm->target, texture_type, 108 DRAW_JIT_TEXTURE_IMG_STRIDE); 109 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, 110 llvm->target, texture_type, 111 DRAW_JIT_TEXTURE_DATA); 112 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod, 113 llvm->target, texture_type, 114 DRAW_JIT_TEXTURE_MIN_LOD); 115 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod, 116 llvm->target, texture_type, 117 DRAW_JIT_TEXTURE_MAX_LOD); 118 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias, 119 llvm->target, texture_type, 120 DRAW_JIT_TEXTURE_LOD_BIAS); 121 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color, 122 llvm->target, texture_type, 123 DRAW_JIT_TEXTURE_BORDER_COLOR); 124 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, 125 llvm->target, texture_type); 126 127 LLVMAddTypeName(llvm->module, "texture", texture_type); 128 } 129 130 131 /* struct draw_jit_context */ 132 { 133 LLVMTypeRef elem_types[5]; 134 LLVMTypeRef context_type; 135 136 elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ 137 elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */ 138 elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */ 139 elem_types[3] = LLVMPointerType(LLVMFloatType(), 0); /* viewport */ 140 elem_types[4] = LLVMArrayType(texture_type, 141 PIPE_MAX_VERTEX_SAMPLERS); /* textures */ 142 143 context_type = LLVMStructType(elem_types, Elements(elem_types), 0); 144 145 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, 146 llvm->target, context_type, 0); 147 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, 148 llvm->target, context_type, 1); 149 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes, 150 llvm->target, context_type, 2); 151 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, 152 llvm->target, context_type, 153 DRAW_JIT_CTX_TEXTURES); 154 LP_CHECK_STRUCT_SIZE(struct draw_jit_context, 155 llvm->target, context_type); 156 157 LLVMAddTypeName(llvm->module, "draw_jit_context", context_type); 158 159 llvm->context_ptr_type = LLVMPointerType(context_type, 0); 160 } 161 { 162 LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0); 163 llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0); 164 } 165 /* struct pipe_vertex_buffer */ 166 { 167 LLVMTypeRef elem_types[4]; 168 LLVMTypeRef vb_type; 169 170 elem_types[0] = LLVMInt32Type(); 171 elem_types[1] = LLVMInt32Type(); 172 elem_types[2] = LLVMInt32Type(); 173 elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */ 174 175 vb_type = LLVMStructType(elem_types, Elements(elem_types), 0); 176 177 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, 178 llvm->target, vb_type, 0); 179 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, 180 llvm->target, vb_type, 2); 181 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, 182 llvm->target, vb_type); 183 184 LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type); 185 186 llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); 187 } 188} 189 190static LLVMTypeRef 191create_vertex_header(struct draw_llvm *llvm, int data_elems) 192{ 193 /* struct vertex_header */ 194 LLVMTypeRef elem_types[3]; 195 LLVMTypeRef vertex_header; 196 char struct_name[24]; 197 198 util_snprintf(struct_name, 23, "vertex_header%d", data_elems); 199 200 elem_types[0] = LLVMIntType(32); 201 elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); 202 elem_types[2] = LLVMArrayType(elem_types[1], data_elems); 203 204 vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); 205 206 /* these are bit-fields and we can't take address of them 207 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, 208 llvm->target, vertex_header, 209 DRAW_JIT_VERTEX_CLIPMASK); 210 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, 211 llvm->target, vertex_header, 212 DRAW_JIT_VERTEX_EDGEFLAG); 213 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, 214 llvm->target, vertex_header, 215 DRAW_JIT_VERTEX_PAD); 216 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, 217 llvm->target, vertex_header, 218 DRAW_JIT_VERTEX_VERTEX_ID); 219 */ 220 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, 221 llvm->target, vertex_header, 222 DRAW_JIT_VERTEX_CLIP); 223 LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, 224 llvm->target, vertex_header, 225 DRAW_JIT_VERTEX_DATA); 226 227 LLVMAddTypeName(llvm->module, struct_name, vertex_header); 228 229 return LLVMPointerType(vertex_header, 0); 230} 231 232struct draw_llvm * 233draw_llvm_create(struct draw_context *draw) 234{ 235 struct draw_llvm *llvm; 236 237 llvm = CALLOC_STRUCT( draw_llvm ); 238 if (!llvm) 239 return NULL; 240 241 llvm->draw = draw; 242 llvm->engine = draw->engine; 243 244 debug_assert(llvm->engine); 245 246 llvm->module = LLVMModuleCreateWithName("draw_llvm"); 247 llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module); 248 249 LLVMAddModuleProvider(llvm->engine, llvm->provider); 250 251 llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine); 252 253 llvm->pass = LLVMCreateFunctionPassManager(llvm->provider); 254 LLVMAddTargetData(llvm->target, llvm->pass); 255 256 if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { 257 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, 258 * but there are more on SVN. */ 259 /* TODO: Add more passes */ 260 261 LLVMAddCFGSimplificationPass(llvm->pass); 262 263 if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) { 264 /* For LLVM >= 2.7 and 32-bit build, use this order of passes to 265 * avoid generating bad code. 266 * Test with piglit glsl-vs-sqrt-zero test. 267 */ 268 LLVMAddConstantPropagationPass(llvm->pass); 269 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 270 } 271 else { 272 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 273 LLVMAddConstantPropagationPass(llvm->pass); 274 } 275 276 LLVMAddInstructionCombiningPass(llvm->pass); 277 LLVMAddGVNPass(llvm->pass); 278 } else { 279 /* We need at least this pass to prevent the backends to fail in 280 * unexpected ways. 281 */ 282 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 283 } 284 285 init_globals(llvm); 286 287 if (gallivm_debug & GALLIVM_DEBUG_IR) { 288 LLVMDumpModule(llvm->module); 289 } 290 291 llvm->nr_variants = 0; 292 make_empty_list(&llvm->vs_variants_list); 293 294 return llvm; 295} 296 297void 298draw_llvm_destroy(struct draw_llvm *llvm) 299{ 300 LLVMDisposePassManager(llvm->pass); 301 302 FREE(llvm); 303} 304 305struct draw_llvm_variant * 306draw_llvm_create_variant(struct draw_llvm *llvm, 307 unsigned num_inputs, 308 const struct draw_llvm_variant_key *key) 309{ 310 struct draw_llvm_variant *variant; 311 struct llvm_vertex_shader *shader = 312 llvm_vertex_shader(llvm->draw->vs.vertex_shader); 313 314 variant = MALLOC(sizeof *variant + 315 shader->variant_key_size - 316 sizeof variant->key); 317 if (variant == NULL) 318 return NULL; 319 320 variant->llvm = llvm; 321 322 memcpy(&variant->key, key, shader->variant_key_size); 323 324 llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); 325 326 draw_llvm_generate(llvm, variant); 327 draw_llvm_generate_elts(llvm, variant); 328 329 variant->shader = shader; 330 variant->list_item_global.base = variant; 331 variant->list_item_local.base = variant; 332 /*variant->no = */shader->variants_created++; 333 variant->list_item_global.base = variant; 334 335 return variant; 336} 337 338static void 339generate_vs(struct draw_llvm *llvm, 340 LLVMBuilderRef builder, 341 LLVMValueRef (*outputs)[NUM_CHANNELS], 342 const LLVMValueRef (*inputs)[NUM_CHANNELS], 343 LLVMValueRef context_ptr, 344 struct lp_build_sampler_soa *draw_sampler) 345{ 346 const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; 347 struct lp_type vs_type; 348 LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); 349 struct lp_build_sampler_soa *sampler = 0; 350 351 memset(&vs_type, 0, sizeof vs_type); 352 vs_type.floating = TRUE; /* floating point values */ 353 vs_type.sign = TRUE; /* values are signed */ 354 vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ 355 vs_type.width = 32; /* 32-bit float */ 356 vs_type.length = 4; /* 4 elements per vector */ 357#if 0 358 num_vs = 4; /* number of vertices per block */ 359#endif 360 361 if (gallivm_debug & GALLIVM_DEBUG_IR) { 362 tgsi_dump(tokens, 0); 363 } 364 365 if (llvm->draw->num_sampler_views && 366 llvm->draw->num_samplers) 367 sampler = draw_sampler; 368 369 lp_build_tgsi_soa(builder, 370 tokens, 371 vs_type, 372 NULL /*struct lp_build_mask_context *mask*/, 373 consts_ptr, 374 NULL /*pos*/, 375 inputs, 376 outputs, 377 sampler, 378 &llvm->draw->vs.vertex_shader->info); 379} 380 381#if DEBUG_STORE 382static void print_vectorf(LLVMBuilderRef builder, 383 LLVMValueRef vec) 384{ 385 LLVMValueRef val[4]; 386 val[0] = LLVMBuildExtractElement(builder, vec, 387 LLVMConstInt(LLVMInt32Type(), 0, 0), ""); 388 val[1] = LLVMBuildExtractElement(builder, vec, 389 LLVMConstInt(LLVMInt32Type(), 1, 0), ""); 390 val[2] = LLVMBuildExtractElement(builder, vec, 391 LLVMConstInt(LLVMInt32Type(), 2, 0), ""); 392 val[3] = LLVMBuildExtractElement(builder, vec, 393 LLVMConstInt(LLVMInt32Type(), 3, 0), ""); 394 lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", 395 val[0], val[1], val[2], val[3]); 396} 397#endif 398 399static void 400generate_fetch(LLVMBuilderRef builder, 401 LLVMValueRef vbuffers_ptr, 402 LLVMValueRef *res, 403 struct pipe_vertex_element *velem, 404 LLVMValueRef vbuf, 405 LLVMValueRef index, 406 LLVMValueRef instance_id) 407{ 408 LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); 409 LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, 410 &indices, 1, ""); 411 LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf); 412 LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf); 413 LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf); 414 LLVMValueRef cond; 415 LLVMValueRef stride; 416 417 if (velem->instance_divisor) { 418 /* array index = instance_id / instance_divisor */ 419 index = LLVMBuildUDiv(builder, instance_id, 420 LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0), 421 "instance_divisor"); 422 } 423 424 /* limit index to min(index, vb_max_index) */ 425 cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); 426 index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); 427 428 stride = LLVMBuildMul(builder, vb_stride, index, ""); 429 430 vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); 431 432 stride = LLVMBuildAdd(builder, stride, 433 vb_buffer_offset, 434 ""); 435 stride = LLVMBuildAdd(builder, stride, 436 LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0), 437 ""); 438 439 /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/ 440 vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); 441 442 *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format); 443} 444 445static LLVMValueRef 446aos_to_soa(LLVMBuilderRef builder, 447 LLVMValueRef val0, 448 LLVMValueRef val1, 449 LLVMValueRef val2, 450 LLVMValueRef val3, 451 LLVMValueRef channel) 452{ 453 LLVMValueRef ex, res; 454 455 ex = LLVMBuildExtractElement(builder, val0, 456 channel, ""); 457 res = LLVMBuildInsertElement(builder, 458 LLVMConstNull(LLVMTypeOf(val0)), 459 ex, 460 LLVMConstInt(LLVMInt32Type(), 0, 0), 461 ""); 462 463 ex = LLVMBuildExtractElement(builder, val1, 464 channel, ""); 465 res = LLVMBuildInsertElement(builder, 466 res, ex, 467 LLVMConstInt(LLVMInt32Type(), 1, 0), 468 ""); 469 470 ex = LLVMBuildExtractElement(builder, val2, 471 channel, ""); 472 res = LLVMBuildInsertElement(builder, 473 res, ex, 474 LLVMConstInt(LLVMInt32Type(), 2, 0), 475 ""); 476 477 ex = LLVMBuildExtractElement(builder, val3, 478 channel, ""); 479 res = LLVMBuildInsertElement(builder, 480 res, ex, 481 LLVMConstInt(LLVMInt32Type(), 3, 0), 482 ""); 483 484 return res; 485} 486 487static void 488soa_to_aos(LLVMBuilderRef builder, 489 LLVMValueRef soa[NUM_CHANNELS], 490 LLVMValueRef aos[NUM_CHANNELS]) 491{ 492 LLVMValueRef comp; 493 int i = 0; 494 495 debug_assert(NUM_CHANNELS == 4); 496 497 aos[0] = LLVMConstNull(LLVMTypeOf(soa[0])); 498 aos[1] = aos[2] = aos[3] = aos[0]; 499 500 for (i = 0; i < NUM_CHANNELS; ++i) { 501 LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0); 502 503 comp = LLVMBuildExtractElement(builder, soa[i], 504 LLVMConstInt(LLVMInt32Type(), 0, 0), ""); 505 aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, ""); 506 507 comp = LLVMBuildExtractElement(builder, soa[i], 508 LLVMConstInt(LLVMInt32Type(), 1, 0), ""); 509 aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, ""); 510 511 comp = LLVMBuildExtractElement(builder, soa[i], 512 LLVMConstInt(LLVMInt32Type(), 2, 0), ""); 513 aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, ""); 514 515 comp = LLVMBuildExtractElement(builder, soa[i], 516 LLVMConstInt(LLVMInt32Type(), 3, 0), ""); 517 aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, ""); 518 519 } 520} 521 522static void 523convert_to_soa(LLVMBuilderRef builder, 524 LLVMValueRef (*aos)[NUM_CHANNELS], 525 LLVMValueRef (*soa)[NUM_CHANNELS], 526 int num_attribs) 527{ 528 int i; 529 530 debug_assert(NUM_CHANNELS == 4); 531 532 for (i = 0; i < num_attribs; ++i) { 533 LLVMValueRef val0 = aos[i][0]; 534 LLVMValueRef val1 = aos[i][1]; 535 LLVMValueRef val2 = aos[i][2]; 536 LLVMValueRef val3 = aos[i][3]; 537 538 soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3, 539 LLVMConstInt(LLVMInt32Type(), 0, 0)); 540 soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3, 541 LLVMConstInt(LLVMInt32Type(), 1, 0)); 542 soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3, 543 LLVMConstInt(LLVMInt32Type(), 2, 0)); 544 soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3, 545 LLVMConstInt(LLVMInt32Type(), 3, 0)); 546 } 547} 548 549static void 550store_aos(LLVMBuilderRef builder, 551 LLVMValueRef io_ptr, 552 LLVMValueRef index, 553 LLVMValueRef value, 554 LLVMValueRef clipmask) 555{ 556 LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); 557 LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); 558 LLVMValueRef indices[3]; 559 LLVMValueRef val, shift; 560 561 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 562 indices[1] = index; 563 indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); 564 565 /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */ 566 val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0); 567 shift = LLVMConstInt(LLVMInt32Type(), 12, 0); 568 val = LLVMBuildShl(builder, val, shift, ""); 569 /* add clipmask:12 */ 570 val = LLVMBuildOr(builder, val, clipmask, ""); 571 572 /* store vertex header */ 573 LLVMBuildStore(builder, val, id_ptr); 574 575 576#if DEBUG_STORE 577 lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); 578#endif 579#if 0 580 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); 581 print_vectorf(builder, value);*/ 582 data_ptr = LLVMBuildBitCast(builder, data_ptr, 583 LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0), 584 "datavec"); 585 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, ""); 586 587 LLVMBuildStore(builder, value, data_ptr); 588#else 589 { 590 LLVMValueRef x, y, z, w; 591 LLVMValueRef idx0, idx1, idx2, idx3; 592 LLVMValueRef gep0, gep1, gep2, gep3; 593 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); 594 595 idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 596 idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 597 idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 598 idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0); 599 600 x = LLVMBuildExtractElement(builder, value, 601 idx0, ""); 602 y = LLVMBuildExtractElement(builder, value, 603 idx1, ""); 604 z = LLVMBuildExtractElement(builder, value, 605 idx2, ""); 606 w = LLVMBuildExtractElement(builder, value, 607 idx3, ""); 608 609 gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, ""); 610 gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, ""); 611 gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, ""); 612 gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, ""); 613 614 /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n", 615 x, gep0, y, gep1, z, gep2, w, gep3);*/ 616 LLVMBuildStore(builder, x, gep0); 617 LLVMBuildStore(builder, y, gep1); 618 LLVMBuildStore(builder, z, gep2); 619 LLVMBuildStore(builder, w, gep3); 620 } 621#endif 622} 623 624static void 625store_aos_array(LLVMBuilderRef builder, 626 LLVMValueRef io_ptr, 627 LLVMValueRef aos[NUM_CHANNELS], 628 int attrib, 629 int num_outputs, 630 LLVMValueRef clipmask) 631{ 632 LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); 633 LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 634 LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 635 LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 636 LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); 637 LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; 638 LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3; 639 640 debug_assert(NUM_CHANNELS == 4); 641 642 io0_ptr = LLVMBuildGEP(builder, io_ptr, 643 &ind0, 1, ""); 644 io1_ptr = LLVMBuildGEP(builder, io_ptr, 645 &ind1, 1, ""); 646 io2_ptr = LLVMBuildGEP(builder, io_ptr, 647 &ind2, 1, ""); 648 io3_ptr = LLVMBuildGEP(builder, io_ptr, 649 &ind3, 1, ""); 650 651 clipmask0 = LLVMBuildExtractElement(builder, clipmask, 652 ind0, ""); 653 clipmask1 = LLVMBuildExtractElement(builder, clipmask, 654 ind1, ""); 655 clipmask2 = LLVMBuildExtractElement(builder, clipmask, 656 ind2, ""); 657 clipmask3 = LLVMBuildExtractElement(builder, clipmask, 658 ind3, ""); 659 660#if DEBUG_STORE 661 lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n", 662 io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3); 663#endif 664 /* store for each of the 4 vertices */ 665 store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0); 666 store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1); 667 store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2); 668 store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3); 669} 670 671static void 672convert_to_aos(LLVMBuilderRef builder, 673 LLVMValueRef io, 674 LLVMValueRef (*outputs)[NUM_CHANNELS], 675 LLVMValueRef clipmask, 676 int num_outputs, 677 int max_vertices) 678{ 679 unsigned chan, attrib; 680 681#if DEBUG_STORE 682 lp_build_printf(builder, " # storing begin\n"); 683#endif 684 for (attrib = 0; attrib < num_outputs; ++attrib) { 685 LLVMValueRef soa[4]; 686 LLVMValueRef aos[4]; 687 for(chan = 0; chan < NUM_CHANNELS; ++chan) { 688 if(outputs[attrib][chan]) { 689 LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); 690 lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); 691 /*lp_build_printf(builder, "output %d : %d ", 692 LLVMConstInt(LLVMInt32Type(), attrib, 0), 693 LLVMConstInt(LLVMInt32Type(), chan, 0)); 694 print_vectorf(builder, out);*/ 695 soa[chan] = out; 696 } else 697 soa[chan] = 0; 698 } 699 soa_to_aos(builder, soa, aos); 700 store_aos_array(builder, 701 io, 702 aos, 703 attrib, 704 num_outputs, 705 clipmask); 706 } 707#if DEBUG_STORE 708 lp_build_printf(builder, " # storing end\n"); 709#endif 710} 711 712/* 713 * Stores original vertex positions in clip coordinates 714 * There is probably a more efficient way to do this, 4 floats at once 715 * rather than extracting each element one by one. 716 */ 717static void 718store_clip(LLVMBuilderRef builder, 719 LLVMValueRef io_ptr, 720 LLVMValueRef (*outputs)[NUM_CHANNELS]) 721{ 722 LLVMValueRef out[4]; 723 LLVMValueRef indices[2]; 724 LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; 725 LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3; 726 LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr; 727 LLVMValueRef out0elem, out1elem, out2elem, out3elem; 728 int i; 729 730 LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 731 LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 732 LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 733 LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); 734 735 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 736 indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0); 737 738 out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ 739 out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ 740 out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ 741 out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ 742 743 io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, ""); 744 io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, ""); 745 io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, ""); 746 io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); 747 748 clip_ptr0 = draw_jit_header_clip(builder, io0_ptr); 749 clip_ptr1 = draw_jit_header_clip(builder, io1_ptr); 750 clip_ptr2 = draw_jit_header_clip(builder, io2_ptr); 751 clip_ptr3 = draw_jit_header_clip(builder, io3_ptr); 752 753 for (i = 0; i<4; i++){ 754 clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, 755 indices, 2, ""); //x0 756 clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, 757 indices, 2, ""); //x1 758 clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, 759 indices, 2, ""); //x2 760 clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, 761 indices, 2, ""); //x3 762 763 out0elem = LLVMBuildExtractElement(builder, out[i], 764 ind0, ""); //x0 765 out1elem = LLVMBuildExtractElement(builder, out[i], 766 ind1, ""); //x1 767 out2elem = LLVMBuildExtractElement(builder, out[i], 768 ind2, ""); //x2 769 out3elem = LLVMBuildExtractElement(builder, out[i], 770 ind3, ""); //x3 771 772 LLVMBuildStore(builder, out0elem, clip0_ptr); 773 LLVMBuildStore(builder, out1elem, clip1_ptr); 774 LLVMBuildStore(builder, out2elem, clip2_ptr); 775 LLVMBuildStore(builder, out3elem, clip3_ptr); 776 777 indices[1]= LLVMBuildAdd(builder, indices[1], ind1, ""); 778 } 779 780} 781 782/* Equivalent of _mm_set1_ps(a) 783 */ 784static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, 785 LLVMValueRef a, 786 const char *name) 787{ 788 LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); 789 int i; 790 791 for(i = 0; i < 4; ++i) { 792 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 793 res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); 794 } 795 796 return res; 797} 798 799/* 800 * Transforms the outputs for viewport mapping 801 */ 802static void 803generate_viewport(struct draw_llvm *llvm, 804 LLVMBuilderRef builder, 805 LLVMValueRef (*outputs)[NUM_CHANNELS], 806 LLVMValueRef context_ptr) 807{ 808 int i; 809 struct lp_type f32_type = lp_type_float_vec(32); 810 LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ 811 LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ 812 LLVMValueRef vp_ptr = draw_jit_context_viewport(builder, context_ptr); 813 814 /* for 1/w convention*/ 815 out3 = LLVMBuildFDiv(builder, const1, out3, ""); 816 LLVMBuildStore(builder, out3, outputs[0][3]); 817 818 /* Viewport Mapping */ 819 for (i=0; i<3; i++){ 820 LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/ 821 LLVMValueRef scale; 822 LLVMValueRef trans; 823 LLVMValueRef scale_i; 824 LLVMValueRef trans_i; 825 LLVMValueRef index; 826 827 index = LLVMConstInt(LLVMInt32Type(), i, 0); 828 scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); 829 830 index = LLVMConstInt(LLVMInt32Type(), i+4, 0); 831 trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); 832 833 scale = vec4f_from_scalar(builder, LLVMBuildLoad(builder, scale_i, ""), "scale"); 834 trans = vec4f_from_scalar(builder, LLVMBuildLoad(builder, trans_i, ""), "trans"); 835 836 /* divide by w */ 837 out = LLVMBuildFMul(builder, out, out3, ""); 838 /* mult by scale */ 839 out = LLVMBuildFMul(builder, out, scale, ""); 840 /* add translation */ 841 out = LLVMBuildFAdd(builder, out, trans, ""); 842 843 /* store transformed outputs */ 844 LLVMBuildStore(builder, out, outputs[0][i]); 845 } 846 847} 848 849 850/* 851 * Returns clipmask as 4xi32 bitmask for the 4 vertices 852 */ 853static LLVMValueRef 854generate_clipmask(LLVMBuilderRef builder, 855 LLVMValueRef (*outputs)[NUM_CHANNELS], 856 boolean clip_xy, 857 boolean clip_z, 858 boolean clip_user, 859 boolean clip_halfz, 860 unsigned nr, 861 LLVMValueRef context_ptr) 862{ 863 LLVMValueRef mask; /* stores the <4xi32> clipmasks */ 864 LLVMValueRef test, temp; 865 LLVMValueRef zero, shift; 866 LLVMValueRef pos_x, pos_y, pos_z, pos_w; 867 LLVMValueRef plane1, planes, plane_ptr, sum; 868 869 unsigned i; 870 871 struct lp_type f32_type = lp_type_float_vec(32); 872 873 mask = lp_build_const_int_vec(lp_type_int_vec(32), 0); 874 temp = lp_build_const_int_vec(lp_type_int_vec(32), 0); 875 zero = lp_build_const_vec(f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ 876 shift = lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */ 877 878 /* Assuming position stored at output[0] */ 879 pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ 880 pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ 881 pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ 882 pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ 883 884 /* Cliptest, for hardwired planes */ 885 if (clip_xy){ 886 /* plane 1 */ 887 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); 888 temp = shift; 889 test = LLVMBuildAnd(builder, test, temp, ""); 890 mask = test; 891 892 /* plane 2 */ 893 test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); 894 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); 895 temp = LLVMBuildShl(builder, temp, shift, ""); 896 test = LLVMBuildAnd(builder, test, temp, ""); 897 mask = LLVMBuildOr(builder, mask, test, ""); 898 899 /* plane 3 */ 900 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); 901 temp = LLVMBuildShl(builder, temp, shift, ""); 902 test = LLVMBuildAnd(builder, test, temp, ""); 903 mask = LLVMBuildOr(builder, mask, test, ""); 904 905 /* plane 4 */ 906 test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); 907 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); 908 temp = LLVMBuildShl(builder, temp, shift, ""); 909 test = LLVMBuildAnd(builder, test, temp, ""); 910 mask = LLVMBuildOr(builder, mask, test, ""); 911 } 912 913 if (clip_z){ 914 temp = lp_build_const_int_vec(lp_type_int_vec(32), 16); 915 if (clip_halfz){ 916 /* plane 5 */ 917 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z); 918 test = LLVMBuildAnd(builder, test, temp, ""); 919 mask = LLVMBuildOr(builder, mask, test, ""); 920 } 921 else{ 922 /* plane 5 */ 923 test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); 924 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); 925 test = LLVMBuildAnd(builder, test, temp, ""); 926 mask = LLVMBuildOr(builder, mask, test, ""); 927 } 928 /* plane 6 */ 929 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); 930 temp = LLVMBuildShl(builder, temp, shift, ""); 931 test = LLVMBuildAnd(builder, test, temp, ""); 932 mask = LLVMBuildOr(builder, mask, test, ""); 933 } 934 935 if (clip_user){ 936 LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr); 937 LLVMValueRef indices[3]; 938 temp = lp_build_const_int_vec(lp_type_int_vec(32), 32); 939 940 /* userclip planes */ 941 for (i = 6; i < nr; i++) { 942 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 943 indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0); 944 945 indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); 946 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 947 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x"); 948 planes = vec4f_from_scalar(builder, plane1, "plane4_x"); 949 sum = LLVMBuildFMul(builder, planes, pos_x, ""); 950 951 indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0); 952 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 953 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); 954 planes = vec4f_from_scalar(builder, plane1, "plane4_y"); 955 test = LLVMBuildFMul(builder, planes, pos_y, ""); 956 sum = LLVMBuildFAdd(builder, sum, test, ""); 957 958 indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0); 959 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 960 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); 961 planes = vec4f_from_scalar(builder, plane1, "plane4_z"); 962 test = LLVMBuildFMul(builder, planes, pos_z, ""); 963 sum = LLVMBuildFAdd(builder, sum, test, ""); 964 965 indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0); 966 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 967 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); 968 planes = vec4f_from_scalar(builder, plane1, "plane4_w"); 969 test = LLVMBuildFMul(builder, planes, pos_w, ""); 970 sum = LLVMBuildFAdd(builder, sum, test, ""); 971 972 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum); 973 temp = LLVMBuildShl(builder, temp, shift, ""); 974 test = LLVMBuildAnd(builder, test, temp, ""); 975 mask = LLVMBuildOr(builder, mask, test, ""); 976 } 977 } 978 return mask; 979} 980 981/* 982 * Returns boolean if any clipping has occurred 983 * Used zero/non-zero i32 value to represent boolean 984 */ 985static void 986clipmask_bool(LLVMBuilderRef builder, 987 LLVMValueRef clipmask, 988 LLVMValueRef ret_ptr) 989{ 990 LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, ""); 991 LLVMValueRef temp; 992 int i; 993 994 for (i=0; i<4; i++){ 995 temp = LLVMBuildExtractElement(builder, clipmask, 996 LLVMConstInt(LLVMInt32Type(), i, 0) , ""); 997 ret = LLVMBuildOr(builder, ret, temp, ""); 998 } 999 1000 LLVMBuildStore(builder, ret, ret_ptr); 1001} 1002 1003static void 1004draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) 1005{ 1006 LLVMTypeRef arg_types[8]; 1007 LLVMTypeRef func_type; 1008 LLVMValueRef context_ptr; 1009 LLVMBasicBlockRef block; 1010 LLVMBuilderRef builder; 1011 LLVMValueRef start, end, count, stride, step, io_itr; 1012 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; 1013 LLVMValueRef instance_id; 1014 struct draw_context *draw = llvm->draw; 1015 unsigned i, j; 1016 struct lp_build_context bld; 1017 struct lp_build_loop_state lp_loop; 1018 const int max_vertices = 4; 1019 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; 1020 void *code; 1021 struct lp_build_sampler_soa *sampler = 0; 1022 LLVMValueRef ret, ret_ptr; 1023 boolean bypass_viewport = variant->key.bypass_viewport; 1024 boolean enable_cliptest = variant->key.clip_xy || 1025 variant->key.clip_z || 1026 variant->key.clip_user; 1027 1028 arg_types[0] = llvm->context_ptr_type; /* context */ 1029 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ 1030 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ 1031 arg_types[3] = LLVMInt32Type(); /* start */ 1032 arg_types[4] = LLVMInt32Type(); /* count */ 1033 arg_types[5] = LLVMInt32Type(); /* stride */ 1034 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ 1035 arg_types[7] = LLVMInt32Type(); /* instance_id */ 1036 1037 func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); 1038 1039 variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); 1040 LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); 1041 for(i = 0; i < Elements(arg_types); ++i) 1042 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 1043 LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); 1044 1045 context_ptr = LLVMGetParam(variant->function, 0); 1046 io_ptr = LLVMGetParam(variant->function, 1); 1047 vbuffers_ptr = LLVMGetParam(variant->function, 2); 1048 start = LLVMGetParam(variant->function, 3); 1049 count = LLVMGetParam(variant->function, 4); 1050 stride = LLVMGetParam(variant->function, 5); 1051 vb_ptr = LLVMGetParam(variant->function, 6); 1052 instance_id = LLVMGetParam(variant->function, 7); 1053 1054 lp_build_name(context_ptr, "context"); 1055 lp_build_name(io_ptr, "io"); 1056 lp_build_name(vbuffers_ptr, "vbuffers"); 1057 lp_build_name(start, "start"); 1058 lp_build_name(count, "count"); 1059 lp_build_name(stride, "stride"); 1060 lp_build_name(vb_ptr, "vb"); 1061 lp_build_name(instance_id, "instance_id"); 1062 1063 /* 1064 * Function body 1065 */ 1066 1067 block = LLVMAppendBasicBlock(variant->function, "entry"); 1068 builder = LLVMCreateBuilder(); 1069 LLVMPositionBuilderAtEnd(builder, block); 1070 1071 lp_build_context_init(&bld, builder, lp_type_int(32)); 1072 1073 end = lp_build_add(&bld, start, count); 1074 1075 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); 1076 1077 /* function will return non-zero i32 value if any clipped vertices */ 1078 ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); 1079 LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); 1080 1081 /* code generated texture sampling */ 1082 sampler = draw_llvm_sampler_soa_create( 1083 draw_llvm_variant_key_samplers(&variant->key), 1084 context_ptr); 1085 1086#if DEBUG_STORE 1087 lp_build_printf(builder, "start = %d, end = %d, step = %d\n", 1088 start, end, step); 1089#endif 1090 lp_build_loop_begin(builder, start, &lp_loop); 1091 { 1092 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; 1093 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; 1094 LLVMValueRef io; 1095 LLVMValueRef clipmask; /* holds the clipmask value */ 1096 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; 1097 1098 io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); 1099 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); 1100#if DEBUG_STORE 1101 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", 1102 io_itr, io, lp_loop.counter); 1103#endif 1104 for (i = 0; i < NUM_CHANNELS; ++i) { 1105 LLVMValueRef true_index = LLVMBuildAdd( 1106 builder, 1107 lp_loop.counter, 1108 LLVMConstInt(LLVMInt32Type(), i, 0), ""); 1109 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { 1110 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; 1111 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), 1112 velem->vertex_buffer_index, 1113 0); 1114 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, 1115 &vb_index, 1, ""); 1116 generate_fetch(builder, vbuffers_ptr, 1117 &aos_attribs[j][i], velem, vb, true_index, 1118 instance_id); 1119 } 1120 } 1121 convert_to_soa(builder, aos_attribs, inputs, 1122 draw->pt.nr_vertex_elements); 1123 1124 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; 1125 generate_vs(llvm, 1126 builder, 1127 outputs, 1128 ptr_aos, 1129 context_ptr, 1130 sampler); 1131 1132 /* store original positions in clip before further manipulation */ 1133 store_clip(builder, io, outputs); 1134 1135 /* do cliptest */ 1136 if (enable_cliptest){ 1137 /* allocate clipmask, assign it integer type */ 1138 clipmask = generate_clipmask(builder, outputs, 1139 variant->key.clip_xy, 1140 variant->key.clip_z, 1141 variant->key.clip_user, 1142 variant->key.clip_halfz, 1143 variant->key.nr_planes, 1144 context_ptr); 1145 /* return clipping boolean value for function */ 1146 clipmask_bool(builder, clipmask, ret_ptr); 1147 } 1148 else{ 1149 clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); 1150 } 1151 1152 /* do viewport mapping */ 1153 if (!bypass_viewport){ 1154 generate_viewport(llvm, builder, outputs, context_ptr); 1155 } 1156 1157 /* store clipmask in vertex header and positions in data */ 1158 convert_to_aos(builder, io, outputs, clipmask, 1159 draw->vs.vertex_shader->info.num_outputs, 1160 max_vertices); 1161 } 1162 1163 lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); 1164 1165 sampler->destroy(sampler); 1166 1167#ifdef PIPE_ARCH_X86 1168 /* Avoid corrupting the FPU stack on 32bit OSes. */ 1169 lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); 1170#endif 1171 1172 ret = LLVMBuildLoad(builder, ret_ptr,""); 1173 LLVMBuildRet(builder, ret); 1174 1175 LLVMDisposeBuilder(builder); 1176 1177 /* 1178 * Translate the LLVM IR into machine code. 1179 */ 1180#ifdef DEBUG 1181 if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { 1182 lp_debug_dump_value(variant->function); 1183 assert(0); 1184 } 1185#endif 1186 1187 LLVMRunFunctionPassManager(llvm->pass, variant->function); 1188 1189 if (gallivm_debug & GALLIVM_DEBUG_IR) { 1190 lp_debug_dump_value(variant->function); 1191 debug_printf("\n"); 1192 } 1193 1194 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); 1195 variant->jit_func = (draw_jit_vert_func)pointer_to_func(code); 1196 1197 if (gallivm_debug & GALLIVM_DEBUG_ASM) { 1198 lp_disassemble(code); 1199 } 1200 lp_func_delete_body(variant->function); 1201} 1202 1203 1204static void 1205draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) 1206{ 1207 LLVMTypeRef arg_types[8]; 1208 LLVMTypeRef func_type; 1209 LLVMValueRef context_ptr; 1210 LLVMBasicBlockRef block; 1211 LLVMBuilderRef builder; 1212 LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; 1213 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; 1214 LLVMValueRef instance_id; 1215 struct draw_context *draw = llvm->draw; 1216 unsigned i, j; 1217 struct lp_build_context bld; 1218 struct lp_build_loop_state lp_loop; 1219 const int max_vertices = 4; 1220 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; 1221 LLVMValueRef fetch_max; 1222 void *code; 1223 struct lp_build_sampler_soa *sampler = 0; 1224 LLVMValueRef ret, ret_ptr; 1225 boolean bypass_viewport = variant->key.bypass_viewport; 1226 boolean enable_cliptest = variant->key.clip_xy || 1227 variant->key.clip_z || 1228 variant->key.clip_user; 1229 1230 arg_types[0] = llvm->context_ptr_type; /* context */ 1231 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ 1232 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ 1233 arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */ 1234 arg_types[4] = LLVMInt32Type(); /* fetch_count */ 1235 arg_types[5] = LLVMInt32Type(); /* stride */ 1236 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ 1237 arg_types[7] = LLVMInt32Type(); /* instance_id */ 1238 1239 func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); 1240 1241 variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); 1242 LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); 1243 for(i = 0; i < Elements(arg_types); ++i) 1244 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 1245 LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), 1246 LLVMNoAliasAttribute); 1247 1248 context_ptr = LLVMGetParam(variant->function_elts, 0); 1249 io_ptr = LLVMGetParam(variant->function_elts, 1); 1250 vbuffers_ptr = LLVMGetParam(variant->function_elts, 2); 1251 fetch_elts = LLVMGetParam(variant->function_elts, 3); 1252 fetch_count = LLVMGetParam(variant->function_elts, 4); 1253 stride = LLVMGetParam(variant->function_elts, 5); 1254 vb_ptr = LLVMGetParam(variant->function_elts, 6); 1255 instance_id = LLVMGetParam(variant->function_elts, 7); 1256 1257 lp_build_name(context_ptr, "context"); 1258 lp_build_name(io_ptr, "io"); 1259 lp_build_name(vbuffers_ptr, "vbuffers"); 1260 lp_build_name(fetch_elts, "fetch_elts"); 1261 lp_build_name(fetch_count, "fetch_count"); 1262 lp_build_name(stride, "stride"); 1263 lp_build_name(vb_ptr, "vb"); 1264 lp_build_name(instance_id, "instance_id"); 1265 1266 /* 1267 * Function body 1268 */ 1269 1270 block = LLVMAppendBasicBlock(variant->function_elts, "entry"); 1271 builder = LLVMCreateBuilder(); 1272 LLVMPositionBuilderAtEnd(builder, block); 1273 1274 lp_build_context_init(&bld, builder, lp_type_int(32)); 1275 1276 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); 1277 1278 /* code generated texture sampling */ 1279 sampler = draw_llvm_sampler_soa_create( 1280 draw_llvm_variant_key_samplers(&variant->key), 1281 context_ptr); 1282 1283 fetch_max = LLVMBuildSub(builder, fetch_count, 1284 LLVMConstInt(LLVMInt32Type(), 1, 0), 1285 "fetch_max"); 1286 1287 /* function returns non-zero i32 value if any clipped vertices */ 1288 ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); 1289 LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); 1290 1291 lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); 1292 { 1293 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; 1294 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; 1295 LLVMValueRef io; 1296 LLVMValueRef clipmask; /* holds the clipmask value */ 1297 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; 1298 1299 io_itr = lp_loop.counter; 1300 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); 1301#if DEBUG_STORE 1302 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", 1303 io_itr, io, lp_loop.counter); 1304#endif 1305 for (i = 0; i < NUM_CHANNELS; ++i) { 1306 LLVMValueRef true_index = LLVMBuildAdd( 1307 builder, 1308 lp_loop.counter, 1309 LLVMConstInt(LLVMInt32Type(), i, 0), ""); 1310 LLVMValueRef fetch_ptr; 1311 1312 /* make sure we're not out of bounds which can happen 1313 * if fetch_count % 4 != 0, because on the last iteration 1314 * a few of the 4 vertex fetches will be out of bounds */ 1315 true_index = lp_build_min(&bld, true_index, fetch_max); 1316 1317 fetch_ptr = LLVMBuildGEP(builder, fetch_elts, 1318 &true_index, 1, ""); 1319 true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); 1320 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { 1321 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; 1322 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), 1323 velem->vertex_buffer_index, 1324 0); 1325 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, 1326 &vb_index, 1, ""); 1327 generate_fetch(builder, vbuffers_ptr, 1328 &aos_attribs[j][i], velem, vb, true_index, 1329 instance_id); 1330 } 1331 } 1332 convert_to_soa(builder, aos_attribs, inputs, 1333 draw->pt.nr_vertex_elements); 1334 1335 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; 1336 generate_vs(llvm, 1337 builder, 1338 outputs, 1339 ptr_aos, 1340 context_ptr, 1341 sampler); 1342 1343 /* store original positions in clip before further manipulation */ 1344 store_clip(builder, io, outputs); 1345 1346 /* do cliptest */ 1347 if (enable_cliptest){ 1348 /* allocate clipmask, assign it integer type */ 1349 clipmask = generate_clipmask(builder, outputs, 1350 variant->key.clip_xy, 1351 variant->key.clip_z, 1352 variant->key.clip_user, 1353 variant->key.clip_halfz, 1354 variant->key.nr_planes, 1355 context_ptr); 1356 /* return clipping boolean value for function */ 1357 clipmask_bool(builder, clipmask, ret_ptr); 1358 } 1359 else{ 1360 clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); 1361 } 1362 1363 /* do viewport mapping */ 1364 if (!bypass_viewport){ 1365 generate_viewport(llvm, builder, outputs, context_ptr); 1366 } 1367 1368 /* store clipmask in vertex header, 1369 * original positions in clip 1370 * and transformed positions in data 1371 */ 1372 convert_to_aos(builder, io, outputs, clipmask, 1373 draw->vs.vertex_shader->info.num_outputs, 1374 max_vertices); 1375 } 1376 1377 lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); 1378 1379 sampler->destroy(sampler); 1380 1381#ifdef PIPE_ARCH_X86 1382 /* Avoid corrupting the FPU stack on 32bit OSes. */ 1383 lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); 1384#endif 1385 1386 ret = LLVMBuildLoad(builder, ret_ptr,""); 1387 LLVMBuildRet(builder, ret); 1388 1389 LLVMDisposeBuilder(builder); 1390 1391 /* 1392 * Translate the LLVM IR into machine code. 1393 */ 1394#ifdef DEBUG 1395 if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { 1396 lp_debug_dump_value(variant->function_elts); 1397 assert(0); 1398 } 1399#endif 1400 1401 LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); 1402 1403 if (gallivm_debug & GALLIVM_DEBUG_IR) { 1404 lp_debug_dump_value(variant->function_elts); 1405 debug_printf("\n"); 1406 } 1407 1408 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts); 1409 variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code); 1410 1411 if (gallivm_debug & GALLIVM_DEBUG_ASM) { 1412 lp_disassemble(code); 1413 } 1414 lp_func_delete_body(variant->function_elts); 1415} 1416 1417 1418struct draw_llvm_variant_key * 1419draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) 1420{ 1421 unsigned i; 1422 struct draw_llvm_variant_key *key; 1423 struct lp_sampler_static_state *sampler; 1424 1425 key = (struct draw_llvm_variant_key *)store; 1426 1427 /* Presumably all variants of the shader should have the same 1428 * number of vertex elements - ie the number of shader inputs. 1429 */ 1430 key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; 1431 1432 /* will have to rig this up properly later */ 1433 key->clip_xy = llvm->draw->clip_xy; 1434 key->clip_z = llvm->draw->clip_z; 1435 key->clip_user = llvm->draw->clip_user; 1436 key->bypass_viewport = llvm->draw->identity_viewport; 1437 key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules; 1438 key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); 1439 key->nr_planes = llvm->draw->nr_planes; 1440 key->pad = 0; 1441 1442 /* All variants of this shader will have the same value for 1443 * nr_samplers. Not yet trying to compact away holes in the 1444 * sampler array. 1445 */ 1446 key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1; 1447 1448 sampler = draw_llvm_variant_key_samplers(key); 1449 1450 memcpy(key->vertex_element, 1451 llvm->draw->pt.vertex_element, 1452 sizeof(struct pipe_vertex_element) * key->nr_vertex_elements); 1453 1454 memset(sampler, 0, key->nr_samplers * sizeof *sampler); 1455 1456 for (i = 0 ; i < key->nr_samplers; i++) { 1457 lp_sampler_static_state(&sampler[i], 1458 llvm->draw->sampler_views[i], 1459 llvm->draw->samplers[i]); 1460 } 1461 1462 return key; 1463} 1464 1465void 1466draw_llvm_set_mapped_texture(struct draw_context *draw, 1467 unsigned sampler_idx, 1468 uint32_t width, uint32_t height, uint32_t depth, 1469 uint32_t last_level, 1470 uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], 1471 uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], 1472 const void *data[PIPE_MAX_TEXTURE_LEVELS]) 1473{ 1474 unsigned j; 1475 struct draw_jit_texture *jit_tex; 1476 1477 assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS); 1478 1479 1480 jit_tex = &draw->llvm->jit_context.textures[sampler_idx]; 1481 1482 jit_tex->width = width; 1483 jit_tex->height = height; 1484 jit_tex->depth = depth; 1485 jit_tex->last_level = last_level; 1486 1487 for (j = 0; j <= last_level; j++) { 1488 jit_tex->data[j] = data[j]; 1489 jit_tex->row_stride[j] = row_stride[j]; 1490 jit_tex->img_stride[j] = img_stride[j]; 1491 } 1492} 1493 1494 1495void 1496draw_llvm_set_sampler_state(struct draw_context *draw) 1497{ 1498 unsigned i; 1499 1500 for (i = 0; i < draw->num_samplers; i++) { 1501 struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i]; 1502 1503 if (draw->samplers[i]) { 1504 jit_tex->min_lod = draw->samplers[i]->min_lod; 1505 jit_tex->max_lod = draw->samplers[i]->max_lod; 1506 jit_tex->lod_bias = draw->samplers[i]->lod_bias; 1507 COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color); 1508 } 1509 } 1510} 1511 1512 1513void 1514draw_llvm_destroy_variant(struct draw_llvm_variant *variant) 1515{ 1516 struct draw_llvm *llvm = variant->llvm; 1517 struct draw_context *draw = llvm->draw; 1518 1519 if (variant->function_elts) { 1520 if (variant->function_elts) 1521 LLVMFreeMachineCodeForFunction(draw->engine, 1522 variant->function_elts); 1523 LLVMDeleteFunction(variant->function_elts); 1524 } 1525 1526 if (variant->function) { 1527 if (variant->function) 1528 LLVMFreeMachineCodeForFunction(draw->engine, 1529 variant->function); 1530 LLVMDeleteFunction(variant->function); 1531 } 1532 1533 remove_from_list(&variant->list_item_local); 1534 variant->shader->variants_cached--; 1535 remove_from_list(&variant->list_item_global); 1536 llvm->nr_variants--; 1537 FREE(variant); 1538} 1539