draw_llvm.c revision 815e79e72c1f4aa849c0ee6103621685b678bc9d
1/************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "draw_llvm.h" 29 30#include "draw_context.h" 31#include "draw_vs.h" 32 33#include "gallivm/lp_bld_arit.h" 34#include "gallivm/lp_bld_struct.h" 35#include "gallivm/lp_bld_type.h" 36#include "gallivm/lp_bld_flow.h" 37#include "gallivm/lp_bld_debug.h" 38#include "gallivm/lp_bld_tgsi.h" 39#include "gallivm/lp_bld_printf.h" 40 41#include "tgsi/tgsi_exec.h" 42#include "tgsi/tgsi_dump.h" 43 44#include "util/u_cpu_detect.h" 45#include "util/u_pointer.h" 46#include "util/u_string.h" 47 48#include <llvm-c/Transforms/Scalar.h> 49 50#define DEBUG_STORE 0 51 52/* generates the draw jit function */ 53static void 54draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); 55static void 56draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); 57 58static void 59init_globals(struct draw_llvm *llvm) 60{ 61 LLVMTypeRef texture_type; 62 63 /* struct draw_jit_texture */ 64 { 65 LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS]; 66 67 elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); 68 elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); 69 elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); 70 elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); 71 elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = 72 LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); 73 elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = 74 LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); 75 elem_types[DRAW_JIT_TEXTURE_DATA] = 76 LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), 77 DRAW_MAX_TEXTURE_LEVELS); 78 79 texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); 80 81 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, 82 llvm->target, texture_type, 83 DRAW_JIT_TEXTURE_WIDTH); 84 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, 85 llvm->target, texture_type, 86 DRAW_JIT_TEXTURE_HEIGHT); 87 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, 88 llvm->target, texture_type, 89 DRAW_JIT_TEXTURE_DEPTH); 90 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, 91 llvm->target, texture_type, 92 DRAW_JIT_TEXTURE_LAST_LEVEL); 93 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride, 94 llvm->target, texture_type, 95 DRAW_JIT_TEXTURE_ROW_STRIDE); 96 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride, 97 llvm->target, texture_type, 98 DRAW_JIT_TEXTURE_IMG_STRIDE); 99 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, 100 llvm->target, texture_type, 101 DRAW_JIT_TEXTURE_DATA); 102 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, 103 llvm->target, texture_type); 104 105 LLVMAddTypeName(llvm->module, "texture", texture_type); 106 } 107 108 109 /* struct draw_jit_context */ 110 { 111 LLVMTypeRef elem_types[3]; 112 LLVMTypeRef context_type; 113 114 elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ 115 elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ 116 elem_types[2] = LLVMArrayType(texture_type, 117 PIPE_MAX_VERTEX_SAMPLERS); /* textures */ 118 119 context_type = LLVMStructType(elem_types, Elements(elem_types), 0); 120 121 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, 122 llvm->target, context_type, 0); 123 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, 124 llvm->target, context_type, 1); 125 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, 126 llvm->target, context_type, 127 DRAW_JIT_CTX_TEXTURES); 128 LP_CHECK_STRUCT_SIZE(struct draw_jit_context, 129 llvm->target, context_type); 130 131 LLVMAddTypeName(llvm->module, "draw_jit_context", context_type); 132 133 llvm->context_ptr_type = LLVMPointerType(context_type, 0); 134 } 135 { 136 LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0); 137 llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0); 138 } 139 /* struct pipe_vertex_buffer */ 140 { 141 LLVMTypeRef elem_types[4]; 142 LLVMTypeRef vb_type; 143 144 elem_types[0] = LLVMInt32Type(); 145 elem_types[1] = LLVMInt32Type(); 146 elem_types[2] = LLVMInt32Type(); 147 elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */ 148 149 vb_type = LLVMStructType(elem_types, Elements(elem_types), 0); 150 151 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, 152 llvm->target, vb_type, 0); 153 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, 154 llvm->target, vb_type, 2); 155 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, 156 llvm->target, vb_type); 157 158 LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type); 159 160 llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); 161 } 162} 163 164static LLVMTypeRef 165create_vertex_header(struct draw_llvm *llvm, int data_elems) 166{ 167 /* struct vertex_header */ 168 LLVMTypeRef elem_types[3]; 169 LLVMTypeRef vertex_header; 170 char struct_name[24]; 171 172 util_snprintf(struct_name, 23, "vertex_header%d", data_elems); 173 174 elem_types[0] = LLVMIntType(32); 175 elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); 176 elem_types[2] = LLVMArrayType(elem_types[1], data_elems); 177 178 vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); 179 180 /* these are bit-fields and we can't take address of them 181 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, 182 llvm->target, vertex_header, 183 DRAW_JIT_VERTEX_CLIPMASK); 184 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, 185 llvm->target, vertex_header, 186 DRAW_JIT_VERTEX_EDGEFLAG); 187 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, 188 llvm->target, vertex_header, 189 DRAW_JIT_VERTEX_PAD); 190 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, 191 llvm->target, vertex_header, 192 DRAW_JIT_VERTEX_VERTEX_ID); 193 */ 194 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, 195 llvm->target, vertex_header, 196 DRAW_JIT_VERTEX_CLIP); 197 LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, 198 llvm->target, vertex_header, 199 DRAW_JIT_VERTEX_DATA); 200 201 LLVMAddTypeName(llvm->module, struct_name, vertex_header); 202 203 return LLVMPointerType(vertex_header, 0); 204} 205 206struct draw_llvm * 207draw_llvm_create(struct draw_context *draw) 208{ 209 struct draw_llvm *llvm; 210 211#ifdef PIPE_ARCH_X86 212 util_cpu_detect(); 213 /* require SSE2 due to LLVM PR6960. */ 214 if (!util_cpu_caps.has_sse2) 215 return NULL; 216#endif 217 218 llvm = CALLOC_STRUCT( draw_llvm ); 219 if (!llvm) 220 return NULL; 221 222 llvm->draw = draw; 223 llvm->engine = draw->engine; 224 225 debug_assert(llvm->engine); 226 227 llvm->module = LLVMModuleCreateWithName("draw_llvm"); 228 llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module); 229 230 LLVMAddModuleProvider(llvm->engine, llvm->provider); 231 232 llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine); 233 234 llvm->pass = LLVMCreateFunctionPassManager(llvm->provider); 235 LLVMAddTargetData(llvm->target, llvm->pass); 236 237 if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { 238 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, 239 * but there are more on SVN. */ 240 /* TODO: Add more passes */ 241 242 LLVMAddCFGSimplificationPass(llvm->pass); 243 244 if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) { 245 /* For LLVM >= 2.7 and 32-bit build, use this order of passes to 246 * avoid generating bad code. 247 * Test with piglit glsl-vs-sqrt-zero test. 248 */ 249 LLVMAddConstantPropagationPass(llvm->pass); 250 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 251 } 252 else { 253 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 254 LLVMAddConstantPropagationPass(llvm->pass); 255 } 256 257 if(util_cpu_caps.has_sse4_1) { 258 /* FIXME: There is a bug in this pass, whereby the combination of fptosi 259 * and sitofp (necessary for trunc/floor/ceil/round implementation) 260 * somehow becomes invalid code. 261 */ 262 LLVMAddInstructionCombiningPass(llvm->pass); 263 } 264 LLVMAddGVNPass(llvm->pass); 265 } else { 266 /* We need at least this pass to prevent the backends to fail in 267 * unexpected ways. 268 */ 269 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 270 } 271 272 init_globals(llvm); 273 274 if (gallivm_debug & GALLIVM_DEBUG_IR) { 275 LLVMDumpModule(llvm->module); 276 } 277 278 llvm->nr_variants = 0; 279 make_empty_list(&llvm->vs_variants_list); 280 281 return llvm; 282} 283 284void 285draw_llvm_destroy(struct draw_llvm *llvm) 286{ 287 LLVMDisposePassManager(llvm->pass); 288 289 FREE(llvm); 290} 291 292struct draw_llvm_variant * 293draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs) 294{ 295 struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant)); 296 struct llvm_vertex_shader *shader = 297 llvm_vertex_shader(llvm->draw->vs.vertex_shader); 298 299 variant->llvm = llvm; 300 301 draw_llvm_make_variant_key(llvm, &variant->key); 302 303 llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); 304 305 draw_llvm_generate(llvm, variant); 306 draw_llvm_generate_elts(llvm, variant); 307 308 variant->shader = shader; 309 variant->list_item_global.base = variant; 310 variant->list_item_local.base = variant; 311 /*variant->no = */shader->variants_created++; 312 variant->list_item_global.base = variant; 313 314 return variant; 315} 316 317static void 318generate_vs(struct draw_llvm *llvm, 319 LLVMBuilderRef builder, 320 LLVMValueRef (*outputs)[NUM_CHANNELS], 321 const LLVMValueRef (*inputs)[NUM_CHANNELS], 322 LLVMValueRef context_ptr, 323 struct lp_build_sampler_soa *draw_sampler) 324{ 325 const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; 326 struct lp_type vs_type; 327 LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); 328 struct lp_build_sampler_soa *sampler = 0; 329 330 memset(&vs_type, 0, sizeof vs_type); 331 vs_type.floating = TRUE; /* floating point values */ 332 vs_type.sign = TRUE; /* values are signed */ 333 vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ 334 vs_type.width = 32; /* 32-bit float */ 335 vs_type.length = 4; /* 4 elements per vector */ 336#if 0 337 num_vs = 4; /* number of vertices per block */ 338#endif 339 340 if (gallivm_debug & GALLIVM_DEBUG_IR) { 341 tgsi_dump(tokens, 0); 342 } 343 344 if (llvm->draw->num_sampler_views && 345 llvm->draw->num_samplers) 346 sampler = draw_sampler; 347 348 lp_build_tgsi_soa(builder, 349 tokens, 350 vs_type, 351 NULL /*struct lp_build_mask_context *mask*/, 352 consts_ptr, 353 NULL /*pos*/, 354 inputs, 355 outputs, 356 sampler, 357 &llvm->draw->vs.vertex_shader->info); 358} 359 360#if DEBUG_STORE 361static void print_vectorf(LLVMBuilderRef builder, 362 LLVMValueRef vec) 363{ 364 LLVMValueRef val[4]; 365 val[0] = LLVMBuildExtractElement(builder, vec, 366 LLVMConstInt(LLVMInt32Type(), 0, 0), ""); 367 val[1] = LLVMBuildExtractElement(builder, vec, 368 LLVMConstInt(LLVMInt32Type(), 1, 0), ""); 369 val[2] = LLVMBuildExtractElement(builder, vec, 370 LLVMConstInt(LLVMInt32Type(), 2, 0), ""); 371 val[3] = LLVMBuildExtractElement(builder, vec, 372 LLVMConstInt(LLVMInt32Type(), 3, 0), ""); 373 lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", 374 val[0], val[1], val[2], val[3]); 375} 376#endif 377 378static void 379generate_fetch(LLVMBuilderRef builder, 380 LLVMValueRef vbuffers_ptr, 381 LLVMValueRef *res, 382 struct pipe_vertex_element *velem, 383 LLVMValueRef vbuf, 384 LLVMValueRef index, 385 LLVMValueRef instance_id) 386{ 387 LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); 388 LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, 389 &indices, 1, ""); 390 LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf); 391 LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf); 392 LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf); 393 LLVMValueRef cond; 394 LLVMValueRef stride; 395 396 if (velem->instance_divisor) { 397 /* array index = instance_id / instance_divisor */ 398 index = LLVMBuildUDiv(builder, instance_id, 399 LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0), 400 "instance_divisor"); 401 } 402 403 /* limit index to min(inex, vb_max_index) */ 404 cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); 405 index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); 406 407 stride = LLVMBuildMul(builder, vb_stride, index, ""); 408 409 vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); 410 411 stride = LLVMBuildAdd(builder, stride, 412 vb_buffer_offset, 413 ""); 414 stride = LLVMBuildAdd(builder, stride, 415 LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0), 416 ""); 417 418 /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/ 419 vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); 420 421 *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format); 422} 423 424static LLVMValueRef 425aos_to_soa(LLVMBuilderRef builder, 426 LLVMValueRef val0, 427 LLVMValueRef val1, 428 LLVMValueRef val2, 429 LLVMValueRef val3, 430 LLVMValueRef channel) 431{ 432 LLVMValueRef ex, res; 433 434 ex = LLVMBuildExtractElement(builder, val0, 435 channel, ""); 436 res = LLVMBuildInsertElement(builder, 437 LLVMConstNull(LLVMTypeOf(val0)), 438 ex, 439 LLVMConstInt(LLVMInt32Type(), 0, 0), 440 ""); 441 442 ex = LLVMBuildExtractElement(builder, val1, 443 channel, ""); 444 res = LLVMBuildInsertElement(builder, 445 res, ex, 446 LLVMConstInt(LLVMInt32Type(), 1, 0), 447 ""); 448 449 ex = LLVMBuildExtractElement(builder, val2, 450 channel, ""); 451 res = LLVMBuildInsertElement(builder, 452 res, ex, 453 LLVMConstInt(LLVMInt32Type(), 2, 0), 454 ""); 455 456 ex = LLVMBuildExtractElement(builder, val3, 457 channel, ""); 458 res = LLVMBuildInsertElement(builder, 459 res, ex, 460 LLVMConstInt(LLVMInt32Type(), 3, 0), 461 ""); 462 463 return res; 464} 465 466static void 467soa_to_aos(LLVMBuilderRef builder, 468 LLVMValueRef soa[NUM_CHANNELS], 469 LLVMValueRef aos[NUM_CHANNELS]) 470{ 471 LLVMValueRef comp; 472 int i = 0; 473 474 debug_assert(NUM_CHANNELS == 4); 475 476 aos[0] = LLVMConstNull(LLVMTypeOf(soa[0])); 477 aos[1] = aos[2] = aos[3] = aos[0]; 478 479 for (i = 0; i < NUM_CHANNELS; ++i) { 480 LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0); 481 482 comp = LLVMBuildExtractElement(builder, soa[i], 483 LLVMConstInt(LLVMInt32Type(), 0, 0), ""); 484 aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, ""); 485 486 comp = LLVMBuildExtractElement(builder, soa[i], 487 LLVMConstInt(LLVMInt32Type(), 1, 0), ""); 488 aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, ""); 489 490 comp = LLVMBuildExtractElement(builder, soa[i], 491 LLVMConstInt(LLVMInt32Type(), 2, 0), ""); 492 aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, ""); 493 494 comp = LLVMBuildExtractElement(builder, soa[i], 495 LLVMConstInt(LLVMInt32Type(), 3, 0), ""); 496 aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, ""); 497 498 } 499} 500 501static void 502convert_to_soa(LLVMBuilderRef builder, 503 LLVMValueRef (*aos)[NUM_CHANNELS], 504 LLVMValueRef (*soa)[NUM_CHANNELS], 505 int num_attribs) 506{ 507 int i; 508 509 debug_assert(NUM_CHANNELS == 4); 510 511 for (i = 0; i < num_attribs; ++i) { 512 LLVMValueRef val0 = aos[i][0]; 513 LLVMValueRef val1 = aos[i][1]; 514 LLVMValueRef val2 = aos[i][2]; 515 LLVMValueRef val3 = aos[i][3]; 516 517 soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3, 518 LLVMConstInt(LLVMInt32Type(), 0, 0)); 519 soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3, 520 LLVMConstInt(LLVMInt32Type(), 1, 0)); 521 soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3, 522 LLVMConstInt(LLVMInt32Type(), 2, 0)); 523 soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3, 524 LLVMConstInt(LLVMInt32Type(), 3, 0)); 525 } 526} 527 528static void 529store_aos(LLVMBuilderRef builder, 530 LLVMValueRef io_ptr, 531 LLVMValueRef index, 532 LLVMValueRef value) 533{ 534 LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); 535 LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); 536 LLVMValueRef indices[3]; 537 538 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 539 indices[1] = index; 540 indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); 541 542 /* undefined vertex */ 543 LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 544 0xffff, 0), id_ptr); 545 546#if DEBUG_STORE 547 lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); 548#endif 549#if 0 550 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); 551 print_vectorf(builder, value);*/ 552 data_ptr = LLVMBuildBitCast(builder, data_ptr, 553 LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0), 554 "datavec"); 555 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, ""); 556 557 LLVMBuildStore(builder, value, data_ptr); 558#else 559 { 560 LLVMValueRef x, y, z, w; 561 LLVMValueRef idx0, idx1, idx2, idx3; 562 LLVMValueRef gep0, gep1, gep2, gep3; 563 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); 564 565 idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 566 idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 567 idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 568 idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0); 569 570 x = LLVMBuildExtractElement(builder, value, 571 idx0, ""); 572 y = LLVMBuildExtractElement(builder, value, 573 idx1, ""); 574 z = LLVMBuildExtractElement(builder, value, 575 idx2, ""); 576 w = LLVMBuildExtractElement(builder, value, 577 idx3, ""); 578 579 gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, ""); 580 gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, ""); 581 gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, ""); 582 gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, ""); 583 584 /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n", 585 x, gep0, y, gep1, z, gep2, w, gep3);*/ 586 LLVMBuildStore(builder, x, gep0); 587 LLVMBuildStore(builder, y, gep1); 588 LLVMBuildStore(builder, z, gep2); 589 LLVMBuildStore(builder, w, gep3); 590 } 591#endif 592} 593 594static void 595store_aos_array(LLVMBuilderRef builder, 596 LLVMValueRef io_ptr, 597 LLVMValueRef aos[NUM_CHANNELS], 598 int attrib, 599 int num_outputs) 600{ 601 LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); 602 LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 603 LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 604 LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 605 LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); 606 LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; 607 608 debug_assert(NUM_CHANNELS == 4); 609 610 io0_ptr = LLVMBuildGEP(builder, io_ptr, 611 &ind0, 1, ""); 612 io1_ptr = LLVMBuildGEP(builder, io_ptr, 613 &ind1, 1, ""); 614 io2_ptr = LLVMBuildGEP(builder, io_ptr, 615 &ind2, 1, ""); 616 io3_ptr = LLVMBuildGEP(builder, io_ptr, 617 &ind3, 1, ""); 618 619#if DEBUG_STORE 620 lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n", 621 io_ptr, ind0, ind1, ind2, ind3); 622#endif 623 624 store_aos(builder, io0_ptr, attr_index, aos[0]); 625 store_aos(builder, io1_ptr, attr_index, aos[1]); 626 store_aos(builder, io2_ptr, attr_index, aos[2]); 627 store_aos(builder, io3_ptr, attr_index, aos[3]); 628} 629 630static void 631convert_to_aos(LLVMBuilderRef builder, 632 LLVMValueRef io, 633 LLVMValueRef (*outputs)[NUM_CHANNELS], 634 int num_outputs, 635 int max_vertices) 636{ 637 unsigned chan, attrib; 638 639#if DEBUG_STORE 640 lp_build_printf(builder, " # storing begin\n"); 641#endif 642 for (attrib = 0; attrib < num_outputs; ++attrib) { 643 LLVMValueRef soa[4]; 644 LLVMValueRef aos[4]; 645 for(chan = 0; chan < NUM_CHANNELS; ++chan) { 646 if(outputs[attrib][chan]) { 647 LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); 648 lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); 649 /*lp_build_printf(builder, "output %d : %d ", 650 LLVMConstInt(LLVMInt32Type(), attrib, 0), 651 LLVMConstInt(LLVMInt32Type(), chan, 0)); 652 print_vectorf(builder, out);*/ 653 soa[chan] = out; 654 } else 655 soa[chan] = 0; 656 } 657 soa_to_aos(builder, soa, aos); 658 store_aos_array(builder, 659 io, 660 aos, 661 attrib, 662 num_outputs); 663 } 664#if DEBUG_STORE 665 lp_build_printf(builder, " # storing end\n"); 666#endif 667} 668 669static void 670draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) 671{ 672 LLVMTypeRef arg_types[8]; 673 LLVMTypeRef func_type; 674 LLVMValueRef context_ptr; 675 LLVMBasicBlockRef block; 676 LLVMBuilderRef builder; 677 LLVMValueRef start, end, count, stride, step, io_itr; 678 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; 679 LLVMValueRef instance_id; 680 struct draw_context *draw = llvm->draw; 681 unsigned i, j; 682 struct lp_build_context bld; 683 struct lp_build_loop_state lp_loop; 684 struct lp_type vs_type = lp_type_float_vec(32); 685 const int max_vertices = 4; 686 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; 687 void *code; 688 struct lp_build_sampler_soa *sampler = 0; 689 690 arg_types[0] = llvm->context_ptr_type; /* context */ 691 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ 692 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ 693 arg_types[3] = LLVMInt32Type(); /* start */ 694 arg_types[4] = LLVMInt32Type(); /* count */ 695 arg_types[5] = LLVMInt32Type(); /* stride */ 696 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ 697 arg_types[7] = LLVMInt32Type(); /* instance_id */ 698 699 func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); 700 701 variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); 702 LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); 703 for(i = 0; i < Elements(arg_types); ++i) 704 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 705 LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); 706 707 context_ptr = LLVMGetParam(variant->function, 0); 708 io_ptr = LLVMGetParam(variant->function, 1); 709 vbuffers_ptr = LLVMGetParam(variant->function, 2); 710 start = LLVMGetParam(variant->function, 3); 711 count = LLVMGetParam(variant->function, 4); 712 stride = LLVMGetParam(variant->function, 5); 713 vb_ptr = LLVMGetParam(variant->function, 6); 714 instance_id = LLVMGetParam(variant->function, 7); 715 716 lp_build_name(context_ptr, "context"); 717 lp_build_name(io_ptr, "io"); 718 lp_build_name(vbuffers_ptr, "vbuffers"); 719 lp_build_name(start, "start"); 720 lp_build_name(count, "count"); 721 lp_build_name(stride, "stride"); 722 lp_build_name(vb_ptr, "vb"); 723 lp_build_name(instance_id, "instance_id"); 724 725 /* 726 * Function body 727 */ 728 729 block = LLVMAppendBasicBlock(variant->function, "entry"); 730 builder = LLVMCreateBuilder(); 731 LLVMPositionBuilderAtEnd(builder, block); 732 733 lp_build_context_init(&bld, builder, vs_type); 734 735 end = lp_build_add(&bld, start, count); 736 737 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); 738 739 /* code generated texture sampling */ 740 sampler = draw_llvm_sampler_soa_create(variant->key.sampler, 741 context_ptr); 742 743#if DEBUG_STORE 744 lp_build_printf(builder, "start = %d, end = %d, step = %d\n", 745 start, end, step); 746#endif 747 lp_build_loop_begin(builder, start, &lp_loop); 748 { 749 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; 750 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; 751 LLVMValueRef io; 752 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; 753 754 io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); 755 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); 756#if DEBUG_STORE 757 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", 758 io_itr, io, lp_loop.counter); 759#endif 760 for (i = 0; i < NUM_CHANNELS; ++i) { 761 LLVMValueRef true_index = LLVMBuildAdd( 762 builder, 763 lp_loop.counter, 764 LLVMConstInt(LLVMInt32Type(), i, 0), ""); 765 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { 766 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; 767 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), 768 velem->vertex_buffer_index, 769 0); 770 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, 771 &vb_index, 1, ""); 772 generate_fetch(builder, vbuffers_ptr, 773 &aos_attribs[j][i], velem, vb, true_index, 774 instance_id); 775 } 776 } 777 convert_to_soa(builder, aos_attribs, inputs, 778 draw->pt.nr_vertex_elements); 779 780 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; 781 generate_vs(llvm, 782 builder, 783 outputs, 784 ptr_aos, 785 context_ptr, 786 sampler); 787 788 convert_to_aos(builder, io, outputs, 789 draw->vs.vertex_shader->info.num_outputs, 790 max_vertices); 791 } 792 lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); 793 794 sampler->destroy(sampler); 795 796 LLVMBuildRetVoid(builder); 797 798 LLVMDisposeBuilder(builder); 799 800 /* 801 * Translate the LLVM IR into machine code. 802 */ 803#ifdef DEBUG 804 if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { 805 lp_debug_dump_value(variant->function); 806 assert(0); 807 } 808#endif 809 810 LLVMRunFunctionPassManager(llvm->pass, variant->function); 811 812 if (gallivm_debug & GALLIVM_DEBUG_IR) { 813 lp_debug_dump_value(variant->function); 814 debug_printf("\n"); 815 } 816 817 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); 818 variant->jit_func = (draw_jit_vert_func)pointer_to_func(code); 819 820 if (gallivm_debug & GALLIVM_DEBUG_ASM) { 821 lp_disassemble(code); 822 } 823} 824 825 826static void 827draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) 828{ 829 LLVMTypeRef arg_types[8]; 830 LLVMTypeRef func_type; 831 LLVMValueRef context_ptr; 832 LLVMBasicBlockRef block; 833 LLVMBuilderRef builder; 834 LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; 835 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; 836 LLVMValueRef instance_id; 837 struct draw_context *draw = llvm->draw; 838 unsigned i, j; 839 struct lp_build_context bld; 840 struct lp_build_context bld_int; 841 struct lp_build_loop_state lp_loop; 842 struct lp_type vs_type = lp_type_float_vec(32); 843 const int max_vertices = 4; 844 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; 845 LLVMValueRef fetch_max; 846 void *code; 847 struct lp_build_sampler_soa *sampler = 0; 848 849 arg_types[0] = llvm->context_ptr_type; /* context */ 850 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ 851 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ 852 arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */ 853 arg_types[4] = LLVMInt32Type(); /* fetch_count */ 854 arg_types[5] = LLVMInt32Type(); /* stride */ 855 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ 856 arg_types[7] = LLVMInt32Type(); /* instance_id */ 857 858 func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); 859 860 variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", 861 func_type); 862 LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); 863 for(i = 0; i < Elements(arg_types); ++i) 864 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 865 LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), 866 LLVMNoAliasAttribute); 867 868 context_ptr = LLVMGetParam(variant->function_elts, 0); 869 io_ptr = LLVMGetParam(variant->function_elts, 1); 870 vbuffers_ptr = LLVMGetParam(variant->function_elts, 2); 871 fetch_elts = LLVMGetParam(variant->function_elts, 3); 872 fetch_count = LLVMGetParam(variant->function_elts, 4); 873 stride = LLVMGetParam(variant->function_elts, 5); 874 vb_ptr = LLVMGetParam(variant->function_elts, 6); 875 instance_id = LLVMGetParam(variant->function_elts, 7); 876 877 lp_build_name(context_ptr, "context"); 878 lp_build_name(io_ptr, "io"); 879 lp_build_name(vbuffers_ptr, "vbuffers"); 880 lp_build_name(fetch_elts, "fetch_elts"); 881 lp_build_name(fetch_count, "fetch_count"); 882 lp_build_name(stride, "stride"); 883 lp_build_name(vb_ptr, "vb"); 884 lp_build_name(instance_id, "instance_id"); 885 886 /* 887 * Function body 888 */ 889 890 block = LLVMAppendBasicBlock(variant->function_elts, "entry"); 891 builder = LLVMCreateBuilder(); 892 LLVMPositionBuilderAtEnd(builder, block); 893 894 lp_build_context_init(&bld, builder, vs_type); 895 lp_build_context_init(&bld_int, builder, lp_type_int(32)); 896 897 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); 898 899 /* code generated texture sampling */ 900 sampler = draw_llvm_sampler_soa_create(variant->key.sampler, 901 context_ptr); 902 903 fetch_max = LLVMBuildSub(builder, fetch_count, 904 LLVMConstInt(LLVMInt32Type(), 1, 0), 905 "fetch_max"); 906 907 lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); 908 { 909 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; 910 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; 911 LLVMValueRef io; 912 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; 913 914 io_itr = lp_loop.counter; 915 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); 916#if DEBUG_STORE 917 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", 918 io_itr, io, lp_loop.counter); 919#endif 920 for (i = 0; i < NUM_CHANNELS; ++i) { 921 LLVMValueRef true_index = LLVMBuildAdd( 922 builder, 923 lp_loop.counter, 924 LLVMConstInt(LLVMInt32Type(), i, 0), ""); 925 LLVMValueRef fetch_ptr; 926 927 /* make sure we're not out of bounds which can happen 928 * if fetch_count % 4 != 0, because on the last iteration 929 * a few of the 4 vertex fetches will be out of bounds */ 930 true_index = lp_build_min(&bld_int, true_index, fetch_max); 931 932 fetch_ptr = LLVMBuildGEP(builder, fetch_elts, 933 &true_index, 1, ""); 934 true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); 935 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { 936 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; 937 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), 938 velem->vertex_buffer_index, 939 0); 940 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, 941 &vb_index, 1, ""); 942 generate_fetch(builder, vbuffers_ptr, 943 &aos_attribs[j][i], velem, vb, true_index, 944 instance_id); 945 } 946 } 947 convert_to_soa(builder, aos_attribs, inputs, 948 draw->pt.nr_vertex_elements); 949 950 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; 951 generate_vs(llvm, 952 builder, 953 outputs, 954 ptr_aos, 955 context_ptr, 956 sampler); 957 958 convert_to_aos(builder, io, outputs, 959 draw->vs.vertex_shader->info.num_outputs, 960 max_vertices); 961 } 962 lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); 963 964 sampler->destroy(sampler); 965 966 LLVMBuildRetVoid(builder); 967 968 LLVMDisposeBuilder(builder); 969 970 /* 971 * Translate the LLVM IR into machine code. 972 */ 973#ifdef DEBUG 974 if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { 975 lp_debug_dump_value(variant->function_elts); 976 assert(0); 977 } 978#endif 979 980 LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); 981 982 if (gallivm_debug & GALLIVM_DEBUG_IR) { 983 lp_debug_dump_value(variant->function_elts); 984 debug_printf("\n"); 985 } 986 987 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts); 988 variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code); 989 990 if (gallivm_debug & GALLIVM_DEBUG_ASM) { 991 lp_disassemble(code); 992 } 993} 994 995void 996draw_llvm_make_variant_key(struct draw_llvm *llvm, 997 struct draw_llvm_variant_key *key) 998{ 999 unsigned i; 1000 1001 memset(key, 0, sizeof(struct draw_llvm_variant_key)); 1002 1003 key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; 1004 1005 memcpy(key->vertex_element, 1006 llvm->draw->pt.vertex_element, 1007 sizeof(struct pipe_vertex_element) * key->nr_vertex_elements); 1008 1009 memcpy(&key->vs, 1010 &llvm->draw->vs.vertex_shader->state, 1011 sizeof(struct pipe_shader_state)); 1012 1013 /* if the driver implemented the sampling hooks then 1014 * setup our sampling state */ 1015 if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) { 1016 for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) { 1017 struct draw_vertex_shader *shader = llvm->draw->vs.vertex_shader; 1018 if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) 1019 lp_sampler_static_state(&key->sampler[i], 1020 llvm->draw->sampler_views[i], 1021 llvm->draw->samplers[i]); 1022 } 1023 } 1024} 1025 1026void 1027draw_llvm_set_mapped_texture(struct draw_context *draw, 1028 unsigned sampler_idx, 1029 uint32_t width, uint32_t height, uint32_t depth, 1030 uint32_t last_level, 1031 uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], 1032 uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], 1033 const void *data[DRAW_MAX_TEXTURE_LEVELS]) 1034{ 1035 unsigned j; 1036 struct draw_jit_texture *jit_tex; 1037 1038 assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS); 1039 1040 1041 jit_tex = &draw->llvm->jit_context.textures[sampler_idx]; 1042 1043 jit_tex->width = width; 1044 jit_tex->height = height; 1045 jit_tex->depth = depth; 1046 jit_tex->last_level = last_level; 1047 1048 for (j = 0; j <= last_level; j++) { 1049 jit_tex->data[j] = data[j]; 1050 jit_tex->row_stride[j] = row_stride[j]; 1051 jit_tex->img_stride[j] = img_stride[j]; 1052 } 1053} 1054 1055void 1056draw_llvm_destroy_variant(struct draw_llvm_variant *variant) 1057{ 1058 struct draw_llvm *llvm = variant->llvm; 1059 struct draw_context *draw = llvm->draw; 1060 1061 if (variant->function_elts) { 1062 if (variant->function_elts) 1063 LLVMFreeMachineCodeForFunction(draw->engine, 1064 variant->function_elts); 1065 LLVMDeleteFunction(variant->function_elts); 1066 } 1067 1068 if (variant->function) { 1069 if (variant->function) 1070 LLVMFreeMachineCodeForFunction(draw->engine, 1071 variant->function); 1072 LLVMDeleteFunction(variant->function); 1073 } 1074 1075 remove_from_list(&variant->list_item_local); 1076 variant->shader->variants_cached--; 1077 remove_from_list(&variant->list_item_global); 1078 llvm->nr_variants--; 1079 FREE(variant); 1080} 1081