draw_llvm.c revision 26ff7523b69ddb377ade29296d20abfc46e69489
1/************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "draw_llvm.h" 29 30#include "draw_context.h" 31#include "draw_vs.h" 32 33#include "gallivm/lp_bld_arit.h" 34#include "gallivm/lp_bld_logic.h" 35#include "gallivm/lp_bld_const.h" 36#include "gallivm/lp_bld_swizzle.h" 37#include "gallivm/lp_bld_struct.h" 38#include "gallivm/lp_bld_type.h" 39#include "gallivm/lp_bld_flow.h" 40#include "gallivm/lp_bld_debug.h" 41#include "gallivm/lp_bld_tgsi.h" 42#include "gallivm/lp_bld_printf.h" 43#include "gallivm/lp_bld_intr.h" 44#include "gallivm/lp_bld_init.h" 45 46#include "tgsi/tgsi_exec.h" 47#include "tgsi/tgsi_dump.h" 48 49#include "util/u_cpu_detect.h" 50#include "util/u_pointer.h" 51#include "util/u_string.h" 52 53#include <llvm-c/Transforms/Scalar.h> 54 55#define DEBUG_STORE 0 56 57/* generates the draw jit function */ 58static void 59draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); 60static void 61draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); 62 63static void 64init_globals(struct draw_llvm *llvm) 65{ 66 LLVMTypeRef texture_type; 67 68 /* struct draw_jit_texture */ 69 { 70 LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS]; 71 72 elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); 73 elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); 74 elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); 75 elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); 76 elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = 77 LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); 78 elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = 79 LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); 80 elem_types[DRAW_JIT_TEXTURE_DATA] = 81 LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), 82 DRAW_MAX_TEXTURE_LEVELS); 83 elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); 84 elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); 85 elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); 86 elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = 87 LLVMArrayType(LLVMFloatType(), 4); 88 89 texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); 90 91 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, 92 llvm->target, texture_type, 93 DRAW_JIT_TEXTURE_WIDTH); 94 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, 95 llvm->target, texture_type, 96 DRAW_JIT_TEXTURE_HEIGHT); 97 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, 98 llvm->target, texture_type, 99 DRAW_JIT_TEXTURE_DEPTH); 100 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, 101 llvm->target, texture_type, 102 DRAW_JIT_TEXTURE_LAST_LEVEL); 103 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride, 104 llvm->target, texture_type, 105 DRAW_JIT_TEXTURE_ROW_STRIDE); 106 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride, 107 llvm->target, texture_type, 108 DRAW_JIT_TEXTURE_IMG_STRIDE); 109 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, 110 llvm->target, texture_type, 111 DRAW_JIT_TEXTURE_DATA); 112 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod, 113 llvm->target, texture_type, 114 DRAW_JIT_TEXTURE_MIN_LOD); 115 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod, 116 llvm->target, texture_type, 117 DRAW_JIT_TEXTURE_MAX_LOD); 118 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias, 119 llvm->target, texture_type, 120 DRAW_JIT_TEXTURE_LOD_BIAS); 121 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color, 122 llvm->target, texture_type, 123 DRAW_JIT_TEXTURE_BORDER_COLOR); 124 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, 125 llvm->target, texture_type); 126 127 LLVMAddTypeName(llvm->module, "texture", texture_type); 128 } 129 130 131 /* struct draw_jit_context */ 132 { 133 LLVMTypeRef elem_types[4]; 134 LLVMTypeRef context_type; 135 136 elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ 137 elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */ 138 elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */ 139 elem_types[3] = LLVMArrayType(texture_type, 140 PIPE_MAX_VERTEX_SAMPLERS); /* textures */ 141 142 context_type = LLVMStructType(elem_types, Elements(elem_types), 0); 143 144 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, 145 llvm->target, context_type, 0); 146 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, 147 llvm->target, context_type, 1); 148 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes, 149 llvm->target, context_type, 2); 150 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, 151 llvm->target, context_type, 152 DRAW_JIT_CTX_TEXTURES); 153 LP_CHECK_STRUCT_SIZE(struct draw_jit_context, 154 llvm->target, context_type); 155 156 LLVMAddTypeName(llvm->module, "draw_jit_context", context_type); 157 158 llvm->context_ptr_type = LLVMPointerType(context_type, 0); 159 } 160 { 161 LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0); 162 llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0); 163 } 164 /* struct pipe_vertex_buffer */ 165 { 166 LLVMTypeRef elem_types[4]; 167 LLVMTypeRef vb_type; 168 169 elem_types[0] = LLVMInt32Type(); 170 elem_types[1] = LLVMInt32Type(); 171 elem_types[2] = LLVMInt32Type(); 172 elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */ 173 174 vb_type = LLVMStructType(elem_types, Elements(elem_types), 0); 175 176 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, 177 llvm->target, vb_type, 0); 178 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, 179 llvm->target, vb_type, 2); 180 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, 181 llvm->target, vb_type); 182 183 LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type); 184 185 llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); 186 } 187} 188 189static LLVMTypeRef 190create_vertex_header(struct draw_llvm *llvm, int data_elems) 191{ 192 /* struct vertex_header */ 193 LLVMTypeRef elem_types[3]; 194 LLVMTypeRef vertex_header; 195 char struct_name[24]; 196 197 util_snprintf(struct_name, 23, "vertex_header%d", data_elems); 198 199 elem_types[0] = LLVMIntType(32); 200 elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); 201 elem_types[2] = LLVMArrayType(elem_types[1], data_elems); 202 203 vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); 204 205 /* these are bit-fields and we can't take address of them 206 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, 207 llvm->target, vertex_header, 208 DRAW_JIT_VERTEX_CLIPMASK); 209 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, 210 llvm->target, vertex_header, 211 DRAW_JIT_VERTEX_EDGEFLAG); 212 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, 213 llvm->target, vertex_header, 214 DRAW_JIT_VERTEX_PAD); 215 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, 216 llvm->target, vertex_header, 217 DRAW_JIT_VERTEX_VERTEX_ID); 218 */ 219 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, 220 llvm->target, vertex_header, 221 DRAW_JIT_VERTEX_CLIP); 222 LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, 223 llvm->target, vertex_header, 224 DRAW_JIT_VERTEX_DATA); 225 226 LLVMAddTypeName(llvm->module, struct_name, vertex_header); 227 228 return LLVMPointerType(vertex_header, 0); 229} 230 231struct draw_llvm * 232draw_llvm_create(struct draw_context *draw) 233{ 234 struct draw_llvm *llvm; 235 236 llvm = CALLOC_STRUCT( draw_llvm ); 237 if (!llvm) 238 return NULL; 239 240 llvm->draw = draw; 241 llvm->engine = draw->engine; 242 243 debug_assert(llvm->engine); 244 245 llvm->module = LLVMModuleCreateWithName("draw_llvm"); 246 llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module); 247 248 LLVMAddModuleProvider(llvm->engine, llvm->provider); 249 250 llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine); 251 252 llvm->pass = LLVMCreateFunctionPassManager(llvm->provider); 253 LLVMAddTargetData(llvm->target, llvm->pass); 254 255 if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { 256 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, 257 * but there are more on SVN. */ 258 /* TODO: Add more passes */ 259 260 LLVMAddCFGSimplificationPass(llvm->pass); 261 262 if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) { 263 /* For LLVM >= 2.7 and 32-bit build, use this order of passes to 264 * avoid generating bad code. 265 * Test with piglit glsl-vs-sqrt-zero test. 266 */ 267 LLVMAddConstantPropagationPass(llvm->pass); 268 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 269 } 270 else { 271 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 272 LLVMAddConstantPropagationPass(llvm->pass); 273 } 274 275 if(util_cpu_caps.has_sse4_1) { 276 /* FIXME: There is a bug in this pass, whereby the combination of fptosi 277 * and sitofp (necessary for trunc/floor/ceil/round implementation) 278 * somehow becomes invalid code. 279 */ 280 LLVMAddInstructionCombiningPass(llvm->pass); 281 } 282 LLVMAddGVNPass(llvm->pass); 283 } else { 284 /* We need at least this pass to prevent the backends to fail in 285 * unexpected ways. 286 */ 287 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 288 } 289 290 init_globals(llvm); 291 292 if (gallivm_debug & GALLIVM_DEBUG_IR) { 293 LLVMDumpModule(llvm->module); 294 } 295 296 llvm->nr_variants = 0; 297 make_empty_list(&llvm->vs_variants_list); 298 299 return llvm; 300} 301 302void 303draw_llvm_destroy(struct draw_llvm *llvm) 304{ 305 LLVMDisposePassManager(llvm->pass); 306 307 FREE(llvm); 308} 309 310struct draw_llvm_variant * 311draw_llvm_create_variant(struct draw_llvm *llvm, 312 unsigned num_inputs, 313 const struct draw_llvm_variant_key *key) 314{ 315 struct draw_llvm_variant *variant; 316 struct llvm_vertex_shader *shader = 317 llvm_vertex_shader(llvm->draw->vs.vertex_shader); 318 319 variant = MALLOC(sizeof *variant + 320 shader->variant_key_size - 321 sizeof variant->key); 322 if (variant == NULL) 323 return NULL; 324 325 variant->llvm = llvm; 326 327 memcpy(&variant->key, key, shader->variant_key_size); 328 329 llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); 330 331 draw_llvm_generate(llvm, variant); 332 draw_llvm_generate_elts(llvm, variant); 333 334 variant->shader = shader; 335 variant->list_item_global.base = variant; 336 variant->list_item_local.base = variant; 337 /*variant->no = */shader->variants_created++; 338 variant->list_item_global.base = variant; 339 340 return variant; 341} 342 343static void 344generate_vs(struct draw_llvm *llvm, 345 LLVMBuilderRef builder, 346 LLVMValueRef (*outputs)[NUM_CHANNELS], 347 const LLVMValueRef (*inputs)[NUM_CHANNELS], 348 LLVMValueRef context_ptr, 349 struct lp_build_sampler_soa *draw_sampler) 350{ 351 const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; 352 struct lp_type vs_type; 353 LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); 354 struct lp_build_sampler_soa *sampler = 0; 355 356 memset(&vs_type, 0, sizeof vs_type); 357 vs_type.floating = TRUE; /* floating point values */ 358 vs_type.sign = TRUE; /* values are signed */ 359 vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ 360 vs_type.width = 32; /* 32-bit float */ 361 vs_type.length = 4; /* 4 elements per vector */ 362#if 0 363 num_vs = 4; /* number of vertices per block */ 364#endif 365 366 if (gallivm_debug & GALLIVM_DEBUG_IR) { 367 tgsi_dump(tokens, 0); 368 } 369 370 if (llvm->draw->num_sampler_views && 371 llvm->draw->num_samplers) 372 sampler = draw_sampler; 373 374 lp_build_tgsi_soa(builder, 375 tokens, 376 vs_type, 377 NULL /*struct lp_build_mask_context *mask*/, 378 consts_ptr, 379 NULL /*pos*/, 380 inputs, 381 outputs, 382 sampler, 383 &llvm->draw->vs.vertex_shader->info); 384} 385 386#if DEBUG_STORE 387static void print_vectorf(LLVMBuilderRef builder, 388 LLVMValueRef vec) 389{ 390 LLVMValueRef val[4]; 391 val[0] = LLVMBuildExtractElement(builder, vec, 392 LLVMConstInt(LLVMInt32Type(), 0, 0), ""); 393 val[1] = LLVMBuildExtractElement(builder, vec, 394 LLVMConstInt(LLVMInt32Type(), 1, 0), ""); 395 val[2] = LLVMBuildExtractElement(builder, vec, 396 LLVMConstInt(LLVMInt32Type(), 2, 0), ""); 397 val[3] = LLVMBuildExtractElement(builder, vec, 398 LLVMConstInt(LLVMInt32Type(), 3, 0), ""); 399 lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", 400 val[0], val[1], val[2], val[3]); 401} 402#endif 403 404static void 405generate_fetch(LLVMBuilderRef builder, 406 LLVMValueRef vbuffers_ptr, 407 LLVMValueRef *res, 408 struct pipe_vertex_element *velem, 409 LLVMValueRef vbuf, 410 LLVMValueRef index, 411 LLVMValueRef instance_id) 412{ 413 LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); 414 LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, 415 &indices, 1, ""); 416 LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf); 417 LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf); 418 LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf); 419 LLVMValueRef cond; 420 LLVMValueRef stride; 421 422 if (velem->instance_divisor) { 423 /* array index = instance_id / instance_divisor */ 424 index = LLVMBuildUDiv(builder, instance_id, 425 LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0), 426 "instance_divisor"); 427 } 428 429 /* limit index to min(inex, vb_max_index) */ 430 cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); 431 index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); 432 433 stride = LLVMBuildMul(builder, vb_stride, index, ""); 434 435 vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); 436 437 stride = LLVMBuildAdd(builder, stride, 438 vb_buffer_offset, 439 ""); 440 stride = LLVMBuildAdd(builder, stride, 441 LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0), 442 ""); 443 444 /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/ 445 vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); 446 447 *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format); 448} 449 450static LLVMValueRef 451aos_to_soa(LLVMBuilderRef builder, 452 LLVMValueRef val0, 453 LLVMValueRef val1, 454 LLVMValueRef val2, 455 LLVMValueRef val3, 456 LLVMValueRef channel) 457{ 458 LLVMValueRef ex, res; 459 460 ex = LLVMBuildExtractElement(builder, val0, 461 channel, ""); 462 res = LLVMBuildInsertElement(builder, 463 LLVMConstNull(LLVMTypeOf(val0)), 464 ex, 465 LLVMConstInt(LLVMInt32Type(), 0, 0), 466 ""); 467 468 ex = LLVMBuildExtractElement(builder, val1, 469 channel, ""); 470 res = LLVMBuildInsertElement(builder, 471 res, ex, 472 LLVMConstInt(LLVMInt32Type(), 1, 0), 473 ""); 474 475 ex = LLVMBuildExtractElement(builder, val2, 476 channel, ""); 477 res = LLVMBuildInsertElement(builder, 478 res, ex, 479 LLVMConstInt(LLVMInt32Type(), 2, 0), 480 ""); 481 482 ex = LLVMBuildExtractElement(builder, val3, 483 channel, ""); 484 res = LLVMBuildInsertElement(builder, 485 res, ex, 486 LLVMConstInt(LLVMInt32Type(), 3, 0), 487 ""); 488 489 return res; 490} 491 492static void 493soa_to_aos(LLVMBuilderRef builder, 494 LLVMValueRef soa[NUM_CHANNELS], 495 LLVMValueRef aos[NUM_CHANNELS]) 496{ 497 LLVMValueRef comp; 498 int i = 0; 499 500 debug_assert(NUM_CHANNELS == 4); 501 502 aos[0] = LLVMConstNull(LLVMTypeOf(soa[0])); 503 aos[1] = aos[2] = aos[3] = aos[0]; 504 505 for (i = 0; i < NUM_CHANNELS; ++i) { 506 LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0); 507 508 comp = LLVMBuildExtractElement(builder, soa[i], 509 LLVMConstInt(LLVMInt32Type(), 0, 0), ""); 510 aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, ""); 511 512 comp = LLVMBuildExtractElement(builder, soa[i], 513 LLVMConstInt(LLVMInt32Type(), 1, 0), ""); 514 aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, ""); 515 516 comp = LLVMBuildExtractElement(builder, soa[i], 517 LLVMConstInt(LLVMInt32Type(), 2, 0), ""); 518 aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, ""); 519 520 comp = LLVMBuildExtractElement(builder, soa[i], 521 LLVMConstInt(LLVMInt32Type(), 3, 0), ""); 522 aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, ""); 523 524 } 525} 526 527static void 528convert_to_soa(LLVMBuilderRef builder, 529 LLVMValueRef (*aos)[NUM_CHANNELS], 530 LLVMValueRef (*soa)[NUM_CHANNELS], 531 int num_attribs) 532{ 533 int i; 534 535 debug_assert(NUM_CHANNELS == 4); 536 537 for (i = 0; i < num_attribs; ++i) { 538 LLVMValueRef val0 = aos[i][0]; 539 LLVMValueRef val1 = aos[i][1]; 540 LLVMValueRef val2 = aos[i][2]; 541 LLVMValueRef val3 = aos[i][3]; 542 543 soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3, 544 LLVMConstInt(LLVMInt32Type(), 0, 0)); 545 soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3, 546 LLVMConstInt(LLVMInt32Type(), 1, 0)); 547 soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3, 548 LLVMConstInt(LLVMInt32Type(), 2, 0)); 549 soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3, 550 LLVMConstInt(LLVMInt32Type(), 3, 0)); 551 } 552} 553 554static void 555store_aos(LLVMBuilderRef builder, 556 LLVMValueRef io_ptr, 557 LLVMValueRef index, 558 LLVMValueRef value, 559 LLVMValueRef clipmask) 560{ 561 LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); 562 LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); 563 LLVMValueRef indices[3]; 564 LLVMValueRef val, shift; 565 566 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 567 indices[1] = index; 568 indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); 569 570 /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */ 571 val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0); 572 shift = LLVMConstInt(LLVMInt32Type(), 12, 0); 573 val = LLVMBuildShl(builder, val, shift, ""); 574 /* add clipmask:12 */ 575 val = LLVMBuildOr(builder, val, clipmask, ""); 576 577 /* store vertex header */ 578 LLVMBuildStore(builder, val, id_ptr); 579 580 581#if DEBUG_STORE 582 lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); 583#endif 584#if 0 585 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); 586 print_vectorf(builder, value);*/ 587 data_ptr = LLVMBuildBitCast(builder, data_ptr, 588 LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0), 589 "datavec"); 590 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, ""); 591 592 LLVMBuildStore(builder, value, data_ptr); 593#else 594 { 595 LLVMValueRef x, y, z, w; 596 LLVMValueRef idx0, idx1, idx2, idx3; 597 LLVMValueRef gep0, gep1, gep2, gep3; 598 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); 599 600 idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 601 idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 602 idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 603 idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0); 604 605 x = LLVMBuildExtractElement(builder, value, 606 idx0, ""); 607 y = LLVMBuildExtractElement(builder, value, 608 idx1, ""); 609 z = LLVMBuildExtractElement(builder, value, 610 idx2, ""); 611 w = LLVMBuildExtractElement(builder, value, 612 idx3, ""); 613 614 gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, ""); 615 gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, ""); 616 gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, ""); 617 gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, ""); 618 619 /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n", 620 x, gep0, y, gep1, z, gep2, w, gep3);*/ 621 LLVMBuildStore(builder, x, gep0); 622 LLVMBuildStore(builder, y, gep1); 623 LLVMBuildStore(builder, z, gep2); 624 LLVMBuildStore(builder, w, gep3); 625 } 626#endif 627} 628 629static void 630store_aos_array(LLVMBuilderRef builder, 631 LLVMValueRef io_ptr, 632 LLVMValueRef aos[NUM_CHANNELS], 633 int attrib, 634 int num_outputs, 635 LLVMValueRef clipmask) 636{ 637 LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); 638 LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 639 LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 640 LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 641 LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); 642 LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; 643 LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3; 644 645 debug_assert(NUM_CHANNELS == 4); 646 647 io0_ptr = LLVMBuildGEP(builder, io_ptr, 648 &ind0, 1, ""); 649 io1_ptr = LLVMBuildGEP(builder, io_ptr, 650 &ind1, 1, ""); 651 io2_ptr = LLVMBuildGEP(builder, io_ptr, 652 &ind2, 1, ""); 653 io3_ptr = LLVMBuildGEP(builder, io_ptr, 654 &ind3, 1, ""); 655 656 clipmask0 = LLVMBuildExtractElement(builder, clipmask, 657 ind0, ""); 658 clipmask1 = LLVMBuildExtractElement(builder, clipmask, 659 ind1, ""); 660 clipmask2 = LLVMBuildExtractElement(builder, clipmask, 661 ind2, ""); 662 clipmask3 = LLVMBuildExtractElement(builder, clipmask, 663 ind3, ""); 664 665#if DEBUG_STORE 666 lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n", 667 io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3); 668#endif 669 /* store for each of the 4 vertices */ 670 store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0); 671 store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1); 672 store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2); 673 store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3); 674} 675 676static void 677convert_to_aos(LLVMBuilderRef builder, 678 LLVMValueRef io, 679 LLVMValueRef (*outputs)[NUM_CHANNELS], 680 LLVMValueRef clipmask, 681 int num_outputs, 682 int max_vertices) 683{ 684 unsigned chan, attrib; 685 686#if DEBUG_STORE 687 lp_build_printf(builder, " # storing begin\n"); 688#endif 689 for (attrib = 0; attrib < num_outputs; ++attrib) { 690 LLVMValueRef soa[4]; 691 LLVMValueRef aos[4]; 692 for(chan = 0; chan < NUM_CHANNELS; ++chan) { 693 if(outputs[attrib][chan]) { 694 LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); 695 lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); 696 /*lp_build_printf(builder, "output %d : %d ", 697 LLVMConstInt(LLVMInt32Type(), attrib, 0), 698 LLVMConstInt(LLVMInt32Type(), chan, 0)); 699 print_vectorf(builder, out);*/ 700 soa[chan] = out; 701 } else 702 soa[chan] = 0; 703 } 704 soa_to_aos(builder, soa, aos); 705 store_aos_array(builder, 706 io, 707 aos, 708 attrib, 709 num_outputs, 710 clipmask); 711 } 712#if DEBUG_STORE 713 lp_build_printf(builder, " # storing end\n"); 714#endif 715} 716 717/* 718 * Stores original vertex positions in clip coordinates 719 * There is probably a more efficient way to do this, 4 floats at once 720 * rather than extracting each element one by one. 721 */ 722static void 723store_clip(LLVMBuilderRef builder, 724 LLVMValueRef io_ptr, 725 LLVMValueRef (*outputs)[NUM_CHANNELS]) 726{ 727 LLVMValueRef out[4]; 728 LLVMValueRef indices[2]; 729 LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; 730 LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3; 731 LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr; 732 LLVMValueRef out0elem, out1elem, out2elem, out3elem; 733 734 LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 735 LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 736 LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 737 LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); 738 739 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 740 indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0); 741 742 out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ 743 out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ 744 out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ 745 out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ 746 747 io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, ""); 748 io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, ""); 749 io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, ""); 750 io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); 751 752 clip_ptr0 = draw_jit_header_clip(builder, io0_ptr); 753 clip_ptr1 = draw_jit_header_clip(builder, io1_ptr); 754 clip_ptr2 = draw_jit_header_clip(builder, io2_ptr); 755 clip_ptr3 = draw_jit_header_clip(builder, io3_ptr); 756 757 for (int i = 0; i<4; i++){ 758 clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, 759 indices, 2, ""); //x0 760 clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, 761 indices, 2, ""); //x1 762 clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, 763 indices, 2, ""); //x2 764 clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, 765 indices, 2, ""); //x3 766 767 out0elem = LLVMBuildExtractElement(builder, out[i], 768 ind0, ""); //x0 769 out1elem = LLVMBuildExtractElement(builder, out[i], 770 ind1, ""); //x1 771 out2elem = LLVMBuildExtractElement(builder, out[i], 772 ind2, ""); //x2 773 out3elem = LLVMBuildExtractElement(builder, out[i], 774 ind3, ""); //x3 775 776 LLVMBuildStore(builder, out0elem, clip0_ptr); 777 LLVMBuildStore(builder, out1elem, clip1_ptr); 778 LLVMBuildStore(builder, out2elem, clip2_ptr); 779 LLVMBuildStore(builder, out3elem, clip3_ptr); 780 781 indices[1]= LLVMBuildAdd(builder, indices[1], ind1, ""); 782 } 783 784} 785 786/* 787 * Transforms the outputs for viewport mapping 788 */ 789static void 790generate_viewport(struct draw_llvm *llvm, 791 LLVMBuilderRef builder, 792 LLVMValueRef (*outputs)[NUM_CHANNELS]) 793{ 794 int i; 795 const float *scaleA = llvm->draw->viewport.scale; 796 const float *transA = llvm->draw->viewport.translate; 797 struct lp_type f32_type = lp_type_float_vec(32); 798 LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ 799 LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ 800 801 /* for 1/w convention*/ 802 out3 = LLVMBuildFDiv(builder, const1, out3, ""); 803 804 /* Viewport Mapping */ 805 for (i=0; i<4; i++){ 806 LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/ 807 LLVMValueRef scale = lp_build_const_vec(f32_type, scaleA[i]); /*sx sx sx sx*/ 808 LLVMValueRef trans = lp_build_const_vec(f32_type, transA[i]); /*tx tx tx tx*/ 809 810 /* divide by w */ 811 out = LLVMBuildMul(builder, out, out3, ""); 812 /* mult by scale */ 813 out = LLVMBuildMul(builder, out, scale, ""); 814 /* add translation */ 815 out = LLVMBuildAdd(builder, out, trans, ""); 816 817 /* store transformed outputs */ 818 LLVMBuildStore(builder, out, outputs[0][i]); 819 } 820 821} 822 823/* Equivalent of _mm_set1_ps(a) 824 */ 825static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, 826 LLVMValueRef a, 827 const char *name) 828{ 829 LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); 830 int i; 831 832 for(i = 0; i < 4; ++i) { 833 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 834 res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); 835 } 836 837 return res; 838} 839 840/* 841 * Returns clipmask as 4xi32 bitmask for the 4 vertices 842 */ 843static LLVMValueRef 844generate_clipmask(LLVMBuilderRef builder, 845 LLVMValueRef (*outputs)[NUM_CHANNELS], 846 boolean clip_xy, 847 boolean clip_z, 848 boolean clip_user, 849 boolean enable_d3dclipping, 850 unsigned nr, 851 LLVMValueRef context_ptr) 852{ 853 LLVMValueRef mask; /* stores the <4xi32> clipmasks */ 854 LLVMValueRef test, temp; 855 LLVMValueRef zero, shift; 856 LLVMValueRef pos_x, pos_y, pos_z, pos_w; 857 LLVMValueRef plane1, planes, plane_ptr, sum; 858 859 unsigned i; 860 861 struct lp_type f32_type = lp_type_float_vec(32); 862 863 zero = lp_build_const_vec(f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ 864 shift = lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */ 865 866 /* Assuming position stored at output[0] */ 867 pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ 868 pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ 869 pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ 870 pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ 871 872 /* Cliptest, for hardwired planes */ 873 if (clip_xy){ 874 /* plane 1 */ 875 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); 876 temp = shift; 877 test = LLVMBuildAnd(builder, test, temp, ""); 878 mask = test; 879 880 /* plane 2 */ 881 test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); 882 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); 883 temp = LLVMBuildShl(builder, temp, shift, ""); 884 test = LLVMBuildAnd(builder, test, temp, ""); 885 mask = LLVMBuildOr(builder, mask, test, ""); 886 887 /* plane 3 */ 888 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); 889 temp = LLVMBuildShl(builder, temp, shift, ""); 890 test = LLVMBuildAnd(builder, test, temp, ""); 891 mask = LLVMBuildOr(builder, mask, test, ""); 892 893 /* plane 4 */ 894 test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); 895 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); 896 temp = LLVMBuildShl(builder, temp, shift, ""); 897 test = LLVMBuildAnd(builder, test, temp, ""); 898 mask = LLVMBuildOr(builder, mask, test, ""); 899 } 900 901 if (clip_z){ 902 if (enable_d3dclipping){ 903 /* plane 5 */ 904 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z); 905 temp = LLVMBuildShl(builder, temp, shift, ""); 906 test = LLVMBuildAnd(builder, test, temp, ""); 907 mask = LLVMBuildOr(builder, mask, test, ""); 908 } 909 else{ 910 /* plane 5 */ 911 test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); 912 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); 913 temp = LLVMBuildShl(builder, temp, shift, ""); 914 test = LLVMBuildAnd(builder, test, temp, ""); 915 mask = LLVMBuildOr(builder, mask, test, ""); 916 } 917 /* plane 6 */ 918 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); 919 temp = LLVMBuildShl(builder, temp, shift, ""); 920 test = LLVMBuildAnd(builder, test, temp, ""); 921 mask = LLVMBuildOr(builder, mask, test, ""); 922 } 923 924 if (clip_user){ 925 LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr); 926 LLVMValueRef indices[3]; 927 928 /* userclip planes */ 929 for (i = 6; i < nr; i++) { 930 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 931 indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0); 932 933 indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); 934 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 935 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x"); 936 planes = vec4f_from_scalar(builder, plane1, "plane4_x"); 937 sum = LLVMBuildMul(builder, planes, pos_x, ""); 938 939 indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0); 940 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 941 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); 942 planes = vec4f_from_scalar(builder, plane1, "plane4_y"); 943 test = LLVMBuildMul(builder, planes, pos_y, ""); 944 sum = LLVMBuildFAdd(builder, sum, test, ""); 945 946 indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0); 947 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 948 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); 949 planes = vec4f_from_scalar(builder, plane1, "plane4_z"); 950 test = LLVMBuildMul(builder, planes, pos_z, ""); 951 sum = LLVMBuildFAdd(builder, sum, test, ""); 952 953 indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0); 954 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 955 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); 956 planes = vec4f_from_scalar(builder, plane1, "plane4_w"); 957 test = LLVMBuildMul(builder, planes, pos_w, ""); 958 sum = LLVMBuildFAdd(builder, sum, test, ""); 959 960 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum); 961 temp = LLVMBuildShl(builder, temp, shift, ""); 962 test = LLVMBuildAnd(builder, test, temp, ""); 963 mask = LLVMBuildOr(builder, mask, test, ""); 964 } 965 } 966 return mask; 967} 968 969/* 970 * Returns boolean if any clipping has occurred 971 * Used zero/non-zero i32 value to represent boolean 972 */ 973static void 974clipmask_bool(LLVMBuilderRef builder, 975 LLVMValueRef clipmask, 976 LLVMValueRef ret_ptr) 977{ 978 LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, ""); 979 LLVMValueRef temp; 980 int i; 981 982 LLVMDumpValue(clipmask); 983 984 for (i=0; i<4; i++){ 985 temp = LLVMBuildExtractElement(builder, clipmask, 986 LLVMConstInt(LLVMInt32Type(), i, 0) , ""); 987 ret = LLVMBuildOr(builder, ret, temp, ""); 988 LLVMDumpValue(ret); 989 } 990 991 LLVMBuildStore(builder, ret, ret_ptr); 992 LLVMDumpValue(ret_ptr); 993 994} 995 996static void 997draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) 998{ 999 LLVMTypeRef arg_types[8]; 1000 LLVMTypeRef func_type; 1001 LLVMValueRef context_ptr; 1002 LLVMBasicBlockRef block; 1003 LLVMBuilderRef builder; 1004 LLVMValueRef start, end, count, stride, step, io_itr; 1005 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; 1006 LLVMValueRef instance_id; 1007 struct draw_context *draw = llvm->draw; 1008 unsigned i, j; 1009 struct lp_build_context bld; 1010 struct lp_build_loop_state lp_loop; 1011 const int max_vertices = 4; 1012 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; 1013 void *code; 1014 struct lp_build_sampler_soa *sampler = 0; 1015 LLVMValueRef ret, ret_ptr; 1016 boolean bypass_viewport = variant->key.bypass_viewport; 1017 boolean enable_cliptest = variant->key.clip_xy || 1018 variant->key.clip_z || 1019 variant->key.clip_user; 1020 1021 arg_types[0] = llvm->context_ptr_type; /* context */ 1022 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ 1023 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ 1024 arg_types[3] = LLVMInt32Type(); /* start */ 1025 arg_types[4] = LLVMInt32Type(); /* count */ 1026 arg_types[5] = LLVMInt32Type(); /* stride */ 1027 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ 1028 arg_types[7] = LLVMInt32Type(); /* instance_id */ 1029 1030 func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); 1031 1032 variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); 1033 LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); 1034 for(i = 0; i < Elements(arg_types); ++i) 1035 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 1036 LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); 1037 1038 context_ptr = LLVMGetParam(variant->function, 0); 1039 io_ptr = LLVMGetParam(variant->function, 1); 1040 vbuffers_ptr = LLVMGetParam(variant->function, 2); 1041 start = LLVMGetParam(variant->function, 3); 1042 count = LLVMGetParam(variant->function, 4); 1043 stride = LLVMGetParam(variant->function, 5); 1044 vb_ptr = LLVMGetParam(variant->function, 6); 1045 instance_id = LLVMGetParam(variant->function, 7); 1046 1047 lp_build_name(context_ptr, "context"); 1048 lp_build_name(io_ptr, "io"); 1049 lp_build_name(vbuffers_ptr, "vbuffers"); 1050 lp_build_name(start, "start"); 1051 lp_build_name(count, "count"); 1052 lp_build_name(stride, "stride"); 1053 lp_build_name(vb_ptr, "vb"); 1054 lp_build_name(instance_id, "instance_id"); 1055 1056 /* 1057 * Function body 1058 */ 1059 1060 block = LLVMAppendBasicBlock(variant->function, "entry"); 1061 builder = LLVMCreateBuilder(); 1062 LLVMPositionBuilderAtEnd(builder, block); 1063 1064 lp_build_context_init(&bld, builder, lp_type_int(32)); 1065 1066 end = lp_build_add(&bld, start, count); 1067 1068 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); 1069 1070 /* function will return non-zero i32 value if any clipped vertices */ 1071 ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); 1072 LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); 1073 1074 /* code generated texture sampling */ 1075 sampler = draw_llvm_sampler_soa_create( 1076 draw_llvm_variant_key_samplers(&variant->key), 1077 context_ptr); 1078 1079#if DEBUG_STORE 1080 lp_build_printf(builder, "start = %d, end = %d, step = %d\n", 1081 start, end, step); 1082#endif 1083 lp_build_loop_begin(builder, start, &lp_loop); 1084 { 1085 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; 1086 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; 1087 LLVMValueRef io; 1088 LLVMValueRef clipmask; /* holds the clipmask value */ 1089 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; 1090 1091 io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); 1092 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); 1093#if DEBUG_STORE 1094 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", 1095 io_itr, io, lp_loop.counter); 1096#endif 1097 for (i = 0; i < NUM_CHANNELS; ++i) { 1098 LLVMValueRef true_index = LLVMBuildAdd( 1099 builder, 1100 lp_loop.counter, 1101 LLVMConstInt(LLVMInt32Type(), i, 0), ""); 1102 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { 1103 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; 1104 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), 1105 velem->vertex_buffer_index, 1106 0); 1107 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, 1108 &vb_index, 1, ""); 1109 generate_fetch(builder, vbuffers_ptr, 1110 &aos_attribs[j][i], velem, vb, true_index, 1111 instance_id); 1112 } 1113 } 1114 convert_to_soa(builder, aos_attribs, inputs, 1115 draw->pt.nr_vertex_elements); 1116 1117 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; 1118 generate_vs(llvm, 1119 builder, 1120 outputs, 1121 ptr_aos, 1122 context_ptr, 1123 sampler); 1124 1125 /* store original positions in clip before further manipulation */ 1126 store_clip(builder, io, outputs); 1127 1128 /* do cliptest */ 1129 if (enable_cliptest){ 1130 /* allocate clipmask, assign it integer type */ 1131 clipmask = generate_clipmask(builder, outputs, 1132 variant->key.clip_xy, 1133 variant->key.clip_z, 1134 variant->key.clip_user, 1135 variant->key.enable_d3dclipping, 1136 variant->key.nr_planes, 1137 context_ptr); 1138 /* return clipping boolean value for function */ 1139 clipmask_bool(builder, clipmask, ret_ptr); 1140 } 1141 else{ 1142 clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); 1143 } 1144 1145 /* do viewport mapping */ 1146 if (!bypass_viewport){ 1147 generate_viewport(llvm, builder, outputs); 1148 } 1149 1150 /* store clipmask in vertex header and positions in data */ 1151 convert_to_aos(builder, io, outputs, clipmask, 1152 draw->vs.vertex_shader->info.num_outputs, 1153 max_vertices); 1154 } 1155 1156 lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); 1157 1158 sampler->destroy(sampler); 1159 1160#ifdef PIPE_ARCH_X86 1161 /* Avoid corrupting the FPU stack on 32bit OSes. */ 1162 lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); 1163#endif 1164 1165 ret = LLVMBuildLoad(builder, ret_ptr,""); 1166 LLVMBuildRet(builder, ret); 1167 1168 LLVMDisposeBuilder(builder); 1169 1170 /* 1171 * Translate the LLVM IR into machine code. 1172 */ 1173#ifdef DEBUG 1174 if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { 1175 lp_debug_dump_value(variant->function); 1176 assert(0); 1177 } 1178#endif 1179 1180 LLVMRunFunctionPassManager(llvm->pass, variant->function); 1181 1182 if (gallivm_debug & GALLIVM_DEBUG_IR) { 1183 lp_debug_dump_value(variant->function); 1184 debug_printf("\n"); 1185 } 1186 1187 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); 1188 variant->jit_func = (draw_jit_vert_func)pointer_to_func(code); 1189 1190 if (gallivm_debug & GALLIVM_DEBUG_ASM) { 1191 lp_disassemble(code); 1192 } 1193 lp_func_delete_body(variant->function); 1194} 1195 1196 1197static void 1198draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) 1199{ 1200 LLVMTypeRef arg_types[8]; 1201 LLVMTypeRef func_type; 1202 LLVMValueRef context_ptr; 1203 LLVMBasicBlockRef block; 1204 LLVMBuilderRef builder; 1205 LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; 1206 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; 1207 LLVMValueRef instance_id; 1208 struct draw_context *draw = llvm->draw; 1209 unsigned i, j; 1210 struct lp_build_context bld; 1211 struct lp_build_loop_state lp_loop; 1212 const int max_vertices = 4; 1213 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; 1214 LLVMValueRef fetch_max; 1215 void *code; 1216 struct lp_build_sampler_soa *sampler = 0; 1217 LLVMValueRef ret, ret_ptr; 1218 boolean bypass_viewport = variant->key.bypass_viewport; 1219 boolean enable_cliptest = variant->key.clip_xy || 1220 variant->key.clip_z || 1221 variant->key.clip_user; 1222 1223 arg_types[0] = llvm->context_ptr_type; /* context */ 1224 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ 1225 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ 1226 arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */ 1227 arg_types[4] = LLVMInt32Type(); /* fetch_count */ 1228 arg_types[5] = LLVMInt32Type(); /* stride */ 1229 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ 1230 arg_types[7] = LLVMInt32Type(); /* instance_id */ 1231 1232 func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); 1233 1234 variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); 1235 LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); 1236 for(i = 0; i < Elements(arg_types); ++i) 1237 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 1238 LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), 1239 LLVMNoAliasAttribute); 1240 1241 context_ptr = LLVMGetParam(variant->function_elts, 0); 1242 io_ptr = LLVMGetParam(variant->function_elts, 1); 1243 vbuffers_ptr = LLVMGetParam(variant->function_elts, 2); 1244 fetch_elts = LLVMGetParam(variant->function_elts, 3); 1245 fetch_count = LLVMGetParam(variant->function_elts, 4); 1246 stride = LLVMGetParam(variant->function_elts, 5); 1247 vb_ptr = LLVMGetParam(variant->function_elts, 6); 1248 instance_id = LLVMGetParam(variant->function_elts, 7); 1249 1250 lp_build_name(context_ptr, "context"); 1251 lp_build_name(io_ptr, "io"); 1252 lp_build_name(vbuffers_ptr, "vbuffers"); 1253 lp_build_name(fetch_elts, "fetch_elts"); 1254 lp_build_name(fetch_count, "fetch_count"); 1255 lp_build_name(stride, "stride"); 1256 lp_build_name(vb_ptr, "vb"); 1257 lp_build_name(instance_id, "instance_id"); 1258 1259 /* 1260 * Function body 1261 */ 1262 1263 block = LLVMAppendBasicBlock(variant->function_elts, "entry"); 1264 builder = LLVMCreateBuilder(); 1265 LLVMPositionBuilderAtEnd(builder, block); 1266 1267 lp_build_context_init(&bld, builder, lp_type_int(32)); 1268 1269 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); 1270 1271 /* code generated texture sampling */ 1272 sampler = draw_llvm_sampler_soa_create( 1273 draw_llvm_variant_key_samplers(&variant->key), 1274 context_ptr); 1275 1276 fetch_max = LLVMBuildSub(builder, fetch_count, 1277 LLVMConstInt(LLVMInt32Type(), 1, 0), 1278 "fetch_max"); 1279 1280 /* function returns non-zero i32 value if any clipped vertices */ 1281 ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); 1282 LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); 1283 1284 lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); 1285 { 1286 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; 1287 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; 1288 LLVMValueRef io; 1289 LLVMValueRef clipmask; /* holds the clipmask value */ 1290 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; 1291 1292 io_itr = lp_loop.counter; 1293 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); 1294#if DEBUG_STORE 1295 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", 1296 io_itr, io, lp_loop.counter); 1297#endif 1298 for (i = 0; i < NUM_CHANNELS; ++i) { 1299 LLVMValueRef true_index = LLVMBuildAdd( 1300 builder, 1301 lp_loop.counter, 1302 LLVMConstInt(LLVMInt32Type(), i, 0), ""); 1303 LLVMValueRef fetch_ptr; 1304 1305 /* make sure we're not out of bounds which can happen 1306 * if fetch_count % 4 != 0, because on the last iteration 1307 * a few of the 4 vertex fetches will be out of bounds */ 1308 true_index = lp_build_min(&bld, true_index, fetch_max); 1309 1310 fetch_ptr = LLVMBuildGEP(builder, fetch_elts, 1311 &true_index, 1, ""); 1312 true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); 1313 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { 1314 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; 1315 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), 1316 velem->vertex_buffer_index, 1317 0); 1318 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, 1319 &vb_index, 1, ""); 1320 generate_fetch(builder, vbuffers_ptr, 1321 &aos_attribs[j][i], velem, vb, true_index, 1322 instance_id); 1323 } 1324 } 1325 convert_to_soa(builder, aos_attribs, inputs, 1326 draw->pt.nr_vertex_elements); 1327 1328 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; 1329 generate_vs(llvm, 1330 builder, 1331 outputs, 1332 ptr_aos, 1333 context_ptr, 1334 sampler); 1335 1336 /* store original positions in clip before further manipulation */ 1337 store_clip(builder, io, outputs); 1338 1339 /* do cliptest */ 1340 if (enable_cliptest){ 1341 /* allocate clipmask, assign it integer type */ 1342 clipmask = generate_clipmask(builder, outputs, 1343 variant->key.clip_xy, 1344 variant->key.clip_z, 1345 variant->key.clip_user, 1346 variant->key.enable_d3dclipping, 1347 variant->key.nr_planes, 1348 context_ptr); 1349 /* return clipping boolean value for function */ 1350 clipmask_bool(builder, clipmask, ret_ptr); 1351 } 1352 else{ 1353 clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); 1354 } 1355 1356 /* do viewport mapping */ 1357 if (!bypass_viewport){ 1358 generate_viewport(llvm, builder, outputs); 1359 } 1360 1361 /* store clipmask in vertex header, 1362 * original positions in clip 1363 * and transformed positions in data 1364 */ 1365 convert_to_aos(builder, io, outputs, clipmask, 1366 draw->vs.vertex_shader->info.num_outputs, 1367 max_vertices); 1368 } 1369 1370 lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); 1371 1372 sampler->destroy(sampler); 1373 1374#ifdef PIPE_ARCH_X86 1375 /* Avoid corrupting the FPU stack on 32bit OSes. */ 1376 lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); 1377#endif 1378 1379 ret = LLVMBuildLoad(builder, ret_ptr,""); 1380 LLVMBuildRet(builder, ret); 1381 1382 LLVMDisposeBuilder(builder); 1383 1384 /* 1385 * Translate the LLVM IR into machine code. 1386 */ 1387#ifdef DEBUG 1388 if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { 1389 lp_debug_dump_value(variant->function_elts); 1390 assert(0); 1391 } 1392#endif 1393 1394 LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); 1395 1396 if (gallivm_debug & GALLIVM_DEBUG_IR) { 1397 lp_debug_dump_value(variant->function_elts); 1398 debug_printf("\n"); 1399 } 1400 1401 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts); 1402 variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code); 1403 1404 if (gallivm_debug & GALLIVM_DEBUG_ASM) { 1405 lp_disassemble(code); 1406 } 1407 lp_func_delete_body(variant->function_elts); 1408} 1409 1410 1411struct draw_llvm_variant_key * 1412draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) 1413{ 1414 unsigned i; 1415 struct draw_llvm_variant_key *key; 1416 struct lp_sampler_static_state *sampler; 1417 1418 key = (struct draw_llvm_variant_key *)store; 1419 1420 /* Presumably all variants of the shader should have the same 1421 * number of vertex elements - ie the number of shader inputs. 1422 */ 1423 key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; 1424 1425 /* will have to rig this up properly later */ 1426 key->clip_xy = llvm->draw->clip_xy; 1427 key->clip_z = llvm->draw->clip_z; 1428 key->clip_user = llvm->draw->clip_user; 1429 key->bypass_viewport = llvm->draw->identity_viewport; 1430 key->enable_d3dclipping = (boolean)!llvm->draw->rasterizer->gl_rasterization_rules; 1431 key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); 1432 key->nr_planes = llvm->draw->nr_planes; 1433 key->pad = 0; 1434 1435 /* All variants of this shader will have the same value for 1436 * nr_samplers. Not yet trying to compact away holes in the 1437 * sampler array. 1438 */ 1439 key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1; 1440 1441 sampler = draw_llvm_variant_key_samplers(key); 1442 1443 memcpy(key->vertex_element, 1444 llvm->draw->pt.vertex_element, 1445 sizeof(struct pipe_vertex_element) * key->nr_vertex_elements); 1446 1447 memset(sampler, 0, key->nr_samplers * sizeof *sampler); 1448 1449 for (i = 0 ; i < key->nr_samplers; i++) { 1450 lp_sampler_static_state(&sampler[i], 1451 llvm->draw->sampler_views[i], 1452 llvm->draw->samplers[i]); 1453 } 1454 1455 return key; 1456} 1457 1458void 1459draw_llvm_set_mapped_texture(struct draw_context *draw, 1460 unsigned sampler_idx, 1461 uint32_t width, uint32_t height, uint32_t depth, 1462 uint32_t last_level, 1463 uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], 1464 uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], 1465 const void *data[DRAW_MAX_TEXTURE_LEVELS]) 1466{ 1467 unsigned j; 1468 struct draw_jit_texture *jit_tex; 1469 1470 assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS); 1471 1472 1473 jit_tex = &draw->llvm->jit_context.textures[sampler_idx]; 1474 1475 jit_tex->width = width; 1476 jit_tex->height = height; 1477 jit_tex->depth = depth; 1478 jit_tex->last_level = last_level; 1479 1480 for (j = 0; j <= last_level; j++) { 1481 jit_tex->data[j] = data[j]; 1482 jit_tex->row_stride[j] = row_stride[j]; 1483 jit_tex->img_stride[j] = img_stride[j]; 1484 } 1485} 1486 1487void 1488draw_llvm_destroy_variant(struct draw_llvm_variant *variant) 1489{ 1490 struct draw_llvm *llvm = variant->llvm; 1491 struct draw_context *draw = llvm->draw; 1492 1493 if (variant->function_elts) { 1494 if (variant->function_elts) 1495 LLVMFreeMachineCodeForFunction(draw->engine, 1496 variant->function_elts); 1497 LLVMDeleteFunction(variant->function_elts); 1498 } 1499 1500 if (variant->function) { 1501 if (variant->function) 1502 LLVMFreeMachineCodeForFunction(draw->engine, 1503 variant->function); 1504 LLVMDeleteFunction(variant->function); 1505 } 1506 1507 remove_from_list(&variant->list_item_local); 1508 variant->shader->variants_cached--; 1509 remove_from_list(&variant->list_item_global); 1510 llvm->nr_variants--; 1511 FREE(variant); 1512} 1513