draw_llvm.c revision 1d6f3543a063ab9e740fd0c149dcce26c282d773
1/************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "draw_llvm.h" 29 30#include "draw_context.h" 31#include "draw_vs.h" 32 33#include "gallivm/lp_bld_arit.h" 34#include "gallivm/lp_bld_logic.h" 35#include "gallivm/lp_bld_const.h" 36#include "gallivm/lp_bld_swizzle.h" 37#include "gallivm/lp_bld_struct.h" 38#include "gallivm/lp_bld_type.h" 39#include "gallivm/lp_bld_flow.h" 40#include "gallivm/lp_bld_debug.h" 41#include "gallivm/lp_bld_tgsi.h" 42#include "gallivm/lp_bld_printf.h" 43#include "gallivm/lp_bld_intr.h" 44#include "gallivm/lp_bld_init.h" 45 46#include "tgsi/tgsi_exec.h" 47#include "tgsi/tgsi_dump.h" 48 49#include "util/u_math.h" 50#include "util/u_pointer.h" 51#include "util/u_string.h" 52 53#include <llvm-c/Transforms/Scalar.h> 54 55#define DEBUG_STORE 0 56 57/* generates the draw jit function */ 58static void 59draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); 60static void 61draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); 62 63static void 64init_globals(struct draw_llvm *llvm) 65{ 66 LLVMTypeRef texture_type; 67 68 /* struct draw_jit_texture */ 69 { 70 LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS]; 71 72 elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); 73 elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); 74 elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); 75 elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); 76 elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = 77 LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); 78 elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = 79 LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); 80 elem_types[DRAW_JIT_TEXTURE_DATA] = 81 LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), 82 PIPE_MAX_TEXTURE_LEVELS); 83 elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); 84 elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); 85 elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); 86 elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = 87 LLVMArrayType(LLVMFloatType(), 4); 88 89 texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); 90 91 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, 92 llvm->target, texture_type, 93 DRAW_JIT_TEXTURE_WIDTH); 94 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, 95 llvm->target, texture_type, 96 DRAW_JIT_TEXTURE_HEIGHT); 97 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, 98 llvm->target, texture_type, 99 DRAW_JIT_TEXTURE_DEPTH); 100 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, 101 llvm->target, texture_type, 102 DRAW_JIT_TEXTURE_LAST_LEVEL); 103 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride, 104 llvm->target, texture_type, 105 DRAW_JIT_TEXTURE_ROW_STRIDE); 106 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride, 107 llvm->target, texture_type, 108 DRAW_JIT_TEXTURE_IMG_STRIDE); 109 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, 110 llvm->target, texture_type, 111 DRAW_JIT_TEXTURE_DATA); 112 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod, 113 llvm->target, texture_type, 114 DRAW_JIT_TEXTURE_MIN_LOD); 115 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod, 116 llvm->target, texture_type, 117 DRAW_JIT_TEXTURE_MAX_LOD); 118 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias, 119 llvm->target, texture_type, 120 DRAW_JIT_TEXTURE_LOD_BIAS); 121 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color, 122 llvm->target, texture_type, 123 DRAW_JIT_TEXTURE_BORDER_COLOR); 124 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, 125 llvm->target, texture_type); 126 127 LLVMAddTypeName(llvm->module, "texture", texture_type); 128 } 129 130 131 /* struct draw_jit_context */ 132 { 133 LLVMTypeRef elem_types[5]; 134 LLVMTypeRef context_type; 135 136 elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ 137 elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */ 138 elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */ 139 elem_types[3] = LLVMPointerType(LLVMFloatType(), 0); /* viewport */ 140 elem_types[4] = LLVMArrayType(texture_type, 141 PIPE_MAX_VERTEX_SAMPLERS); /* textures */ 142 143 context_type = LLVMStructType(elem_types, Elements(elem_types), 0); 144 145 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, 146 llvm->target, context_type, 0); 147 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, 148 llvm->target, context_type, 1); 149 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes, 150 llvm->target, context_type, 2); 151 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, 152 llvm->target, context_type, 153 DRAW_JIT_CTX_TEXTURES); 154 LP_CHECK_STRUCT_SIZE(struct draw_jit_context, 155 llvm->target, context_type); 156 157 LLVMAddTypeName(llvm->module, "draw_jit_context", context_type); 158 159 llvm->context_ptr_type = LLVMPointerType(context_type, 0); 160 } 161 { 162 LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0); 163 llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0); 164 } 165 /* struct pipe_vertex_buffer */ 166 { 167 LLVMTypeRef elem_types[4]; 168 LLVMTypeRef vb_type; 169 170 elem_types[0] = LLVMInt32Type(); 171 elem_types[1] = LLVMInt32Type(); 172 elem_types[2] = LLVMInt32Type(); 173 elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */ 174 175 vb_type = LLVMStructType(elem_types, Elements(elem_types), 0); 176 177 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, 178 llvm->target, vb_type, 0); 179 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, 180 llvm->target, vb_type, 2); 181 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, 182 llvm->target, vb_type); 183 184 LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type); 185 186 llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); 187 } 188} 189 190static LLVMTypeRef 191create_vertex_header(struct draw_llvm *llvm, int data_elems) 192{ 193 /* struct vertex_header */ 194 LLVMTypeRef elem_types[3]; 195 LLVMTypeRef vertex_header; 196 char struct_name[24]; 197 198 util_snprintf(struct_name, 23, "vertex_header%d", data_elems); 199 200 elem_types[0] = LLVMIntType(32); 201 elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); 202 elem_types[2] = LLVMArrayType(elem_types[1], data_elems); 203 204 vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); 205 206 /* these are bit-fields and we can't take address of them 207 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, 208 llvm->target, vertex_header, 209 DRAW_JIT_VERTEX_CLIPMASK); 210 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, 211 llvm->target, vertex_header, 212 DRAW_JIT_VERTEX_EDGEFLAG); 213 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, 214 llvm->target, vertex_header, 215 DRAW_JIT_VERTEX_PAD); 216 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, 217 llvm->target, vertex_header, 218 DRAW_JIT_VERTEX_VERTEX_ID); 219 */ 220 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, 221 llvm->target, vertex_header, 222 DRAW_JIT_VERTEX_CLIP); 223 LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, 224 llvm->target, vertex_header, 225 DRAW_JIT_VERTEX_DATA); 226 227 LLVMAddTypeName(llvm->module, struct_name, vertex_header); 228 229 return LLVMPointerType(vertex_header, 0); 230} 231 232struct draw_llvm * 233draw_llvm_create(struct draw_context *draw) 234{ 235 struct draw_llvm *llvm; 236 237 llvm = CALLOC_STRUCT( draw_llvm ); 238 if (!llvm) 239 return NULL; 240 241 llvm->draw = draw; 242 llvm->engine = draw->engine; 243 244 debug_assert(llvm->engine); 245 246 llvm->module = LLVMModuleCreateWithName("draw_llvm"); 247 llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module); 248 249 LLVMAddModuleProvider(llvm->engine, llvm->provider); 250 251 llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine); 252 253 llvm->pass = LLVMCreateFunctionPassManager(llvm->provider); 254 LLVMAddTargetData(llvm->target, llvm->pass); 255 256 if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { 257 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, 258 * but there are more on SVN. */ 259 /* TODO: Add more passes */ 260 261 LLVMAddCFGSimplificationPass(llvm->pass); 262 263 if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) { 264 /* For LLVM >= 2.7 and 32-bit build, use this order of passes to 265 * avoid generating bad code. 266 * Test with piglit glsl-vs-sqrt-zero test. 267 */ 268 LLVMAddConstantPropagationPass(llvm->pass); 269 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 270 } 271 else { 272 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 273 LLVMAddConstantPropagationPass(llvm->pass); 274 } 275 276 LLVMAddInstructionCombiningPass(llvm->pass); 277 LLVMAddGVNPass(llvm->pass); 278 } else { 279 /* We need at least this pass to prevent the backends to fail in 280 * unexpected ways. 281 */ 282 LLVMAddPromoteMemoryToRegisterPass(llvm->pass); 283 } 284 285 init_globals(llvm); 286 287 if (gallivm_debug & GALLIVM_DEBUG_IR) { 288 LLVMDumpModule(llvm->module); 289 } 290 291 llvm->nr_variants = 0; 292 make_empty_list(&llvm->vs_variants_list); 293 294 return llvm; 295} 296 297void 298draw_llvm_destroy(struct draw_llvm *llvm) 299{ 300 LLVMDisposePassManager(llvm->pass); 301 302 FREE(llvm); 303} 304 305struct draw_llvm_variant * 306draw_llvm_create_variant(struct draw_llvm *llvm, 307 unsigned num_inputs, 308 const struct draw_llvm_variant_key *key) 309{ 310 struct draw_llvm_variant *variant; 311 struct llvm_vertex_shader *shader = 312 llvm_vertex_shader(llvm->draw->vs.vertex_shader); 313 314 variant = MALLOC(sizeof *variant + 315 shader->variant_key_size - 316 sizeof variant->key); 317 if (variant == NULL) 318 return NULL; 319 320 variant->llvm = llvm; 321 322 memcpy(&variant->key, key, shader->variant_key_size); 323 324 llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); 325 326 draw_llvm_generate(llvm, variant); 327 draw_llvm_generate_elts(llvm, variant); 328 329 variant->shader = shader; 330 variant->list_item_global.base = variant; 331 variant->list_item_local.base = variant; 332 /*variant->no = */shader->variants_created++; 333 variant->list_item_global.base = variant; 334 335 return variant; 336} 337 338static void 339generate_vs(struct draw_llvm *llvm, 340 LLVMBuilderRef builder, 341 LLVMValueRef (*outputs)[NUM_CHANNELS], 342 const LLVMValueRef (*inputs)[NUM_CHANNELS], 343 LLVMValueRef system_values_array, 344 LLVMValueRef context_ptr, 345 struct lp_build_sampler_soa *draw_sampler) 346{ 347 const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; 348 struct lp_type vs_type; 349 LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); 350 struct lp_build_sampler_soa *sampler = 0; 351 352 memset(&vs_type, 0, sizeof vs_type); 353 vs_type.floating = TRUE; /* floating point values */ 354 vs_type.sign = TRUE; /* values are signed */ 355 vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ 356 vs_type.width = 32; /* 32-bit float */ 357 vs_type.length = 4; /* 4 elements per vector */ 358#if 0 359 num_vs = 4; /* number of vertices per block */ 360#endif 361 362 if (gallivm_debug & GALLIVM_DEBUG_IR) { 363 tgsi_dump(tokens, 0); 364 } 365 366 if (llvm->draw->num_sampler_views && 367 llvm->draw->num_samplers) 368 sampler = draw_sampler; 369 370 lp_build_tgsi_soa(builder, 371 tokens, 372 vs_type, 373 NULL /*struct lp_build_mask_context *mask*/, 374 consts_ptr, 375 system_values_array, 376 NULL /*pos*/, 377 inputs, 378 outputs, 379 sampler, 380 &llvm->draw->vs.vertex_shader->info); 381} 382 383#if DEBUG_STORE 384static void print_vectorf(LLVMBuilderRef builder, 385 LLVMValueRef vec) 386{ 387 LLVMValueRef val[4]; 388 val[0] = LLVMBuildExtractElement(builder, vec, 389 LLVMConstInt(LLVMInt32Type(), 0, 0), ""); 390 val[1] = LLVMBuildExtractElement(builder, vec, 391 LLVMConstInt(LLVMInt32Type(), 1, 0), ""); 392 val[2] = LLVMBuildExtractElement(builder, vec, 393 LLVMConstInt(LLVMInt32Type(), 2, 0), ""); 394 val[3] = LLVMBuildExtractElement(builder, vec, 395 LLVMConstInt(LLVMInt32Type(), 3, 0), ""); 396 lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", 397 val[0], val[1], val[2], val[3]); 398} 399#endif 400 401static void 402generate_fetch(LLVMBuilderRef builder, 403 LLVMValueRef vbuffers_ptr, 404 LLVMValueRef *res, 405 struct pipe_vertex_element *velem, 406 LLVMValueRef vbuf, 407 LLVMValueRef index, 408 LLVMValueRef instance_id) 409{ 410 LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); 411 LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, 412 &indices, 1, ""); 413 LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf); 414 LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf); 415 LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf); 416 LLVMValueRef cond; 417 LLVMValueRef stride; 418 419 if (velem->instance_divisor) { 420 /* array index = instance_id / instance_divisor */ 421 index = LLVMBuildUDiv(builder, instance_id, 422 LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0), 423 "instance_divisor"); 424 } 425 426 /* limit index to min(index, vb_max_index) */ 427 cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); 428 index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); 429 430 stride = LLVMBuildMul(builder, vb_stride, index, ""); 431 432 vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); 433 434 stride = LLVMBuildAdd(builder, stride, 435 vb_buffer_offset, 436 ""); 437 stride = LLVMBuildAdd(builder, stride, 438 LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0), 439 ""); 440 441 /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/ 442 vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); 443 444 *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format); 445} 446 447static LLVMValueRef 448aos_to_soa(LLVMBuilderRef builder, 449 LLVMValueRef val0, 450 LLVMValueRef val1, 451 LLVMValueRef val2, 452 LLVMValueRef val3, 453 LLVMValueRef channel) 454{ 455 LLVMValueRef ex, res; 456 457 ex = LLVMBuildExtractElement(builder, val0, 458 channel, ""); 459 res = LLVMBuildInsertElement(builder, 460 LLVMConstNull(LLVMTypeOf(val0)), 461 ex, 462 LLVMConstInt(LLVMInt32Type(), 0, 0), 463 ""); 464 465 ex = LLVMBuildExtractElement(builder, val1, 466 channel, ""); 467 res = LLVMBuildInsertElement(builder, 468 res, ex, 469 LLVMConstInt(LLVMInt32Type(), 1, 0), 470 ""); 471 472 ex = LLVMBuildExtractElement(builder, val2, 473 channel, ""); 474 res = LLVMBuildInsertElement(builder, 475 res, ex, 476 LLVMConstInt(LLVMInt32Type(), 2, 0), 477 ""); 478 479 ex = LLVMBuildExtractElement(builder, val3, 480 channel, ""); 481 res = LLVMBuildInsertElement(builder, 482 res, ex, 483 LLVMConstInt(LLVMInt32Type(), 3, 0), 484 ""); 485 486 return res; 487} 488 489static void 490soa_to_aos(LLVMBuilderRef builder, 491 LLVMValueRef soa[NUM_CHANNELS], 492 LLVMValueRef aos[NUM_CHANNELS]) 493{ 494 LLVMValueRef comp; 495 int i = 0; 496 497 debug_assert(NUM_CHANNELS == 4); 498 499 aos[0] = LLVMConstNull(LLVMTypeOf(soa[0])); 500 aos[1] = aos[2] = aos[3] = aos[0]; 501 502 for (i = 0; i < NUM_CHANNELS; ++i) { 503 LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0); 504 505 comp = LLVMBuildExtractElement(builder, soa[i], 506 LLVMConstInt(LLVMInt32Type(), 0, 0), ""); 507 aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, ""); 508 509 comp = LLVMBuildExtractElement(builder, soa[i], 510 LLVMConstInt(LLVMInt32Type(), 1, 0), ""); 511 aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, ""); 512 513 comp = LLVMBuildExtractElement(builder, soa[i], 514 LLVMConstInt(LLVMInt32Type(), 2, 0), ""); 515 aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, ""); 516 517 comp = LLVMBuildExtractElement(builder, soa[i], 518 LLVMConstInt(LLVMInt32Type(), 3, 0), ""); 519 aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, ""); 520 521 } 522} 523 524static void 525convert_to_soa(LLVMBuilderRef builder, 526 LLVMValueRef (*aos)[NUM_CHANNELS], 527 LLVMValueRef (*soa)[NUM_CHANNELS], 528 int num_attribs) 529{ 530 int i; 531 532 debug_assert(NUM_CHANNELS == 4); 533 534 for (i = 0; i < num_attribs; ++i) { 535 LLVMValueRef val0 = aos[i][0]; 536 LLVMValueRef val1 = aos[i][1]; 537 LLVMValueRef val2 = aos[i][2]; 538 LLVMValueRef val3 = aos[i][3]; 539 540 soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3, 541 LLVMConstInt(LLVMInt32Type(), 0, 0)); 542 soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3, 543 LLVMConstInt(LLVMInt32Type(), 1, 0)); 544 soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3, 545 LLVMConstInt(LLVMInt32Type(), 2, 0)); 546 soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3, 547 LLVMConstInt(LLVMInt32Type(), 3, 0)); 548 } 549} 550 551static void 552store_aos(LLVMBuilderRef builder, 553 LLVMValueRef io_ptr, 554 LLVMValueRef index, 555 LLVMValueRef value, 556 LLVMValueRef clipmask) 557{ 558 LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); 559 LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); 560 LLVMValueRef indices[3]; 561 LLVMValueRef val, shift; 562 563 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 564 indices[1] = index; 565 indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); 566 567 /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */ 568 val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0); 569 shift = LLVMConstInt(LLVMInt32Type(), 12, 0); 570 val = LLVMBuildShl(builder, val, shift, ""); 571 /* add clipmask:12 */ 572 val = LLVMBuildOr(builder, val, clipmask, ""); 573 574 /* store vertex header */ 575 LLVMBuildStore(builder, val, id_ptr); 576 577 578#if DEBUG_STORE 579 lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); 580#endif 581#if 0 582 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); 583 print_vectorf(builder, value);*/ 584 data_ptr = LLVMBuildBitCast(builder, data_ptr, 585 LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0), 586 "datavec"); 587 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, ""); 588 589 LLVMBuildStore(builder, value, data_ptr); 590#else 591 { 592 LLVMValueRef x, y, z, w; 593 LLVMValueRef idx0, idx1, idx2, idx3; 594 LLVMValueRef gep0, gep1, gep2, gep3; 595 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); 596 597 idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 598 idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 599 idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 600 idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0); 601 602 x = LLVMBuildExtractElement(builder, value, 603 idx0, ""); 604 y = LLVMBuildExtractElement(builder, value, 605 idx1, ""); 606 z = LLVMBuildExtractElement(builder, value, 607 idx2, ""); 608 w = LLVMBuildExtractElement(builder, value, 609 idx3, ""); 610 611 gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, ""); 612 gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, ""); 613 gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, ""); 614 gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, ""); 615 616 /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n", 617 x, gep0, y, gep1, z, gep2, w, gep3);*/ 618 LLVMBuildStore(builder, x, gep0); 619 LLVMBuildStore(builder, y, gep1); 620 LLVMBuildStore(builder, z, gep2); 621 LLVMBuildStore(builder, w, gep3); 622 } 623#endif 624} 625 626static void 627store_aos_array(LLVMBuilderRef builder, 628 LLVMValueRef io_ptr, 629 LLVMValueRef aos[NUM_CHANNELS], 630 int attrib, 631 int num_outputs, 632 LLVMValueRef clipmask) 633{ 634 LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); 635 LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 636 LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 637 LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 638 LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); 639 LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; 640 LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3; 641 642 debug_assert(NUM_CHANNELS == 4); 643 644 io0_ptr = LLVMBuildGEP(builder, io_ptr, 645 &ind0, 1, ""); 646 io1_ptr = LLVMBuildGEP(builder, io_ptr, 647 &ind1, 1, ""); 648 io2_ptr = LLVMBuildGEP(builder, io_ptr, 649 &ind2, 1, ""); 650 io3_ptr = LLVMBuildGEP(builder, io_ptr, 651 &ind3, 1, ""); 652 653 clipmask0 = LLVMBuildExtractElement(builder, clipmask, 654 ind0, ""); 655 clipmask1 = LLVMBuildExtractElement(builder, clipmask, 656 ind1, ""); 657 clipmask2 = LLVMBuildExtractElement(builder, clipmask, 658 ind2, ""); 659 clipmask3 = LLVMBuildExtractElement(builder, clipmask, 660 ind3, ""); 661 662#if DEBUG_STORE 663 lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n", 664 io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3); 665#endif 666 /* store for each of the 4 vertices */ 667 store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0); 668 store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1); 669 store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2); 670 store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3); 671} 672 673static void 674convert_to_aos(LLVMBuilderRef builder, 675 LLVMValueRef io, 676 LLVMValueRef (*outputs)[NUM_CHANNELS], 677 LLVMValueRef clipmask, 678 int num_outputs, 679 int max_vertices) 680{ 681 unsigned chan, attrib; 682 683#if DEBUG_STORE 684 lp_build_printf(builder, " # storing begin\n"); 685#endif 686 for (attrib = 0; attrib < num_outputs; ++attrib) { 687 LLVMValueRef soa[4]; 688 LLVMValueRef aos[4]; 689 for(chan = 0; chan < NUM_CHANNELS; ++chan) { 690 if(outputs[attrib][chan]) { 691 LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); 692 lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); 693 /*lp_build_printf(builder, "output %d : %d ", 694 LLVMConstInt(LLVMInt32Type(), attrib, 0), 695 LLVMConstInt(LLVMInt32Type(), chan, 0)); 696 print_vectorf(builder, out);*/ 697 soa[chan] = out; 698 } else 699 soa[chan] = 0; 700 } 701 soa_to_aos(builder, soa, aos); 702 store_aos_array(builder, 703 io, 704 aos, 705 attrib, 706 num_outputs, 707 clipmask); 708 } 709#if DEBUG_STORE 710 lp_build_printf(builder, " # storing end\n"); 711#endif 712} 713 714/* 715 * Stores original vertex positions in clip coordinates 716 * There is probably a more efficient way to do this, 4 floats at once 717 * rather than extracting each element one by one. 718 */ 719static void 720store_clip(LLVMBuilderRef builder, 721 LLVMValueRef io_ptr, 722 LLVMValueRef (*outputs)[NUM_CHANNELS]) 723{ 724 LLVMValueRef out[4]; 725 LLVMValueRef indices[2]; 726 LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; 727 LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3; 728 LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr; 729 LLVMValueRef out0elem, out1elem, out2elem, out3elem; 730 int i; 731 732 LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 733 LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 734 LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 735 LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); 736 737 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 738 indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0); 739 740 out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ 741 out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ 742 out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ 743 out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ 744 745 io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, ""); 746 io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, ""); 747 io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, ""); 748 io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); 749 750 clip_ptr0 = draw_jit_header_clip(builder, io0_ptr); 751 clip_ptr1 = draw_jit_header_clip(builder, io1_ptr); 752 clip_ptr2 = draw_jit_header_clip(builder, io2_ptr); 753 clip_ptr3 = draw_jit_header_clip(builder, io3_ptr); 754 755 for (i = 0; i<4; i++){ 756 clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, 757 indices, 2, ""); //x0 758 clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, 759 indices, 2, ""); //x1 760 clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, 761 indices, 2, ""); //x2 762 clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, 763 indices, 2, ""); //x3 764 765 out0elem = LLVMBuildExtractElement(builder, out[i], 766 ind0, ""); //x0 767 out1elem = LLVMBuildExtractElement(builder, out[i], 768 ind1, ""); //x1 769 out2elem = LLVMBuildExtractElement(builder, out[i], 770 ind2, ""); //x2 771 out3elem = LLVMBuildExtractElement(builder, out[i], 772 ind3, ""); //x3 773 774 LLVMBuildStore(builder, out0elem, clip0_ptr); 775 LLVMBuildStore(builder, out1elem, clip1_ptr); 776 LLVMBuildStore(builder, out2elem, clip2_ptr); 777 LLVMBuildStore(builder, out3elem, clip3_ptr); 778 779 indices[1]= LLVMBuildAdd(builder, indices[1], ind1, ""); 780 } 781 782} 783 784/* Equivalent of _mm_set1_ps(a) 785 */ 786static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, 787 LLVMValueRef a, 788 const char *name) 789{ 790 LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); 791 int i; 792 793 for(i = 0; i < 4; ++i) { 794 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 795 res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); 796 } 797 798 return res; 799} 800 801/* 802 * Transforms the outputs for viewport mapping 803 */ 804static void 805generate_viewport(struct draw_llvm *llvm, 806 LLVMBuilderRef builder, 807 LLVMValueRef (*outputs)[NUM_CHANNELS], 808 LLVMValueRef context_ptr) 809{ 810 int i; 811 struct lp_type f32_type = lp_type_float_vec(32); 812 LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ 813 LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ 814 LLVMValueRef vp_ptr = draw_jit_context_viewport(builder, context_ptr); 815 816 /* for 1/w convention*/ 817 out3 = LLVMBuildFDiv(builder, const1, out3, ""); 818 LLVMBuildStore(builder, out3, outputs[0][3]); 819 820 /* Viewport Mapping */ 821 for (i=0; i<3; i++){ 822 LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/ 823 LLVMValueRef scale; 824 LLVMValueRef trans; 825 LLVMValueRef scale_i; 826 LLVMValueRef trans_i; 827 LLVMValueRef index; 828 829 index = LLVMConstInt(LLVMInt32Type(), i, 0); 830 scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); 831 832 index = LLVMConstInt(LLVMInt32Type(), i+4, 0); 833 trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); 834 835 scale = vec4f_from_scalar(builder, LLVMBuildLoad(builder, scale_i, ""), "scale"); 836 trans = vec4f_from_scalar(builder, LLVMBuildLoad(builder, trans_i, ""), "trans"); 837 838 /* divide by w */ 839 out = LLVMBuildFMul(builder, out, out3, ""); 840 /* mult by scale */ 841 out = LLVMBuildFMul(builder, out, scale, ""); 842 /* add translation */ 843 out = LLVMBuildFAdd(builder, out, trans, ""); 844 845 /* store transformed outputs */ 846 LLVMBuildStore(builder, out, outputs[0][i]); 847 } 848 849} 850 851 852/* 853 * Returns clipmask as 4xi32 bitmask for the 4 vertices 854 */ 855static LLVMValueRef 856generate_clipmask(LLVMBuilderRef builder, 857 LLVMValueRef (*outputs)[NUM_CHANNELS], 858 boolean clip_xy, 859 boolean clip_z, 860 boolean clip_user, 861 boolean clip_halfz, 862 unsigned nr, 863 LLVMValueRef context_ptr) 864{ 865 LLVMValueRef mask; /* stores the <4xi32> clipmasks */ 866 LLVMValueRef test, temp; 867 LLVMValueRef zero, shift; 868 LLVMValueRef pos_x, pos_y, pos_z, pos_w; 869 LLVMValueRef plane1, planes, plane_ptr, sum; 870 871 unsigned i; 872 873 struct lp_type f32_type = lp_type_float_vec(32); 874 875 mask = lp_build_const_int_vec(lp_type_int_vec(32), 0); 876 temp = lp_build_const_int_vec(lp_type_int_vec(32), 0); 877 zero = lp_build_const_vec(f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ 878 shift = lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */ 879 880 /* Assuming position stored at output[0] */ 881 pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/ 882 pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/ 883 pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/ 884 pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ 885 886 /* Cliptest, for hardwired planes */ 887 if (clip_xy){ 888 /* plane 1 */ 889 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); 890 temp = shift; 891 test = LLVMBuildAnd(builder, test, temp, ""); 892 mask = test; 893 894 /* plane 2 */ 895 test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); 896 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); 897 temp = LLVMBuildShl(builder, temp, shift, ""); 898 test = LLVMBuildAnd(builder, test, temp, ""); 899 mask = LLVMBuildOr(builder, mask, test, ""); 900 901 /* plane 3 */ 902 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); 903 temp = LLVMBuildShl(builder, temp, shift, ""); 904 test = LLVMBuildAnd(builder, test, temp, ""); 905 mask = LLVMBuildOr(builder, mask, test, ""); 906 907 /* plane 4 */ 908 test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); 909 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); 910 temp = LLVMBuildShl(builder, temp, shift, ""); 911 test = LLVMBuildAnd(builder, test, temp, ""); 912 mask = LLVMBuildOr(builder, mask, test, ""); 913 } 914 915 if (clip_z){ 916 temp = lp_build_const_int_vec(lp_type_int_vec(32), 16); 917 if (clip_halfz){ 918 /* plane 5 */ 919 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z); 920 test = LLVMBuildAnd(builder, test, temp, ""); 921 mask = LLVMBuildOr(builder, mask, test, ""); 922 } 923 else{ 924 /* plane 5 */ 925 test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); 926 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test); 927 test = LLVMBuildAnd(builder, test, temp, ""); 928 mask = LLVMBuildOr(builder, mask, test, ""); 929 } 930 /* plane 6 */ 931 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); 932 temp = LLVMBuildShl(builder, temp, shift, ""); 933 test = LLVMBuildAnd(builder, test, temp, ""); 934 mask = LLVMBuildOr(builder, mask, test, ""); 935 } 936 937 if (clip_user){ 938 LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr); 939 LLVMValueRef indices[3]; 940 temp = lp_build_const_int_vec(lp_type_int_vec(32), 32); 941 942 /* userclip planes */ 943 for (i = 6; i < nr; i++) { 944 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 945 indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0); 946 947 indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); 948 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 949 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x"); 950 planes = vec4f_from_scalar(builder, plane1, "plane4_x"); 951 sum = LLVMBuildFMul(builder, planes, pos_x, ""); 952 953 indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0); 954 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 955 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); 956 planes = vec4f_from_scalar(builder, plane1, "plane4_y"); 957 test = LLVMBuildFMul(builder, planes, pos_y, ""); 958 sum = LLVMBuildFAdd(builder, sum, test, ""); 959 960 indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0); 961 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 962 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); 963 planes = vec4f_from_scalar(builder, plane1, "plane4_z"); 964 test = LLVMBuildFMul(builder, planes, pos_z, ""); 965 sum = LLVMBuildFAdd(builder, sum, test, ""); 966 967 indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0); 968 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 969 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); 970 planes = vec4f_from_scalar(builder, plane1, "plane4_w"); 971 test = LLVMBuildFMul(builder, planes, pos_w, ""); 972 sum = LLVMBuildFAdd(builder, sum, test, ""); 973 974 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum); 975 temp = LLVMBuildShl(builder, temp, shift, ""); 976 test = LLVMBuildAnd(builder, test, temp, ""); 977 mask = LLVMBuildOr(builder, mask, test, ""); 978 } 979 } 980 return mask; 981} 982 983/* 984 * Returns boolean if any clipping has occurred 985 * Used zero/non-zero i32 value to represent boolean 986 */ 987static void 988clipmask_bool(LLVMBuilderRef builder, 989 LLVMValueRef clipmask, 990 LLVMValueRef ret_ptr) 991{ 992 LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, ""); 993 LLVMValueRef temp; 994 int i; 995 996 for (i=0; i<4; i++){ 997 temp = LLVMBuildExtractElement(builder, clipmask, 998 LLVMConstInt(LLVMInt32Type(), i, 0) , ""); 999 ret = LLVMBuildOr(builder, ret, temp, ""); 1000 } 1001 1002 LLVMBuildStore(builder, ret, ret_ptr); 1003} 1004 1005static void 1006draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) 1007{ 1008 LLVMTypeRef arg_types[8]; 1009 LLVMTypeRef func_type; 1010 LLVMValueRef context_ptr; 1011 LLVMBasicBlockRef block; 1012 LLVMBuilderRef builder; 1013 LLVMValueRef start, end, count, stride, step, io_itr; 1014 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; 1015 LLVMValueRef instance_id; 1016 LLVMValueRef system_values_array; 1017 struct draw_context *draw = llvm->draw; 1018 const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info; 1019 unsigned i, j; 1020 struct lp_build_context bld; 1021 struct lp_build_loop_state lp_loop; 1022 const int max_vertices = 4; 1023 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; 1024 void *code; 1025 struct lp_build_sampler_soa *sampler = 0; 1026 LLVMValueRef ret, ret_ptr; 1027 boolean bypass_viewport = variant->key.bypass_viewport; 1028 boolean enable_cliptest = variant->key.clip_xy || 1029 variant->key.clip_z || 1030 variant->key.clip_user; 1031 1032 arg_types[0] = llvm->context_ptr_type; /* context */ 1033 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ 1034 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ 1035 arg_types[3] = LLVMInt32Type(); /* start */ 1036 arg_types[4] = LLVMInt32Type(); /* count */ 1037 arg_types[5] = LLVMInt32Type(); /* stride */ 1038 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ 1039 arg_types[7] = LLVMInt32Type(); /* instance_id */ 1040 1041 func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); 1042 1043 variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); 1044 LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); 1045 for(i = 0; i < Elements(arg_types); ++i) 1046 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 1047 LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); 1048 1049 context_ptr = LLVMGetParam(variant->function, 0); 1050 io_ptr = LLVMGetParam(variant->function, 1); 1051 vbuffers_ptr = LLVMGetParam(variant->function, 2); 1052 start = LLVMGetParam(variant->function, 3); 1053 count = LLVMGetParam(variant->function, 4); 1054 stride = LLVMGetParam(variant->function, 5); 1055 vb_ptr = LLVMGetParam(variant->function, 6); 1056 instance_id = LLVMGetParam(variant->function, 7); 1057 1058 lp_build_name(context_ptr, "context"); 1059 lp_build_name(io_ptr, "io"); 1060 lp_build_name(vbuffers_ptr, "vbuffers"); 1061 lp_build_name(start, "start"); 1062 lp_build_name(count, "count"); 1063 lp_build_name(stride, "stride"); 1064 lp_build_name(vb_ptr, "vb"); 1065 lp_build_name(instance_id, "instance_id"); 1066 1067 /* 1068 * Function body 1069 */ 1070 1071 block = LLVMAppendBasicBlock(variant->function, "entry"); 1072 builder = LLVMCreateBuilder(); 1073 LLVMPositionBuilderAtEnd(builder, block); 1074 1075 lp_build_context_init(&bld, builder, lp_type_int(32)); 1076 1077 system_values_array = lp_build_system_values_array(builder, vs_info, 1078 instance_id, NULL); 1079 1080 end = lp_build_add(&bld, start, count); 1081 1082 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); 1083 1084 /* function will return non-zero i32 value if any clipped vertices */ 1085 ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); 1086 LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); 1087 1088 /* code generated texture sampling */ 1089 sampler = draw_llvm_sampler_soa_create( 1090 draw_llvm_variant_key_samplers(&variant->key), 1091 context_ptr); 1092 1093#if DEBUG_STORE 1094 lp_build_printf(builder, "start = %d, end = %d, step = %d\n", 1095 start, end, step); 1096#endif 1097 lp_build_loop_begin(builder, start, &lp_loop); 1098 { 1099 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; 1100 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; 1101 LLVMValueRef io; 1102 LLVMValueRef clipmask; /* holds the clipmask value */ 1103 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; 1104 1105 io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); 1106 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); 1107#if DEBUG_STORE 1108 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", 1109 io_itr, io, lp_loop.counter); 1110#endif 1111 for (i = 0; i < NUM_CHANNELS; ++i) { 1112 LLVMValueRef true_index = LLVMBuildAdd( 1113 builder, 1114 lp_loop.counter, 1115 LLVMConstInt(LLVMInt32Type(), i, 0), ""); 1116 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { 1117 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; 1118 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), 1119 velem->vertex_buffer_index, 1120 0); 1121 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, 1122 &vb_index, 1, ""); 1123 generate_fetch(builder, vbuffers_ptr, 1124 &aos_attribs[j][i], velem, vb, true_index, 1125 instance_id); 1126 } 1127 } 1128 convert_to_soa(builder, aos_attribs, inputs, 1129 draw->pt.nr_vertex_elements); 1130 1131 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; 1132 generate_vs(llvm, 1133 builder, 1134 outputs, 1135 ptr_aos, 1136 system_values_array, 1137 context_ptr, 1138 sampler); 1139 1140 /* store original positions in clip before further manipulation */ 1141 store_clip(builder, io, outputs); 1142 1143 /* do cliptest */ 1144 if (enable_cliptest){ 1145 /* allocate clipmask, assign it integer type */ 1146 clipmask = generate_clipmask(builder, outputs, 1147 variant->key.clip_xy, 1148 variant->key.clip_z, 1149 variant->key.clip_user, 1150 variant->key.clip_halfz, 1151 variant->key.nr_planes, 1152 context_ptr); 1153 /* return clipping boolean value for function */ 1154 clipmask_bool(builder, clipmask, ret_ptr); 1155 } 1156 else{ 1157 clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); 1158 } 1159 1160 /* do viewport mapping */ 1161 if (!bypass_viewport){ 1162 generate_viewport(llvm, builder, outputs, context_ptr); 1163 } 1164 1165 /* store clipmask in vertex header and positions in data */ 1166 convert_to_aos(builder, io, outputs, clipmask, 1167 vs_info->num_outputs, max_vertices); 1168 } 1169 1170 lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); 1171 1172 sampler->destroy(sampler); 1173 1174 ret = LLVMBuildLoad(builder, ret_ptr,""); 1175 LLVMBuildRet(builder, ret); 1176 1177 LLVMDisposeBuilder(builder); 1178 1179 /* 1180 * Translate the LLVM IR into machine code. 1181 */ 1182#ifdef DEBUG 1183 if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { 1184 lp_debug_dump_value(variant->function); 1185 assert(0); 1186 } 1187#endif 1188 1189 LLVMRunFunctionPassManager(llvm->pass, variant->function); 1190 1191 if (gallivm_debug & GALLIVM_DEBUG_IR) { 1192 lp_debug_dump_value(variant->function); 1193 debug_printf("\n"); 1194 } 1195 1196 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); 1197 variant->jit_func = (draw_jit_vert_func)pointer_to_func(code); 1198 1199 if (gallivm_debug & GALLIVM_DEBUG_ASM) { 1200 lp_disassemble(code); 1201 } 1202 lp_func_delete_body(variant->function); 1203} 1204 1205 1206static void 1207draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) 1208{ 1209 LLVMTypeRef arg_types[8]; 1210 LLVMTypeRef func_type; 1211 LLVMValueRef context_ptr; 1212 LLVMBasicBlockRef block; 1213 LLVMBuilderRef builder; 1214 LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; 1215 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; 1216 LLVMValueRef instance_id; 1217 LLVMValueRef system_values_array; 1218 struct draw_context *draw = llvm->draw; 1219 const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info; 1220 unsigned i, j; 1221 struct lp_build_context bld; 1222 struct lp_build_loop_state lp_loop; 1223 const int max_vertices = 4; 1224 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; 1225 LLVMValueRef fetch_max; 1226 void *code; 1227 struct lp_build_sampler_soa *sampler = 0; 1228 LLVMValueRef ret, ret_ptr; 1229 boolean bypass_viewport = variant->key.bypass_viewport; 1230 boolean enable_cliptest = variant->key.clip_xy || 1231 variant->key.clip_z || 1232 variant->key.clip_user; 1233 1234 arg_types[0] = llvm->context_ptr_type; /* context */ 1235 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ 1236 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ 1237 arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */ 1238 arg_types[4] = LLVMInt32Type(); /* fetch_count */ 1239 arg_types[5] = LLVMInt32Type(); /* stride */ 1240 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ 1241 arg_types[7] = LLVMInt32Type(); /* instance_id */ 1242 1243 func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0); 1244 1245 variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); 1246 LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); 1247 for(i = 0; i < Elements(arg_types); ++i) 1248 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 1249 LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), 1250 LLVMNoAliasAttribute); 1251 1252 context_ptr = LLVMGetParam(variant->function_elts, 0); 1253 io_ptr = LLVMGetParam(variant->function_elts, 1); 1254 vbuffers_ptr = LLVMGetParam(variant->function_elts, 2); 1255 fetch_elts = LLVMGetParam(variant->function_elts, 3); 1256 fetch_count = LLVMGetParam(variant->function_elts, 4); 1257 stride = LLVMGetParam(variant->function_elts, 5); 1258 vb_ptr = LLVMGetParam(variant->function_elts, 6); 1259 instance_id = LLVMGetParam(variant->function_elts, 7); 1260 1261 lp_build_name(context_ptr, "context"); 1262 lp_build_name(io_ptr, "io"); 1263 lp_build_name(vbuffers_ptr, "vbuffers"); 1264 lp_build_name(fetch_elts, "fetch_elts"); 1265 lp_build_name(fetch_count, "fetch_count"); 1266 lp_build_name(stride, "stride"); 1267 lp_build_name(vb_ptr, "vb"); 1268 lp_build_name(instance_id, "instance_id"); 1269 1270 /* 1271 * Function body 1272 */ 1273 1274 block = LLVMAppendBasicBlock(variant->function_elts, "entry"); 1275 builder = LLVMCreateBuilder(); 1276 LLVMPositionBuilderAtEnd(builder, block); 1277 1278 lp_build_context_init(&bld, builder, lp_type_int(32)); 1279 1280 system_values_array = lp_build_system_values_array(builder, vs_info, 1281 instance_id, NULL); 1282 1283 1284 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); 1285 1286 /* code generated texture sampling */ 1287 sampler = draw_llvm_sampler_soa_create( 1288 draw_llvm_variant_key_samplers(&variant->key), 1289 context_ptr); 1290 1291 fetch_max = LLVMBuildSub(builder, fetch_count, 1292 LLVMConstInt(LLVMInt32Type(), 1, 0), 1293 "fetch_max"); 1294 1295 /* function returns non-zero i32 value if any clipped vertices */ 1296 ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), ""); 1297 LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr); 1298 1299 lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); 1300 { 1301 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; 1302 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; 1303 LLVMValueRef io; 1304 LLVMValueRef clipmask; /* holds the clipmask value */ 1305 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; 1306 1307 io_itr = lp_loop.counter; 1308 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); 1309#if DEBUG_STORE 1310 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", 1311 io_itr, io, lp_loop.counter); 1312#endif 1313 for (i = 0; i < NUM_CHANNELS; ++i) { 1314 LLVMValueRef true_index = LLVMBuildAdd( 1315 builder, 1316 lp_loop.counter, 1317 LLVMConstInt(LLVMInt32Type(), i, 0), ""); 1318 LLVMValueRef fetch_ptr; 1319 1320 /* make sure we're not out of bounds which can happen 1321 * if fetch_count % 4 != 0, because on the last iteration 1322 * a few of the 4 vertex fetches will be out of bounds */ 1323 true_index = lp_build_min(&bld, true_index, fetch_max); 1324 1325 fetch_ptr = LLVMBuildGEP(builder, fetch_elts, 1326 &true_index, 1, ""); 1327 true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); 1328 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { 1329 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; 1330 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), 1331 velem->vertex_buffer_index, 1332 0); 1333 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, 1334 &vb_index, 1, ""); 1335 generate_fetch(builder, vbuffers_ptr, 1336 &aos_attribs[j][i], velem, vb, true_index, 1337 instance_id); 1338 } 1339 } 1340 convert_to_soa(builder, aos_attribs, inputs, 1341 draw->pt.nr_vertex_elements); 1342 1343 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; 1344 generate_vs(llvm, 1345 builder, 1346 outputs, 1347 ptr_aos, 1348 system_values_array, 1349 context_ptr, 1350 sampler); 1351 1352 /* store original positions in clip before further manipulation */ 1353 store_clip(builder, io, outputs); 1354 1355 /* do cliptest */ 1356 if (enable_cliptest){ 1357 /* allocate clipmask, assign it integer type */ 1358 clipmask = generate_clipmask(builder, outputs, 1359 variant->key.clip_xy, 1360 variant->key.clip_z, 1361 variant->key.clip_user, 1362 variant->key.clip_halfz, 1363 variant->key.nr_planes, 1364 context_ptr); 1365 /* return clipping boolean value for function */ 1366 clipmask_bool(builder, clipmask, ret_ptr); 1367 } 1368 else{ 1369 clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0); 1370 } 1371 1372 /* do viewport mapping */ 1373 if (!bypass_viewport){ 1374 generate_viewport(llvm, builder, outputs, context_ptr); 1375 } 1376 1377 /* store clipmask in vertex header, 1378 * original positions in clip 1379 * and transformed positions in data 1380 */ 1381 convert_to_aos(builder, io, outputs, clipmask, 1382 vs_info->num_outputs, max_vertices); 1383 } 1384 1385 lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); 1386 1387 sampler->destroy(sampler); 1388 1389 ret = LLVMBuildLoad(builder, ret_ptr,""); 1390 LLVMBuildRet(builder, ret); 1391 1392 LLVMDisposeBuilder(builder); 1393 1394 /* 1395 * Translate the LLVM IR into machine code. 1396 */ 1397#ifdef DEBUG 1398 if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { 1399 lp_debug_dump_value(variant->function_elts); 1400 assert(0); 1401 } 1402#endif 1403 1404 LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); 1405 1406 if (gallivm_debug & GALLIVM_DEBUG_IR) { 1407 lp_debug_dump_value(variant->function_elts); 1408 debug_printf("\n"); 1409 } 1410 1411 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts); 1412 variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code); 1413 1414 if (gallivm_debug & GALLIVM_DEBUG_ASM) { 1415 lp_disassemble(code); 1416 } 1417 lp_func_delete_body(variant->function_elts); 1418} 1419 1420 1421struct draw_llvm_variant_key * 1422draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) 1423{ 1424 unsigned i; 1425 struct draw_llvm_variant_key *key; 1426 struct lp_sampler_static_state *sampler; 1427 1428 key = (struct draw_llvm_variant_key *)store; 1429 1430 /* Presumably all variants of the shader should have the same 1431 * number of vertex elements - ie the number of shader inputs. 1432 */ 1433 key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; 1434 1435 /* will have to rig this up properly later */ 1436 key->clip_xy = llvm->draw->clip_xy; 1437 key->clip_z = llvm->draw->clip_z; 1438 key->clip_user = llvm->draw->clip_user; 1439 key->bypass_viewport = llvm->draw->identity_viewport; 1440 key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules; 1441 key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); 1442 key->nr_planes = llvm->draw->nr_planes; 1443 key->pad = 0; 1444 1445 /* All variants of this shader will have the same value for 1446 * nr_samplers. Not yet trying to compact away holes in the 1447 * sampler array. 1448 */ 1449 key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1; 1450 1451 sampler = draw_llvm_variant_key_samplers(key); 1452 1453 memcpy(key->vertex_element, 1454 llvm->draw->pt.vertex_element, 1455 sizeof(struct pipe_vertex_element) * key->nr_vertex_elements); 1456 1457 memset(sampler, 0, key->nr_samplers * sizeof *sampler); 1458 1459 for (i = 0 ; i < key->nr_samplers; i++) { 1460 lp_sampler_static_state(&sampler[i], 1461 llvm->draw->sampler_views[i], 1462 llvm->draw->samplers[i]); 1463 } 1464 1465 return key; 1466} 1467 1468void 1469draw_llvm_set_mapped_texture(struct draw_context *draw, 1470 unsigned sampler_idx, 1471 uint32_t width, uint32_t height, uint32_t depth, 1472 uint32_t last_level, 1473 uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], 1474 uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], 1475 const void *data[PIPE_MAX_TEXTURE_LEVELS]) 1476{ 1477 unsigned j; 1478 struct draw_jit_texture *jit_tex; 1479 1480 assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS); 1481 1482 1483 jit_tex = &draw->llvm->jit_context.textures[sampler_idx]; 1484 1485 jit_tex->width = width; 1486 jit_tex->height = height; 1487 jit_tex->depth = depth; 1488 jit_tex->last_level = last_level; 1489 1490 for (j = 0; j <= last_level; j++) { 1491 jit_tex->data[j] = data[j]; 1492 jit_tex->row_stride[j] = row_stride[j]; 1493 jit_tex->img_stride[j] = img_stride[j]; 1494 } 1495} 1496 1497 1498void 1499draw_llvm_set_sampler_state(struct draw_context *draw) 1500{ 1501 unsigned i; 1502 1503 for (i = 0; i < draw->num_samplers; i++) { 1504 struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i]; 1505 1506 if (draw->samplers[i]) { 1507 jit_tex->min_lod = draw->samplers[i]->min_lod; 1508 jit_tex->max_lod = draw->samplers[i]->max_lod; 1509 jit_tex->lod_bias = draw->samplers[i]->lod_bias; 1510 COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color); 1511 } 1512 } 1513} 1514 1515 1516void 1517draw_llvm_destroy_variant(struct draw_llvm_variant *variant) 1518{ 1519 struct draw_llvm *llvm = variant->llvm; 1520 struct draw_context *draw = llvm->draw; 1521 1522 if (variant->function_elts) { 1523 if (variant->function_elts) 1524 LLVMFreeMachineCodeForFunction(draw->engine, 1525 variant->function_elts); 1526 LLVMDeleteFunction(variant->function_elts); 1527 } 1528 1529 if (variant->function) { 1530 if (variant->function) 1531 LLVMFreeMachineCodeForFunction(draw->engine, 1532 variant->function); 1533 LLVMDeleteFunction(variant->function); 1534 } 1535 1536 remove_from_list(&variant->list_item_local); 1537 variant->shader->variants_cached--; 1538 remove_from_list(&variant->list_item_global); 1539 llvm->nr_variants--; 1540 FREE(variant); 1541} 1542