1/* 2 * Copyright 2011 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Tom Stellard <thomas.stellard@amd.com> 24 * 25 */ 26#include "radeon_llvm.h" 27 28#include "gallivm/lp_bld_const.h" 29#include "gallivm/lp_bld_gather.h" 30#include "gallivm/lp_bld_flow.h" 31#include "gallivm/lp_bld_init.h" 32#include "gallivm/lp_bld_intr.h" 33#include "gallivm/lp_bld_swizzle.h" 34#include "tgsi/tgsi_info.h" 35#include "tgsi/tgsi_parse.h" 36#include "util/u_math.h" 37#include "util/u_memory.h" 38#include "util/u_debug.h" 39 40#include <llvm-c/Core.h> 41#include <llvm-c/Transforms/Scalar.h> 42 43static struct radeon_llvm_loop * get_current_loop(struct radeon_llvm_context * ctx) 44{ 45 return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL; 46} 47 48static struct radeon_llvm_branch * get_current_branch( 49 struct radeon_llvm_context * ctx) 50{ 51 return ctx->branch_depth > 0 ? 52 ctx->branch + (ctx->branch_depth - 1) : NULL; 53} 54 55unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) 56{ 57 return (index * 4) + chan; 58} 59 60static LLVMValueRef emit_swizzle( 61 struct lp_build_tgsi_context * bld_base, 62 LLVMValueRef value, 63 unsigned swizzle_x, 64 unsigned swizzle_y, 65 unsigned swizzle_z, 66 unsigned swizzle_w) 67{ 68 LLVMValueRef swizzles[4]; 69 LLVMTypeRef i32t = 70 LLVMInt32TypeInContext(bld_base->base.gallivm->context); 71 72 swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0); 73 swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0); 74 swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0); 75 swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0); 76 77 return LLVMBuildShuffleVector(bld_base->base.gallivm->builder, 78 value, 79 LLVMGetUndef(LLVMTypeOf(value)), 80 LLVMConstVector(swizzles, 4), ""); 81} 82 83static LLVMValueRef 84emit_array_index( 85 struct lp_build_tgsi_soa_context *bld, 86 const struct tgsi_full_src_register *reg, 87 unsigned swizzle) 88{ 89 struct gallivm_state * gallivm = bld->bld_base.base.gallivm; 90 91 LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, 92 bld->addr[reg->Indirect.Index][swizzle], ""); 93 LLVMValueRef offset = lp_build_const_int32(gallivm, reg->Register.Index); 94 LLVMValueRef hw_index = LLVMBuildAdd(gallivm->builder, addr, offset, ""); 95 LLVMValueRef soa_index = LLVMBuildMul(gallivm->builder, hw_index, 96 lp_build_const_int32(gallivm, 4), ""); 97 LLVMValueRef array_index = LLVMBuildAdd(gallivm->builder, soa_index, 98 lp_build_const_int32(gallivm, swizzle), ""); 99 100 return array_index; 101} 102 103static LLVMValueRef 104emit_fetch_immediate( 105 struct lp_build_tgsi_context *bld_base, 106 const struct tgsi_full_src_register *reg, 107 enum tgsi_opcode_type type, 108 unsigned swizzle) 109{ 110 LLVMTypeRef ctype; 111 LLVMContextRef ctx = bld_base->base.gallivm->context; 112 113 switch (type) { 114 case TGSI_TYPE_UNSIGNED: 115 case TGSI_TYPE_SIGNED: 116 ctype = LLVMInt32TypeInContext(ctx); 117 break; 118 case TGSI_TYPE_UNTYPED: 119 case TGSI_TYPE_FLOAT: 120 ctype = LLVMFloatTypeInContext(ctx); 121 break; 122 default: 123 ctype = 0; 124 break; 125 } 126 127 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 128 return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); 129} 130 131static LLVMValueRef 132emit_fetch_input( 133 struct lp_build_tgsi_context *bld_base, 134 const struct tgsi_full_src_register *reg, 135 enum tgsi_opcode_type type, 136 unsigned swizzle) 137{ 138 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); 139 if (swizzle == ~0) { 140 LLVMValueRef values[TGSI_NUM_CHANNELS] = {}; 141 unsigned chan; 142 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 143 values[chan] = ctx->inputs[radeon_llvm_reg_index_soa( 144 reg->Register.Index, chan)]; 145 } 146 return lp_build_gather_values(bld_base->base.gallivm, values, 147 TGSI_NUM_CHANNELS); 148 } else { 149 return bitcast(bld_base, type, ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]); 150 } 151} 152 153static LLVMValueRef 154emit_fetch_temporary( 155 struct lp_build_tgsi_context *bld_base, 156 const struct tgsi_full_src_register *reg, 157 enum tgsi_opcode_type type, 158 unsigned swizzle) 159{ 160 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 161 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 162 if (swizzle == ~0) { 163 LLVMValueRef values[TGSI_NUM_CHANNELS] = {}; 164 unsigned chan; 165 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 166 values[chan] = emit_fetch_temporary(bld_base, reg, type, chan); 167 } 168 return lp_build_gather_values(bld_base->base.gallivm, values, 169 TGSI_NUM_CHANNELS); 170 } 171 172 if (reg->Register.Indirect) { 173 LLVMValueRef array_index = emit_array_index(bld, reg, swizzle); 174 LLVMValueRef ptr = LLVMBuildGEP(builder, bld->temps_array, &array_index, 175 1, ""); 176 return LLVMBuildLoad(builder, ptr, ""); 177 } else { 178 LLVMValueRef temp_ptr; 179 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); 180 return bitcast(bld_base,type,LLVMBuildLoad(builder, temp_ptr, "")); 181 } 182} 183 184static LLVMValueRef 185emit_fetch_output( 186 struct lp_build_tgsi_context *bld_base, 187 const struct tgsi_full_src_register *reg, 188 enum tgsi_opcode_type type, 189 unsigned swizzle) 190{ 191 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 192 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 193 if (reg->Register.Indirect) { 194 LLVMValueRef array_index = emit_array_index(bld, reg, swizzle); 195 LLVMValueRef ptr = LLVMBuildGEP(builder, bld->outputs_array, &array_index, 196 1, ""); 197 return LLVMBuildLoad(builder, ptr, ""); 198 } else { 199 LLVMValueRef temp_ptr; 200 temp_ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle); 201 return LLVMBuildLoad(builder, temp_ptr, ""); 202 } 203} 204 205static void emit_declaration( 206 struct lp_build_tgsi_context * bld_base, 207 const struct tgsi_full_declaration *decl) 208{ 209 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); 210 switch(decl->Declaration.File) { 211 case TGSI_FILE_ADDRESS: 212 { 213 unsigned idx; 214 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 215 unsigned chan; 216 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 217 ctx->soa.addr[idx][chan] = lp_build_alloca( 218 &ctx->gallivm, 219 ctx->soa.bld_base.uint_bld.elem_type, ""); 220 } 221 } 222 break; 223 } 224 225 case TGSI_FILE_TEMPORARY: 226 lp_emit_declaration_soa(bld_base, decl); 227 break; 228 229 case TGSI_FILE_INPUT: 230 { 231 unsigned idx; 232 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 233 ctx->load_input(ctx, idx, decl); 234 } 235 } 236 break; 237 238 case TGSI_FILE_SYSTEM_VALUE: 239 { 240 unsigned idx; 241 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 242 ctx->load_system_value(ctx, idx, decl); 243 } 244 } 245 break; 246 247 case TGSI_FILE_OUTPUT: 248 { 249 unsigned idx; 250 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { 251 unsigned chan; 252 assert(idx < RADEON_LLVM_MAX_OUTPUTS); 253 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 254 ctx->soa.outputs[idx][chan] = lp_build_alloca(&ctx->gallivm, 255 ctx->soa.bld_base.base.elem_type, ""); 256 } 257 } 258 259 ctx->output_reg_count = MAX2(ctx->output_reg_count, 260 decl->Range.Last + 1); 261 break; 262 } 263 264 default: 265 break; 266 } 267} 268 269static void 270emit_store( 271 struct lp_build_tgsi_context * bld_base, 272 const struct tgsi_full_instruction * inst, 273 const struct tgsi_opcode_info * info, 274 LLVMValueRef dst[4]) 275{ 276 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 277 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 278 struct lp_build_context base = bld->bld_base.base; 279 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 280 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 281 LLVMValueRef temp_ptr; 282 unsigned chan, chan_index; 283 boolean is_vec_store = FALSE; 284 if (dst[0]) { 285 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0])); 286 is_vec_store = (k == LLVMVectorTypeKind); 287 } 288 289 if (is_vec_store) { 290 LLVMValueRef values[4] = {}; 291 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) { 292 LLVMValueRef index = lp_build_const_int32(gallivm, chan); 293 values[chan] = LLVMBuildExtractElement(gallivm->builder, 294 dst[0], index, ""); 295 } 296 bld_base->emit_store(bld_base, inst, info, values); 297 return; 298 } 299 300 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 301 LLVMValueRef value = dst[chan_index]; 302 303 if (inst->Instruction.Saturate != TGSI_SAT_NONE) { 304 struct lp_build_emit_data clamp_emit_data; 305 306 memset(&clamp_emit_data, 0, sizeof(clamp_emit_data)); 307 clamp_emit_data.arg_count = 3; 308 clamp_emit_data.args[0] = value; 309 clamp_emit_data.args[2] = base.one; 310 311 switch(inst->Instruction.Saturate) { 312 case TGSI_SAT_ZERO_ONE: 313 clamp_emit_data.args[1] = base.zero; 314 break; 315 case TGSI_SAT_MINUS_PLUS_ONE: 316 clamp_emit_data.args[1] = LLVMConstReal( 317 base.elem_type, -1.0f); 318 break; 319 default: 320 assert(0); 321 } 322 value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP, 323 &clamp_emit_data); 324 } 325 326 switch(reg->Register.File) { 327 case TGSI_FILE_OUTPUT: 328 temp_ptr = bld->outputs[reg->Register.Index][chan_index]; 329 break; 330 331 case TGSI_FILE_TEMPORARY: 332 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index); 333 break; 334 335 default: 336 return; 337 } 338 339 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); 340 341 LLVMBuildStore(builder, value, temp_ptr); 342 } 343} 344 345static void bgnloop_emit( 346 const struct lp_build_tgsi_action * action, 347 struct lp_build_tgsi_context * bld_base, 348 struct lp_build_emit_data * emit_data) 349{ 350 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); 351 struct gallivm_state * gallivm = bld_base->base.gallivm; 352 LLVMBasicBlockRef loop_block; 353 LLVMBasicBlockRef endloop_block; 354 endloop_block = LLVMAppendBasicBlockInContext(gallivm->context, 355 ctx->main_fn, "ENDLOOP"); 356 loop_block = LLVMInsertBasicBlockInContext(gallivm->context, 357 endloop_block, "LOOP"); 358 LLVMBuildBr(gallivm->builder, loop_block); 359 LLVMPositionBuilderAtEnd(gallivm->builder, loop_block); 360 ctx->loop_depth++; 361 ctx->loop[ctx->loop_depth - 1].loop_block = loop_block; 362 ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block; 363} 364 365static void brk_emit( 366 const struct lp_build_tgsi_action * action, 367 struct lp_build_tgsi_context * bld_base, 368 struct lp_build_emit_data * emit_data) 369{ 370 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); 371 struct gallivm_state * gallivm = bld_base->base.gallivm; 372 struct radeon_llvm_loop * current_loop = get_current_loop(ctx); 373 374 LLVMBuildBr(gallivm->builder, current_loop->endloop_block); 375} 376 377static void cont_emit( 378 const struct lp_build_tgsi_action * action, 379 struct lp_build_tgsi_context * bld_base, 380 struct lp_build_emit_data * emit_data) 381{ 382 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); 383 struct gallivm_state * gallivm = bld_base->base.gallivm; 384 struct radeon_llvm_loop * current_loop = get_current_loop(ctx); 385 386 LLVMBuildBr(gallivm->builder, current_loop->loop_block); 387} 388 389static void else_emit( 390 const struct lp_build_tgsi_action * action, 391 struct lp_build_tgsi_context * bld_base, 392 struct lp_build_emit_data * emit_data) 393{ 394 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); 395 struct gallivm_state * gallivm = bld_base->base.gallivm; 396 struct radeon_llvm_branch * current_branch = get_current_branch(ctx); 397 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder); 398 399 /* We need to add a terminator to the current block if the previous 400 * instruction was an ENDIF.Example: 401 * IF 402 * [code] 403 * IF 404 * [code] 405 * ELSE 406 * [code] 407 * ENDIF <-- 408 * ELSE<-- 409 * [code] 410 * ENDIF 411 */ 412 413 if (current_block != current_branch->if_block) { 414 LLVMBuildBr(gallivm->builder, current_branch->endif_block); 415 } 416 if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) { 417 LLVMBuildBr(gallivm->builder, current_branch->endif_block); 418 } 419 current_branch->has_else = 1; 420 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block); 421} 422 423static void endif_emit( 424 const struct lp_build_tgsi_action * action, 425 struct lp_build_tgsi_context * bld_base, 426 struct lp_build_emit_data * emit_data) 427{ 428 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); 429 struct gallivm_state * gallivm = bld_base->base.gallivm; 430 struct radeon_llvm_branch * current_branch = get_current_branch(ctx); 431 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder); 432 433 /* If we have consecutive ENDIF instructions, then the first ENDIF 434 * will not have a terminator, so we need to add one. */ 435 if (current_block != current_branch->if_block 436 && current_block != current_branch->else_block 437 && !LLVMGetBasicBlockTerminator(current_block)) { 438 439 LLVMBuildBr(gallivm->builder, current_branch->endif_block); 440 } 441 if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) { 442 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block); 443 LLVMBuildBr(gallivm->builder, current_branch->endif_block); 444 } 445 446 if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) { 447 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block); 448 LLVMBuildBr(gallivm->builder, current_branch->endif_block); 449 } 450 451 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block); 452 ctx->branch_depth--; 453} 454 455static void endloop_emit( 456 const struct lp_build_tgsi_action * action, 457 struct lp_build_tgsi_context * bld_base, 458 struct lp_build_emit_data * emit_data) 459{ 460 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); 461 struct gallivm_state * gallivm = bld_base->base.gallivm; 462 struct radeon_llvm_loop * current_loop = get_current_loop(ctx); 463 464 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) { 465 LLVMBuildBr(gallivm->builder, current_loop->loop_block); 466 } 467 468 LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block); 469 ctx->loop_depth--; 470} 471 472static void if_emit( 473 const struct lp_build_tgsi_action * action, 474 struct lp_build_tgsi_context * bld_base, 475 struct lp_build_emit_data * emit_data) 476{ 477 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); 478 struct gallivm_state * gallivm = bld_base->base.gallivm; 479 LLVMValueRef cond; 480 LLVMBasicBlockRef if_block, else_block, endif_block; 481 482 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, 483 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]), 484 bld_base->int_bld.zero, ""); 485 486 endif_block = LLVMAppendBasicBlockInContext(gallivm->context, 487 ctx->main_fn, "ENDIF"); 488 if_block = LLVMInsertBasicBlockInContext(gallivm->context, 489 endif_block, "IF"); 490 else_block = LLVMInsertBasicBlockInContext(gallivm->context, 491 endif_block, "ELSE"); 492 LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block); 493 LLVMPositionBuilderAtEnd(gallivm->builder, if_block); 494 495 ctx->branch_depth++; 496 ctx->branch[ctx->branch_depth - 1].endif_block = endif_block; 497 ctx->branch[ctx->branch_depth - 1].if_block = if_block; 498 ctx->branch[ctx->branch_depth - 1].else_block = else_block; 499 ctx->branch[ctx->branch_depth - 1].has_else = 0; 500} 501 502static void kil_emit( 503 const struct lp_build_tgsi_action * action, 504 struct lp_build_tgsi_context * bld_base, 505 struct lp_build_emit_data * emit_data) 506{ 507 unsigned i; 508 for (i = 0; i < emit_data->arg_count; i++) { 509 emit_data->output[i] = lp_build_intrinsic_unary( 510 bld_base->base.gallivm->builder, 511 action->intr_name, 512 emit_data->dst_type, emit_data->args[i]); 513 } 514} 515 516 517static void emit_prepare_cube_coords( 518 struct lp_build_tgsi_context * bld_base, 519 struct lp_build_emit_data * emit_data) 520{ 521 boolean shadowcube = (emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE); 522 struct gallivm_state * gallivm = bld_base->base.gallivm; 523 LLVMBuilderRef builder = gallivm->builder; 524 LLVMTypeRef type = bld_base->base.elem_type; 525 LLVMValueRef coords[4]; 526 LLVMValueRef mad_args[3]; 527 unsigned i, cnt; 528 529 LLVMValueRef v = build_intrinsic(builder, "llvm.AMDGPU.cube", 530 LLVMVectorType(type, 4), 531 &emit_data->args[0],1, LLVMReadNoneAttribute); 532 533 /* save src.w for shadow cube */ 534 cnt = shadowcube ? 3 : 4; 535 536 for (i = 0; i < cnt; ++i) { 537 LLVMValueRef idx = lp_build_const_int32(gallivm, i); 538 coords[i] = LLVMBuildExtractElement(builder, v, idx, ""); 539 } 540 541 coords[2] = build_intrinsic(builder, "llvm.AMDIL.fabs.", 542 type, &coords[2], 1, LLVMReadNoneAttribute); 543 coords[2] = build_intrinsic(builder, "llvm.AMDGPU.rcp", 544 type, &coords[2], 1, LLVMReadNoneAttribute); 545 546 mad_args[1] = coords[2]; 547 mad_args[2] = LLVMConstReal(type, 1.5); 548 549 mad_args[0] = coords[0]; 550 coords[0] = build_intrinsic(builder, "llvm.AMDIL.mad.", 551 type, mad_args, 3, LLVMReadNoneAttribute); 552 553 mad_args[0] = coords[1]; 554 coords[1] = build_intrinsic(builder, "llvm.AMDIL.mad.", 555 type, mad_args, 3, LLVMReadNoneAttribute); 556 557 /* apply yxwy swizzle to cooords */ 558 coords[2] = coords[3]; 559 coords[3] = coords[1]; 560 coords[1] = coords[0]; 561 coords[0] = coords[3]; 562 563 emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, 564 coords, 4); 565} 566 567static void txd_fetch_args( 568 struct lp_build_tgsi_context * bld_base, 569 struct lp_build_emit_data * emit_data) 570{ 571 const struct tgsi_full_instruction * inst = emit_data->inst; 572 573 LLVMValueRef coords[4]; 574 unsigned chan, src; 575 for (src = 0; src < 3; src++) { 576 for (chan = 0; chan < 4; chan++) 577 coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan); 578 579 emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm, 580 coords, 4); 581 } 582 emit_data->arg_count = 3; 583 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); 584} 585 586 587static void txp_fetch_args( 588 struct lp_build_tgsi_context * bld_base, 589 struct lp_build_emit_data * emit_data) 590{ 591 const struct tgsi_full_instruction * inst = emit_data->inst; 592 LLVMValueRef src_w; 593 unsigned chan; 594 LLVMValueRef coords[4]; 595 596 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); 597 src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); 598 599 for (chan = 0; chan < 3; chan++ ) { 600 LLVMValueRef arg = lp_build_emit_fetch(bld_base, 601 emit_data->inst, 0, chan); 602 coords[chan] = lp_build_emit_llvm_binary(bld_base, 603 TGSI_OPCODE_DIV, arg, src_w); 604 } 605 coords[3] = bld_base->base.one; 606 emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, 607 coords, 4); 608 emit_data->arg_count = 1; 609 610 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 611 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) && 612 inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { 613 emit_prepare_cube_coords(bld_base, emit_data); 614 } 615} 616 617static void tex_fetch_args( 618 struct lp_build_tgsi_context * bld_base, 619 struct lp_build_emit_data * emit_data) 620{ 621 /* XXX: lp_build_swizzle_aos() was failing with wrong arg types, 622 * when we used CHAN_ALL. We should be able to get this to work, 623 * but for now we will swizzle it ourselves 624 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 625 0, CHAN_ALL); 626 627 */ 628 629 const struct tgsi_full_instruction * inst = emit_data->inst; 630 631 LLVMValueRef coords[4]; 632 unsigned chan; 633 for (chan = 0; chan < 4; chan++) { 634 coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan); 635 } 636 637 emit_data->arg_count = 1; 638 emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, 639 coords, 4); 640 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); 641 642 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 643 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) && 644 inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { 645 emit_prepare_cube_coords(bld_base, emit_data); 646 } 647} 648 649static void txf_fetch_args( 650 struct lp_build_tgsi_context * bld_base, 651 struct lp_build_emit_data * emit_data) 652{ 653 const struct tgsi_full_instruction * inst = emit_data->inst; 654 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 655 const struct tgsi_texture_offset * off = inst->TexOffsets; 656 LLVMTypeRef offset_type = bld_base->int_bld.elem_type; 657 658 /* fetch tex coords */ 659 tex_fetch_args(bld_base, emit_data); 660 661 /* fetch tex offsets */ 662 if (inst->Texture.NumOffsets) { 663 assert(inst->Texture.NumOffsets == 1); 664 665 emit_data->args[1] = LLVMConstBitCast( 666 bld->immediates[off->Index][off->SwizzleX], 667 offset_type); 668 emit_data->args[2] = LLVMConstBitCast( 669 bld->immediates[off->Index][off->SwizzleY], 670 offset_type); 671 emit_data->args[3] = LLVMConstBitCast( 672 bld->immediates[off->Index][off->SwizzleZ], 673 offset_type); 674 } else { 675 emit_data->args[1] = bld_base->int_bld.zero; 676 emit_data->args[2] = bld_base->int_bld.zero; 677 emit_data->args[3] = bld_base->int_bld.zero; 678 } 679 680 emit_data->arg_count = 4; 681} 682 683static void emit_icmp( 684 const struct lp_build_tgsi_action * action, 685 struct lp_build_tgsi_context * bld_base, 686 struct lp_build_emit_data * emit_data) 687{ 688 unsigned pred; 689 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 690 LLVMContextRef context = bld_base->base.gallivm->context; 691 692 switch (emit_data->inst->Instruction.Opcode) { 693 case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break; 694 case TGSI_OPCODE_USNE: pred = LLVMIntNE; break; 695 case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break; 696 case TGSI_OPCODE_USLT: pred = LLVMIntULT; break; 697 case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break; 698 case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break; 699 default: 700 assert(!"unknown instruction"); 701 } 702 703 LLVMValueRef v = LLVMBuildICmp(builder, pred, 704 emit_data->args[0], emit_data->args[1],""); 705 706 v = LLVMBuildSExtOrBitCast(builder, v, 707 LLVMInt32TypeInContext(context), ""); 708 709 emit_data->output[emit_data->chan] = v; 710} 711 712static void emit_cmp( 713 const struct lp_build_tgsi_action *action, 714 struct lp_build_tgsi_context * bld_base, 715 struct lp_build_emit_data * emit_data) 716{ 717 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 718 LLVMRealPredicate pred; 719 LLVMValueRef cond; 720 721 /* XXX I'm not sure whether to do unordered or ordered comparisons, 722 * but llvmpipe uses unordered comparisons, so for consistency we use 723 * unordered. (The authors of llvmpipe aren't sure about using 724 * unordered vs ordered comparisons either. 725 */ 726 switch (emit_data->inst->Instruction.Opcode) { 727 case TGSI_OPCODE_SGE: pred = LLVMRealUGE; break; 728 case TGSI_OPCODE_SEQ: pred = LLVMRealUEQ; break; 729 case TGSI_OPCODE_SLE: pred = LLVMRealULE; break; 730 case TGSI_OPCODE_SLT: pred = LLVMRealULT; break; 731 case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break; 732 case TGSI_OPCODE_SGT: pred = LLVMRealUGT; break; 733 default: assert(!"unknown instruction"); 734 } 735 736 cond = LLVMBuildFCmp(builder, 737 pred, emit_data->args[0], emit_data->args[1], ""); 738 739 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, 740 cond, bld_base->base.one, bld_base->base.zero, ""); 741} 742 743static void emit_not( 744 const struct lp_build_tgsi_action * action, 745 struct lp_build_tgsi_context * bld_base, 746 struct lp_build_emit_data * emit_data) 747{ 748 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 749 LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED, 750 emit_data->args[0]); 751 emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, ""); 752} 753 754static void emit_and( 755 const struct lp_build_tgsi_action * action, 756 struct lp_build_tgsi_context * bld_base, 757 struct lp_build_emit_data * emit_data) 758{ 759 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 760 emit_data->output[emit_data->chan] = LLVMBuildAnd(builder, 761 emit_data->args[0], emit_data->args[1], ""); 762} 763 764static void emit_or( 765 const struct lp_build_tgsi_action * action, 766 struct lp_build_tgsi_context * bld_base, 767 struct lp_build_emit_data * emit_data) 768{ 769 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 770 emit_data->output[emit_data->chan] = LLVMBuildOr(builder, 771 emit_data->args[0], emit_data->args[1], ""); 772} 773 774static void emit_uadd( 775 const struct lp_build_tgsi_action * action, 776 struct lp_build_tgsi_context * bld_base, 777 struct lp_build_emit_data * emit_data) 778{ 779 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 780 emit_data->output[emit_data->chan] = LLVMBuildAdd(builder, 781 emit_data->args[0], emit_data->args[1], ""); 782} 783 784static void emit_udiv( 785 const struct lp_build_tgsi_action * action, 786 struct lp_build_tgsi_context * bld_base, 787 struct lp_build_emit_data * emit_data) 788{ 789 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 790 emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder, 791 emit_data->args[0], emit_data->args[1], ""); 792} 793 794static void emit_idiv( 795 const struct lp_build_tgsi_action * action, 796 struct lp_build_tgsi_context * bld_base, 797 struct lp_build_emit_data * emit_data) 798{ 799 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 800 emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder, 801 emit_data->args[0], emit_data->args[1], ""); 802} 803 804static void emit_mod( 805 const struct lp_build_tgsi_action * action, 806 struct lp_build_tgsi_context * bld_base, 807 struct lp_build_emit_data * emit_data) 808{ 809 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 810 emit_data->output[emit_data->chan] = LLVMBuildSRem(builder, 811 emit_data->args[0], emit_data->args[1], ""); 812} 813 814static void emit_umod( 815 const struct lp_build_tgsi_action * action, 816 struct lp_build_tgsi_context * bld_base, 817 struct lp_build_emit_data * emit_data) 818{ 819 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 820 emit_data->output[emit_data->chan] = LLVMBuildURem(builder, 821 emit_data->args[0], emit_data->args[1], ""); 822} 823 824static void emit_shl( 825 const struct lp_build_tgsi_action * action, 826 struct lp_build_tgsi_context * bld_base, 827 struct lp_build_emit_data * emit_data) 828{ 829 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 830 emit_data->output[emit_data->chan] = LLVMBuildShl(builder, 831 emit_data->args[0], emit_data->args[1], ""); 832} 833 834static void emit_ushr( 835 const struct lp_build_tgsi_action * action, 836 struct lp_build_tgsi_context * bld_base, 837 struct lp_build_emit_data * emit_data) 838{ 839 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 840 emit_data->output[emit_data->chan] = LLVMBuildLShr(builder, 841 emit_data->args[0], emit_data->args[1], ""); 842} 843static void emit_ishr( 844 const struct lp_build_tgsi_action * action, 845 struct lp_build_tgsi_context * bld_base, 846 struct lp_build_emit_data * emit_data) 847{ 848 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 849 emit_data->output[emit_data->chan] = LLVMBuildAShr(builder, 850 emit_data->args[0], emit_data->args[1], ""); 851} 852 853static void emit_xor( 854 const struct lp_build_tgsi_action * action, 855 struct lp_build_tgsi_context * bld_base, 856 struct lp_build_emit_data * emit_data) 857{ 858 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 859 emit_data->output[emit_data->chan] = LLVMBuildXor(builder, 860 emit_data->args[0], emit_data->args[1], ""); 861} 862 863static void emit_ssg( 864 const struct lp_build_tgsi_action * action, 865 struct lp_build_tgsi_context * bld_base, 866 struct lp_build_emit_data * emit_data) 867{ 868 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 869 870 LLVMValueRef cmp, val; 871 872 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) { 873 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, ""); 874 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], ""); 875 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, ""); 876 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), ""); 877 } else { // float SSG 878 cmp = LLVMBuildFCmp(builder, LLVMRealUGT, emit_data->args[0], bld_base->int_bld.zero, ""); 879 val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], ""); 880 cmp = LLVMBuildFCmp(builder, LLVMRealUGE, val, bld_base->base.zero, ""); 881 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), ""); 882 } 883 884 emit_data->output[emit_data->chan] = val; 885} 886 887static void emit_ineg( 888 const struct lp_build_tgsi_action * action, 889 struct lp_build_tgsi_context * bld_base, 890 struct lp_build_emit_data * emit_data) 891{ 892 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 893 emit_data->output[emit_data->chan] = LLVMBuildNeg(builder, 894 emit_data->args[0], ""); 895} 896 897static void emit_f2i( 898 const struct lp_build_tgsi_action * action, 899 struct lp_build_tgsi_context * bld_base, 900 struct lp_build_emit_data * emit_data) 901{ 902 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 903 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder, 904 emit_data->args[0], bld_base->int_bld.elem_type, ""); 905} 906 907static void emit_f2u( 908 const struct lp_build_tgsi_action * action, 909 struct lp_build_tgsi_context * bld_base, 910 struct lp_build_emit_data * emit_data) 911{ 912 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 913 emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder, 914 emit_data->args[0], bld_base->uint_bld.elem_type, ""); 915} 916 917static void emit_i2f( 918 const struct lp_build_tgsi_action * action, 919 struct lp_build_tgsi_context * bld_base, 920 struct lp_build_emit_data * emit_data) 921{ 922 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 923 emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder, 924 emit_data->args[0], bld_base->base.elem_type, ""); 925} 926 927static void emit_u2f( 928 const struct lp_build_tgsi_action * action, 929 struct lp_build_tgsi_context * bld_base, 930 struct lp_build_emit_data * emit_data) 931{ 932 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 933 emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder, 934 emit_data->args[0], bld_base->base.elem_type, ""); 935} 936 937static void emit_immediate(struct lp_build_tgsi_context * bld_base, 938 const struct tgsi_full_immediate *imm) 939{ 940 unsigned i; 941 struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); 942 943 for (i = 0; i < 4; ++i) { 944 ctx->soa.immediates[ctx->soa.num_immediates][i] = 945 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false ); 946 } 947 948 ctx->soa.num_immediates++; 949} 950 951LLVMValueRef 952build_intrinsic(LLVMBuilderRef builder, 953 const char *name, 954 LLVMTypeRef ret_type, 955 LLVMValueRef *args, 956 unsigned num_args, 957 LLVMAttribute attr) 958{ 959 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); 960 LLVMValueRef function; 961 962 function = LLVMGetNamedFunction(module, name); 963 if(!function) { 964 LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS]; 965 unsigned i; 966 967 assert(num_args <= LP_MAX_FUNC_ARGS); 968 969 for(i = 0; i < num_args; ++i) { 970 assert(args[i]); 971 arg_types[i] = LLVMTypeOf(args[i]); 972 } 973 974 function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args); 975 976 if (attr) 977 LLVMAddFunctionAttr(function, attr); 978 } 979 980 return LLVMBuildCall(builder, function, args, num_args, ""); 981} 982 983void 984build_tgsi_intrinsic_nomem( 985 const struct lp_build_tgsi_action * action, 986 struct lp_build_tgsi_context * bld_base, 987 struct lp_build_emit_data * emit_data) 988{ 989 struct lp_build_context * base = &bld_base->base; 990 emit_data->output[emit_data->chan] = build_intrinsic( 991 base->gallivm->builder, action->intr_name, 992 emit_data->dst_type, emit_data->args, 993 emit_data->arg_count, LLVMReadNoneAttribute); 994} 995 996void radeon_llvm_context_init(struct radeon_llvm_context * ctx) 997{ 998 struct lp_type type; 999 LLVMTypeRef main_fn_type; 1000 LLVMBasicBlockRef main_fn_body; 1001 1002 /* Initialize the gallivm object: 1003 * We are only using the module, context, and builder fields of this struct. 1004 * This should be enough for us to be able to pass our gallivm struct to the 1005 * helper functions in the gallivm module. 1006 */ 1007 memset(&ctx->gallivm, 0, sizeof (ctx->gallivm)); 1008 memset(&ctx->soa, 0, sizeof(ctx->soa)); 1009 ctx->gallivm.context = LLVMContextCreate(); 1010 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", 1011 ctx->gallivm.context); 1012 ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context); 1013 1014 /* Setup the module */ 1015 main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context), 1016 NULL, 0, 0); 1017 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type); 1018 main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context, 1019 ctx->main_fn, "main_body"); 1020 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body); 1021 1022 ctx->store_output_intr = "llvm.AMDGPU.store.output."; 1023 ctx->swizzle_intr = "llvm.AMDGPU.swizzle"; 1024 struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base; 1025 1026 /* XXX: We need to revisit this.I think the correct way to do this is 1027 * to use length = 4 here and use the elem_bld for everything. */ 1028 type.floating = TRUE; 1029 type.sign = TRUE; 1030 type.width = 32; 1031 type.length = 1; 1032 1033 lp_build_context_init(&bld_base->base, &ctx->gallivm, type); 1034 lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); 1035 lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); 1036 1037 bld_base->soa = 1; 1038 bld_base->emit_store = emit_store; 1039 bld_base->emit_swizzle = emit_swizzle; 1040 bld_base->emit_declaration = emit_declaration; 1041 bld_base->emit_immediate = emit_immediate; 1042 1043 bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 1044 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 1045 bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 1046 bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_output; 1047 1048 /* Allocate outputs */ 1049 ctx->soa.outputs = ctx->outputs; 1050 1051 /* XXX: Is there a better way to initialize all this ? */ 1052 1053 lp_set_default_actions(bld_base); 1054 1055 bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem; 1056 bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs."; 1057 bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not; 1058 bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and; 1059 bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor; 1060 bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or; 1061 bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd; 1062 bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv; 1063 bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv; 1064 bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod; 1065 bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod; 1066 bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg; 1067 bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl; 1068 bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr; 1069 bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr; 1070 bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg; 1071 bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg; 1072 bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f; 1073 bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f; 1074 bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i; 1075 bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u; 1076 bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx"; 1077 bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args; 1078 bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy"; 1079 bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args; 1080 bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp; 1081 bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp; 1082 bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp; 1083 bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp; 1084 bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp; 1085 bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp; 1086 bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem; 1087 bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest."; 1088 bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem; 1089 bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min."; 1090 bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem; 1091 bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.AMDIL.max."; 1092 bld_base->op_actions[TGSI_OPCODE_IMIN].emit = build_tgsi_intrinsic_nomem; 1093 bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin"; 1094 bld_base->op_actions[TGSI_OPCODE_IMAX].emit = build_tgsi_intrinsic_nomem; 1095 bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax"; 1096 bld_base->op_actions[TGSI_OPCODE_UMIN].emit = build_tgsi_intrinsic_nomem; 1097 bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin"; 1098 bld_base->op_actions[TGSI_OPCODE_UMAX].emit = build_tgsi_intrinsic_nomem; 1099 bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax"; 1100 bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args; 1101 bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf"; 1102 bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args; 1103 bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq"; 1104 bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem; 1105 bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.AMDIL.round.posinf."; 1106 1107 1108 1109 bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem; 1110 bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.AMDIL.fabs."; 1111 bld_base->op_actions[TGSI_OPCODE_ARL].emit = build_tgsi_intrinsic_nomem; 1112 bld_base->op_actions[TGSI_OPCODE_ARL].intr_name = "llvm.AMDGPU.arl"; 1113 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; 1114 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit; 1115 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; 1116 bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem; 1117 bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp."; 1118 bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem; 1119 bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt"; 1120 bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem; 1121 bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.AMDGPU.cos"; 1122 bld_base->op_actions[TGSI_OPCODE_DIV].emit = build_tgsi_intrinsic_nomem; 1123 bld_base->op_actions[TGSI_OPCODE_DIV].intr_name = "llvm.AMDGPU.div"; 1124 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; 1125 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; 1126 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; 1127 bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem; 1128 bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp."; 1129 bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem; 1130 bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.AMDGPU.floor"; 1131 bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem; 1132 bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction."; 1133 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; 1134 bld_base->op_actions[TGSI_OPCODE_KIL].emit = kil_emit; 1135 bld_base->op_actions[TGSI_OPCODE_KIL].intr_name = "llvm.AMDGPU.kill"; 1136 bld_base->op_actions[TGSI_OPCODE_KILP].emit = lp_build_tgsi_intrinsic; 1137 bld_base->op_actions[TGSI_OPCODE_KILP].intr_name = "llvm.AMDGPU.kilp"; 1138 bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem; 1139 bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.AMDIL.log."; 1140 bld_base->op_actions[TGSI_OPCODE_LRP].emit = build_tgsi_intrinsic_nomem; 1141 bld_base->op_actions[TGSI_OPCODE_LRP].intr_name = "llvm.AMDGPU.lrp"; 1142 bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem; 1143 bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min."; 1144 bld_base->op_actions[TGSI_OPCODE_MAD].emit = build_tgsi_intrinsic_nomem; 1145 bld_base->op_actions[TGSI_OPCODE_MAD].intr_name = "llvm.AMDIL.mad."; 1146 bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem; 1147 bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.AMDIL.max."; 1148 bld_base->op_actions[TGSI_OPCODE_MUL].emit = build_tgsi_intrinsic_nomem; 1149 bld_base->op_actions[TGSI_OPCODE_MUL].intr_name = "llvm.AMDGPU.mul"; 1150 bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem; 1151 bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.AMDGPU.pow"; 1152 bld_base->op_actions[TGSI_OPCODE_RCP].emit = build_tgsi_intrinsic_nomem; 1153 bld_base->op_actions[TGSI_OPCODE_RCP].intr_name = "llvm.AMDGPU.rcp"; 1154 bld_base->op_actions[TGSI_OPCODE_SSG].emit = build_tgsi_intrinsic_nomem; 1155 bld_base->op_actions[TGSI_OPCODE_SSG].intr_name = "llvm.AMDGPU.ssg"; 1156 bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp; 1157 bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp; 1158 bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_cmp; 1159 bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_cmp; 1160 bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_cmp; 1161 bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp; 1162 bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem; 1163 bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.AMDGPU.sin"; 1164 bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args; 1165 bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex"; 1166 bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args; 1167 bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb"; 1168 bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args; 1169 bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd"; 1170 bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args; 1171 bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl"; 1172 bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args; 1173 bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex"; 1174 bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem; 1175 bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc"; 1176 1177 bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem; 1178 bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq"; 1179} 1180 1181void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx) 1182{ 1183 struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm; 1184 /* End the main function with Return*/ 1185 LLVMBuildRetVoid(gallivm->builder); 1186 1187 /* Create the pass manager */ 1188 ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule( 1189 gallivm->module); 1190 1191 /* This pass should eliminate all the load and store instructions */ 1192 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); 1193 1194 /* Add some optimization passes */ 1195 LLVMAddScalarReplAggregatesPass(gallivm->passmgr); 1196 LLVMAddCFGSimplificationPass(gallivm->passmgr); 1197 1198 /* Run the passs */ 1199 LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn); 1200 1201 LLVMDisposeBuilder(gallivm->builder); 1202 LLVMDisposePassManager(gallivm->passmgr); 1203 1204} 1205 1206void radeon_llvm_dispose(struct radeon_llvm_context * ctx) 1207{ 1208 LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module); 1209 LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context); 1210} 1211