1/* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "si_shader_internal.h" 25#include "gallivm/lp_bld_const.h" 26#include "gallivm/lp_bld_intr.h" 27#include "gallivm/lp_bld_gather.h" 28#include "tgsi/tgsi_parse.h" 29 30static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base, 31 struct lp_build_emit_data *emit_data) 32{ 33 const struct tgsi_full_instruction *inst = emit_data->inst; 34 struct gallivm_state *gallivm = bld_base->base.gallivm; 35 LLVMBuilderRef builder = gallivm->builder; 36 unsigned i; 37 LLVMValueRef conds[TGSI_NUM_CHANNELS]; 38 39 for (i = 0; i < TGSI_NUM_CHANNELS; i++) { 40 LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i); 41 conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value, 42 bld_base->base.zero, ""); 43 } 44 45 /* Or the conditions together */ 46 for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) { 47 conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], ""); 48 } 49 50 emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context); 51 emit_data->arg_count = 1; 52 emit_data->args[0] = LLVMBuildSelect(builder, conds[0], 53 lp_build_const_float(gallivm, -1.0f), 54 bld_base->base.zero, ""); 55} 56 57static void kil_emit(const struct lp_build_tgsi_action *action, 58 struct lp_build_tgsi_context *bld_base, 59 struct lp_build_emit_data *emit_data) 60{ 61 unsigned i; 62 for (i = 0; i < emit_data->arg_count; i++) { 63 emit_data->output[i] = lp_build_intrinsic_unary( 64 bld_base->base.gallivm->builder, 65 action->intr_name, 66 emit_data->dst_type, emit_data->args[i]); 67 } 68} 69 70static void emit_icmp(const struct lp_build_tgsi_action *action, 71 struct lp_build_tgsi_context *bld_base, 72 struct lp_build_emit_data *emit_data) 73{ 74 unsigned pred; 75 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 76 LLVMContextRef context = bld_base->base.gallivm->context; 77 78 switch (emit_data->inst->Instruction.Opcode) { 79 case TGSI_OPCODE_USEQ: 80 case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break; 81 case TGSI_OPCODE_USNE: 82 case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break; 83 case TGSI_OPCODE_USGE: 84 case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break; 85 case TGSI_OPCODE_USLT: 86 case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break; 87 case TGSI_OPCODE_ISGE: 88 case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break; 89 case TGSI_OPCODE_ISLT: 90 case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break; 91 default: 92 assert(!"unknown instruction"); 93 pred = 0; 94 break; 95 } 96 97 LLVMValueRef v = LLVMBuildICmp(builder, pred, 98 emit_data->args[0], emit_data->args[1],""); 99 100 v = LLVMBuildSExtOrBitCast(builder, v, 101 LLVMInt32TypeInContext(context), ""); 102 103 emit_data->output[emit_data->chan] = v; 104} 105 106static void emit_ucmp(const struct lp_build_tgsi_action *action, 107 struct lp_build_tgsi_context *bld_base, 108 struct lp_build_emit_data *emit_data) 109{ 110 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 111 112 LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0], 113 bld_base->uint_bld.elem_type, ""); 114 115 LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0, 116 bld_base->uint_bld.zero, ""); 117 118 emit_data->output[emit_data->chan] = 119 LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], ""); 120} 121 122static void emit_cmp(const struct lp_build_tgsi_action *action, 123 struct lp_build_tgsi_context *bld_base, 124 struct lp_build_emit_data *emit_data) 125{ 126 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 127 LLVMValueRef cond, *args = emit_data->args; 128 129 cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0], 130 bld_base->base.zero, ""); 131 132 emit_data->output[emit_data->chan] = 133 LLVMBuildSelect(builder, cond, args[1], args[2], ""); 134} 135 136static void emit_set_cond(const struct lp_build_tgsi_action *action, 137 struct lp_build_tgsi_context *bld_base, 138 struct lp_build_emit_data *emit_data) 139{ 140 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 141 LLVMRealPredicate pred; 142 LLVMValueRef cond; 143 144 /* Use ordered for everything but NE (which is usual for 145 * float comparisons) 146 */ 147 switch (emit_data->inst->Instruction.Opcode) { 148 case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break; 149 case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break; 150 case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break; 151 case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break; 152 case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break; 153 case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break; 154 default: assert(!"unknown instruction"); pred = 0; break; 155 } 156 157 cond = LLVMBuildFCmp(builder, 158 pred, emit_data->args[0], emit_data->args[1], ""); 159 160 emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, 161 cond, bld_base->base.one, bld_base->base.zero, ""); 162} 163 164static void emit_fcmp(const struct lp_build_tgsi_action *action, 165 struct lp_build_tgsi_context *bld_base, 166 struct lp_build_emit_data *emit_data) 167{ 168 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 169 LLVMContextRef context = bld_base->base.gallivm->context; 170 LLVMRealPredicate pred; 171 172 /* Use ordered for everything but NE (which is usual for 173 * float comparisons) 174 */ 175 switch (emit_data->inst->Instruction.Opcode) { 176 case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break; 177 case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break; 178 case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break; 179 case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break; 180 default: assert(!"unknown instruction"); pred = 0; break; 181 } 182 183 LLVMValueRef v = LLVMBuildFCmp(builder, pred, 184 emit_data->args[0], emit_data->args[1],""); 185 186 v = LLVMBuildSExtOrBitCast(builder, v, 187 LLVMInt32TypeInContext(context), ""); 188 189 emit_data->output[emit_data->chan] = v; 190} 191 192static void emit_dcmp(const struct lp_build_tgsi_action *action, 193 struct lp_build_tgsi_context *bld_base, 194 struct lp_build_emit_data *emit_data) 195{ 196 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 197 LLVMContextRef context = bld_base->base.gallivm->context; 198 LLVMRealPredicate pred; 199 200 /* Use ordered for everything but NE (which is usual for 201 * float comparisons) 202 */ 203 switch (emit_data->inst->Instruction.Opcode) { 204 case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break; 205 case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break; 206 case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break; 207 case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break; 208 default: assert(!"unknown instruction"); pred = 0; break; 209 } 210 211 LLVMValueRef v = LLVMBuildFCmp(builder, pred, 212 emit_data->args[0], emit_data->args[1],""); 213 214 v = LLVMBuildSExtOrBitCast(builder, v, 215 LLVMInt32TypeInContext(context), ""); 216 217 emit_data->output[emit_data->chan] = v; 218} 219 220static void emit_not(const struct lp_build_tgsi_action *action, 221 struct lp_build_tgsi_context *bld_base, 222 struct lp_build_emit_data *emit_data) 223{ 224 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 225 LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED, 226 emit_data->args[0]); 227 emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, ""); 228} 229 230static void emit_arl(const struct lp_build_tgsi_action *action, 231 struct lp_build_tgsi_context *bld_base, 232 struct lp_build_emit_data *emit_data) 233{ 234 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 235 LLVMValueRef floor_index = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]); 236 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder, 237 floor_index, bld_base->base.int_elem_type , ""); 238} 239 240static void emit_and(const struct lp_build_tgsi_action *action, 241 struct lp_build_tgsi_context *bld_base, 242 struct lp_build_emit_data *emit_data) 243{ 244 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 245 emit_data->output[emit_data->chan] = LLVMBuildAnd(builder, 246 emit_data->args[0], emit_data->args[1], ""); 247} 248 249static void emit_or(const struct lp_build_tgsi_action *action, 250 struct lp_build_tgsi_context *bld_base, 251 struct lp_build_emit_data *emit_data) 252{ 253 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 254 emit_data->output[emit_data->chan] = LLVMBuildOr(builder, 255 emit_data->args[0], emit_data->args[1], ""); 256} 257 258static void emit_uadd(const struct lp_build_tgsi_action *action, 259 struct lp_build_tgsi_context *bld_base, 260 struct lp_build_emit_data *emit_data) 261{ 262 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 263 emit_data->output[emit_data->chan] = LLVMBuildAdd(builder, 264 emit_data->args[0], emit_data->args[1], ""); 265} 266 267static void emit_udiv(const struct lp_build_tgsi_action *action, 268 struct lp_build_tgsi_context *bld_base, 269 struct lp_build_emit_data *emit_data) 270{ 271 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 272 emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder, 273 emit_data->args[0], emit_data->args[1], ""); 274} 275 276static void emit_idiv(const struct lp_build_tgsi_action *action, 277 struct lp_build_tgsi_context *bld_base, 278 struct lp_build_emit_data *emit_data) 279{ 280 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 281 emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder, 282 emit_data->args[0], emit_data->args[1], ""); 283} 284 285static void emit_mod(const struct lp_build_tgsi_action *action, 286 struct lp_build_tgsi_context *bld_base, 287 struct lp_build_emit_data *emit_data) 288{ 289 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 290 emit_data->output[emit_data->chan] = LLVMBuildSRem(builder, 291 emit_data->args[0], emit_data->args[1], ""); 292} 293 294static void emit_umod(const struct lp_build_tgsi_action *action, 295 struct lp_build_tgsi_context *bld_base, 296 struct lp_build_emit_data *emit_data) 297{ 298 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 299 emit_data->output[emit_data->chan] = LLVMBuildURem(builder, 300 emit_data->args[0], emit_data->args[1], ""); 301} 302 303static void emit_shl(const struct lp_build_tgsi_action *action, 304 struct lp_build_tgsi_context *bld_base, 305 struct lp_build_emit_data *emit_data) 306{ 307 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 308 emit_data->output[emit_data->chan] = LLVMBuildShl(builder, 309 emit_data->args[0], emit_data->args[1], ""); 310} 311 312static void emit_ushr(const struct lp_build_tgsi_action *action, 313 struct lp_build_tgsi_context *bld_base, 314 struct lp_build_emit_data *emit_data) 315{ 316 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 317 emit_data->output[emit_data->chan] = LLVMBuildLShr(builder, 318 emit_data->args[0], emit_data->args[1], ""); 319} 320static void emit_ishr(const struct lp_build_tgsi_action *action, 321 struct lp_build_tgsi_context *bld_base, 322 struct lp_build_emit_data *emit_data) 323{ 324 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 325 emit_data->output[emit_data->chan] = LLVMBuildAShr(builder, 326 emit_data->args[0], emit_data->args[1], ""); 327} 328 329static void emit_xor(const struct lp_build_tgsi_action *action, 330 struct lp_build_tgsi_context *bld_base, 331 struct lp_build_emit_data *emit_data) 332{ 333 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 334 emit_data->output[emit_data->chan] = LLVMBuildXor(builder, 335 emit_data->args[0], emit_data->args[1], ""); 336} 337 338static void emit_ssg(const struct lp_build_tgsi_action *action, 339 struct lp_build_tgsi_context *bld_base, 340 struct lp_build_emit_data *emit_data) 341{ 342 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 343 344 LLVMValueRef cmp, val; 345 346 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) { 347 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int64_bld.zero, ""); 348 val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, emit_data->args[0], ""); 349 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int64_bld.zero, ""); 350 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), ""); 351 } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) { 352 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, ""); 353 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], ""); 354 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, ""); 355 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), ""); 356 } else { // float SSG 357 cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, ""); 358 val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], ""); 359 cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, ""); 360 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), ""); 361 } 362 363 emit_data->output[emit_data->chan] = val; 364} 365 366static void emit_ineg(const struct lp_build_tgsi_action *action, 367 struct lp_build_tgsi_context *bld_base, 368 struct lp_build_emit_data *emit_data) 369{ 370 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 371 emit_data->output[emit_data->chan] = LLVMBuildNeg(builder, 372 emit_data->args[0], ""); 373} 374 375static void emit_dneg(const struct lp_build_tgsi_action *action, 376 struct lp_build_tgsi_context *bld_base, 377 struct lp_build_emit_data *emit_data) 378{ 379 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 380 emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder, 381 emit_data->args[0], ""); 382} 383 384static void emit_frac(const struct lp_build_tgsi_action *action, 385 struct lp_build_tgsi_context *bld_base, 386 struct lp_build_emit_data *emit_data) 387{ 388 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 389 char *intr; 390 391 if (emit_data->info->opcode == TGSI_OPCODE_FRC) 392 intr = "llvm.floor.f32"; 393 else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC) 394 intr = "llvm.floor.f64"; 395 else { 396 assert(0); 397 return; 398 } 399 400 LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type, 401 &emit_data->args[0], 1, 402 LP_FUNC_ATTR_READNONE); 403 emit_data->output[emit_data->chan] = LLVMBuildFSub(builder, 404 emit_data->args[0], floor, ""); 405} 406 407static void emit_f2i(const struct lp_build_tgsi_action *action, 408 struct lp_build_tgsi_context *bld_base, 409 struct lp_build_emit_data *emit_data) 410{ 411 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 412 emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder, 413 emit_data->args[0], bld_base->int_bld.elem_type, ""); 414} 415 416static void emit_f2u(const struct lp_build_tgsi_action *action, 417 struct lp_build_tgsi_context *bld_base, 418 struct lp_build_emit_data *emit_data) 419{ 420 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 421 emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder, 422 emit_data->args[0], bld_base->uint_bld.elem_type, ""); 423} 424 425static void emit_i2f(const struct lp_build_tgsi_action *action, 426 struct lp_build_tgsi_context *bld_base, 427 struct lp_build_emit_data *emit_data) 428{ 429 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 430 emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder, 431 emit_data->args[0], bld_base->base.elem_type, ""); 432} 433 434static void emit_u2f(const struct lp_build_tgsi_action *action, 435 struct lp_build_tgsi_context *bld_base, 436 struct lp_build_emit_data *emit_data) 437{ 438 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 439 emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder, 440 emit_data->args[0], bld_base->base.elem_type, ""); 441} 442 443static void 444build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action, 445 struct lp_build_tgsi_context *bld_base, 446 struct lp_build_emit_data *emit_data) 447{ 448 struct lp_build_context *base = &bld_base->base; 449 emit_data->output[emit_data->chan] = 450 lp_build_intrinsic(base->gallivm->builder, action->intr_name, 451 emit_data->dst_type, emit_data->args, 452 emit_data->arg_count, LP_FUNC_ATTR_READNONE); 453} 454 455static void emit_bfi(const struct lp_build_tgsi_action *action, 456 struct lp_build_tgsi_context *bld_base, 457 struct lp_build_emit_data *emit_data) 458{ 459 struct gallivm_state *gallivm = bld_base->base.gallivm; 460 LLVMBuilderRef builder = gallivm->builder; 461 LLVMValueRef bfi_args[3]; 462 LLVMValueRef bfi_sm5; 463 LLVMValueRef cond; 464 465 // Calculate the bitmask: (((1 << src3) - 1) << src2 466 bfi_args[0] = LLVMBuildShl(builder, 467 LLVMBuildSub(builder, 468 LLVMBuildShl(builder, 469 bld_base->int_bld.one, 470 emit_data->args[3], ""), 471 bld_base->int_bld.one, ""), 472 emit_data->args[2], ""); 473 474 bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1], 475 emit_data->args[2], ""); 476 477 bfi_args[2] = emit_data->args[0]; 478 479 /* Calculate: 480 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) 481 * Use the right-hand side, which the LLVM backend can convert to V_BFI. 482 */ 483 bfi_sm5 = 484 LLVMBuildXor(builder, bfi_args[2], 485 LLVMBuildAnd(builder, bfi_args[0], 486 LLVMBuildXor(builder, bfi_args[1], bfi_args[2], 487 ""), ""), ""); 488 489 /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend 490 * uses the convenient V_BFI lowering for the above, which follows SM5 491 * and disagrees with GLSL semantics when bits (src3) is 32. 492 */ 493 cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3], 494 lp_build_const_int32(gallivm, 32), ""); 495 emit_data->output[emit_data->chan] = 496 LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, ""); 497} 498 499static void emit_bfe(const struct lp_build_tgsi_action *action, 500 struct lp_build_tgsi_context *bld_base, 501 struct lp_build_emit_data *emit_data) 502{ 503 struct gallivm_state *gallivm = bld_base->base.gallivm; 504 LLVMBuilderRef builder = gallivm->builder; 505 LLVMValueRef bfe_sm5; 506 LLVMValueRef cond; 507 508 bfe_sm5 = lp_build_intrinsic(builder, action->intr_name, 509 emit_data->dst_type, emit_data->args, 510 emit_data->arg_count, LP_FUNC_ATTR_READNONE); 511 512 /* Correct for GLSL semantics. */ 513 cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2], 514 lp_build_const_int32(gallivm, 32), ""); 515 emit_data->output[emit_data->chan] = 516 LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, ""); 517} 518 519/* this is ffs in C */ 520static void emit_lsb(const struct lp_build_tgsi_action *action, 521 struct lp_build_tgsi_context *bld_base, 522 struct lp_build_emit_data *emit_data) 523{ 524 struct gallivm_state *gallivm = bld_base->base.gallivm; 525 LLVMBuilderRef builder = gallivm->builder; 526 LLVMValueRef args[2] = { 527 emit_data->args[0], 528 529 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't 530 * add special code to check for x=0. The reason is that 531 * the LLVM behavior for x=0 is different from what we 532 * need here. However, LLVM also assumes that ffs(x) is 533 * in [0, 31], but GLSL expects that ffs(0) = -1, so 534 * a conditional assignment to handle 0 is still required. 535 */ 536 LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0) 537 }; 538 539 LLVMValueRef lsb = 540 lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32", 541 emit_data->dst_type, args, ARRAY_SIZE(args), 542 LP_FUNC_ATTR_READNONE); 543 544 /* TODO: We need an intrinsic to skip this conditional. */ 545 /* Check for zero: */ 546 emit_data->output[emit_data->chan] = 547 LLVMBuildSelect(builder, 548 LLVMBuildICmp(builder, LLVMIntEQ, args[0], 549 bld_base->uint_bld.zero, ""), 550 lp_build_const_int32(gallivm, -1), lsb, ""); 551} 552 553/* Find the last bit set. */ 554static void emit_umsb(const struct lp_build_tgsi_action *action, 555 struct lp_build_tgsi_context *bld_base, 556 struct lp_build_emit_data *emit_data) 557{ 558 struct gallivm_state *gallivm = bld_base->base.gallivm; 559 LLVMBuilderRef builder = gallivm->builder; 560 LLVMValueRef args[2] = { 561 emit_data->args[0], 562 /* Don't generate code for handling zero: */ 563 LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0) 564 }; 565 566 LLVMValueRef msb = 567 lp_build_intrinsic(builder, "llvm.ctlz.i32", 568 emit_data->dst_type, args, ARRAY_SIZE(args), 569 LP_FUNC_ATTR_READNONE); 570 571 /* The HW returns the last bit index from MSB, but TGSI wants 572 * the index from LSB. Invert it by doing "31 - msb". */ 573 msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31), 574 msb, ""); 575 576 /* Check for zero: */ 577 emit_data->output[emit_data->chan] = 578 LLVMBuildSelect(builder, 579 LLVMBuildICmp(builder, LLVMIntEQ, args[0], 580 bld_base->uint_bld.zero, ""), 581 lp_build_const_int32(gallivm, -1), msb, ""); 582} 583 584/* Find the last bit opposite of the sign bit. */ 585static void emit_imsb(const struct lp_build_tgsi_action *action, 586 struct lp_build_tgsi_context *bld_base, 587 struct lp_build_emit_data *emit_data) 588{ 589 struct gallivm_state *gallivm = bld_base->base.gallivm; 590 LLVMBuilderRef builder = gallivm->builder; 591 LLVMValueRef arg = emit_data->args[0]; 592 593 LLVMValueRef msb = 594 lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32", 595 emit_data->dst_type, &arg, 1, 596 LP_FUNC_ATTR_READNONE); 597 598 /* The HW returns the last bit index from MSB, but TGSI wants 599 * the index from LSB. Invert it by doing "31 - msb". */ 600 msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31), 601 msb, ""); 602 603 /* If arg == 0 || arg == -1 (0xffffffff), return -1. */ 604 LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1); 605 606 LLVMValueRef cond = 607 LLVMBuildOr(builder, 608 LLVMBuildICmp(builder, LLVMIntEQ, arg, 609 bld_base->uint_bld.zero, ""), 610 LLVMBuildICmp(builder, LLVMIntEQ, arg, 611 all_ones, ""), ""); 612 613 emit_data->output[emit_data->chan] = 614 LLVMBuildSelect(builder, cond, all_ones, msb, ""); 615} 616 617static void emit_iabs(const struct lp_build_tgsi_action *action, 618 struct lp_build_tgsi_context *bld_base, 619 struct lp_build_emit_data *emit_data) 620{ 621 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 622 623 emit_data->output[emit_data->chan] = 624 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX, 625 emit_data->args[0], 626 LLVMBuildNeg(builder, 627 emit_data->args[0], "")); 628} 629 630static void emit_minmax_int(const struct lp_build_tgsi_action *action, 631 struct lp_build_tgsi_context *bld_base, 632 struct lp_build_emit_data *emit_data) 633{ 634 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 635 LLVMIntPredicate op; 636 637 switch (emit_data->info->opcode) { 638 default: 639 assert(0); 640 case TGSI_OPCODE_IMAX: 641 case TGSI_OPCODE_I64MAX: 642 op = LLVMIntSGT; 643 break; 644 case TGSI_OPCODE_IMIN: 645 case TGSI_OPCODE_I64MIN: 646 op = LLVMIntSLT; 647 break; 648 case TGSI_OPCODE_UMAX: 649 case TGSI_OPCODE_U64MAX: 650 op = LLVMIntUGT; 651 break; 652 case TGSI_OPCODE_UMIN: 653 case TGSI_OPCODE_U64MIN: 654 op = LLVMIntULT; 655 break; 656 } 657 658 emit_data->output[emit_data->chan] = 659 LLVMBuildSelect(builder, 660 LLVMBuildICmp(builder, op, emit_data->args[0], 661 emit_data->args[1], ""), 662 emit_data->args[0], 663 emit_data->args[1], ""); 664} 665 666static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base, 667 struct lp_build_emit_data *emit_data) 668{ 669 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 670 0, TGSI_CHAN_X); 671 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 672 0, TGSI_CHAN_Y); 673} 674 675static void emit_pk2h(const struct lp_build_tgsi_action *action, 676 struct lp_build_tgsi_context *bld_base, 677 struct lp_build_emit_data *emit_data) 678{ 679 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 680 LLVMContextRef context = bld_base->base.gallivm->context; 681 struct lp_build_context *uint_bld = &bld_base->uint_bld; 682 LLVMTypeRef fp16, i16; 683 LLVMValueRef const16, comp[2]; 684 unsigned i; 685 686 fp16 = LLVMHalfTypeInContext(context); 687 i16 = LLVMInt16TypeInContext(context); 688 const16 = lp_build_const_int32(uint_bld->gallivm, 16); 689 690 for (i = 0; i < 2; i++) { 691 comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, ""); 692 comp[i] = LLVMBuildBitCast(builder, comp[i], i16, ""); 693 comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, ""); 694 } 695 696 comp[1] = LLVMBuildShl(builder, comp[1], const16, ""); 697 comp[0] = LLVMBuildOr(builder, comp[0], comp[1], ""); 698 699 emit_data->output[emit_data->chan] = comp[0]; 700} 701 702static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base, 703 struct lp_build_emit_data *emit_data) 704{ 705 emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 706 0, TGSI_CHAN_X); 707} 708 709static void emit_up2h(const struct lp_build_tgsi_action *action, 710 struct lp_build_tgsi_context *bld_base, 711 struct lp_build_emit_data *emit_data) 712{ 713 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 714 LLVMContextRef context = bld_base->base.gallivm->context; 715 struct lp_build_context *uint_bld = &bld_base->uint_bld; 716 LLVMTypeRef fp16, i16; 717 LLVMValueRef const16, input, val; 718 unsigned i; 719 720 fp16 = LLVMHalfTypeInContext(context); 721 i16 = LLVMInt16TypeInContext(context); 722 const16 = lp_build_const_int32(uint_bld->gallivm, 16); 723 input = emit_data->args[0]; 724 725 for (i = 0; i < 2; i++) { 726 val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input; 727 val = LLVMBuildTrunc(builder, val, i16, ""); 728 val = LLVMBuildBitCast(builder, val, fp16, ""); 729 emit_data->output[i] = 730 LLVMBuildFPExt(builder, val, bld_base->base.elem_type, ""); 731 } 732} 733 734static void emit_fdiv(const struct lp_build_tgsi_action *action, 735 struct lp_build_tgsi_context *bld_base, 736 struct lp_build_emit_data *emit_data) 737{ 738 struct si_shader_context *ctx = si_shader_context(bld_base); 739 740 emit_data->output[emit_data->chan] = 741 LLVMBuildFDiv(bld_base->base.gallivm->builder, 742 emit_data->args[0], emit_data->args[1], ""); 743 744 /* Use v_rcp_f32 instead of precise division. */ 745 if (HAVE_LLVM >= 0x0309 && 746 !LLVMIsConstant(emit_data->output[emit_data->chan])) 747 LLVMSetMetadata(emit_data->output[emit_data->chan], 748 ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); 749} 750 751/* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in 752 * the target machine. f64 needs global unsafe math flags to get rsq. */ 753static void emit_rsq(const struct lp_build_tgsi_action *action, 754 struct lp_build_tgsi_context *bld_base, 755 struct lp_build_emit_data *emit_data) 756{ 757 LLVMValueRef sqrt = 758 lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT, 759 emit_data->args[0]); 760 761 emit_data->output[emit_data->chan] = 762 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV, 763 bld_base->base.one, sqrt); 764} 765 766void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) 767{ 768 lp_set_default_actions(bld_base); 769 770 bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and; 771 bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl; 772 bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi; 773 bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem; 774 bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = 775 HAVE_LLVM >= 0x0308 ? "llvm.bitreverse.i32" : "llvm.AMDGPU.brev"; 776 bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem; 777 bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32"; 778 bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem; 779 bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = 780 HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp."; 781 bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp; 782 bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem; 783 bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32"; 784 bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem; 785 bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64"; 786 bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem; 787 bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64"; 788 bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac; 789 bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv; 790 bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg; 791 bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp; 792 bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp; 793 bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp; 794 bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp; 795 bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem; 796 bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = 797 HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.rsq.f64" : "llvm.AMDGPU.rsq.f64"; 798 bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem; 799 bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64"; 800 bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem; 801 bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = 802 HAVE_LLVM >= 0x0308 ? "llvm.exp2.f32" : "llvm.AMDIL.exp."; 803 bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem; 804 bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32"; 805 bld_base->op_actions[TGSI_OPCODE_FMA].emit = 806 bld_base->op_actions[TGSI_OPCODE_MAD].emit; 807 bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac; 808 bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i; 809 bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u; 810 bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp; 811 bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp; 812 bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp; 813 bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp; 814 bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs; 815 bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe; 816 bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32"; 817 bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv; 818 bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int; 819 bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int; 820 bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb; 821 bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg; 822 bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr; 823 bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp; 824 bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp; 825 bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg; 826 bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f; 827 bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args; 828 bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit; 829 bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill"; 830 bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic; 831 bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp"; 832 bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb; 833 bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem; 834 bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32"; 835 bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem; 836 bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32"; 837 bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem; 838 bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32"; 839 bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod; 840 bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb; 841 bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not; 842 bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or; 843 bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args; 844 bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h; 845 bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem; 846 bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32"; 847 bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem; 848 bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32"; 849 bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem; 850 bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32"; 851 bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq; 852 bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond; 853 bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond; 854 bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl; 855 bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond; 856 bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond; 857 bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond; 858 bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond; 859 bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem; 860 bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32"; 861 bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem; 862 bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32"; 863 bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg; 864 bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem; 865 bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32"; 866 bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd; 867 bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe; 868 bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32"; 869 bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv; 870 bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int; 871 bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int; 872 bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod; 873 bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp; 874 bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp; 875 bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr; 876 bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp; 877 bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp; 878 bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f; 879 bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor; 880 bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp; 881 bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args; 882 bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h; 883 884 bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int; 885 bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int; 886 bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int; 887 bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int; 888 bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs; 889 bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg; 890 bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg; 891 892 bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp; 893 bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp; 894 bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp; 895 bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp; 896 bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp; 897 bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp; 898 899 bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd; 900 bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl; 901 bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr; 902 bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr; 903 904 bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod; 905 bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod; 906 bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv; 907 bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv; 908} 909