1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sub license, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 18 * USE OR OTHER DEALINGS IN THE SOFTWARE. 19 * 20 * The above copyright notice and this permission notice (including the 21 * next paragraph) shall be included in all copies or substantial portions 22 * of the Software. 23 * 24 */ 25/* based on pieces from si_pipe.c and radeon_llvm_emit.c */ 26#include "ac_llvm_util.h" 27 28#include <llvm-c/Core.h> 29 30#include "c11/threads.h" 31 32#include <assert.h> 33#include <stdio.h> 34 35#include "util/bitscan.h" 36#include "util/macros.h" 37 38static void ac_init_llvm_target() 39{ 40#if HAVE_LLVM < 0x0307 41 LLVMInitializeR600TargetInfo(); 42 LLVMInitializeR600Target(); 43 LLVMInitializeR600TargetMC(); 44 LLVMInitializeR600AsmPrinter(); 45#else 46 LLVMInitializeAMDGPUTargetInfo(); 47 LLVMInitializeAMDGPUTarget(); 48 LLVMInitializeAMDGPUTargetMC(); 49 LLVMInitializeAMDGPUAsmPrinter(); 50#endif 51} 52 53static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT; 54 55static LLVMTargetRef ac_get_llvm_target(const char *triple) 56{ 57 LLVMTargetRef target = NULL; 58 char *err_message = NULL; 59 60 call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target); 61 62 if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { 63 fprintf(stderr, "Cannot find target for triple %s ", triple); 64 if (err_message) { 65 fprintf(stderr, "%s\n", err_message); 66 } 67 LLVMDisposeMessage(err_message); 68 return NULL; 69 } 70 return target; 71} 72 73static const char *ac_get_llvm_processor_name(enum radeon_family family) 74{ 75 switch (family) { 76 case CHIP_TAHITI: 77 return "tahiti"; 78 case CHIP_PITCAIRN: 79 return "pitcairn"; 80 case CHIP_VERDE: 81 return "verde"; 82 case CHIP_OLAND: 83 return "oland"; 84 case CHIP_HAINAN: 85 return "hainan"; 86 case CHIP_BONAIRE: 87 return "bonaire"; 88 case CHIP_KABINI: 89 return "kabini"; 90 case CHIP_KAVERI: 91 return "kaveri"; 92 case CHIP_HAWAII: 93 return "hawaii"; 94 case CHIP_MULLINS: 95 return "mullins"; 96 case CHIP_TONGA: 97 return "tonga"; 98 case CHIP_ICELAND: 99 return "iceland"; 100 case CHIP_CARRIZO: 101 return "carrizo"; 102#if HAVE_LLVM <= 0x0307 103 case CHIP_FIJI: 104 return "tonga"; 105 case CHIP_STONEY: 106 return "carrizo"; 107#else 108 case CHIP_FIJI: 109 return "fiji"; 110 case CHIP_STONEY: 111 return "stoney"; 112#endif 113#if HAVE_LLVM <= 0x0308 114 case CHIP_POLARIS10: 115 return "tonga"; 116 case CHIP_POLARIS11: 117 return "tonga"; 118#else 119 case CHIP_POLARIS10: 120 return "polaris10"; 121 case CHIP_POLARIS11: 122 return "polaris11"; 123#endif 124 default: 125 return ""; 126 } 127} 128 129LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family) 130{ 131 assert(family >= CHIP_TAHITI); 132 133 const char *triple = "amdgcn--"; 134 LLVMTargetRef target = ac_get_llvm_target(triple); 135 LLVMTargetMachineRef tm = LLVMCreateTargetMachine( 136 target, 137 triple, 138 ac_get_llvm_processor_name(family), 139 "+DumpCode,+vgpr-spilling", 140 LLVMCodeGenLevelDefault, 141 LLVMRelocDefault, 142 LLVMCodeModelDefault); 143 144 return tm; 145} 146 147/* Initialize module-independent parts of the context. 148 * 149 * The caller is responsible for initializing ctx::module and ctx::builder. 150 */ 151void 152ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context) 153{ 154 LLVMValueRef args[1]; 155 156 ctx->context = context; 157 ctx->module = NULL; 158 ctx->builder = NULL; 159 160 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); 161 ctx->f32 = LLVMFloatTypeInContext(ctx->context); 162 163 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6); 164 165 args[0] = LLVMConstReal(ctx->f32, 2.5); 166 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1); 167} 168 169#if HAVE_LLVM < 0x0400 170static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr) 171{ 172 switch (attr) { 173 case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute; 174 case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute; 175 case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute; 176 case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute; 177 case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute; 178 case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute; 179 case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute; 180 default: 181 fprintf(stderr, "Unhandled function attribute: %x\n", attr); 182 return 0; 183 } 184} 185 186#else 187 188static const char *attr_to_str(enum ac_func_attr attr) 189{ 190 switch (attr) { 191 case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; 192 case AC_FUNC_ATTR_BYVAL: return "byval"; 193 case AC_FUNC_ATTR_INREG: return "inreg"; 194 case AC_FUNC_ATTR_NOALIAS: return "noalias"; 195 case AC_FUNC_ATTR_NOUNWIND: return "nounwind"; 196 case AC_FUNC_ATTR_READNONE: return "readnone"; 197 case AC_FUNC_ATTR_READONLY: return "readonly"; 198 default: 199 fprintf(stderr, "Unhandled function attribute: %x\n", attr); 200 return 0; 201 } 202} 203 204#endif 205 206void 207ac_add_function_attr(LLVMValueRef function, 208 int attr_idx, 209 enum ac_func_attr attr) 210{ 211 212#if HAVE_LLVM < 0x0400 213 LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr); 214 if (attr_idx == -1) { 215 LLVMAddFunctionAttr(function, llvm_attr); 216 } else { 217 LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); 218 } 219#else 220 LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function)); 221 const char *attr_name = attr_to_str(attr); 222 unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, 223 strlen(attr_name)); 224 LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0); 225 LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); 226#endif 227} 228 229LLVMValueRef 230ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, 231 LLVMTypeRef return_type, LLVMValueRef *params, 232 unsigned param_count, unsigned attrib_mask) 233{ 234 LLVMValueRef function; 235 236 function = LLVMGetNamedFunction(ctx->module, name); 237 if (!function) { 238 LLVMTypeRef param_types[32], function_type; 239 unsigned i; 240 241 assert(param_count <= 32); 242 243 for (i = 0; i < param_count; ++i) { 244 assert(params[i]); 245 param_types[i] = LLVMTypeOf(params[i]); 246 } 247 function_type = 248 LLVMFunctionType(return_type, param_types, param_count, 0); 249 function = LLVMAddFunction(ctx->module, name, function_type); 250 251 LLVMSetFunctionCallConv(function, LLVMCCallConv); 252 LLVMSetLinkage(function, LLVMExternalLinkage); 253 254 attrib_mask |= AC_FUNC_ATTR_NOUNWIND; 255 while (attrib_mask) { 256 enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); 257 ac_add_function_attr(function, -1, attr); 258 } 259 } 260 return LLVMBuildCall(ctx->builder, function, params, param_count, ""); 261} 262 263LLVMValueRef 264ac_build_gather_values_extended(struct ac_llvm_context *ctx, 265 LLVMValueRef *values, 266 unsigned value_count, 267 unsigned value_stride, 268 bool load) 269{ 270 LLVMBuilderRef builder = ctx->builder; 271 LLVMValueRef vec; 272 unsigned i; 273 274 275 if (value_count == 1) { 276 if (load) 277 return LLVMBuildLoad(builder, values[0], ""); 278 return values[0]; 279 } else if (!value_count) 280 unreachable("value_count is 0"); 281 282 for (i = 0; i < value_count; i++) { 283 LLVMValueRef value = values[i * value_stride]; 284 if (load) 285 value = LLVMBuildLoad(builder, value, ""); 286 287 if (!i) 288 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); 289 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); 290 vec = LLVMBuildInsertElement(builder, vec, value, index, ""); 291 } 292 return vec; 293} 294 295LLVMValueRef 296ac_build_gather_values(struct ac_llvm_context *ctx, 297 LLVMValueRef *values, 298 unsigned value_count) 299{ 300 return ac_build_gather_values_extended(ctx, values, value_count, 1, false); 301} 302 303LLVMValueRef 304ac_emit_fdiv(struct ac_llvm_context *ctx, 305 LLVMValueRef num, 306 LLVMValueRef den) 307{ 308 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, ""); 309 310 if (!LLVMIsConstant(ret)) 311 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); 312 return ret; 313} 314 315/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 316 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is 317 * already multiplied by two. id is the cube face number. 318 */ 319struct cube_selection_coords { 320 LLVMValueRef stc[2]; 321 LLVMValueRef ma; 322 LLVMValueRef id; 323}; 324 325static void 326build_cube_intrinsic(struct ac_llvm_context *ctx, 327 LLVMValueRef in[3], 328 struct cube_selection_coords *out) 329{ 330 LLVMBuilderRef builder = ctx->builder; 331 332 if (HAVE_LLVM >= 0x0309) { 333 LLVMTypeRef f32 = ctx->f32; 334 335 out->stc[1] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc", 336 f32, in, 3, AC_FUNC_ATTR_READNONE); 337 out->stc[0] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc", 338 f32, in, 3, AC_FUNC_ATTR_READNONE); 339 out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema", 340 f32, in, 3, AC_FUNC_ATTR_READNONE); 341 out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid", 342 f32, in, 3, AC_FUNC_ATTR_READNONE); 343 } else { 344 LLVMValueRef c[4] = { 345 in[0], 346 in[1], 347 in[2], 348 LLVMGetUndef(LLVMTypeOf(in[0])) 349 }; 350 LLVMValueRef vec = ac_build_gather_values(ctx, c, 4); 351 352 LLVMValueRef tmp = 353 ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube", 354 LLVMTypeOf(vec), &vec, 1, 355 AC_FUNC_ATTR_READNONE); 356 357 out->stc[1] = LLVMBuildExtractElement(builder, tmp, 358 LLVMConstInt(ctx->i32, 0, 0), ""); 359 out->stc[0] = LLVMBuildExtractElement(builder, tmp, 360 LLVMConstInt(ctx->i32, 1, 0), ""); 361 out->ma = LLVMBuildExtractElement(builder, tmp, 362 LLVMConstInt(ctx->i32, 2, 0), ""); 363 out->id = LLVMBuildExtractElement(builder, tmp, 364 LLVMConstInt(ctx->i32, 3, 0), ""); 365 } 366} 367 368/** 369 * Build a manual selection sequence for cube face sc/tc coordinates and 370 * major axis vector (multiplied by 2 for consistency) for the given 371 * vec3 \p coords, for the face implied by \p selcoords. 372 * 373 * For the major axis, we always adjust the sign to be in the direction of 374 * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards 375 * the selcoords major axis. 376 */ 377static void build_cube_select(LLVMBuilderRef builder, 378 const struct cube_selection_coords *selcoords, 379 const LLVMValueRef *coords, 380 LLVMValueRef *out_st, 381 LLVMValueRef *out_ma) 382{ 383 LLVMTypeRef f32 = LLVMTypeOf(coords[0]); 384 LLVMValueRef is_ma_positive; 385 LLVMValueRef sgn_ma; 386 LLVMValueRef is_ma_z, is_not_ma_z; 387 LLVMValueRef is_ma_y; 388 LLVMValueRef is_ma_x; 389 LLVMValueRef sgn; 390 LLVMValueRef tmp; 391 392 is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, 393 selcoords->ma, LLVMConstReal(f32, 0.0), ""); 394 sgn_ma = LLVMBuildSelect(builder, is_ma_positive, 395 LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), ""); 396 397 is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), ""); 398 is_not_ma_z = LLVMBuildNot(builder, is_ma_z, ""); 399 is_ma_y = LLVMBuildAnd(builder, is_not_ma_z, 400 LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), ""); 401 is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), ""); 402 403 /* Select sc */ 404 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], ""); 405 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0), 406 LLVMBuildSelect(builder, is_ma_x, sgn_ma, 407 LLVMBuildFNeg(builder, sgn_ma, ""), ""), ""); 408 out_st[0] = LLVMBuildFMul(builder, tmp, sgn, ""); 409 410 /* Select tc */ 411 tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], ""); 412 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""), 413 LLVMConstReal(f32, -1.0), ""); 414 out_st[1] = LLVMBuildFMul(builder, tmp, sgn, ""); 415 416 /* Select ma */ 417 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], 418 LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), ""); 419 sgn = LLVMBuildSelect(builder, is_ma_positive, 420 LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), ""); 421 *out_ma = LLVMBuildFMul(builder, tmp, sgn, ""); 422} 423 424void 425ac_prepare_cube_coords(struct ac_llvm_context *ctx, 426 bool is_deriv, bool is_array, 427 LLVMValueRef *coords_arg, 428 LLVMValueRef *derivs_arg) 429{ 430 431 LLVMBuilderRef builder = ctx->builder; 432 struct cube_selection_coords selcoords; 433 LLVMValueRef coords[3]; 434 LLVMValueRef invma; 435 436 build_cube_intrinsic(ctx, coords_arg, &selcoords); 437 438 invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32", 439 ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE); 440 invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma); 441 442 for (int i = 0; i < 2; ++i) 443 coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, ""); 444 445 coords[2] = selcoords.id; 446 447 if (is_deriv && derivs_arg) { 448 LLVMValueRef derivs[4]; 449 int axis; 450 451 /* Convert cube derivatives to 2D derivatives. */ 452 for (axis = 0; axis < 2; axis++) { 453 LLVMValueRef deriv_st[2]; 454 LLVMValueRef deriv_ma; 455 456 /* Transform the derivative alongside the texture 457 * coordinate. Mathematically, the correct formula is 458 * as follows. Assume we're projecting onto the +Z face 459 * and denote by dx/dh the derivative of the (original) 460 * X texture coordinate with respect to horizontal 461 * window coordinates. The projection onto the +Z face 462 * plane is: 463 * 464 * f(x,z) = x/z 465 * 466 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh 467 * = 1/z * dx/dh - x/z * 1/z * dz/dh. 468 * 469 * This motivatives the implementation below. 470 * 471 * Whether this actually gives the expected results for 472 * apps that might feed in derivatives obtained via 473 * finite differences is anyone's guess. The OpenGL spec 474 * seems awfully quiet about how textureGrad for cube 475 * maps should be handled. 476 */ 477 build_cube_select(builder, &selcoords, &derivs_arg[axis * 3], 478 deriv_st, &deriv_ma); 479 480 deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, ""); 481 482 for (int i = 0; i < 2; ++i) 483 derivs[axis * 2 + i] = 484 LLVMBuildFSub(builder, 485 LLVMBuildFMul(builder, deriv_st[i], invma, ""), 486 LLVMBuildFMul(builder, deriv_ma, coords[i], ""), ""); 487 } 488 489 memcpy(derivs_arg, derivs, sizeof(derivs)); 490 } 491 492 /* Shift the texture coordinate. This must be applied after the 493 * derivative calculation. 494 */ 495 for (int i = 0; i < 2; ++i) 496 coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), ""); 497 498 if (is_array) { 499 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */ 500 /* coords_arg.w component - array_index for cube arrays */ 501 LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), ""); 502 coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], ""); 503 } 504 505 memcpy(coords_arg, coords, sizeof(coords)); 506} 507