swr_shader.cpp revision efdaf5fa3e74ca4f3d9217dc6955aef6dc698a68
1/**************************************************************************** 2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 ***************************************************************************/ 23 24// llvm redefines DEBUG 25#pragma push_macro("DEBUG") 26#undef DEBUG 27#include "JitManager.h" 28#include "llvm-c/Core.h" 29#include "llvm/Support/CBindingWrapping.h" 30#pragma pop_macro("DEBUG") 31 32#include "state.h" 33#include "state_llvm.h" 34#include "builder.h" 35 36#include "tgsi/tgsi_strings.h" 37#include "gallivm/lp_bld_init.h" 38#include "gallivm/lp_bld_flow.h" 39#include "gallivm/lp_bld_struct.h" 40#include "gallivm/lp_bld_tgsi.h" 41 42#include "swr_context.h" 43#include "swr_context_llvm.h" 44#include "swr_state.h" 45#include "swr_screen.h" 46 47static unsigned 48locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info); 49 50bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs) 51{ 52 return !memcmp(&lhs, &rhs, sizeof(lhs)); 53} 54 55bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs) 56{ 57 return !memcmp(&lhs, &rhs, sizeof(lhs)); 58} 59 60static void 61swr_generate_sampler_key(const struct lp_tgsi_info &info, 62 struct swr_context *ctx, 63 unsigned shader_type, 64 struct swr_jit_sampler_key &key) 65{ 66 key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1; 67 68 for (unsigned i = 0; i < key.nr_samplers; i++) { 69 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 70 lp_sampler_static_sampler_state( 71 &key.sampler[i].sampler_state, 72 ctx->samplers[shader_type][i]); 73 } 74 } 75 76 /* 77 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes 78 * are dx10-style? Can't really have mixed opcodes, at least not 79 * if we want to skip the holes here (without rescanning tgsi). 80 */ 81 if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { 82 key.nr_sampler_views = 83 info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; 84 for (unsigned i = 0; i < key.nr_sampler_views; i++) { 85 if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { 86 lp_sampler_static_texture_state( 87 &key.sampler[i].texture_state, 88 ctx->sampler_views[shader_type][i]); 89 } 90 } 91 } else { 92 key.nr_sampler_views = key.nr_samplers; 93 for (unsigned i = 0; i < key.nr_sampler_views; i++) { 94 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 95 lp_sampler_static_texture_state( 96 &key.sampler[i].texture_state, 97 ctx->sampler_views[shader_type][i]); 98 } 99 } 100 } 101} 102 103void 104swr_generate_fs_key(struct swr_jit_fs_key &key, 105 struct swr_context *ctx, 106 swr_fragment_shader *swr_fs) 107{ 108 memset(&key, 0, sizeof(key)); 109 110 key.nr_cbufs = ctx->framebuffer.nr_cbufs; 111 key.light_twoside = ctx->rasterizer->light_twoside; 112 memcpy(&key.vs_output_semantic_name, 113 &ctx->vs->info.base.output_semantic_name, 114 sizeof(key.vs_output_semantic_name)); 115 memcpy(&key.vs_output_semantic_idx, 116 &ctx->vs->info.base.output_semantic_index, 117 sizeof(key.vs_output_semantic_idx)); 118 119 swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key); 120} 121 122void 123swr_generate_vs_key(struct swr_jit_vs_key &key, 124 struct swr_context *ctx, 125 swr_vertex_shader *swr_vs) 126{ 127 memset(&key, 0, sizeof(key)); 128 129 key.clip_plane_mask = 130 swr_vs->info.base.clipdist_writemask ? 131 swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable : 132 ctx->rasterizer->clip_plane_enable; 133 134 swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key); 135} 136 137struct BuilderSWR : public Builder { 138 BuilderSWR(JitManager *pJitMgr, const char *pName) 139 : Builder(pJitMgr) 140 { 141 pJitMgr->SetupNewModule(); 142 gallivm = gallivm_create(pName, wrap(&JM()->mContext)); 143 pJitMgr->mpCurrentModule = unwrap(gallivm->module); 144 } 145 146 ~BuilderSWR() { 147 gallivm_free_ir(gallivm); 148 } 149 150 struct gallivm_state *gallivm; 151 PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key); 152 PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key); 153}; 154 155PFN_VERTEX_FUNC 156BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) 157{ 158 struct swr_vertex_shader *swr_vs = ctx->vs; 159 160 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 161 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 162 163 memset(outputs, 0, sizeof(outputs)); 164 165 AttrBuilder attrBuilder; 166 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 167 AttributeSet attrSet = AttributeSet::get( 168 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 169 170 std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 171 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)}; 172 FunctionType *vsFuncType = 173 FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false); 174 175 // create new vertex shader function 176 auto pFunction = Function::Create(vsFuncType, 177 GlobalValue::ExternalLinkage, 178 "VS", 179 JM()->mpCurrentModule); 180 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 181 182 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 183 IRB()->SetInsertPoint(block); 184 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 185 186 auto argitr = pFunction->arg_begin(); 187 Value *hPrivateData = &*argitr++; 188 hPrivateData->setName("hPrivateData"); 189 Value *pVsCtx = &*argitr++; 190 pVsCtx->setName("vsCtx"); 191 192 Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)}); 193 194 consts_ptr->setName("vs_constants"); 195 Value *const_sizes_ptr = 196 GEP(hPrivateData, {0, swr_draw_context_num_constantsVS}); 197 const_sizes_ptr->setName("num_vs_constants"); 198 199 Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin}); 200 201 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { 202 const unsigned mask = swr_vs->info.base.input_usage_mask[attrib]; 203 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 204 if (mask & (1 << channel)) { 205 inputs[attrib][channel] = 206 wrap(LOAD(vtxInput, {0, 0, attrib, channel})); 207 } 208 } 209 } 210 211 struct lp_build_sampler_soa *sampler = 212 swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX); 213 214 struct lp_bld_tgsi_system_values system_values; 215 memset(&system_values, 0, sizeof(system_values)); 216 system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID})); 217 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID})); 218 219 lp_build_tgsi_soa(gallivm, 220 swr_vs->pipe.tokens, 221 lp_type_float_vec(32, 32 * 8), 222 NULL, // mask 223 wrap(consts_ptr), 224 wrap(const_sizes_ptr), 225 &system_values, 226 inputs, 227 outputs, 228 wrap(hPrivateData), // (sampler context) 229 NULL, // thread data 230 sampler, // sampler 231 &swr_vs->info.base, 232 NULL); // geometry shader face 233 234 sampler->destroy(sampler); 235 236 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 237 238 Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout}); 239 240 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 241 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) { 242 if (!outputs[attrib][channel]) 243 continue; 244 245 Value *val = LOAD(unwrap(outputs[attrib][channel])); 246 247 uint32_t outSlot = attrib; 248 if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) 249 outSlot = VERTEX_POINT_SIZE_SLOT; 250 STORE(val, vtxOutput, {0, 0, outSlot, channel}); 251 } 252 } 253 254 if (ctx->rasterizer->clip_plane_enable || 255 swr_vs->info.base.culldist_writemask) { 256 unsigned clip_mask = ctx->rasterizer->clip_plane_enable; 257 258 unsigned cv = 0; 259 if (swr_vs->info.base.writes_clipvertex) { 260 cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0, 261 &swr_vs->info.base); 262 } else { 263 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 264 if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && 265 swr_vs->info.base.output_semantic_index[i] == 0) { 266 cv = i; 267 break; 268 } 269 } 270 } 271 LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], ""); 272 LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], ""); 273 LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], ""); 274 LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], ""); 275 276 for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) { 277 // clip distance overrides user clip planes 278 if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) || 279 ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) { 280 unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1, 281 &swr_vs->info.base); 282 if (val < 4) { 283 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], ""); 284 STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val}); 285 } else { 286 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], ""); 287 STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4}); 288 } 289 continue; 290 } 291 292 if (!(clip_mask & (1 << val))) 293 continue; 294 295 Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0})); 296 Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1})); 297 Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2})); 298 Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3})); 299 Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)), 300 FADD(FMUL(unwrap(cy), VBROADCAST(py)), 301 FADD(FMUL(unwrap(cz), VBROADCAST(pz)), 302 FMUL(unwrap(cw), VBROADCAST(pw))))); 303 304 if (val < 4) 305 STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val}); 306 else 307 STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4}); 308 } 309 } 310 311 RET_VOID(); 312 313 gallivm_verify_function(gallivm, wrap(pFunction)); 314 gallivm_compile_module(gallivm); 315 316 // lp_debug_dump_value(func); 317 318 PFN_VERTEX_FUNC pFunc = 319 (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 320 321 debug_printf("vert shader %p\n", pFunc); 322 assert(pFunc && "Error: VertShader = NULL"); 323 324#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5) 325 JM()->mIsModuleFinalized = true; 326#endif 327 328 return pFunc; 329} 330 331PFN_VERTEX_FUNC 332swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key) 333{ 334 BuilderSWR builder( 335 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 336 "VS"); 337 PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key); 338 339 ctx->vs->map.insert(std::make_pair(key, make_unique<VariantVS>(builder.gallivm, func))); 340 return func; 341} 342 343static unsigned 344locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info) 345{ 346 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 347 if ((info->output_semantic_name[i] == name) 348 && (info->output_semantic_index[i] == index)) { 349 return i - 1; // position is not part of the linkage 350 } 351 } 352 353 if (name == TGSI_SEMANTIC_COLOR) { // BCOLOR fallback 354 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 355 if ((info->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) 356 && (info->output_semantic_index[i] == index)) { 357 return i - 1; // position is not part of the linkage 358 } 359 } 360 } 361 362 return 0xFFFFFFFF; 363} 364 365PFN_PIXEL_KERNEL 366BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) 367{ 368 struct swr_fragment_shader *swr_fs = ctx->fs; 369 370 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 371 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 372 373 memset(inputs, 0, sizeof(inputs)); 374 memset(outputs, 0, sizeof(outputs)); 375 376 struct lp_build_sampler_soa *sampler = NULL; 377 378 AttrBuilder attrBuilder; 379 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 380 AttributeSet attrSet = AttributeSet::get( 381 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 382 383 std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 384 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)}; 385 FunctionType *funcType = 386 FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false); 387 388 auto pFunction = Function::Create(funcType, 389 GlobalValue::ExternalLinkage, 390 "FS", 391 JM()->mpCurrentModule); 392 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 393 394 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 395 IRB()->SetInsertPoint(block); 396 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 397 398 auto args = pFunction->arg_begin(); 399 Value *hPrivateData = &*args++; 400 hPrivateData->setName("hPrivateData"); 401 Value *pPS = &*args++; 402 pPS->setName("psCtx"); 403 404 Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS}); 405 consts_ptr->setName("fs_constants"); 406 Value *const_sizes_ptr = 407 GEP(hPrivateData, {0, swr_draw_context_num_constantsFS}); 408 const_sizes_ptr->setName("num_fs_constants"); 409 410 // load *pAttribs, *pPerspAttribs 411 Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs"); 412 Value *pPerspAttribs = 413 LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs"); 414 415 swr_fs->constantMask = 0; 416 swr_fs->flatConstantMask = 0; 417 swr_fs->pointSpriteMask = 0; 418 419 for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { 420 const unsigned mask = swr_fs->info.base.input_usage_mask[attrib]; 421 const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib]; 422 const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib]; 423 424 if (!mask) 425 continue; 426 427 // load i,j 428 Value *vi = nullptr, *vj = nullptr; 429 switch (interpLoc) { 430 case TGSI_INTERPOLATE_LOC_CENTER: 431 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i"); 432 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j"); 433 break; 434 case TGSI_INTERPOLATE_LOC_CENTROID: 435 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i"); 436 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j"); 437 break; 438 case TGSI_INTERPOLATE_LOC_SAMPLE: 439 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i"); 440 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j"); 441 break; 442 } 443 444 // load/compute w 445 Value *vw = nullptr, *pAttribs; 446 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE) { 447 pAttribs = pPerspAttribs; 448 switch (interpLoc) { 449 case TGSI_INTERPOLATE_LOC_CENTER: 450 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center})); 451 break; 452 case TGSI_INTERPOLATE_LOC_CENTROID: 453 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid})); 454 break; 455 case TGSI_INTERPOLATE_LOC_SAMPLE: 456 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample})); 457 break; 458 } 459 } else { 460 pAttribs = pRawAttribs; 461 vw = VIMMED1(1.f); 462 } 463 464 vw->setName("w"); 465 466 ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib]; 467 ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib]; 468 469 if (semantic_name == TGSI_SEMANTIC_FACE) { 470 Value *ff = 471 UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty); 472 ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f)); 473 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace"); 474 475 inputs[attrib][0] = wrap(ff); 476 inputs[attrib][1] = wrap(VIMMED1(0.0f)); 477 inputs[attrib][2] = wrap(VIMMED1(0.0f)); 478 inputs[attrib][3] = wrap(VIMMED1(1.0f)); 479 continue; 480 } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord 481 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX")); 482 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY")); 483 inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ")); 484 inputs[attrib][3] = 485 wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW")); 486 continue; 487 } else if (semantic_name == TGSI_SEMANTIC_PRIMID) { 488 Value *primID = LOAD(pPS, {0, SWR_PS_CONTEXT_primID}, "primID"); 489 inputs[attrib][0] = wrap(VECTOR_SPLAT(JM()->mVWidth, primID)); 490 inputs[attrib][1] = wrap(VIMMED1(0)); 491 inputs[attrib][2] = wrap(VIMMED1(0)); 492 inputs[attrib][3] = wrap(VIMMED1(0)); 493 continue; 494 } 495 496 unsigned linkedAttrib = 497 locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base); 498 if (linkedAttrib == 0xFFFFFFFF) { 499 // not found - check for point sprite 500 if (ctx->rasterizer->sprite_coord_enable) { 501 linkedAttrib = ctx->vs->info.base.num_outputs - 1; 502 swr_fs->pointSpriteMask |= (1 << linkedAttrib); 503 } else { 504 fprintf(stderr, 505 "Missing %s[%d]\n", 506 tgsi_semantic_names[semantic_name], 507 semantic_idx); 508 assert(0 && "attribute linkage not found"); 509 } 510 } 511 512 if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 513 swr_fs->constantMask |= 1 << linkedAttrib; 514 } else if (interpMode == TGSI_INTERPOLATE_COLOR) { 515 swr_fs->flatConstantMask |= 1 << linkedAttrib; 516 } 517 518 for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 519 if (mask & (1 << channel)) { 520 Value *indexA = C(linkedAttrib * 12 + channel); 521 Value *indexB = C(linkedAttrib * 12 + channel + 4); 522 Value *indexC = C(linkedAttrib * 12 + channel + 8); 523 524 if ((semantic_name == TGSI_SEMANTIC_COLOR) 525 && ctx->rasterizer->light_twoside) { 526 unsigned bcolorAttrib = locate_linkage( 527 TGSI_SEMANTIC_BCOLOR, semantic_idx, &ctx->vs->info.base); 528 529 unsigned diff = 12 * (bcolorAttrib - linkedAttrib); 530 531 Value *back = 532 XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace"); 533 534 Value *offset = MUL(back, C(diff)); 535 offset->setName("offset"); 536 537 indexA = ADD(indexA, offset); 538 indexB = ADD(indexB, offset); 539 indexC = ADD(indexC, offset); 540 541 if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 542 swr_fs->constantMask |= 1 << bcolorAttrib; 543 } else if (interpMode == TGSI_INTERPOLATE_COLOR) { 544 swr_fs->flatConstantMask |= 1 << bcolorAttrib; 545 } 546 } 547 548 Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA))); 549 Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB))); 550 Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC))); 551 552 if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 553 inputs[attrib][channel] = wrap(va); 554 } else { 555 Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj); 556 557 vc = FMUL(vk, vc); 558 559 Value *interp = FMUL(va, vi); 560 Value *interp1 = FMUL(vb, vj); 561 interp = FADD(interp, interp1); 562 interp = FADD(interp, vc); 563 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE) 564 interp = FMUL(interp, vw); 565 inputs[attrib][channel] = wrap(interp); 566 } 567 } 568 } 569 } 570 571 sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT); 572 573 struct lp_bld_tgsi_system_values system_values; 574 memset(&system_values, 0, sizeof(system_values)); 575 576 struct lp_build_mask_context mask; 577 578 if (swr_fs->info.base.uses_kill) { 579 Value *mask_val = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask"); 580 lp_build_mask_begin( 581 &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val)); 582 } 583 584 lp_build_tgsi_soa(gallivm, 585 swr_fs->pipe.tokens, 586 lp_type_float_vec(32, 32 * 8), 587 swr_fs->info.base.uses_kill ? &mask : NULL, // mask 588 wrap(consts_ptr), 589 wrap(const_sizes_ptr), 590 &system_values, 591 inputs, 592 outputs, 593 wrap(hPrivateData), 594 NULL, // thread data 595 sampler, // sampler 596 &swr_fs->info.base, 597 NULL); // geometry shader face 598 599 sampler->destroy(sampler); 600 601 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 602 603 for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs; 604 attrib++) { 605 switch (swr_fs->info.base.output_semantic_name[attrib]) { 606 case TGSI_SEMANTIC_POSITION: { 607 // write z 608 LLVMValueRef outZ = 609 LLVMBuildLoad(gallivm->builder, outputs[attrib][2], ""); 610 STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ}); 611 break; 612 } 613 case TGSI_SEMANTIC_COLOR: { 614 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 615 if (!outputs[attrib][channel]) 616 continue; 617 618 LLVMValueRef out = 619 LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], ""); 620 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { 621 for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) { 622 STORE(unwrap(out), 623 pPS, 624 {0, SWR_PS_CONTEXT_shaded, rt, channel}); 625 } 626 } else { 627 STORE(unwrap(out), 628 pPS, 629 {0, 630 SWR_PS_CONTEXT_shaded, 631 swr_fs->info.base.output_semantic_index[attrib], 632 channel}); 633 } 634 } 635 break; 636 } 637 default: { 638 fprintf(stderr, 639 "unknown output from FS %s[%d]\n", 640 tgsi_semantic_names[swr_fs->info.base 641 .output_semantic_name[attrib]], 642 swr_fs->info.base.output_semantic_index[attrib]); 643 break; 644 } 645 } 646 } 647 648 LLVMValueRef mask_result = 0; 649 if (swr_fs->info.base.uses_kill) { 650 mask_result = lp_build_mask_end(&mask); 651 } 652 653 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 654 655 if (swr_fs->info.base.uses_kill) { 656 STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask}); 657 } 658 659 RET_VOID(); 660 661 gallivm_verify_function(gallivm, wrap(pFunction)); 662 663 gallivm_compile_module(gallivm); 664 665 PFN_PIXEL_KERNEL kernel = 666 (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction)); 667 debug_printf("frag shader %p\n", kernel); 668 assert(kernel && "Error: FragShader = NULL"); 669 670#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5) 671 JM()->mIsModuleFinalized = true; 672#endif 673 674 return kernel; 675} 676 677PFN_PIXEL_KERNEL 678swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key) 679{ 680 BuilderSWR builder( 681 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 682 "FS"); 683 PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key); 684 685 ctx->fs->map.insert(std::make_pair(key, make_unique<VariantFS>(builder.gallivm, func))); 686 return func; 687} 688