blend_jit.cpp revision d3d97f8395513bf365d2fe8e4292c8098290586f
1/**************************************************************************** 2* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3* 4* Permission is hereby granted, free of charge, to any person obtaining a 5* copy of this software and associated documentation files (the "Software"), 6* to deal in the Software without restriction, including without limitation 7* the rights to use, copy, modify, merge, publish, distribute, sublicense, 8* and/or sell copies of the Software, and to permit persons to whom the 9* Software is furnished to do so, subject to the following conditions: 10* 11* The above copyright notice and this permission notice (including the next 12* paragraph) shall be included in all copies or substantial portions of the 13* Software. 14* 15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21* IN THE SOFTWARE. 22* 23* @file blend_jit.cpp 24* 25* @brief Implementation of the blend jitter 26* 27* Notes: 28* 29******************************************************************************/ 30#include "jit_api.h" 31#include "blend_jit.h" 32#include "builder.h" 33#include "state_llvm.h" 34 35#include <sstream> 36 37// components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized 38#define QUANTIZE_THRESHOLD 2 39 40////////////////////////////////////////////////////////////////////////// 41/// Interface to Jitting a blend shader 42////////////////////////////////////////////////////////////////////////// 43struct BlendJit : public Builder 44{ 45 BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){}; 46 47 template<bool Color, bool Alpha> 48 void GenerateBlendFactor(SWR_BLEND_FACTOR factor, Value* constColor[4], Value* src[4], Value* src1[4], Value* dst[4], Value* result[4]) 49 { 50 Value* out[4]; 51 52 switch (factor) 53 { 54 case BLENDFACTOR_ONE: 55 out[0] = out[1] = out[2] = out[3] = VIMMED1(1.0f); 56 break; 57 case BLENDFACTOR_SRC_COLOR: 58 out[0] = src[0]; 59 out[1] = src[1]; 60 out[2] = src[2]; 61 out[3] = src[3]; 62 break; 63 case BLENDFACTOR_SRC_ALPHA: 64 out[0] = out[1] = out[2] = out[3] = src[3]; 65 break; 66 case BLENDFACTOR_DST_ALPHA: 67 out[0] = out[1] = out[2] = out[3] = dst[3]; 68 break; 69 case BLENDFACTOR_DST_COLOR: 70 out[0] = dst[0]; 71 out[1] = dst[1]; 72 out[2] = dst[2]; 73 out[3] = dst[3]; 74 break; 75 case BLENDFACTOR_SRC_ALPHA_SATURATE: 76 out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3])); 77 out[3] = VIMMED1(1.0f); 78 break; 79 case BLENDFACTOR_CONST_COLOR: 80 out[0] = constColor[0]; 81 out[1] = constColor[1]; 82 out[2] = constColor[2]; 83 out[3] = constColor[3]; 84 break; 85 case BLENDFACTOR_CONST_ALPHA: 86 out[0] = out[1] = out[2] = out[3] = constColor[3]; 87 break; 88 case BLENDFACTOR_SRC1_COLOR: 89 out[0] = src1[0]; 90 out[1] = src1[1]; 91 out[2] = src1[2]; 92 out[3] = src1[3]; 93 break; 94 case BLENDFACTOR_SRC1_ALPHA: 95 out[0] = out[1] = out[2] = out[3] = src1[3]; 96 break; 97 case BLENDFACTOR_ZERO: 98 out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); 99 break; 100 case BLENDFACTOR_INV_SRC_COLOR: 101 out[0] = FSUB(VIMMED1(1.0f), src[0]); 102 out[1] = FSUB(VIMMED1(1.0f), src[1]); 103 out[2] = FSUB(VIMMED1(1.0f), src[2]); 104 out[3] = FSUB(VIMMED1(1.0f), src[3]); 105 break; 106 case BLENDFACTOR_INV_SRC_ALPHA: 107 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src[3]); 108 break; 109 case BLENDFACTOR_INV_DST_ALPHA: 110 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), dst[3]); 111 break; 112 case BLENDFACTOR_INV_DST_COLOR: 113 out[0] = FSUB(VIMMED1(1.0f), dst[0]); 114 out[1] = FSUB(VIMMED1(1.0f), dst[1]); 115 out[2] = FSUB(VIMMED1(1.0f), dst[2]); 116 out[3] = FSUB(VIMMED1(1.0f), dst[3]); 117 break; 118 case BLENDFACTOR_INV_CONST_COLOR: 119 out[0] = FSUB(VIMMED1(1.0f), constColor[0]); 120 out[1] = FSUB(VIMMED1(1.0f), constColor[1]); 121 out[2] = FSUB(VIMMED1(1.0f), constColor[2]); 122 out[3] = FSUB(VIMMED1(1.0f), constColor[3]); 123 break; 124 case BLENDFACTOR_INV_CONST_ALPHA: 125 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), constColor[3]); 126 break; 127 case BLENDFACTOR_INV_SRC1_COLOR: 128 out[0] = FSUB(VIMMED1(1.0f), src1[0]); 129 out[1] = FSUB(VIMMED1(1.0f), src1[1]); 130 out[2] = FSUB(VIMMED1(1.0f), src1[2]); 131 out[3] = FSUB(VIMMED1(1.0f), src1[3]); 132 break; 133 case BLENDFACTOR_INV_SRC1_ALPHA: 134 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src1[3]); 135 break; 136 default: 137 SWR_ASSERT(false, "Unsupported blend factor: %d", factor); 138 out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); 139 break; 140 } 141 142 if (Color) 143 { 144 result[0] = out[0]; 145 result[1] = out[1]; 146 result[2] = out[2]; 147 } 148 149 if (Alpha) 150 { 151 result[3] = out[3]; 152 } 153 } 154 155 void Clamp(SWR_FORMAT format, Value* src[4]) 156 { 157 const SWR_FORMAT_INFO& info = GetFormatInfo(format); 158 SWR_TYPE type = info.type[0]; 159 160 switch (type) 161 { 162 case SWR_TYPE_FLOAT: 163 break; 164 165 case SWR_TYPE_UNORM: 166 src[0] = VMINPS(VMAXPS(src[0], VIMMED1(0.0f)), VIMMED1(1.0f)); 167 src[1] = VMINPS(VMAXPS(src[1], VIMMED1(0.0f)), VIMMED1(1.0f)); 168 src[2] = VMINPS(VMAXPS(src[2], VIMMED1(0.0f)), VIMMED1(1.0f)); 169 src[3] = VMINPS(VMAXPS(src[3], VIMMED1(0.0f)), VIMMED1(1.0f)); 170 break; 171 172 case SWR_TYPE_SNORM: 173 src[0] = VMINPS(VMAXPS(src[0], VIMMED1(-1.0f)), VIMMED1(1.0f)); 174 src[1] = VMINPS(VMAXPS(src[1], VIMMED1(-1.0f)), VIMMED1(1.0f)); 175 src[2] = VMINPS(VMAXPS(src[2], VIMMED1(-1.0f)), VIMMED1(1.0f)); 176 src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f)); 177 break; 178 179 default: SWR_ASSERT(false, "Unsupport format type: %d", type); 180 } 181 } 182 183 void ApplyDefaults(SWR_FORMAT format, Value* src[4]) 184 { 185 const SWR_FORMAT_INFO& info = GetFormatInfo(format); 186 187 bool valid[] = { false, false, false, false }; 188 for (uint32_t c = 0; c < info.numComps; ++c) 189 { 190 valid[info.swizzle[c]] = true; 191 } 192 193 for (uint32_t c = 0; c < 4; ++c) 194 { 195 if (!valid[c]) 196 { 197 src[c] = BITCAST(VIMMED1((int)info.defaults[c]), mSimdFP32Ty); 198 } 199 } 200 } 201 202 void ApplyUnusedDefaults(SWR_FORMAT format, Value* src[4]) 203 { 204 const SWR_FORMAT_INFO& info = GetFormatInfo(format); 205 206 for (uint32_t c = 0; c < info.numComps; ++c) 207 { 208 if (info.type[c] == SWR_TYPE_UNUSED) 209 { 210 src[info.swizzle[c]] = BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty); 211 } 212 } 213 } 214 215 void Quantize(SWR_FORMAT format, Value* src[4]) 216 { 217 const SWR_FORMAT_INFO& info = GetFormatInfo(format); 218 for (uint32_t c = 0; c < info.numComps; ++c) 219 { 220 if (info.bpc[c] <= QUANTIZE_THRESHOLD) 221 { 222 uint32_t swizComp = info.swizzle[c]; 223 float factor = (float)((1 << info.bpc[c]) - 1); 224 switch (info.type[c]) 225 { 226 case SWR_TYPE_UNORM: 227 src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f)); 228 src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO)); 229 src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f /factor)); 230 break; 231 default: SWR_ASSERT(false, "Unsupported format type: %d", info.type[c]); 232 } 233 } 234 } 235 } 236 237 template<bool Color, bool Alpha> 238 void BlendFunc(SWR_BLEND_OP blendOp, Value* src[4], Value* srcFactor[4], Value* dst[4], Value* dstFactor[4], Value* result[4]) 239 { 240 Value* out[4]; 241 Value* srcBlend[4]; 242 Value* dstBlend[4]; 243 for (uint32_t i = 0; i < 4; ++i) 244 { 245 srcBlend[i] = FMUL(src[i], srcFactor[i]); 246 dstBlend[i] = FMUL(dst[i], dstFactor[i]); 247 } 248 249 switch (blendOp) 250 { 251 case BLENDOP_ADD: 252 out[0] = FADD(srcBlend[0], dstBlend[0]); 253 out[1] = FADD(srcBlend[1], dstBlend[1]); 254 out[2] = FADD(srcBlend[2], dstBlend[2]); 255 out[3] = FADD(srcBlend[3], dstBlend[3]); 256 break; 257 258 case BLENDOP_SUBTRACT: 259 out[0] = FSUB(srcBlend[0], dstBlend[0]); 260 out[1] = FSUB(srcBlend[1], dstBlend[1]); 261 out[2] = FSUB(srcBlend[2], dstBlend[2]); 262 out[3] = FSUB(srcBlend[3], dstBlend[3]); 263 break; 264 265 case BLENDOP_REVSUBTRACT: 266 out[0] = FSUB(dstBlend[0], srcBlend[0]); 267 out[1] = FSUB(dstBlend[1], srcBlend[1]); 268 out[2] = FSUB(dstBlend[2], srcBlend[2]); 269 out[3] = FSUB(dstBlend[3], srcBlend[3]); 270 break; 271 272 case BLENDOP_MIN: 273 out[0] = VMINPS(src[0], dst[0]); 274 out[1] = VMINPS(src[1], dst[1]); 275 out[2] = VMINPS(src[2], dst[2]); 276 out[3] = VMINPS(src[3], dst[3]); 277 break; 278 279 case BLENDOP_MAX: 280 out[0] = VMAXPS(src[0], dst[0]); 281 out[1] = VMAXPS(src[1], dst[1]); 282 out[2] = VMAXPS(src[2], dst[2]); 283 out[3] = VMAXPS(src[3], dst[3]); 284 break; 285 286 default: 287 SWR_ASSERT(false, "Unsupported blend operation: %d", blendOp); 288 out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); 289 break; 290 } 291 292 if (Color) 293 { 294 result[0] = out[0]; 295 result[1] = out[1]; 296 result[2] = out[2]; 297 } 298 299 if (Alpha) 300 { 301 result[3] = out[3]; 302 } 303 } 304 305 void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4]) 306 { 307 // Op: (s == PS output, d = RT contents) 308 switch(logicOp) 309 { 310 case LOGICOP_CLEAR: 311 result[0] = VIMMED1(0); 312 result[1] = VIMMED1(0); 313 result[2] = VIMMED1(0); 314 result[3] = VIMMED1(0); 315 break; 316 317 case LOGICOP_NOR: 318 // ~(s | d) 319 result[0] = XOR(OR(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); 320 result[1] = XOR(OR(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); 321 result[2] = XOR(OR(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); 322 result[3] = XOR(OR(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); 323 break; 324 325 case LOGICOP_AND_INVERTED: 326 // ~s & d 327 // todo: use avx andnot instr when I can find the intrinsic to call 328 result[0] = AND(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]); 329 result[1] = AND(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]); 330 result[2] = AND(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]); 331 result[3] = AND(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]); 332 break; 333 334 case LOGICOP_COPY_INVERTED: 335 // ~s 336 result[0] = XOR(src[0], VIMMED1(0xFFFFFFFF)); 337 result[1] = XOR(src[1], VIMMED1(0xFFFFFFFF)); 338 result[2] = XOR(src[2], VIMMED1(0xFFFFFFFF)); 339 result[3] = XOR(src[3], VIMMED1(0xFFFFFFFF)); 340 break; 341 342 case LOGICOP_AND_REVERSE: 343 // s & ~d 344 // todo: use avx andnot instr when I can find the intrinsic to call 345 result[0] = AND(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]); 346 result[1] = AND(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]); 347 result[2] = AND(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]); 348 result[3] = AND(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]); 349 break; 350 351 case LOGICOP_INVERT: 352 // ~d 353 result[0] = XOR(dst[0], VIMMED1(0xFFFFFFFF)); 354 result[1] = XOR(dst[1], VIMMED1(0xFFFFFFFF)); 355 result[2] = XOR(dst[2], VIMMED1(0xFFFFFFFF)); 356 result[3] = XOR(dst[3], VIMMED1(0xFFFFFFFF)); 357 break; 358 359 case LOGICOP_XOR: 360 // s ^ d 361 result[0] = XOR(src[0], dst[0]); 362 result[1] = XOR(src[1], dst[1]); 363 result[2] = XOR(src[2], dst[2]); 364 result[3] = XOR(src[3], dst[3]); 365 break; 366 367 case LOGICOP_NAND: 368 // ~(s & d) 369 result[0] = XOR(AND(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); 370 result[1] = XOR(AND(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); 371 result[2] = XOR(AND(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); 372 result[3] = XOR(AND(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); 373 break; 374 375 case LOGICOP_AND: 376 // s & d 377 result[0] = AND(src[0], dst[0]); 378 result[1] = AND(src[1], dst[1]); 379 result[2] = AND(src[2], dst[2]); 380 result[3] = AND(src[3], dst[3]); 381 break; 382 383 case LOGICOP_EQUIV: 384 // ~(s ^ d) 385 result[0] = XOR(XOR(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); 386 result[1] = XOR(XOR(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); 387 result[2] = XOR(XOR(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); 388 result[3] = XOR(XOR(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); 389 break; 390 391 case LOGICOP_NOOP: 392 result[0] = dst[0]; 393 result[1] = dst[1]; 394 result[2] = dst[2]; 395 result[3] = dst[3]; 396 break; 397 398 case LOGICOP_OR_INVERTED: 399 // ~s | d 400 result[0] = OR(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]); 401 result[1] = OR(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]); 402 result[2] = OR(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]); 403 result[3] = OR(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]); 404 break; 405 406 case LOGICOP_COPY: 407 result[0] = src[0]; 408 result[1] = src[1]; 409 result[2] = src[2]; 410 result[3] = src[3]; 411 break; 412 413 case LOGICOP_OR_REVERSE: 414 // s | ~d 415 result[0] = OR(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]); 416 result[1] = OR(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]); 417 result[2] = OR(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]); 418 result[3] = OR(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]); 419 break; 420 421 case LOGICOP_OR: 422 // s | d 423 result[0] = OR(src[0], dst[0]); 424 result[1] = OR(src[1], dst[1]); 425 result[2] = OR(src[2], dst[2]); 426 result[3] = OR(src[3], dst[3]); 427 break; 428 429 case LOGICOP_SET: 430 result[0] = VIMMED1(0xFFFFFFFF); 431 result[1] = VIMMED1(0xFFFFFFFF); 432 result[2] = VIMMED1(0xFFFFFFFF); 433 result[3] = VIMMED1(0xFFFFFFFF); 434 break; 435 436 default: 437 SWR_ASSERT(false, "Unsupported logic operation: %d", logicOp); 438 result[0] = result[1] = result[2] = result[3] = VIMMED1(0.0f); 439 break; 440 } 441 } 442 443 void AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* pAlpha, Value* ppMask) 444 { 445 // load uint32_t reference 446 Value* pRef = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_alphaTestReference })); 447 448 Value* pTest = nullptr; 449 if (state.alphaTestFormat == ALPHA_TEST_UNORM8) 450 { 451 // convert float alpha to unorm8 452 Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f)); 453 pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty); 454 455 // compare 456 switch (state.alphaTestFunction) 457 { 458 case ZFUNC_ALWAYS: pTest = VIMMED1(true); break; 459 case ZFUNC_NEVER: pTest = VIMMED1(false); break; 460 case ZFUNC_LT: pTest = ICMP_ULT(pAlphaU8, pRef); break; 461 case ZFUNC_EQ: pTest = ICMP_EQ(pAlphaU8, pRef); break; 462 case ZFUNC_LE: pTest = ICMP_ULE(pAlphaU8, pRef); break; 463 case ZFUNC_GT: pTest = ICMP_UGT(pAlphaU8, pRef); break; 464 case ZFUNC_NE: pTest = ICMP_NE(pAlphaU8, pRef); break; 465 case ZFUNC_GE: pTest = ICMP_UGE(pAlphaU8, pRef); break; 466 default: 467 SWR_ASSERT(false, "Invalid alpha test function"); 468 break; 469 } 470 } 471 else 472 { 473 // cast ref to float 474 pRef = BITCAST(pRef, mSimdFP32Ty); 475 476 // compare 477 switch (state.alphaTestFunction) 478 { 479 case ZFUNC_ALWAYS: pTest = VIMMED1(true); break; 480 case ZFUNC_NEVER: pTest = VIMMED1(false); break; 481 case ZFUNC_LT: pTest = FCMP_OLT(pAlpha, pRef); break; 482 case ZFUNC_EQ: pTest = FCMP_OEQ(pAlpha, pRef); break; 483 case ZFUNC_LE: pTest = FCMP_OLE(pAlpha, pRef); break; 484 case ZFUNC_GT: pTest = FCMP_OGT(pAlpha, pRef); break; 485 case ZFUNC_NE: pTest = FCMP_ONE(pAlpha, pRef); break; 486 case ZFUNC_GE: pTest = FCMP_OGE(pAlpha, pRef); break; 487 default: 488 SWR_ASSERT(false, "Invalid alpha test function"); 489 break; 490 } 491 } 492 493 // load current mask 494 Value* pMask = LOAD(ppMask); 495 496 // convert to int1 mask 497 pMask = MASK(pMask); 498 499 // and with alpha test result 500 pMask = AND(pMask, pTest); 501 502 // convert back to vector mask 503 pMask = VMASK(pMask); 504 505 // store new mask 506 STORE(pMask, ppMask); 507 } 508 509 Function* Create(const BLEND_COMPILE_STATE& state) 510 { 511 static std::size_t jitNum = 0; 512 513 std::stringstream fnName("BlendShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate); 514 fnName << jitNum++; 515 516 // blend function signature 517 //typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*); 518 519 std::vector<Type*> args{ 520 PointerType::get(Gen_SWR_BLEND_STATE(JM()), 0), // SWR_BLEND_STATE* 521 PointerType::get(mSimdFP32Ty, 0), // simdvector& src 522 PointerType::get(mSimdFP32Ty, 0), // simdvector& src1 523 Type::getInt32Ty(JM()->mContext), // sampleNum 524 PointerType::get(mSimdFP32Ty, 0), // uint8_t* pDst 525 PointerType::get(mSimdFP32Ty, 0), // simdvector& result 526 PointerType::get(mSimdInt32Ty, 0), // simdscalari* oMask 527 PointerType::get(mSimdInt32Ty, 0), // simdscalari* pMask 528 }; 529 530 FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false); 531 Function* blendFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule); 532 533 BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc); 534 535 IRB()->SetInsertPoint(entry); 536 537 // arguments 538 auto argitr = blendFunc->getArgumentList().begin(); 539 Value* pBlendState = &*argitr++; 540 pBlendState->setName("pBlendState"); 541 Value* pSrc = &*argitr++; 542 pSrc->setName("src"); 543 Value* pSrc1 = &*argitr++; 544 pSrc1->setName("src1"); 545 Value* sampleNum = &*argitr++; 546 sampleNum->setName("sampleNum"); 547 Value* pDst = &*argitr++; 548 pDst->setName("pDst"); 549 Value* pResult = &*argitr++; 550 pResult->setName("result"); 551 Value* ppoMask = &*argitr++; 552 ppoMask->setName("ppoMask"); 553 Value* ppMask = &*argitr++; 554 ppMask->setName("pMask"); 555 556 static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format"); 557 Value* dst[4]; 558 Value* constantColor[4]; 559 Value* src[4]; 560 Value* src1[4]; 561 Value* result[4]; 562 for (uint32_t i = 0; i < 4; ++i) 563 { 564 // load hot tile 565 dst[i] = LOAD(pDst, { i }); 566 567 // load constant color 568 constantColor[i] = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_constantColor, i })); 569 570 // load src 571 src[i] = LOAD(pSrc, { i }); 572 573 // load src1 574 src1[i] = LOAD(pSrc1, { i }); 575 } 576 Value* currentMask = VIMMED1(-1); 577 if (state.desc.alphaToCoverageEnable) 578 { 579 Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f); 580 uint32_t bits = (1 << state.desc.numSamples) - 1; 581 currentMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits))); 582 currentMask = FP_TO_SI(FADD(currentMask, VIMMED1(0.5f)), mSimdInt32Ty); 583 } 584 585 // alpha test 586 if (state.desc.alphaTestEnable) 587 { 588 AlphaTest(state, pBlendState, src[3], ppMask); 589 } 590 591 // color blend 592 if (state.blendState.blendEnable) 593 { 594 // clamp sources 595 Clamp(state.format, src); 596 Clamp(state.format, src1); 597 Clamp(state.format, dst); 598 Clamp(state.format, constantColor); 599 600 // apply defaults to hottile contents to take into account missing components 601 ApplyDefaults(state.format, dst); 602 603 // Force defaults for unused 'X' components 604 ApplyUnusedDefaults(state.format, dst); 605 606 // Quantize low precision components 607 Quantize(state.format, dst); 608 609 // special case clamping for R11G11B10_float which has no sign bit 610 if (state.format == R11G11B10_FLOAT) 611 { 612 dst[0] = VMAXPS(dst[0], VIMMED1(0.0f)); 613 dst[1] = VMAXPS(dst[1], VIMMED1(0.0f)); 614 dst[2] = VMAXPS(dst[2], VIMMED1(0.0f)); 615 dst[3] = VMAXPS(dst[3], VIMMED1(0.0f)); 616 } 617 618 Value* srcFactor[4]; 619 Value* dstFactor[4]; 620 if (state.desc.independentAlphaBlendEnable) 621 { 622 GenerateBlendFactor<true, false>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor); 623 GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor, constantColor, src, src1, dst, srcFactor); 624 625 GenerateBlendFactor<true, false>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor); 626 GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor, constantColor, src, src1, dst, dstFactor); 627 628 BlendFunc<true, false>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result); 629 BlendFunc<false, true>(state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result); 630 } 631 else 632 { 633 GenerateBlendFactor<true, true>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor); 634 GenerateBlendFactor<true, true>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor); 635 636 BlendFunc<true, true>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result); 637 } 638 639 // store results out 640 for (uint32_t i = 0; i < 4; ++i) 641 { 642 STORE(result[i], pResult, { i }); 643 } 644 } 645 646 if(state.blendState.logicOpEnable) 647 { 648 const SWR_FORMAT_INFO& info = GetFormatInfo(state.format); 649 SWR_ASSERT(info.type[0] == SWR_TYPE_UINT); 650 Value* vMask[4]; 651 for(uint32_t i = 0; i < 4; i++) 652 { 653 switch(info.bpc[i]) 654 { 655 case 0: vMask[i] = VIMMED1(0x00000000); break; 656 case 2: vMask[i] = VIMMED1(0x00000003); break; 657 case 5: vMask[i] = VIMMED1(0x0000001F); break; 658 case 6: vMask[i] = VIMMED1(0x0000003F); break; 659 case 8: vMask[i] = VIMMED1(0x000000FF); break; 660 case 10: vMask[i] = VIMMED1(0x000003FF); break; 661 case 11: vMask[i] = VIMMED1(0x000007FF); break; 662 case 16: vMask[i] = VIMMED1(0x0000FFFF); break; 663 case 24: vMask[i] = VIMMED1(0x00FFFFFF); break; 664 case 32: vMask[i] = VIMMED1(0xFFFFFFFF); break; 665 default: 666 vMask[i] = VIMMED1(0x0); 667 SWR_ASSERT(0, "Unsupported bpc for logic op\n"); 668 break; 669 } 670 src[i] = BITCAST(src[i], mSimdInt32Ty);//, vMask[i]); 671 dst[i] = BITCAST(dst[i], mSimdInt32Ty); 672 } 673 674 LogicOpFunc(state.blendState.logicOpFunc, src, dst, result); 675 676 // store results out 677 for(uint32_t i = 0; i < 4; ++i) 678 { 679 // clear upper bits from PS output not in RT format after doing logic op 680 result[i] = AND(result[i], vMask[i]); 681 682 STORE(BITCAST(result[i], mSimdFP32Ty), pResult, {i}); 683 } 684 } 685 686 if(state.desc.oMaskEnable) 687 { 688 assert(!(state.desc.alphaToCoverageEnable)); 689 // load current mask 690 Value* oMask = LOAD(ppoMask); 691 Value* sampleMasked = VBROADCAST(SHL(C(1), sampleNum)); 692 oMask = AND(oMask, sampleMasked); 693 currentMask = AND(oMask, currentMask); 694 } 695 696 if(state.desc.sampleMaskEnable) 697 { 698 Value* sampleMask = LOAD(pBlendState, { 0, SWR_BLEND_STATE_sampleMask}); 699 Value* sampleMasked = SHL(C(1), sampleNum); 700 sampleMask = AND(sampleMask, sampleMasked); 701 sampleMask = VBROADCAST(ICMP_SGT(sampleMask, C(0))); 702 sampleMask = S_EXT(sampleMask, mSimdInt32Ty); 703 currentMask = AND(sampleMask, currentMask); 704 } 705 706 if (state.desc.alphaToCoverageEnable) 707 { 708 Value* sampleMasked = SHL(C(1), sampleNum); 709 currentMask = AND(currentMask, VBROADCAST(sampleMasked)); 710 } 711 712 if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable || 713 state.desc.oMaskEnable) 714 { 715 // load current mask 716 Value* pMask = LOAD(ppMask); 717 currentMask = S_EXT(ICMP_SGT(currentMask, VBROADCAST(C(0))), mSimdInt32Ty); 718 Value* outputMask = AND(pMask, currentMask); 719 // store new mask 720 STORE(outputMask, GEP(ppMask, C(0))); 721 } 722 723 RET_VOID(); 724 725 JitManager::DumpToFile(blendFunc, ""); 726 727 ::FunctionPassManager passes(JM()->mpCurrentModule); 728 729 passes.add(createBreakCriticalEdgesPass()); 730 passes.add(createCFGSimplificationPass()); 731 passes.add(createEarlyCSEPass()); 732 passes.add(createPromoteMemoryToRegisterPass()); 733 passes.add(createCFGSimplificationPass()); 734 passes.add(createEarlyCSEPass()); 735 passes.add(createInstructionCombiningPass()); 736 passes.add(createInstructionSimplifierPass()); 737 passes.add(createConstantPropagationPass()); 738 passes.add(createSCCPPass()); 739 passes.add(createAggressiveDCEPass()); 740 741 passes.run(*blendFunc); 742 743 JitManager::DumpToFile(blendFunc, "optimized"); 744 745 return blendFunc; 746 } 747}; 748 749////////////////////////////////////////////////////////////////////////// 750/// @brief JITs from fetch shader IR 751/// @param hJitMgr - JitManager handle 752/// @param func - LLVM function IR 753/// @return PFN_FETCH_FUNC - pointer to fetch code 754PFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc) 755{ 756 const llvm::Function *func = (const llvm::Function*)hFunc; 757 JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr); 758 PFN_BLEND_JIT_FUNC pfnBlend; 759 pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str())); 760 // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module 761 pJitMgr->mIsModuleFinalized = true; 762 763 return pfnBlend; 764} 765 766////////////////////////////////////////////////////////////////////////// 767/// @brief JIT compiles blend shader 768/// @param hJitMgr - JitManager handle 769/// @param state - blend state to build function from 770extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr, const BLEND_COMPILE_STATE& state) 771{ 772 JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr); 773 774 pJitMgr->SetupNewModule(); 775 776 BlendJit theJit(pJitMgr); 777 HANDLE hFunc = theJit.Create(state); 778 779 return JitBlendFunc(hJitMgr, hFunc); 780} 781