1c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/**************************************************************************** 2c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 4c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Permission is hereby granted, free of charge, to any person obtaining a 5c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* copy of this software and associated documentation files (the "Software"), 6c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* to deal in the Software without restriction, including without limitation 7c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* the rights to use, copy, modify, merge, publish, distribute, sublicense, 8c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* and/or sell copies of the Software, and to permit persons to whom the 9c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Software is furnished to do so, subject to the following conditions: 10c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 11c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* The above copyright notice and this permission notice (including the next 12c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* paragraph) shall be included in all copies or substantial portions of the 13c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Software. 14c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 15c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* IN THE SOFTWARE. 22c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 23c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* @file blend_jit.cpp 24c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 25c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* @brief Implementation of the blend jitter 26c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 27c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Notes: 28c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 29c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley******************************************************************************/ 30c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "jit_api.h" 31c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "blend_jit.h" 32c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "builder.h" 33c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "state_llvm.h" 34c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 35c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include <sstream> 36c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 37c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley// components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized 38c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#define QUANTIZE_THRESHOLD 2 39c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 40aaeb07989ee30c589b034a551f71e23e7b8056b2Tim Rowleyusing namespace llvm; 41aaeb07989ee30c589b034a551f71e23e7b8056b2Tim Rowleyusing namespace SwrJit; 42aaeb07989ee30c589b034a551f71e23e7b8056b2Tim Rowley 43c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 44c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// Interface to Jitting a blend shader 45c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 46c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleystruct BlendJit : public Builder 47c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 48c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){}; 49c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 50c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley template<bool Color, bool Alpha> 51c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void GenerateBlendFactor(SWR_BLEND_FACTOR factor, Value* constColor[4], Value* src[4], Value* src1[4], Value* dst[4], Value* result[4]) 52c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 53c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* out[4]; 54c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 55c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (factor) 56c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 57c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_ONE: 58c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = VIMMED1(1.0f); 59c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 60c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_SRC_COLOR: 61c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = src[0]; 62c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = src[1]; 63c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = src[2]; 64c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = src[3]; 65c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 66c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_SRC_ALPHA: 67c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = src[3]; 68c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 69c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_DST_ALPHA: 70c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = dst[3]; 71c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 72c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_DST_COLOR: 73c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = dst[0]; 74c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = dst[1]; 75c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = dst[2]; 76c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = dst[3]; 77c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 78c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_SRC_ALPHA_SATURATE: 79c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3])); 80c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = VIMMED1(1.0f); 81c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 82c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_CONST_COLOR: 83c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = constColor[0]; 84c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = constColor[1]; 85c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = constColor[2]; 86c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = constColor[3]; 87c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 88c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_CONST_ALPHA: 89c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = constColor[3]; 90c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 91c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_SRC1_COLOR: 92c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = src1[0]; 93c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = src1[1]; 94c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = src1[2]; 95c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = src1[3]; 96c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 97c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_SRC1_ALPHA: 98c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = src1[3]; 99c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 100c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_ZERO: 101c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); 102c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 103c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_INV_SRC_COLOR: 104c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = FSUB(VIMMED1(1.0f), src[0]); 105c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = FSUB(VIMMED1(1.0f), src[1]); 106c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = FSUB(VIMMED1(1.0f), src[2]); 107c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = FSUB(VIMMED1(1.0f), src[3]); 108c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 109c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_INV_SRC_ALPHA: 110c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src[3]); 111c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 112c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_INV_DST_ALPHA: 113c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), dst[3]); 114c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 115c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_INV_DST_COLOR: 116c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = FSUB(VIMMED1(1.0f), dst[0]); 117c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = FSUB(VIMMED1(1.0f), dst[1]); 118c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = FSUB(VIMMED1(1.0f), dst[2]); 119c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = FSUB(VIMMED1(1.0f), dst[3]); 120c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 121c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_INV_CONST_COLOR: 122c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = FSUB(VIMMED1(1.0f), constColor[0]); 123c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = FSUB(VIMMED1(1.0f), constColor[1]); 124c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = FSUB(VIMMED1(1.0f), constColor[2]); 125c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = FSUB(VIMMED1(1.0f), constColor[3]); 126c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 127c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_INV_CONST_ALPHA: 128c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), constColor[3]); 129c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 130c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_INV_SRC1_COLOR: 131c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = FSUB(VIMMED1(1.0f), src1[0]); 132c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = FSUB(VIMMED1(1.0f), src1[1]); 133c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = FSUB(VIMMED1(1.0f), src1[2]); 134c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = FSUB(VIMMED1(1.0f), src1[3]); 135c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 136c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDFACTOR_INV_SRC1_ALPHA: 137c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src1[3]); 138c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 139c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: 140c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(false, "Unsupported blend factor: %d", factor); 141c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); 142c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 143c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 144c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 145c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (Color) 146c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 147c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = out[0]; 148c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = out[1]; 149c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = out[2]; 150c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 151c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 152c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (Alpha) 153c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 154c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = out[3]; 155c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 156c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 157c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 158c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void Clamp(SWR_FORMAT format, Value* src[4]) 159c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 160c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const SWR_FORMAT_INFO& info = GetFormatInfo(format); 161c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_TYPE type = info.type[0]; 162c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 163c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (type) 164c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 165c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case SWR_TYPE_FLOAT: 166c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 167c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 168c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case SWR_TYPE_UNORM: 169c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[0] = VMINPS(VMAXPS(src[0], VIMMED1(0.0f)), VIMMED1(1.0f)); 170c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[1] = VMINPS(VMAXPS(src[1], VIMMED1(0.0f)), VIMMED1(1.0f)); 171c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[2] = VMINPS(VMAXPS(src[2], VIMMED1(0.0f)), VIMMED1(1.0f)); 172c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[3] = VMINPS(VMAXPS(src[3], VIMMED1(0.0f)), VIMMED1(1.0f)); 173c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 174c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 175c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case SWR_TYPE_SNORM: 176c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[0] = VMINPS(VMAXPS(src[0], VIMMED1(-1.0f)), VIMMED1(1.0f)); 177c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[1] = VMINPS(VMAXPS(src[1], VIMMED1(-1.0f)), VIMMED1(1.0f)); 178c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[2] = VMINPS(VMAXPS(src[2], VIMMED1(-1.0f)), VIMMED1(1.0f)); 179c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f)); 180c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 181c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 182c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: SWR_ASSERT(false, "Unsupport format type: %d", type); 183c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 184c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 185c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 186c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void ApplyDefaults(SWR_FORMAT format, Value* src[4]) 187c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 188c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const SWR_FORMAT_INFO& info = GetFormatInfo(format); 189c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 190c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley bool valid[] = { false, false, false, false }; 191c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t c = 0; c < info.numComps; ++c) 192c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 193c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley valid[info.swizzle[c]] = true; 194c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 195c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 196c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t c = 0; c < 4; ++c) 197c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 198c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (!valid[c]) 199c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 200c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[c] = BITCAST(VIMMED1((int)info.defaults[c]), mSimdFP32Ty); 201c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 202c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 203c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 204c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 205c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void ApplyUnusedDefaults(SWR_FORMAT format, Value* src[4]) 206c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 207c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const SWR_FORMAT_INFO& info = GetFormatInfo(format); 208c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 209c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t c = 0; c < info.numComps; ++c) 210c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 211c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (info.type[c] == SWR_TYPE_UNUSED) 212c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 213c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[info.swizzle[c]] = BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty); 214c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 215c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 216c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 217c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 218c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void Quantize(SWR_FORMAT format, Value* src[4]) 219c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 220c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const SWR_FORMAT_INFO& info = GetFormatInfo(format); 221c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t c = 0; c < info.numComps; ++c) 222c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 223b53a33feef2595fc71fadafa5857d5482514a724Ilia Mirkin if (info.bpc[c] <= QUANTIZE_THRESHOLD && info.type[c] != SWR_TYPE_UNUSED) 224c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 225c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t swizComp = info.swizzle[c]; 226c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley float factor = (float)((1 << info.bpc[c]) - 1); 227c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (info.type[c]) 228c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 229c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case SWR_TYPE_UNORM: 230c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f)); 231c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO)); 232c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f /factor)); 233c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 234c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: SWR_ASSERT(false, "Unsupported format type: %d", info.type[c]); 235c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 236c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 237c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 238c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 239c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 240c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley template<bool Color, bool Alpha> 241c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void BlendFunc(SWR_BLEND_OP blendOp, Value* src[4], Value* srcFactor[4], Value* dst[4], Value* dstFactor[4], Value* result[4]) 242c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 243c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* out[4]; 244c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* srcBlend[4]; 245c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* dstBlend[4]; 246c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t i = 0; i < 4; ++i) 247c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 248c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley srcBlend[i] = FMUL(src[i], srcFactor[i]); 249c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dstBlend[i] = FMUL(dst[i], dstFactor[i]); 250c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 251c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 252c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (blendOp) 253c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 254c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDOP_ADD: 255c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = FADD(srcBlend[0], dstBlend[0]); 256c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = FADD(srcBlend[1], dstBlend[1]); 257c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = FADD(srcBlend[2], dstBlend[2]); 258c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = FADD(srcBlend[3], dstBlend[3]); 259c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 260c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 261c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDOP_SUBTRACT: 262c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = FSUB(srcBlend[0], dstBlend[0]); 263c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = FSUB(srcBlend[1], dstBlend[1]); 264c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = FSUB(srcBlend[2], dstBlend[2]); 265c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = FSUB(srcBlend[3], dstBlend[3]); 266c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 267c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 268c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDOP_REVSUBTRACT: 269c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = FSUB(dstBlend[0], srcBlend[0]); 270c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = FSUB(dstBlend[1], srcBlend[1]); 271c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = FSUB(dstBlend[2], srcBlend[2]); 272c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = FSUB(dstBlend[3], srcBlend[3]); 273c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 274c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 275c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDOP_MIN: 276c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = VMINPS(src[0], dst[0]); 277c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = VMINPS(src[1], dst[1]); 278c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = VMINPS(src[2], dst[2]); 279c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = VMINPS(src[3], dst[3]); 280c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 281c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 282c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case BLENDOP_MAX: 283c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = VMAXPS(src[0], dst[0]); 284c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[1] = VMAXPS(src[1], dst[1]); 285c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[2] = VMAXPS(src[2], dst[2]); 286c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[3] = VMAXPS(src[3], dst[3]); 287c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 288c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 289c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: 290c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(false, "Unsupported blend operation: %d", blendOp); 291c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); 292c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 293c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 294c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 295c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (Color) 296c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 297c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = out[0]; 298c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = out[1]; 299c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = out[2]; 300c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 301c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 302c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (Alpha) 303c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 304c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = out[3]; 305c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 306c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 307c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 308c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4]) 309c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 310c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Op: (s == PS output, d = RT contents) 311c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch(logicOp) 312c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 313c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_CLEAR: 314c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = VIMMED1(0); 315c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = VIMMED1(0); 316c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = VIMMED1(0); 317c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = VIMMED1(0); 318c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 319c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 320c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_NOR: 321c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // ~(s | d) 322c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = XOR(OR(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); 323c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = XOR(OR(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); 324c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = XOR(OR(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); 325c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = XOR(OR(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); 326c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 327c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 328c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_AND_INVERTED: 329c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // ~s & d 330c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // todo: use avx andnot instr when I can find the intrinsic to call 331c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = AND(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]); 332c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = AND(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]); 333c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = AND(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]); 334c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = AND(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]); 335c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 336c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 337c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_COPY_INVERTED: 338c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // ~s 339c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = XOR(src[0], VIMMED1(0xFFFFFFFF)); 340c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = XOR(src[1], VIMMED1(0xFFFFFFFF)); 341c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = XOR(src[2], VIMMED1(0xFFFFFFFF)); 342c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = XOR(src[3], VIMMED1(0xFFFFFFFF)); 343c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 344c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 345c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_AND_REVERSE: 346c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // s & ~d 347c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // todo: use avx andnot instr when I can find the intrinsic to call 348c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = AND(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]); 349c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = AND(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]); 350c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = AND(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]); 351c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = AND(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]); 352c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 353c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 354c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_INVERT: 355c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // ~d 356c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = XOR(dst[0], VIMMED1(0xFFFFFFFF)); 357c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = XOR(dst[1], VIMMED1(0xFFFFFFFF)); 358c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = XOR(dst[2], VIMMED1(0xFFFFFFFF)); 359c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = XOR(dst[3], VIMMED1(0xFFFFFFFF)); 360c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 361c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 362c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_XOR: 363c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // s ^ d 364c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = XOR(src[0], dst[0]); 365c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = XOR(src[1], dst[1]); 366c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = XOR(src[2], dst[2]); 367c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = XOR(src[3], dst[3]); 368c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 369c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 370c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_NAND: 371c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // ~(s & d) 372c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = XOR(AND(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); 373c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = XOR(AND(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); 374c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = XOR(AND(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); 375c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = XOR(AND(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); 376c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 377c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 378c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_AND: 379c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // s & d 380c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = AND(src[0], dst[0]); 381c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = AND(src[1], dst[1]); 382c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = AND(src[2], dst[2]); 383c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = AND(src[3], dst[3]); 384c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 385c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 386c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_EQUIV: 387c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // ~(s ^ d) 388c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = XOR(XOR(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); 389c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = XOR(XOR(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); 390c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = XOR(XOR(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); 391c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = XOR(XOR(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); 392c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 393c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 394c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_NOOP: 395c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = dst[0]; 396c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = dst[1]; 397c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = dst[2]; 398c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = dst[3]; 399c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 400c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 401c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_OR_INVERTED: 402c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // ~s | d 403c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = OR(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]); 404c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = OR(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]); 405c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = OR(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]); 406c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = OR(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]); 407c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 408c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 409c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_COPY: 410c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = src[0]; 411c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = src[1]; 412c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = src[2]; 413c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = src[3]; 414c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 415c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 416c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_OR_REVERSE: 417c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // s | ~d 418c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = OR(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]); 419c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = OR(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]); 420c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = OR(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]); 421c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = OR(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]); 422c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 423c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 424c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_OR: 425c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // s | d 426c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = OR(src[0], dst[0]); 427c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = OR(src[1], dst[1]); 428c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = OR(src[2], dst[2]); 429c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = OR(src[3], dst[3]); 430c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 431c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 432c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case LOGICOP_SET: 433c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = VIMMED1(0xFFFFFFFF); 434c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[1] = VIMMED1(0xFFFFFFFF); 435c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[2] = VIMMED1(0xFFFFFFFF); 436c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[3] = VIMMED1(0xFFFFFFFF); 437c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 438c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 439c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: 440c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(false, "Unsupported logic operation: %d", logicOp); 441c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[0] = result[1] = result[2] = result[3] = VIMMED1(0.0f); 442c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 443c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 444c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 445c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 446cee66dd2aa182ba30130bef3298444667753b051Tim Rowley void AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* ppAlpha, Value* ppMask) 447c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 448c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // load uint32_t reference 449c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* pRef = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_alphaTestReference })); 450cee66dd2aa182ba30130bef3298444667753b051Tim Rowley 451cee66dd2aa182ba30130bef3298444667753b051Tim Rowley // load alpha 452cee66dd2aa182ba30130bef3298444667753b051Tim Rowley Value* pAlpha = LOAD(ppAlpha); 453c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 454c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* pTest = nullptr; 455c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (state.alphaTestFormat == ALPHA_TEST_UNORM8) 456c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 457c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // convert float alpha to unorm8 458c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f)); 459c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty); 460c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 461c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // compare 462c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (state.alphaTestFunction) 463c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 464c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_ALWAYS: pTest = VIMMED1(true); break; 465c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_NEVER: pTest = VIMMED1(false); break; 466c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_LT: pTest = ICMP_ULT(pAlphaU8, pRef); break; 467c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_EQ: pTest = ICMP_EQ(pAlphaU8, pRef); break; 468c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_LE: pTest = ICMP_ULE(pAlphaU8, pRef); break; 469c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_GT: pTest = ICMP_UGT(pAlphaU8, pRef); break; 470c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_NE: pTest = ICMP_NE(pAlphaU8, pRef); break; 471c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_GE: pTest = ICMP_UGE(pAlphaU8, pRef); break; 472c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: 473c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(false, "Invalid alpha test function"); 474c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 475c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 476c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 477c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 478c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 479c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // cast ref to float 480c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pRef = BITCAST(pRef, mSimdFP32Ty); 481c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 482c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // compare 483c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (state.alphaTestFunction) 484c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 485c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_ALWAYS: pTest = VIMMED1(true); break; 486c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_NEVER: pTest = VIMMED1(false); break; 487c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_LT: pTest = FCMP_OLT(pAlpha, pRef); break; 488c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_EQ: pTest = FCMP_OEQ(pAlpha, pRef); break; 489c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_LE: pTest = FCMP_OLE(pAlpha, pRef); break; 490c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_GT: pTest = FCMP_OGT(pAlpha, pRef); break; 491c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_NE: pTest = FCMP_ONE(pAlpha, pRef); break; 492c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case ZFUNC_GE: pTest = FCMP_OGE(pAlpha, pRef); break; 493c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: 494c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(false, "Invalid alpha test function"); 495c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 496c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 497c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 498c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 499c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // load current mask 500c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* pMask = LOAD(ppMask); 501c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 502c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // convert to int1 mask 503c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pMask = MASK(pMask); 504c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 505c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // and with alpha test result 506c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pMask = AND(pMask, pTest); 507c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 508c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // convert back to vector mask 509c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pMask = VMASK(pMask); 510c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 511c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // store new mask 512c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley STORE(pMask, ppMask); 513c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 514c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 515c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Function* Create(const BLEND_COMPILE_STATE& state) 516c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 517c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley static std::size_t jitNum = 0; 518c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 519c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley std::stringstream fnName("BlendShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate); 520c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley fnName << jitNum++; 521c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 522c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // blend function signature 523c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley //typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*); 524c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 525c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley std::vector<Type*> args{ 526c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PointerType::get(Gen_SWR_BLEND_STATE(JM()), 0), // SWR_BLEND_STATE* 527c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PointerType::get(mSimdFP32Ty, 0), // simdvector& src 528c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PointerType::get(mSimdFP32Ty, 0), // simdvector& src1 529cee66dd2aa182ba30130bef3298444667753b051Tim Rowley PointerType::get(mSimdFP32Ty, 0), // src0alpha 530c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Type::getInt32Ty(JM()->mContext), // sampleNum 531c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PointerType::get(mSimdFP32Ty, 0), // uint8_t* pDst 532c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PointerType::get(mSimdFP32Ty, 0), // simdvector& result 533c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PointerType::get(mSimdInt32Ty, 0), // simdscalari* oMask 534c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PointerType::get(mSimdInt32Ty, 0), // simdscalari* pMask 535c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley }; 536c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 537c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false); 538c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Function* blendFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule); 539c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 540c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc); 541c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 542c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley IRB()->SetInsertPoint(entry); 543c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 544c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // arguments 545c375c448bf529952afec3df62aff5f9f506c498eTim Rowley auto argitr = blendFunc->arg_begin(); 546c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* pBlendState = &*argitr++; 547c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pBlendState->setName("pBlendState"); 548c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* pSrc = &*argitr++; 549c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pSrc->setName("src"); 550c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* pSrc1 = &*argitr++; 551c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pSrc1->setName("src1"); 552cee66dd2aa182ba30130bef3298444667753b051Tim Rowley Value* pSrc0Alpha = &*argitr++; 553cee66dd2aa182ba30130bef3298444667753b051Tim Rowley pSrc0Alpha->setName("src0alpha"); 554c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* sampleNum = &*argitr++; 555c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley sampleNum->setName("sampleNum"); 556c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* pDst = &*argitr++; 557c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pDst->setName("pDst"); 558c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* pResult = &*argitr++; 559c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pResult->setName("result"); 560c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* ppoMask = &*argitr++; 561c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley ppoMask->setName("ppoMask"); 562c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* ppMask = &*argitr++; 563c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley ppMask->setName("pMask"); 564c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 565c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format"); 566c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* dst[4]; 567c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* constantColor[4]; 568c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* src[4]; 569c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* src1[4]; 570c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* result[4]; 571c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t i = 0; i < 4; ++i) 572c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 573c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // load hot tile 574c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dst[i] = LOAD(pDst, { i }); 575c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 576c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // load constant color 577c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley constantColor[i] = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_constantColor, i })); 578c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 579c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // load src 580c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src[i] = LOAD(pSrc, { i }); 581c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 582c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // load src1 583c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley src1[i] = LOAD(pSrc1, { i }); 584c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 585c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* currentMask = VIMMED1(-1); 586090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley if (state.desc.alphaToCoverageEnable) 587c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 588090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f); 589090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley uint32_t bits = (1 << state.desc.numSamples) - 1; 590090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley currentMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits))); 591090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley currentMask = FP_TO_SI(FADD(currentMask, VIMMED1(0.5f)), mSimdInt32Ty); 592c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 593c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 594c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // alpha test 595c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (state.desc.alphaTestEnable) 596c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 597cee66dd2aa182ba30130bef3298444667753b051Tim Rowley AlphaTest(state, pBlendState, pSrc0Alpha, ppMask); 598c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 599c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 600c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // color blend 601c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (state.blendState.blendEnable) 602c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 603c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // clamp sources 604c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Clamp(state.format, src); 605c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Clamp(state.format, src1); 606c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Clamp(state.format, dst); 607c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Clamp(state.format, constantColor); 608c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 609c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // apply defaults to hottile contents to take into account missing components 610c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley ApplyDefaults(state.format, dst); 611c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 612c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Force defaults for unused 'X' components 613c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley ApplyUnusedDefaults(state.format, dst); 614c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 615c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Quantize low precision components 616c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Quantize(state.format, dst); 617c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 618c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // special case clamping for R11G11B10_float which has no sign bit 619c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (state.format == R11G11B10_FLOAT) 620c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 621c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dst[0] = VMAXPS(dst[0], VIMMED1(0.0f)); 622c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dst[1] = VMAXPS(dst[1], VIMMED1(0.0f)); 623c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dst[2] = VMAXPS(dst[2], VIMMED1(0.0f)); 624c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dst[3] = VMAXPS(dst[3], VIMMED1(0.0f)); 625c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 626c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 627c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* srcFactor[4]; 628c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* dstFactor[4]; 629c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (state.desc.independentAlphaBlendEnable) 630c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 631c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley GenerateBlendFactor<true, false>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor); 632c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor, constantColor, src, src1, dst, srcFactor); 633c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 634c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley GenerateBlendFactor<true, false>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor); 635c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor, constantColor, src, src1, dst, dstFactor); 636c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 637c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley BlendFunc<true, false>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result); 638c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley BlendFunc<false, true>(state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result); 639c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 640c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 641c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 642c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley GenerateBlendFactor<true, true>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor); 643c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley GenerateBlendFactor<true, true>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor); 644c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 645c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley BlendFunc<true, true>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result); 646c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 647c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 648c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // store results out 649c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t i = 0; i < 4; ++i) 650c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 651c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley STORE(result[i], pResult, { i }); 652c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 653c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 654c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 655c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if(state.blendState.logicOpEnable) 656c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 657c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const SWR_FORMAT_INFO& info = GetFormatInfo(state.format); 658c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* vMask[4]; 659aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin float scale[4]; 660aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin 661aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin if (!state.blendState.blendEnable) 662aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin { 663aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin Clamp(state.format, src); 664aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin Clamp(state.format, dst); 665aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin } 666aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin 667c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for(uint32_t i = 0; i < 4; i++) 668c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 669aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin if (info.type[i] == SWR_TYPE_UNUSED) 670c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 671aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin continue; 672aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin } 673aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin 674aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin if (info.bpc[i] >= 32) { 675aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin vMask[i] = VIMMED1(0xFFFFFFFF); 676aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin scale[i] = 0xFFFFFFFF; 677aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin } else { 678aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin vMask[i] = VIMMED1((1 << info.bpc[i]) - 1); 679aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin if (info.type[i] == SWR_TYPE_SNORM) 680aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin scale[i] = (1 << (info.bpc[i] - 1)) - 1; 681aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin else 682aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin scale[i] = (1 << info.bpc[i]) - 1; 683aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin } 684aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin 685aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin switch (info.type[i]) { 686c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: 687aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin SWR_ASSERT(0, "Unsupported type for logic op\n"); 688aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin /* fallthrough */ 689aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin case SWR_TYPE_UINT: 690aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin case SWR_TYPE_SINT: 691aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin src[i] = BITCAST(src[i], mSimdInt32Ty); 692aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin dst[i] = BITCAST(dst[i], mSimdInt32Ty); 693aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin break; 694aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin case SWR_TYPE_SNORM: 695ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin src[i] = FP_TO_SI( 696ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin FMUL(src[i], VIMMED1(scale[i])), 697ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin mSimdInt32Ty); 698ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin dst[i] = FP_TO_SI( 699ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin FMUL(dst[i], VIMMED1(scale[i])), 700ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin mSimdInt32Ty); 701ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin break; 702aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin case SWR_TYPE_UNORM: 703aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin src[i] = FP_TO_UI( 704aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin FMUL(src[i], VIMMED1(scale[i])), 705aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin mSimdInt32Ty); 706aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin dst[i] = FP_TO_UI( 707aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin FMUL(dst[i], VIMMED1(scale[i])), 708aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin mSimdInt32Ty); 709c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 710c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 711c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 712c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 713c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley LogicOpFunc(state.blendState.logicOpFunc, src, dst, result); 714c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 715c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // store results out 716c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for(uint32_t i = 0; i < 4; ++i) 717c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 718aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin if (info.type[i] == SWR_TYPE_UNUSED) 719aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin { 720aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin continue; 721aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin } 722aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin 723c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // clear upper bits from PS output not in RT format after doing logic op 724c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley result[i] = AND(result[i], vMask[i]); 725c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 726aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin switch (info.type[i]) { 727aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin default: 728aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin SWR_ASSERT(0, "Unsupported type for logic op\n"); 729aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin /* fallthrough */ 730aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin case SWR_TYPE_UINT: 731aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin case SWR_TYPE_SINT: 732aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin result[i] = BITCAST(result[i], mSimdFP32Ty); 733aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin break; 734aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin case SWR_TYPE_SNORM: 735ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin result[i] = SHL(result[i], C(32 - info.bpc[i])); 736ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin result[i] = ASHR(result[i], C(32 - info.bpc[i])); 737ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin result[i] = FMUL(SI_TO_FP(result[i], mSimdFP32Ty), 738ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin VIMMED1(1.0f / scale[i])); 739ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin break; 740aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin case SWR_TYPE_UNORM: 741aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin result[i] = FMUL(UI_TO_FP(result[i], mSimdFP32Ty), 742aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin VIMMED1(1.0f / scale[i])); 743aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin break; 744aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin } 745aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin 746aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin STORE(result[i], pResult, {i}); 747c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 748c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 749c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 750c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if(state.desc.oMaskEnable) 751c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 752c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley assert(!(state.desc.alphaToCoverageEnable)); 753c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // load current mask 754c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* oMask = LOAD(ppoMask); 755c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* sampleMasked = VBROADCAST(SHL(C(1), sampleNum)); 756c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley oMask = AND(oMask, sampleMasked); 757c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley currentMask = AND(oMask, currentMask); 758c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 759c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 760c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if(state.desc.sampleMaskEnable) 761c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 762c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* sampleMask = LOAD(pBlendState, { 0, SWR_BLEND_STATE_sampleMask}); 763c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* sampleMasked = SHL(C(1), sampleNum); 764c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley sampleMask = AND(sampleMask, sampleMasked); 765c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley sampleMask = VBROADCAST(ICMP_SGT(sampleMask, C(0))); 766c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley sampleMask = S_EXT(sampleMask, mSimdInt32Ty); 767c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley currentMask = AND(sampleMask, currentMask); 768c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 769c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 770090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley if (state.desc.alphaToCoverageEnable) 771090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley { 772090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley Value* sampleMasked = SHL(C(1), sampleNum); 773090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley currentMask = AND(currentMask, VBROADCAST(sampleMasked)); 774090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley } 775090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley 776c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable || 777c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley state.desc.oMaskEnable) 778c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 779c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // load current mask 780c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* pMask = LOAD(ppMask); 781c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley currentMask = S_EXT(ICMP_SGT(currentMask, VBROADCAST(C(0))), mSimdInt32Ty); 782c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley Value* outputMask = AND(pMask, currentMask); 783c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // store new mask 784c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley STORE(outputMask, GEP(ppMask, C(0))); 785c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 786c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 787c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley RET_VOID(); 788c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 789c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley JitManager::DumpToFile(blendFunc, ""); 790c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 791d3d97f8395513bf365d2fe8e4292c8098290586fTim Rowley ::FunctionPassManager passes(JM()->mpCurrentModule); 79268314b676968e2cf0f8e94f573fa28e766e48349Tim Rowley 793c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.add(createBreakCriticalEdgesPass()); 794c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.add(createCFGSimplificationPass()); 795c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.add(createEarlyCSEPass()); 796c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.add(createPromoteMemoryToRegisterPass()); 797c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.add(createCFGSimplificationPass()); 798c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.add(createEarlyCSEPass()); 799c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.add(createInstructionCombiningPass()); 800c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.add(createInstructionSimplifierPass()); 801c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.add(createConstantPropagationPass()); 802c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.add(createSCCPPass()); 803c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.add(createAggressiveDCEPass()); 804c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 805c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley passes.run(*blendFunc); 806c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 807c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley JitManager::DumpToFile(blendFunc, "optimized"); 808c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 809c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return blendFunc; 810c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 811c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley}; 812c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 813c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 814c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief JITs from fetch shader IR 815c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param hJitMgr - JitManager handle 816c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param func - LLVM function IR 817c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @return PFN_FETCH_FUNC - pointer to fetch code 818c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim RowleyPFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc) 819c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 820c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const llvm::Function *func = (const llvm::Function*)hFunc; 821c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr); 822c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PFN_BLEND_JIT_FUNC pfnBlend; 823c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str())); 824c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module 825c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pJitMgr->mIsModuleFinalized = true; 826c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 827c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return pfnBlend; 828c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 829c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 830c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 831c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief JIT compiles blend shader 832c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param hJitMgr - JitManager handle 833c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param state - blend state to build function from 834c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleyextern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr, const BLEND_COMPILE_STATE& state) 835c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 836c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr); 837c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 838c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pJitMgr->SetupNewModule(); 839c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 840c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley BlendJit theJit(pJitMgr); 841c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley HANDLE hFunc = theJit.Create(state); 842c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 843c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return JitBlendFunc(hJitMgr, hFunc); 844c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 845