1c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/****************************************************************************
2c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley*
4c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Permission is hereby granted, free of charge, to any person obtaining a
5c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* copy of this software and associated documentation files (the "Software"),
6c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* to deal in the Software without restriction, including without limitation
7c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* and/or sell copies of the Software, and to permit persons to whom the
9c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Software is furnished to do so, subject to the following conditions:
10c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley*
11c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* The above copyright notice and this permission notice (including the next
12c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* paragraph) shall be included in all copies or substantial portions of the
13c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Software.
14c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley*
15c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* IN THE SOFTWARE.
22c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley*
23c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* @file blend_jit.cpp
24c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley*
25c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* @brief Implementation of the blend jitter
26c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley*
27c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Notes:
28c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley*
29c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley******************************************************************************/
30c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "jit_api.h"
31c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "blend_jit.h"
32c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "builder.h"
33c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "state_llvm.h"
34c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
35c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include <sstream>
36c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
37c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley// components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized
38c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#define QUANTIZE_THRESHOLD 2
39c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
40aaeb07989ee30c589b034a551f71e23e7b8056b2Tim Rowleyusing namespace llvm;
41aaeb07989ee30c589b034a551f71e23e7b8056b2Tim Rowleyusing namespace SwrJit;
42aaeb07989ee30c589b034a551f71e23e7b8056b2Tim Rowley
43c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley//////////////////////////////////////////////////////////////////////////
44c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// Interface to Jitting a blend shader
45c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley//////////////////////////////////////////////////////////////////////////
46c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleystruct BlendJit : public Builder
47c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{
48c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){};
49c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
50c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    template<bool Color, bool Alpha>
51c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    void GenerateBlendFactor(SWR_BLEND_FACTOR factor, Value* constColor[4], Value* src[4], Value* src1[4], Value* dst[4], Value* result[4])
52c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    {
53c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* out[4];
54c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
55c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        switch (factor)
56c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
57c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_ONE:
58c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = VIMMED1(1.0f);
59c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
60c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_SRC_COLOR:
61c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = src[0];
62c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = src[1];
63c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = src[2];
64c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = src[3];
65c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
66c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_SRC_ALPHA:
67c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = src[3];
68c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
69c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_DST_ALPHA:
70c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = dst[3];
71c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
72c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_DST_COLOR:
73c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = dst[0];
74c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = dst[1];
75c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = dst[2];
76c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = dst[3];
77c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
78c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_SRC_ALPHA_SATURATE:
79c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3]));
80c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = VIMMED1(1.0f);
81c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
82c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_CONST_COLOR:
83c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = constColor[0];
84c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = constColor[1];
85c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = constColor[2];
86c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = constColor[3];
87c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
88c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_CONST_ALPHA:
89c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = constColor[3];
90c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
91c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_SRC1_COLOR:
92c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = src1[0];
93c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = src1[1];
94c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = src1[2];
95c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = src1[3];
96c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
97c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_SRC1_ALPHA:
98c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = src1[3];
99c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
100c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_ZERO:
101c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
102c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
103c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_INV_SRC_COLOR:
104c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = FSUB(VIMMED1(1.0f), src[0]);
105c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = FSUB(VIMMED1(1.0f), src[1]);
106c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = FSUB(VIMMED1(1.0f), src[2]);
107c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = FSUB(VIMMED1(1.0f), src[3]);
108c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
109c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_INV_SRC_ALPHA:
110c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src[3]);
111c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
112c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_INV_DST_ALPHA:
113c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), dst[3]);
114c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
115c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_INV_DST_COLOR:
116c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = FSUB(VIMMED1(1.0f), dst[0]);
117c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = FSUB(VIMMED1(1.0f), dst[1]);
118c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = FSUB(VIMMED1(1.0f), dst[2]);
119c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = FSUB(VIMMED1(1.0f), dst[3]);
120c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
121c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_INV_CONST_COLOR:
122c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = FSUB(VIMMED1(1.0f), constColor[0]);
123c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = FSUB(VIMMED1(1.0f), constColor[1]);
124c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = FSUB(VIMMED1(1.0f), constColor[2]);
125c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
126c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
127c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_INV_CONST_ALPHA:
128c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
129c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
130c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_INV_SRC1_COLOR:
131c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = FSUB(VIMMED1(1.0f), src1[0]);
132c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = FSUB(VIMMED1(1.0f), src1[1]);
133c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = FSUB(VIMMED1(1.0f), src1[2]);
134c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = FSUB(VIMMED1(1.0f), src1[3]);
135c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
136c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDFACTOR_INV_SRC1_ALPHA:
137c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src1[3]);
138c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
139c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        default:
140c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            SWR_ASSERT(false, "Unsupported blend factor: %d", factor);
141c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
142c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
143c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
144c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
145c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        if (Color)
146c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
147c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = out[0];
148c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = out[1];
149c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = out[2];
150c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
151c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
152c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        if (Alpha)
153c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
154c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = out[3];
155c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
156c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    }
157c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
158c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    void Clamp(SWR_FORMAT format, Value* src[4])
159c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    {
160c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        const SWR_FORMAT_INFO& info = GetFormatInfo(format);
161c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        SWR_TYPE type = info.type[0];
162c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
163c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        switch (type)
164c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
165c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case SWR_TYPE_FLOAT:
166c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
167c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
168c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case SWR_TYPE_UNORM:
169c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            src[0] = VMINPS(VMAXPS(src[0], VIMMED1(0.0f)), VIMMED1(1.0f));
170c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            src[1] = VMINPS(VMAXPS(src[1], VIMMED1(0.0f)), VIMMED1(1.0f));
171c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            src[2] = VMINPS(VMAXPS(src[2], VIMMED1(0.0f)), VIMMED1(1.0f));
172c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            src[3] = VMINPS(VMAXPS(src[3], VIMMED1(0.0f)), VIMMED1(1.0f));
173c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
174c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
175c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case SWR_TYPE_SNORM:
176c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            src[0] = VMINPS(VMAXPS(src[0], VIMMED1(-1.0f)), VIMMED1(1.0f));
177c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            src[1] = VMINPS(VMAXPS(src[1], VIMMED1(-1.0f)), VIMMED1(1.0f));
178c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            src[2] = VMINPS(VMAXPS(src[2], VIMMED1(-1.0f)), VIMMED1(1.0f));
179c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f));
180c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
181c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
182c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        default: SWR_ASSERT(false, "Unsupport format type: %d", type);
183c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
184c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    }
185c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
186c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    void ApplyDefaults(SWR_FORMAT format, Value* src[4])
187c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    {
188c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        const SWR_FORMAT_INFO& info = GetFormatInfo(format);
189c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
190c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        bool valid[] = { false, false, false, false };
191c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        for (uint32_t c = 0; c < info.numComps; ++c)
192c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
193c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            valid[info.swizzle[c]] = true;
194c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
195c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
196c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        for (uint32_t c = 0; c < 4; ++c)
197c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
198c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            if (!valid[c])
199c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            {
200c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                src[c] = BITCAST(VIMMED1((int)info.defaults[c]), mSimdFP32Ty);
201c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            }
202c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
203c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    }
204c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
205c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    void ApplyUnusedDefaults(SWR_FORMAT format, Value* src[4])
206c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    {
207c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        const SWR_FORMAT_INFO& info = GetFormatInfo(format);
208c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
209c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        for (uint32_t c = 0; c < info.numComps; ++c)
210c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
211c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            if (info.type[c] == SWR_TYPE_UNUSED)
212c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            {
213c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                src[info.swizzle[c]] = BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty);
214c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            }
215c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
216c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    }
217c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
218c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    void Quantize(SWR_FORMAT format, Value* src[4])
219c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    {
220c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        const SWR_FORMAT_INFO& info = GetFormatInfo(format);
221c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        for (uint32_t c = 0; c < info.numComps; ++c)
222c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
223b53a33feef2595fc71fadafa5857d5482514a724Ilia Mirkin            if (info.bpc[c] <= QUANTIZE_THRESHOLD && info.type[c] != SWR_TYPE_UNUSED)
224c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            {
225c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                uint32_t swizComp = info.swizzle[c];
226c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                float factor = (float)((1 << info.bpc[c]) - 1);
227c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                switch (info.type[c])
228c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                {
229c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                case SWR_TYPE_UNORM:
230c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                    src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f));
231c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                    src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO));
232c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                    src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f /factor));
233c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                    break;
234c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                default: SWR_ASSERT(false, "Unsupported format type: %d", info.type[c]);
235c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                }
236c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            }
237c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
238c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    }
239c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
240c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    template<bool Color, bool Alpha>
241c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    void BlendFunc(SWR_BLEND_OP blendOp, Value* src[4], Value* srcFactor[4], Value* dst[4], Value* dstFactor[4], Value* result[4])
242c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    {
243c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* out[4];
244c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* srcBlend[4];
245c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* dstBlend[4];
246c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        for (uint32_t i = 0; i < 4; ++i)
247c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
248c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            srcBlend[i] = FMUL(src[i], srcFactor[i]);
249c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            dstBlend[i] = FMUL(dst[i], dstFactor[i]);
250c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
251c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
252c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        switch (blendOp)
253c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
254c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDOP_ADD:
255c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = FADD(srcBlend[0], dstBlend[0]);
256c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = FADD(srcBlend[1], dstBlend[1]);
257c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = FADD(srcBlend[2], dstBlend[2]);
258c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = FADD(srcBlend[3], dstBlend[3]);
259c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
260c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
261c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDOP_SUBTRACT:
262c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = FSUB(srcBlend[0], dstBlend[0]);
263c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = FSUB(srcBlend[1], dstBlend[1]);
264c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = FSUB(srcBlend[2], dstBlend[2]);
265c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = FSUB(srcBlend[3], dstBlend[3]);
266c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
267c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
268c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDOP_REVSUBTRACT:
269c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = FSUB(dstBlend[0], srcBlend[0]);
270c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = FSUB(dstBlend[1], srcBlend[1]);
271c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = FSUB(dstBlend[2], srcBlend[2]);
272c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = FSUB(dstBlend[3], srcBlend[3]);
273c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
274c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
275c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDOP_MIN:
276c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = VMINPS(src[0], dst[0]);
277c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = VMINPS(src[1], dst[1]);
278c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = VMINPS(src[2], dst[2]);
279c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = VMINPS(src[3], dst[3]);
280c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
281c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
282c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case BLENDOP_MAX:
283c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = VMAXPS(src[0], dst[0]);
284c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[1] = VMAXPS(src[1], dst[1]);
285c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[2] = VMAXPS(src[2], dst[2]);
286c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[3] = VMAXPS(src[3], dst[3]);
287c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
288c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
289c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        default:
290c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            SWR_ASSERT(false, "Unsupported blend operation: %d", blendOp);
291c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
292c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
293c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
294c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
295c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        if (Color)
296c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
297c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = out[0];
298c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = out[1];
299c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = out[2];
300c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
301c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
302c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        if (Alpha)
303c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
304c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = out[3];
305c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
306c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    }
307c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
308c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4])
309c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    {
310c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        // Op: (s == PS output, d = RT contents)
311c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        switch(logicOp)
312c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
313c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_CLEAR:
314c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = VIMMED1(0);
315c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = VIMMED1(0);
316c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = VIMMED1(0);
317c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = VIMMED1(0);
318c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
319c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
320c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_NOR:
321c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // ~(s | d)
322c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = XOR(OR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
323c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = XOR(OR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
324c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = XOR(OR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
325c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = XOR(OR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
326c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
327c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
328c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_AND_INVERTED:
329c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // ~s & d
330c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // todo: use avx andnot instr when I can find the intrinsic to call
331c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = AND(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
332c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = AND(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
333c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = AND(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
334c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = AND(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
335c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
336c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
337c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_COPY_INVERTED:
338c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // ~s
339c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = XOR(src[0], VIMMED1(0xFFFFFFFF));
340c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = XOR(src[1], VIMMED1(0xFFFFFFFF));
341c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = XOR(src[2], VIMMED1(0xFFFFFFFF));
342c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = XOR(src[3], VIMMED1(0xFFFFFFFF));
343c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
344c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
345c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_AND_REVERSE:
346c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // s & ~d
347c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // todo: use avx andnot instr when I can find the intrinsic to call
348c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = AND(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
349c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = AND(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
350c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = AND(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
351c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = AND(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
352c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
353c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
354c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_INVERT:
355c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // ~d
356c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = XOR(dst[0], VIMMED1(0xFFFFFFFF));
357c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = XOR(dst[1], VIMMED1(0xFFFFFFFF));
358c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = XOR(dst[2], VIMMED1(0xFFFFFFFF));
359c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = XOR(dst[3], VIMMED1(0xFFFFFFFF));
360c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
361c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
362c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_XOR:
363c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // s ^ d
364c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = XOR(src[0], dst[0]);
365c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = XOR(src[1], dst[1]);
366c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = XOR(src[2], dst[2]);
367c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = XOR(src[3], dst[3]);
368c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
369c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
370c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_NAND:
371c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // ~(s & d)
372c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = XOR(AND(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
373c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = XOR(AND(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
374c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = XOR(AND(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
375c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = XOR(AND(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
376c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
377c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
378c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_AND:
379c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // s & d
380c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = AND(src[0], dst[0]);
381c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = AND(src[1], dst[1]);
382c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = AND(src[2], dst[2]);
383c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = AND(src[3], dst[3]);
384c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
385c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
386c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_EQUIV:
387c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // ~(s ^ d)
388c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = XOR(XOR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
389c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = XOR(XOR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
390c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = XOR(XOR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
391c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = XOR(XOR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
392c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
393c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
394c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_NOOP:
395c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = dst[0];
396c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = dst[1];
397c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = dst[2];
398c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = dst[3];
399c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
400c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
401c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_OR_INVERTED:
402c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // ~s | d
403c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = OR(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
404c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = OR(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
405c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = OR(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
406c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = OR(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
407c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
408c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
409c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_COPY:
410c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = src[0];
411c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = src[1];
412c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = src[2];
413c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = src[3];
414c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
415c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
416c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_OR_REVERSE:
417c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // s | ~d
418c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = OR(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
419c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = OR(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
420c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = OR(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
421c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = OR(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
422c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
423c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
424c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_OR:
425c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // s | d
426c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = OR(src[0], dst[0]);
427c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = OR(src[1], dst[1]);
428c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = OR(src[2], dst[2]);
429c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = OR(src[3], dst[3]);
430c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
431c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
432c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        case LOGICOP_SET:
433c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = VIMMED1(0xFFFFFFFF);
434c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[1] = VIMMED1(0xFFFFFFFF);
435c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[2] = VIMMED1(0xFFFFFFFF);
436c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[3] = VIMMED1(0xFFFFFFFF);
437c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
438c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
439c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        default:
440c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            SWR_ASSERT(false, "Unsupported logic operation: %d", logicOp);
441c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            result[0] = result[1] = result[2] = result[3] = VIMMED1(0.0f);
442c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            break;
443c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
444c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    }
445c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
446cee66dd2aa182ba30130bef3298444667753b051Tim Rowley    void AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* ppAlpha, Value* ppMask)
447c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    {
448c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        // load uint32_t reference
449c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* pRef = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_alphaTestReference }));
450cee66dd2aa182ba30130bef3298444667753b051Tim Rowley
451cee66dd2aa182ba30130bef3298444667753b051Tim Rowley        // load alpha
452cee66dd2aa182ba30130bef3298444667753b051Tim Rowley        Value* pAlpha = LOAD(ppAlpha);
453c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
454c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* pTest = nullptr;
455c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        if (state.alphaTestFormat == ALPHA_TEST_UNORM8)
456c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
457c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // convert float alpha to unorm8
458c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f));
459c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty);
460c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
461c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // compare
462c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            switch (state.alphaTestFunction)
463c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            {
464c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_ALWAYS:  pTest = VIMMED1(true); break;
465c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_NEVER:   pTest = VIMMED1(false); break;
466c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_LT:      pTest = ICMP_ULT(pAlphaU8, pRef); break;
467c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_EQ:      pTest = ICMP_EQ(pAlphaU8, pRef); break;
468c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_LE:      pTest = ICMP_ULE(pAlphaU8, pRef); break;
469c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_GT:      pTest = ICMP_UGT(pAlphaU8, pRef); break;
470c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_NE:      pTest = ICMP_NE(pAlphaU8, pRef); break;
471c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_GE:      pTest = ICMP_UGE(pAlphaU8, pRef); break;
472c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            default:
473c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                SWR_ASSERT(false, "Invalid alpha test function");
474c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                break;
475c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            }
476c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
477c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        else
478c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
479c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // cast ref to float
480c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            pRef = BITCAST(pRef, mSimdFP32Ty);
481c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
482c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // compare
483c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            switch (state.alphaTestFunction)
484c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            {
485c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_ALWAYS:  pTest = VIMMED1(true); break;
486c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_NEVER:   pTest = VIMMED1(false); break;
487c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_LT:      pTest = FCMP_OLT(pAlpha, pRef); break;
488c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_EQ:      pTest = FCMP_OEQ(pAlpha, pRef); break;
489c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_LE:      pTest = FCMP_OLE(pAlpha, pRef); break;
490c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_GT:      pTest = FCMP_OGT(pAlpha, pRef); break;
491c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_NE:      pTest = FCMP_ONE(pAlpha, pRef); break;
492c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            case ZFUNC_GE:      pTest = FCMP_OGE(pAlpha, pRef); break;
493c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            default:
494c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                SWR_ASSERT(false, "Invalid alpha test function");
495c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                break;
496c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            }
497c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
498c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
499c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        // load current mask
500c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* pMask = LOAD(ppMask);
501c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
502c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        // convert to int1 mask
503c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        pMask = MASK(pMask);
504c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
505c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        // and with alpha test result
506c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        pMask = AND(pMask, pTest);
507c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
508c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        // convert back to vector mask
509c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        pMask = VMASK(pMask);
510c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
511c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        // store new mask
512c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        STORE(pMask, ppMask);
513c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    }
514c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
515c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    Function* Create(const BLEND_COMPILE_STATE& state)
516c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    {
517c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        static std::size_t jitNum = 0;
518c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
519c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        std::stringstream fnName("BlendShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
520c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        fnName << jitNum++;
521c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
522c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        // blend function signature
523c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        //typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*);
524c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
525c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        std::vector<Type*> args{
526c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            PointerType::get(Gen_SWR_BLEND_STATE(JM()), 0), // SWR_BLEND_STATE*
527c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            PointerType::get(mSimdFP32Ty, 0),               // simdvector& src
528c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            PointerType::get(mSimdFP32Ty, 0),               // simdvector& src1
529cee66dd2aa182ba30130bef3298444667753b051Tim Rowley            PointerType::get(mSimdFP32Ty, 0),               // src0alpha
530c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Type::getInt32Ty(JM()->mContext),               // sampleNum
531c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            PointerType::get(mSimdFP32Ty, 0),               // uint8_t* pDst
532c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            PointerType::get(mSimdFP32Ty, 0),               // simdvector& result
533c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            PointerType::get(mSimdInt32Ty, 0),              // simdscalari* oMask
534c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            PointerType::get(mSimdInt32Ty, 0),              // simdscalari* pMask
535c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        };
536c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
537c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
538c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Function* blendFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
539c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
540c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc);
541c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
542c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        IRB()->SetInsertPoint(entry);
543c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
544c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        // arguments
545c375c448bf529952afec3df62aff5f9f506c498eTim Rowley        auto argitr = blendFunc->arg_begin();
546c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* pBlendState = &*argitr++;
547c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        pBlendState->setName("pBlendState");
548c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* pSrc = &*argitr++;
549c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        pSrc->setName("src");
550c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* pSrc1 = &*argitr++;
551c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        pSrc1->setName("src1");
552cee66dd2aa182ba30130bef3298444667753b051Tim Rowley        Value* pSrc0Alpha = &*argitr++;
553cee66dd2aa182ba30130bef3298444667753b051Tim Rowley        pSrc0Alpha->setName("src0alpha");
554c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* sampleNum = &*argitr++;
555c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        sampleNum->setName("sampleNum");
556c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* pDst = &*argitr++;
557c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        pDst->setName("pDst");
558c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* pResult = &*argitr++;
559c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        pResult->setName("result");
560c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* ppoMask = &*argitr++;
561c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        ppoMask->setName("ppoMask");
562c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* ppMask = &*argitr++;
563c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        ppMask->setName("pMask");
564c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
565c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
566c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* dst[4];
567c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* constantColor[4];
568c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* src[4];
569c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* src1[4];
570c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* result[4];
571c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        for (uint32_t i = 0; i < 4; ++i)
572c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
573c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // load hot tile
574c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            dst[i] = LOAD(pDst, { i });
575c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
576c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // load constant color
577c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            constantColor[i] = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_constantColor, i }));
578c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
579c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // load src
580c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            src[i] = LOAD(pSrc, { i });
581c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
582c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // load src1
583c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            src1[i] = LOAD(pSrc1, { i });
584c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
585c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        Value* currentMask = VIMMED1(-1);
586090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley        if (state.desc.alphaToCoverageEnable)
587c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
588090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley            Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f);
589090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley            uint32_t bits = (1 << state.desc.numSamples) - 1;
590090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley            currentMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits)));
591090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley            currentMask = FP_TO_SI(FADD(currentMask, VIMMED1(0.5f)), mSimdInt32Ty);
592c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
593c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
594c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        // alpha test
595c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        if (state.desc.alphaTestEnable)
596c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
597cee66dd2aa182ba30130bef3298444667753b051Tim Rowley            AlphaTest(state, pBlendState, pSrc0Alpha, ppMask);
598c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
599c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
600c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        // color blend
601c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        if (state.blendState.blendEnable)
602c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
603c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // clamp sources
604c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Clamp(state.format, src);
605c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Clamp(state.format, src1);
606c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Clamp(state.format, dst);
607c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Clamp(state.format, constantColor);
608c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
609c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // apply defaults to hottile contents to take into account missing components
610c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            ApplyDefaults(state.format, dst);
611c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
612c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // Force defaults for unused 'X' components
613c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            ApplyUnusedDefaults(state.format, dst);
614c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
615c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // Quantize low precision components
616c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Quantize(state.format, dst);
617c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
618c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // special case clamping for R11G11B10_float which has no sign bit
619c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            if (state.format == R11G11B10_FLOAT)
620c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            {
621c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                dst[0] = VMAXPS(dst[0], VIMMED1(0.0f));
622c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                dst[1] = VMAXPS(dst[1], VIMMED1(0.0f));
623c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                dst[2] = VMAXPS(dst[2], VIMMED1(0.0f));
624c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                dst[3] = VMAXPS(dst[3], VIMMED1(0.0f));
625c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            }
626c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
627c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Value* srcFactor[4];
628c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Value* dstFactor[4];
629c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            if (state.desc.independentAlphaBlendEnable)
630c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            {
631c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                GenerateBlendFactor<true, false>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
632c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor, constantColor, src, src1, dst, srcFactor);
633c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
634c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                GenerateBlendFactor<true, false>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
635c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor, constantColor, src, src1, dst, dstFactor);
636c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
637c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                BlendFunc<true, false>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
638c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                BlendFunc<false, true>(state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
639c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            }
640c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            else
641c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            {
642c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                GenerateBlendFactor<true, true>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
643c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                GenerateBlendFactor<true, true>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
644c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
645c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                BlendFunc<true, true>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
646c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            }
647c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
648c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // store results out
649c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            for (uint32_t i = 0; i < 4; ++i)
650c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            {
651c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                STORE(result[i], pResult, { i });
652c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            }
653c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
654c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
655c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        if(state.blendState.logicOpEnable)
656c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
657c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            const SWR_FORMAT_INFO& info = GetFormatInfo(state.format);
658c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Value* vMask[4];
659aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin            float scale[4];
660aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin
661aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin            if (!state.blendState.blendEnable)
662aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin            {
663aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                Clamp(state.format, src);
664aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                Clamp(state.format, dst);
665aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin            }
666aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin
667c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            for(uint32_t i = 0; i < 4; i++)
668c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            {
669aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                if (info.type[i] == SWR_TYPE_UNUSED)
670c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                {
671aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    continue;
672aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                }
673aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin
674aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                if (info.bpc[i] >= 32) {
675aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    vMask[i] = VIMMED1(0xFFFFFFFF);
676aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    scale[i] = 0xFFFFFFFF;
677aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                } else {
678aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    vMask[i] = VIMMED1((1 << info.bpc[i]) - 1);
679aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    if (info.type[i] == SWR_TYPE_SNORM)
680aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                        scale[i] = (1 << (info.bpc[i] - 1)) - 1;
681aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    else
682aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                        scale[i] = (1 << info.bpc[i]) - 1;
683aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                }
684aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin
685aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                switch (info.type[i]) {
686c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                default:
687aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    SWR_ASSERT(0, "Unsupported type for logic op\n");
688aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    /* fallthrough */
689aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                case SWR_TYPE_UINT:
690aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                case SWR_TYPE_SINT:
691aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    src[i] = BITCAST(src[i], mSimdInt32Ty);
692aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    dst[i] = BITCAST(dst[i], mSimdInt32Ty);
693aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    break;
694aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                case SWR_TYPE_SNORM:
695ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                    src[i] = FP_TO_SI(
696ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                        FMUL(src[i], VIMMED1(scale[i])),
697ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                        mSimdInt32Ty);
698ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                    dst[i] = FP_TO_SI(
699ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                        FMUL(dst[i], VIMMED1(scale[i])),
700ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                        mSimdInt32Ty);
701ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                    break;
702aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                case SWR_TYPE_UNORM:
703aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    src[i] = FP_TO_UI(
704aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                        FMUL(src[i], VIMMED1(scale[i])),
705aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                        mSimdInt32Ty);
706aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    dst[i] = FP_TO_UI(
707aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                        FMUL(dst[i], VIMMED1(scale[i])),
708aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                        mSimdInt32Ty);
709c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                    break;
710c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                }
711c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            }
712c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
713c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            LogicOpFunc(state.blendState.logicOpFunc, src, dst, result);
714c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
715c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // store results out
716c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            for(uint32_t i = 0; i < 4; ++i)
717c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            {
718aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                if (info.type[i] == SWR_TYPE_UNUSED)
719aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                {
720aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    continue;
721aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                }
722aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin
723c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                // clear upper bits from PS output not in RT format after doing logic op
724c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley                result[i] = AND(result[i], vMask[i]);
725c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
726aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                switch (info.type[i]) {
727aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                default:
728aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    SWR_ASSERT(0, "Unsupported type for logic op\n");
729aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    /* fallthrough */
730aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                case SWR_TYPE_UINT:
731aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                case SWR_TYPE_SINT:
732aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    result[i] = BITCAST(result[i], mSimdFP32Ty);
733aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    break;
734aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                case SWR_TYPE_SNORM:
735ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                    result[i] = SHL(result[i], C(32 - info.bpc[i]));
736ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                    result[i] = ASHR(result[i], C(32 - info.bpc[i]));
737ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                    result[i] = FMUL(SI_TO_FP(result[i], mSimdFP32Ty),
738ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                                     VIMMED1(1.0f / scale[i]));
739ddf0f097e74d57858238fcdb13003ab5c974abc3Ilia Mirkin                    break;
740aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                case SWR_TYPE_UNORM:
741aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    result[i] = FMUL(UI_TO_FP(result[i], mSimdFP32Ty),
742aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                                     VIMMED1(1.0f / scale[i]));
743aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                    break;
744aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                }
745aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin
746aed517f98592bd7969f893bbabdb64acf2f6623fIlia Mirkin                STORE(result[i], pResult, {i});
747c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            }
748c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
749c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
750c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        if(state.desc.oMaskEnable)
751c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
752c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            assert(!(state.desc.alphaToCoverageEnable));
753c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // load current mask
754c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Value* oMask = LOAD(ppoMask);
755c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Value* sampleMasked = VBROADCAST(SHL(C(1), sampleNum));
756c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            oMask = AND(oMask, sampleMasked);
757c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            currentMask = AND(oMask, currentMask);
758c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
759c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
760c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        if(state.desc.sampleMaskEnable)
761c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
762c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Value* sampleMask = LOAD(pBlendState, { 0, SWR_BLEND_STATE_sampleMask});
763c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Value* sampleMasked = SHL(C(1), sampleNum);
764c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            sampleMask = AND(sampleMask, sampleMasked);
765c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            sampleMask = VBROADCAST(ICMP_SGT(sampleMask, C(0)));
766c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            sampleMask = S_EXT(sampleMask, mSimdInt32Ty);
767c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            currentMask = AND(sampleMask, currentMask);
768c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
769c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
770090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley        if (state.desc.alphaToCoverageEnable)
771090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley        {
772090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley            Value* sampleMasked = SHL(C(1), sampleNum);
773090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley            currentMask = AND(currentMask, VBROADCAST(sampleMasked));
774090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley        }
775090be2e434d6023428faa9842d38f9d5c3cef67aTim Rowley
776c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable ||
777c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley           state.desc.oMaskEnable)
778c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        {
779c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // load current mask
780c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Value* pMask = LOAD(ppMask);
781c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            currentMask = S_EXT(ICMP_SGT(currentMask, VBROADCAST(C(0))), mSimdInt32Ty);
782c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            Value* outputMask = AND(pMask, currentMask);
783c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            // store new mask
784c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley            STORE(outputMask, GEP(ppMask, C(0)));
785c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        }
786c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
787c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        RET_VOID();
788c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
789c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        JitManager::DumpToFile(blendFunc, "");
790c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
791d3d97f8395513bf365d2fe8e4292c8098290586fTim Rowley        ::FunctionPassManager passes(JM()->mpCurrentModule);
79268314b676968e2cf0f8e94f573fa28e766e48349Tim Rowley
793c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.add(createBreakCriticalEdgesPass());
794c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.add(createCFGSimplificationPass());
795c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.add(createEarlyCSEPass());
796c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.add(createPromoteMemoryToRegisterPass());
797c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.add(createCFGSimplificationPass());
798c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.add(createEarlyCSEPass());
799c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.add(createInstructionCombiningPass());
800c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.add(createInstructionSimplifierPass());
801c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.add(createConstantPropagationPass());
802c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.add(createSCCPPass());
803c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.add(createAggressiveDCEPass());
804c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
805c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        passes.run(*blendFunc);
806c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
807c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        JitManager::DumpToFile(blendFunc, "optimized");
808c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
809c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley        return blendFunc;
810c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    }
811c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley};
812c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
813c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley//////////////////////////////////////////////////////////////////////////
814c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief JITs from fetch shader IR
815c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param hJitMgr - JitManager handle
816c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param func   - LLVM function IR
817c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @return PFN_FETCH_FUNC - pointer to fetch code
818c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim RowleyPFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc)
819c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{
820c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    const llvm::Function *func = (const llvm::Function*)hFunc;
821c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
822c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    PFN_BLEND_JIT_FUNC pfnBlend;
823c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
824c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
825c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    pJitMgr->mIsModuleFinalized = true;
826c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
827c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    return pfnBlend;
828c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley}
829c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
830c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley//////////////////////////////////////////////////////////////////////////
831c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief JIT compiles blend shader
832c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param hJitMgr - JitManager handle
833c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param state   - blend state to build function from
834c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleyextern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr, const BLEND_COMPILE_STATE& state)
835c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{
836c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
837c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
838c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    pJitMgr->SetupNewModule();
839c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
840c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    BlendJit theJit(pJitMgr);
841c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    HANDLE hFunc = theJit.Create(state);
842c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley
843c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley    return JitBlendFunc(hJitMgr, hFunc);
844c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley}
845