1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/************************************************************************** 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright 2009-2010 VMware, Inc. 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * All Rights Reserved. 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "Software"), to deal in the Software without restriction, including 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * without limitation the rights to use, copy, modify, merge, publish, 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * distribute, sub license, and/or sell copies of the Software, and to 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * permit persons to whom the Software is furnished to do so, subject to 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the following conditions: 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * next paragraph) shall be included in all copies or substantial portions 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * of the Software. 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org **************************************************************************/ 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @file 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Blend LLVM IR generation -- SoA layout. 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Blending in SoA is much faster than AoS, especially when separate rgb/alpha 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * factors/functions are used, since no channel masking/shuffling is necessary 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and we can achieve the full throughput of the SIMD operations. Furthermore 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the fragment shader output is also in SoA, so it fits nicely with the rest 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * of the fragment pipeline. 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The drawback is that to be displayed the color buffer needs to be in AoS 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * layout, so we need to tile/untile the color buffer before/after rendering. 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * A color buffer like 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * R11 G11 B11 A11 R12 G12 B12 A12 R13 G13 B13 A13 R14 G14 B14 A14 ... 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * R21 G21 B21 A21 R22 G22 B22 A22 R23 G23 B23 A23 R24 G24 B24 A24 ... 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * R31 G31 B31 A31 R32 G32 B32 A32 R33 G33 B33 A33 R34 G34 B34 A34 ... 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * R41 G41 B41 A41 R42 G42 B42 A42 R43 G43 B43 A43 R44 G44 B44 A44 ... 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * will actually be stored in memory as 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * R11 R12 R21 R22 R13 R14 R23 R24 ... G11 G12 G21 G22 G13 G14 G23 G24 ... B11 B12 B21 B22 B13 B14 B23 B24 ... A11 A12 A21 A22 A13 A14 A23 A24 ... 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * R31 R32 R41 R42 R33 R34 R43 R44 ... G31 G32 G41 G42 G33 G34 G43 G44 ... B31 B32 B41 B42 B33 B34 B43 B44 ... A31 A32 A41 A42 A33 A34 A43 A44 ... 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * NOTE: Run lp_blend_test after any change to this file. 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * You can also run lp_blend_test to obtain AoS vs SoA benchmarks. Invoking it 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * as: 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * lp_blend_test -o blend.tsv 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * will generate a tab-seperated-file with the test results and performance 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * measurements. 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @author Jose Fonseca <jfonseca@vmware.com> 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "pipe/p_state.h" 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_debug.h" 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "gallivm/lp_bld_type.h" 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "gallivm/lp_bld_arit.h" 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "gallivm/lp_bld_init.h" 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_blend.h" 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * We may use the same values several times, so we keep them here to avoid 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * recomputing them. Also reusing the values allows us to do simplifications 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * that LLVM optimization passes wouldn't normally be able to do. 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct lp_build_blend_soa_context 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_context base; 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef src[4]; 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef dst[4]; 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef con[4]; 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef inv_src[4]; 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef inv_dst[4]; 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef inv_con[4]; 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef src_alpha_saturate; 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * We store all factors in a table in order to eliminate redundant 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * multiplications later. 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Indexes are: factor[src,dst][color,term][r,g,b,a] 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef factor[2][2][4]; 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /** 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Table with all terms. 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Indexes are: term[src,dst][r,g,b,a] 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef term[2][4]; 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Build a single SOA blend factor for a color channel. 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \param i the color channel in [0,3] 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic LLVMValueRef 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld, 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned factor, unsigned i) 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Compute src/first term RGB 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (factor) { 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_ONE: 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->base.one; 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_SRC_COLOR: 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->src[i]; 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_SRC_ALPHA: 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->src[3]; 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_DST_COLOR: 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->dst[i]; 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_DST_ALPHA: 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->dst[3]; 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(i == 3) 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->base.one; 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!bld->inv_dst[3]) 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!bld->src_alpha_saturate) 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld->src_alpha_saturate = lp_build_min(&bld->base, bld->src[3], bld->inv_dst[3]); 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->src_alpha_saturate; 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_CONST_COLOR: 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->con[i]; 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_CONST_ALPHA: 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->con[3]; 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_SRC1_COLOR: 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* TODO */ 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->base.zero; 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_SRC1_ALPHA: 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* TODO */ 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->base.zero; 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_ZERO: 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->base.zero; 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_INV_SRC_COLOR: 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!bld->inv_src[i]) 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld->inv_src[i] = lp_build_comp(&bld->base, bld->src[i]); 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->inv_src[i]; 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!bld->inv_src[3]) 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld->inv_src[3] = lp_build_comp(&bld->base, bld->src[3]); 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->inv_src[3]; 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_INV_DST_COLOR: 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!bld->inv_dst[i]) 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld->inv_dst[i] = lp_build_comp(&bld->base, bld->dst[i]); 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->inv_dst[i]; 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_INV_DST_ALPHA: 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!bld->inv_dst[3]) 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]); 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->inv_dst[3]; 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_INV_CONST_COLOR: 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!bld->inv_con[i]) 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]); 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->inv_con[i]; 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!bld->inv_con[3]) 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]); 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->inv_con[3]; 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* TODO */ 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->base.zero; 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* TODO */ 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->base.zero; 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->base.zero; 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Generate blend code in SOA mode. 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \param rt render target index (to index the blend / colormask state) 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \param src src/fragment color 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \param dst dst/framebuffer color 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \param con constant blend color 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \param res the result/output 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_blend_soa(struct gallivm_state *gallivm, 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct pipe_blend_state *blend, 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_type type, 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned rt, 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef src[4], 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef dst[4], 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef con[4], 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef res[4]) 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuilderRef builder = gallivm->builder; 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_blend_soa_context bld; 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned i, j, k; 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(rt < PIPE_MAX_COLOR_BUFS); 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Setup build context */ 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(&bld, 0, sizeof bld); 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_build_context_init(&bld.base, gallivm, type); 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < 4; ++i) { 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.src[i] = src[i]; 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.dst[i] = dst[i]; 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.con[i] = con[i]; 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < 4; ++i) { 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* only compute blending for the color channels enabled for writing */ 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (blend->rt[rt].colormask & (1 << i)) { 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (blend->logicop_enable) { 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(!type.floating) { 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]); 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res[i] = dst[i]; 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else if (blend->rt[rt].blend_enable) { 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned src_factor = i < 3 ? blend->rt[rt].rgb_src_factor : blend->rt[rt].alpha_src_factor; 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned dst_factor = i < 3 ? blend->rt[rt].rgb_dst_factor : blend->rt[rt].alpha_dst_factor; 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org boolean func_commutative = lp_build_blend_func_commutative(func); 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Compute src/dst factors. 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.factor[0][0][i] = src[i]; 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i); 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.factor[1][0][i] = dst[i]; 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i); 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Check if lp_build_blend can perform any optimisations 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res[i] = lp_build_blend(&bld.base, 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org func, 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src_factor, 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst_factor, 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.factor[0][0][i], 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.factor[1][0][i], 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.factor[0][1][i], 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.factor[1][1][i], 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org true, 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org true); 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (res[i]) { 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Compute src/dst terms 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(k = 0; k < 2; ++k) { 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* See if this multiplication has been previously computed */ 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(j = 0; j < i; ++j) { 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if((bld.factor[k][0][j] == bld.factor[k][0][i] && 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.factor[k][1][j] == bld.factor[k][1][i]) || 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (bld.factor[k][0][j] == bld.factor[k][1][i] && 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.factor[k][1][j] == bld.factor[k][0][i])) 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(j < i && bld.term[k][j]) 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.term[k][i] = bld.term[k][j]; 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]); 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src_factor == PIPE_BLENDFACTOR_ZERO && 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (dst_factor == PIPE_BLENDFACTOR_DST_ALPHA || 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dst_factor == PIPE_BLENDFACTOR_INV_DST_ALPHA)) { 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX special case these combos to work around an apparent 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * bug in LLVM. 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This hack disables the check for multiplication by zero 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * in lp_bld_mul(). When we optimize away the 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * multiplication, something goes wrong during code 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * generation and we segfault at runtime. 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef zeroSave = bld.base.zero; 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.base.zero = NULL; 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.factor[k][1][i]); 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.base.zero = zeroSave; 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Combine terms 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* See if this function has been previously applied */ 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(j = 0; j < i; ++j) { 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned prev_func = j < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func; 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func); 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if((!func_reverse && 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.term[0][j] == bld.term[0][i] && 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.term[1][j] == bld.term[1][i]) || 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ((func_commutative || func_reverse) && 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.term[0][j] == bld.term[1][i] && 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.term[1][j] == bld.term[0][i])) 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(j < i) 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res[i] = res[j]; 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]); 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res[i] = src[i]; 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res[i] = dst[i]; 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 344