1866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca/************************************************************************** 2866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * 3866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * Copyright 2009 VMware, Inc. 4866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * All Rights Reserved. 5866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * 6866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * Permission is hereby granted, free of charge, to any person obtaining a 7866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * copy of this software and associated documentation files (the 8866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * "Software"), to deal in the Software without restriction, including 9866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * without limitation the rights to use, copy, modify, merge, publish, 10866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * distribute, sub license, and/or sell copies of the Software, and to 11866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * permit persons to whom the Software is furnished to do so, subject to 12866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * the following conditions: 13866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * 14866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * The above copyright notice and this permission notice (including the 15866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * next paragraph) shall be included in all copies or substantial portions 16866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * of the Software. 17866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * 18866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca * 26866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca **************************************************************************/ 27866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 28866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 292c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca#include "pipe/p_defines.h" 302c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca 31866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca#include "util/u_format.h" 32728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca#include "util/u_memory.h" 33728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca#include "util/u_string.h" 34866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 35866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca#include "lp_bld_type.h" 36866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca#include "lp_bld_const.h" 37866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca#include "lp_bld_conv.h" 382c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca#include "lp_bld_swizzle.h" 39bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca#include "lp_bld_gather.h" 40dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca#include "lp_bld_debug.h" 41866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca#include "lp_bld_format.h" 42866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 43866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 44866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonsecavoid 45abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonsecalp_build_format_swizzle_soa(const struct util_format_description *format_desc, 462c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca struct lp_build_context *bld, 47abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca const LLVMValueRef *unswizzled, 485b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul LLVMValueRef swizzled_out[4]) 49abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca{ 502c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO); 512c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE); 522c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca 532c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 542c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca /* 552c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * Return zzz1 for depth-stencil formats. 562c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * 572c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * XXX: Allow to control the depth swizzle with an additional parameter, 582c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * as the caller may wish another depth swizzle, or retain the stencil 592c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * value. 602c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca */ 61abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca enum util_format_swizzle swizzle = format_desc->swizzle[0]; 622c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle); 635b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth; 645b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul swizzled_out[3] = bld->one; 65abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca } 66abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca else { 67abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca unsigned chan; 68abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca for (chan = 0; chan < 4; ++chan) { 69abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca enum util_format_swizzle swizzle = format_desc->swizzle[chan]; 705b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle); 71abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca } 72abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca } 73abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca} 74abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca 75abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonseca 767e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca/** 777e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * Unpack several pixels in SoA. 787e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * 797e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * It takes a vector of packed pixels: 807e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * 817e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * packed = {P0, P1, P2, P3, ..., Pn} 827e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * 837e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * And will produce four vectors: 847e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * 857e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * red = {R0, R1, R2, R3, ..., Rn} 867e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * green = {G0, G1, G2, G3, ..., Gn} 877e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * blue = {B0, B1, B2, B3, ..., Bn} 887e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * alpha = {A0, A1, A2, A3, ..., An} 897e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * 907e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * It requires that a packed pixel fits into an element of the output 917e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * channels. The common case is when converting pixel with a depth of 32 bit or 927e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca * less into floats. 939a0ff95425ab291374368cc0968ab674729f66f3Brian Paul * 949a0ff95425ab291374368cc0968ab674729f66f3Brian Paul * \param format_desc the format of the 'packed' incoming pixel vector 959a0ff95425ab291374368cc0968ab674729f66f3Brian Paul * \param type the desired type for rgba_out (type.length = n, above) 969a0ff95425ab291374368cc0968ab674729f66f3Brian Paul * \param packed the incoming vector of packed pixels 979a0ff95425ab291374368cc0968ab674729f66f3Brian Paul * \param rgba_out returns the SoA R,G,B,A vectors 987e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca */ 99abff4214ef870f26d5c64adac1235b9e9438a51eJosé Fonsecavoid 100efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_unpack_rgba_soa(struct gallivm_state *gallivm, 101866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca const struct util_format_description *format_desc, 102b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca struct lp_type type, 103866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca LLVMValueRef packed, 1045b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul LLVMValueRef rgba_out[4]) 105866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca{ 106efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMBuilderRef builder = gallivm->builder; 1072c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca struct lp_build_context bld; 108866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca LLVMValueRef inputs[4]; 109866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca unsigned start; 110866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca unsigned chan; 111866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 11240bac07f9b5182890719151c99e9d9035e7984e7José Fonseca assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 113866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca assert(format_desc->block.width == 1); 114866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca assert(format_desc->block.height == 1); 1157e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca assert(format_desc->block.bits <= type.width); 1167e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca /* FIXME: Support more output types */ 1177e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca assert(type.floating); 1187e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca assert(type.width == 32); 119866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 120efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_context_init(&bld, gallivm, type); 1212c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca 122866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca /* Decode the input vector components */ 123866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca start = 0; 124213e6584ed91b4ac6c3ed27f076e1b64e51cbc06José Fonseca for (chan = 0; chan < format_desc->nr_channels; ++chan) { 1259a0ff95425ab291374368cc0968ab674729f66f3Brian Paul const unsigned width = format_desc->channel[chan].size; 1269a0ff95425ab291374368cc0968ab674729f66f3Brian Paul const unsigned stop = start + width; 127866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca LLVMValueRef input; 128866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 129866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca input = packed; 130866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 131866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca switch(format_desc->channel[chan].type) { 132866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca case UTIL_FORMAT_TYPE_VOID: 133efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = lp_build_undef(gallivm, type); 134866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca break; 135866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 136866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca case UTIL_FORMAT_TYPE_UNSIGNED: 1378f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca /* 1388f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca * Align the LSB 1398f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca */ 1408f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 1418f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca if (start) { 142efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), ""); 1438f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca } 1448f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 1458f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca /* 1468f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca * Zero the MSBs 1478f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca */ 1488f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 1498f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca if (stop < format_desc->block.bits) { 1508f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca unsigned mask = ((unsigned long long)1 << width) - 1; 151efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), ""); 1528f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca } 153866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 1548f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca /* 1558f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca * Type conversion 1568f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca */ 1578f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 1588f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca if (type.floating) { 159866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca if(format_desc->channel[chan].normalized) 160efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); 161866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca else 162efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = LLVMBuildSIToFP(builder, input, 163efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_vec_type(gallivm, type), ""); 164866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca } 165866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca else { 166866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca /* FIXME */ 167866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca assert(0); 168efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = lp_build_undef(gallivm, type); 169866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca } 1708f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 1718f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca break; 1728f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 1738f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca case UTIL_FORMAT_TYPE_SIGNED: 1748f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca /* 1758f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca * Align the sign bit first. 1768f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca */ 1778f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 1788f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca if (stop < type.width) { 1798f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca unsigned bits = type.width - stop; 180efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); 1818f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca input = LLVMBuildShl(builder, input, bits_val, ""); 1828f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca } 1838f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 1848f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca /* 1858f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca * Align the LSB (with an arithmetic shift to preserve the sign) 1868f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca */ 1878f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 1888f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca if (format_desc->channel[chan].size < type.width) { 1898f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca unsigned bits = type.width - format_desc->channel[chan].size; 190efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); 1918f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca input = LLVMBuildAShr(builder, input, bits_val, ""); 1928f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca } 1938f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 1948f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca /* 1958f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca * Type conversion 1968f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca */ 1978f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 1988f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca if (type.floating) { 199efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); 2008f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca if (format_desc->channel[chan].normalized) { 2018f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); 202efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); 203fc9a49b638c26801951c33a570178bbb2b67ec60nobled input = LLVMBuildFMul(builder, input, scale_val, ""); 2048f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca } 2058f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca } 2068f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca else { 2078f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca /* FIXME */ 2088f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca assert(0); 209efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = lp_build_undef(gallivm, type); 2108f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca } 2118f38135e283a0e81510fb40fb9e0c918436fa5f1José Fonseca 212866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca break; 213866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 2147e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca case UTIL_FORMAT_TYPE_FLOAT: 2157e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca if (type.floating) { 2167e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca assert(start == 0); 2177e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca assert(stop == 32); 2187e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca assert(type.width == 32); 219efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), ""); 2207e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca } 2217e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca else { 2227e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca /* FIXME */ 2237e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca assert(0); 224efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = lp_build_undef(gallivm, type); 2257e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca } 2267e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca break; 2277e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca 2287e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca case UTIL_FORMAT_TYPE_FIXED: 22940bac07f9b5182890719151c99e9d9035e7984e7José Fonseca if (type.floating) { 23040bac07f9b5182890719151c99e9d9035e7984e7José Fonseca double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); 231efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); 232efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); 233fc9a49b638c26801951c33a570178bbb2b67ec60nobled input = LLVMBuildFMul(builder, input, scale_val, ""); 23440bac07f9b5182890719151c99e9d9035e7984e7José Fonseca } 23540bac07f9b5182890719151c99e9d9035e7984e7José Fonseca else { 23640bac07f9b5182890719151c99e9d9035e7984e7José Fonseca /* FIXME */ 23740bac07f9b5182890719151c99e9d9035e7984e7José Fonseca assert(0); 238efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = lp_build_undef(gallivm, type); 23940bac07f9b5182890719151c99e9d9035e7984e7José Fonseca } 2407e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca break; 2417e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca 242866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca default: 2437e1aceaf0a1fb7b4ee44c7bc488f03b584b8b785José Fonseca assert(0); 244efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = lp_build_undef(gallivm, type); 245866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca break; 246866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca } 247866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 248866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca inputs[chan] = input; 249866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 250866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca start = stop; 251866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca } 252866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca 2535b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out); 254866fbacf2bf93282f622f1f455250491d0b3b63fJosé Fonseca} 255728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 256728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 257eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonsecavoid 258efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_rgba8_to_f32_soa(struct gallivm_state *gallivm, 259eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca struct lp_type dst_type, 260eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca LLVMValueRef packed, 261eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca LLVMValueRef *rgba) 262eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca{ 263efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMBuilderRef builder = gallivm->builder; 264efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff); 265eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca unsigned chan; 266eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca 267eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca packed = LLVMBuildBitCast(builder, packed, 268efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_int_vec_type(gallivm, dst_type), ""); 269eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca 270eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca /* Decode the input vector components */ 271eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca for (chan = 0; chan < 4; ++chan) { 272eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca unsigned start = chan*8; 273eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca unsigned stop = start + 8; 274eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca LLVMValueRef input; 275eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca 276eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca input = packed; 277eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca 278eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca if (start) 279eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca input = LLVMBuildLShr(builder, input, 280efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_const_int_vec(gallivm, dst_type, start), ""); 281eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca 282eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca if (stop < 32) 283eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca input = LLVMBuildAnd(builder, input, mask, ""); 284eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca 285efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input); 286eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca 287eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca rgba[chan] = input; 288eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca } 289eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca} 290eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca 291eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca 292eb20c57f03f7f6a43dedb9c317f3648087e6d1d7José Fonseca 293728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca/** 2949a0ff95425ab291374368cc0968ab674729f66f3Brian Paul * Fetch a texels from a texture, returning them in SoA layout. 295728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca * 2969a0ff95425ab291374368cc0968ab674729f66f3Brian Paul * \param type the desired return type for 'rgba'. The vector length 2979a0ff95425ab291374368cc0968ab674729f66f3Brian Paul * is the number of texels to fetch 29892b1908db89f23ee05f8d0da5307529440bc7560Brian Paul * 29992b1908db89f23ee05f8d0da5307529440bc7560Brian Paul * \param base_ptr points to start of the texture image block. For non- 30092b1908db89f23ee05f8d0da5307529440bc7560Brian Paul * compressed formats, this simply points to the texel. 30192b1908db89f23ee05f8d0da5307529440bc7560Brian Paul * For compressed formats, it points to the start of the 30292b1908db89f23ee05f8d0da5307529440bc7560Brian Paul * compressed data block. 30392b1908db89f23ee05f8d0da5307529440bc7560Brian Paul * 30492b1908db89f23ee05f8d0da5307529440bc7560Brian Paul * \param i, j the sub-block pixel coordinates. For non-compressed formats 30592b1908db89f23ee05f8d0da5307529440bc7560Brian Paul * these will always be (0,0). For compressed formats, i will 30692b1908db89f23ee05f8d0da5307529440bc7560Brian Paul * be in [0, block_width-1] and j will be in [0, block_height-1]. 307728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca */ 308728741a3bd27a3aaab50393665d25776ad06fcb3José Fonsecavoid 309efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_fetch_rgba_soa(struct gallivm_state *gallivm, 310728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca const struct util_format_description *format_desc, 311728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca struct lp_type type, 312728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca LLVMValueRef base_ptr, 313728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca LLVMValueRef offset, 314728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca LLVMValueRef i, 315728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca LLVMValueRef j, 3165b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul LLVMValueRef rgba_out[4]) 317728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca{ 318efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMBuilderRef builder = gallivm->builder; 319728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 32040bac07f9b5182890719151c99e9d9035e7984e7José Fonseca if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 32140bac07f9b5182890719151c99e9d9035e7984e7José Fonseca (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || 32240bac07f9b5182890719151c99e9d9035e7984e7José Fonseca format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && 32340bac07f9b5182890719151c99e9d9035e7984e7José Fonseca format_desc->block.width == 1 && 324728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca format_desc->block.height == 1 && 32540bac07f9b5182890719151c99e9d9035e7984e7José Fonseca format_desc->block.bits <= type.width && 32640bac07f9b5182890719151c99e9d9035e7984e7José Fonseca (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT || 32740bac07f9b5182890719151c99e9d9035e7984e7José Fonseca format_desc->channel[0].size == 32)) 328728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca { 329728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca /* 330728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca * The packed pixel fits into an element of the destination format. Put 3315b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul * the packed pixels into a vector and extract each component for all 332728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca * vector elements in parallel. 333728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca */ 334728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 335728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca LLVMValueRef packed; 336728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 337728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca /* 338728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca * gather the texels from the texture 3399a0ff95425ab291374368cc0968ab674729f66f3Brian Paul * Ex: packed = {BGRA, BGRA, BGRA, BGRA}. 340728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca */ 341efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul packed = lp_build_gather(gallivm, 342728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca type.length, 343728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca format_desc->block.bits, 344728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca type.width, 345728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca base_ptr, offset); 346728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 347728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca /* 348728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca * convert texels to float rgba 349728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca */ 350efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_unpack_rgba_soa(gallivm, 351728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca format_desc, 352728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca type, 3535b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul packed, rgba_out); 3547071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca return; 355728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca } 356728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 3577071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca /* 3587071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca * Try calling lp_build_fetch_rgba_aos for all pixels. 3597071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca */ 3607071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca 3617071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca if (util_format_fits_8unorm(format_desc) && 3623469715a8a171512cf9b528702e70393f01c6041José Fonseca type.floating && type.width == 32 && 3633469715a8a171512cf9b528702e70393f01c6041José Fonseca (type.length == 1 || (type.length % 4 == 0))) { 3647071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca struct lp_type tmp_type; 3657071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca LLVMValueRef tmp; 3667071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca 3677071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca memset(&tmp_type, 0, sizeof tmp_type); 3687071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca tmp_type.width = 8; 3697071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca tmp_type.length = type.length * 4; 3707071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca tmp_type.norm = TRUE; 3717071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca 372efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, 3737071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca base_ptr, offset, i, j); 3747071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca 375efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_rgba8_to_f32_soa(gallivm, 3767071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca type, 3777071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca tmp, 3787071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca rgba_out); 3797071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca 3807071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca return; 3817071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca } 3827071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca 3837071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca /* 3847071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca * Fallback to calling lp_build_fetch_rgba_aos for each pixel. 3857071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca * 3867071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca * This is not the most efficient way of fetching pixels, as we 3877071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca * miss some opportunities to do vectorization, but this is 3887071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca * convenient for formats or scenarios for which there was no 3897071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca * opportunity or incentive to optimize. 3907071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca */ 3917071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca 3927071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca { 393728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca unsigned k, chan; 3947071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca struct lp_type tmp_type; 3957071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca 396dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca if (gallivm_debug & GALLIVM_DEBUG_PERF) { 397dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca debug_printf("%s: scalar unpacking of %s\n", 398dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca __FUNCTION__, format_desc->short_name); 399dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca } 400dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca 4017071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca tmp_type = type; 4027071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca tmp_type.length = 4; 403728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 404728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca for (chan = 0; chan < 4; ++chan) { 405efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul rgba_out[chan] = lp_build_undef(gallivm, type); 406728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca } 407728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 40892b1908db89f23ee05f8d0da5307529440bc7560Brian Paul /* loop over number of pixels */ 409728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca for(k = 0; k < type.length; ++k) { 410efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef index = lp_build_const_int32(gallivm, k); 411728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca LLVMValueRef offset_elem; 412728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca LLVMValueRef i_elem, j_elem; 413ec8d9523d465554e3ffaa1aeef46bfff868281d3José Fonseca LLVMValueRef tmp; 414728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 415efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul offset_elem = LLVMBuildExtractElement(builder, offset, 416efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul index, ""); 417728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 418728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca i_elem = LLVMBuildExtractElement(builder, i, index, ""); 419728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca j_elem = LLVMBuildExtractElement(builder, j, index, ""); 420728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 42192b1908db89f23ee05f8d0da5307529440bc7560Brian Paul /* Get a single float[4]={R,G,B,A} pixel */ 422efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, 4237071eefdb2ef2a1f644a2bbed9685847b60ff6c4José Fonseca base_ptr, offset_elem, 4245b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul i_elem, j_elem); 425728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca 426ec8d9523d465554e3ffaa1aeef46bfff868281d3José Fonseca /* 42792b1908db89f23ee05f8d0da5307529440bc7560Brian Paul * Insert the AoS tmp value channels into the SoA result vectors at 42892b1908db89f23ee05f8d0da5307529440bc7560Brian Paul * position = 'index'. 429ec8d9523d465554e3ffaa1aeef46bfff868281d3José Fonseca */ 430728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca for (chan = 0; chan < 4; ++chan) { 431efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan), 432ec8d9523d465554e3ffaa1aeef46bfff868281d3José Fonseca tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, ""); 4335b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan], 4345b876241a0f9a549c247e602d2b19967cd7f2d6aBrian Paul tmp_chan, index, ""); 435728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca } 436728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca } 437728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca } 438728741a3bd27a3aaab50393665d25776ad06fcb3José Fonseca} 439