1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/************************************************************************** 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright 2009 VMware, Inc. 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * All Rights Reserved. 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "Software"), to deal in the Software without restriction, including 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * without limitation the rights to use, copy, modify, merge, publish, 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * distribute, sub license, and/or sell copies of the Software, and to 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * permit persons to whom the Software is furnished to do so, subject to 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the following conditions: 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * next paragraph) shall be included in all copies or substantial portions 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * of the Software. 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org **************************************************************************/ 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @file 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * AoS pixel format manipulation. 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @author Jose Fonseca <jfonseca@vmware.com> 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_format.h" 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_memory.h" 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_math.h" 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_pointer.h" 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_string.h" 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_arit.h" 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_init.h" 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_type.h" 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_flow.h" 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_const.h" 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_conv.h" 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_swizzle.h" 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_gather.h" 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_debug.h" 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_format.h" 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Basic swizzling. Rearrange the order of the unswizzled array elements 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * too. 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}. 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLLVMValueRef 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_format_swizzle_aos(const struct util_format_description *desc, 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_context *bld, 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef unswizzled) 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned char swizzles[4]; 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned chan; 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(bld->type.length % 4 == 0); 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (chan = 0; chan < 4; ++chan) { 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org enum util_format_swizzle swizzle; 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * For ZS formats do RGBA = ZZZ1 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (chan == 3) { 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swizzle = UTIL_FORMAT_SWIZZLE_1; 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swizzle = UTIL_FORMAT_SWIZZLE_0; 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swizzle = desc->swizzle[0]; 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swizzle = desc->swizzle[chan]; 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swizzles[chan] = swizzle; 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return lp_build_swizzle_aos(bld, unswizzled, swizzles); 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Whether the format matches the vector type, apart of swizzles. 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic INLINE boolean 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgformat_matches_type(const struct util_format_description *desc, 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_type type) 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org enum util_format_type chan_type; 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned chan; 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(type.length % 4 == 0); 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB || 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org desc->block.width != 1 || 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org desc->block.height != 1) { 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return FALSE; 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (type.floating) { 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org chan_type = UTIL_FORMAT_TYPE_FLOAT; 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (type.fixed) { 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org chan_type = UTIL_FORMAT_TYPE_FIXED; 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (type.sign) { 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org chan_type = UTIL_FORMAT_TYPE_SIGNED; 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org chan_type = UTIL_FORMAT_TYPE_UNSIGNED; 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (chan = 0; chan < desc->nr_channels; ++chan) { 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->channel[chan].size != type.width) { 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return FALSE; 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) { 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->channel[chan].type != chan_type || 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org desc->channel[chan].normalized != type.norm) { 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return FALSE; 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return TRUE; 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Unpack a single pixel into its RGBA components. 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @param desc the pixel format for the packed pixel value 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic INLINE LLVMValueRef 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm, 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct util_format_description *desc, 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef packed) 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuilderRef builder = gallivm->builder; 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef shifted, casted, scaled, masked; 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef shifts[4]; 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef masks[4]; 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef scales[4]; 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org boolean normalized; 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org boolean needs_uitofp; 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned shift; 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned i; 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* TODO: Support more formats */ 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(desc->block.width == 1); 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(desc->block.height == 1); 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(desc->block.bits <= 32); 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Do the intermediate integer computations with 32bit integers since it 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * matches floating point size */ 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context)); 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Broadcast the packed value to all four channels 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * before: packed = BGRA 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * after: packed = {BGRA, BGRA, BGRA, BGRA} 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed = LLVMBuildInsertElement(builder, 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed, 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)), 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ""); 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed = LLVMBuildShuffleVector(builder, 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed, 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ""); 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Initialize vector constants */ 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org normalized = FALSE; 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org needs_uitofp = FALSE; 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shift = 0; 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Loop over 4 color components */ 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < 4; ++i) { 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned bits = desc->channel[i].size; 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context)); 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned long long mask = (1ULL << bits) - 1; 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bits == 32) { 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org needs_uitofp = TRUE; 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shifts[i] = lp_build_const_int32(gallivm, shift); 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org masks[i] = lp_build_const_int32(gallivm, mask); 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->channel[i].normalized) { 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scales[i] = lp_build_const_float(gallivm, 1.0 / mask); 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org normalized = TRUE; 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scales[i] = lp_build_const_float(gallivm, 1.0); 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shift += bits; 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA} 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * into masked = {B, G, R, A} 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!needs_uitofp) { 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* UIToFP can't be expressed in SSE2 */ 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* At this point 'casted' may be a vector of floats such as 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * we'll scale this to {1.0, 1.0, 1.0, 1.0}. 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (normalized) 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scaled = casted; 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return scaled; 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Pack a single pixel. 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @param rgba 4 float vector with the unpacked components. 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * XXX: This is mostly for reference and testing -- operating a single pixel at 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * a time is rarely if ever needed. 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLLVMValueRef 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_pack_rgba_aos(struct gallivm_state *gallivm, 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct util_format_description *desc, 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef rgba) 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuilderRef builder = gallivm->builder; 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef type; 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef packed = NULL; 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef swizzles[4]; 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef shifted, casted, scaled, unswizzled; 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef shifts[4]; 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef scales[4]; 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org boolean normalized; 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned shift; 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned i, j; 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(desc->block.width == 1); 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(desc->block.height == 1); 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type = LLVMIntTypeInContext(gallivm->context, desc->block.bits); 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Unswizzle the color components into the source vector. */ 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < 4; ++i) { 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (j = 0; j < 4; ++j) { 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->swizzle[j] == i) 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (j < 4) 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swizzles[i] = lp_build_const_int32(gallivm, j); 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unswizzled = LLVMBuildShuffleVector(builder, rgba, 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)), 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMConstVector(swizzles, 4), ""); 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org normalized = FALSE; 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shift = 0; 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < 4; ++i) { 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned bits = desc->channel[i].size; 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scales[i] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context)); 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned mask = (1 << bits) - 1; 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(bits < 32); 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shifts[i] = lp_build_const_int32(gallivm, shift); 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->channel[i].normalized) { 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scales[i] = lp_build_const_float(gallivm, mask); 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org normalized = TRUE; 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scales[i] = lp_build_const_float(gallivm, 1.0); 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shift += bits; 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (normalized) 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scaled = unswizzled; 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), ""); 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Bitwise or all components */ 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < 4; ++i) { 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_build_const_int32(gallivm, i), ""); 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (packed) 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed = LLVMBuildOr(builder, packed, component, ""); 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed = component; 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!packed) 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (desc->block.bits < 32) 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed = LLVMBuildTrunc(builder, packed, type, ""); 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return packed; 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Fetch a pixel into a 4 float AoS. 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \param format_desc describes format of the image we're fetching from 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \param ptr address of the pixel block (or the texel if uncompressed) 366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \param i, j the sub-block pixel coordinates. For non-compressed formats 367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * these will always be (0, 0). 368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \return a 4 element vector with the pixel's RGBA values. 369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLLVMValueRef 371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_fetch_rgba_aos(struct gallivm_state *gallivm, 372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct util_format_description *format_desc, 373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_type type, 374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef base_ptr, 375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef offset, 376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef i, 377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef j) 378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuilderRef builder = gallivm->builder; 380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned num_pixels = type.length / 4; 381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_build_context bld; 382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(type.length <= LP_MAX_VECTOR_LENGTH); 384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(type.length % 4 == 0); 385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_build_context_init(&bld, gallivm, type); 387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Trivial case 390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The format matches the type (apart of a swizzle) so no need for 392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * scaling or converting. 393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (format_matches_type(format_desc, type) && 396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc->block.bits <= type.width * 4 && 397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org util_is_power_of_two(format_desc->block.bits)) { 398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef packed; 399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The format matches the type (apart of a swizzle) so no need for 402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * scaling or converting. 403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed = lp_build_gather(gallivm, type.length/4, 406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc->block.bits, type.width*4, 407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base_ptr, offset); 408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(format_desc->block.bits <= type.width * type.length); 410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed = LLVMBuildBitCast(gallivm->builder, packed, 412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_build_vec_type(gallivm, type), ""); 413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return lp_build_format_swizzle_aos(format_desc, &bld, packed); 415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Bit arithmetic 419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || 423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && 424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc->block.width == 1 && 425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc->block.height == 1 && 426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org util_is_power_of_two(format_desc->block.bits) && 427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc->block.bits <= 32 && 428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc->is_bitmask && 429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !format_desc->is_mixed && 430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || 431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { 432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; 434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef res; 435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned k; 436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Unpack a pixel at a time into a <4 x float> RGBA vector 439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (k = 0; k < num_pixels; ++k) { 442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef packed; 443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed = lp_build_gather_elem(gallivm, num_pixels, 445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc->block.bits, 32, 446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base_ptr, offset, k); 447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, 449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc, 450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org packed); 451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Type conversion. 455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * TODO: We could avoid floating conversion for integer to 457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * integer conversions. 458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { 461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org debug_printf("%s: unpacking %s with floating point\n", 462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org __FUNCTION__, format_desc->short_name); 463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_build_conv(gallivm, 466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_float32_vec4_type(), 467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type, 468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmps, num_pixels, &res, 1); 469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return lp_build_format_swizzle_aos(format_desc, &bld, res); 471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* If all channels are of same type and we are not using half-floats */ 474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (util_format_is_array(format_desc)) { 475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset); 476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * YUV / subsampled formats 480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_type tmp_type; 484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef tmp; 485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(&tmp_type, 0, sizeof tmp_type); 487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmp_type.width = 8; 488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmp_type.length = num_pixels * 4; 489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmp_type.norm = TRUE; 490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, 492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc, 493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org num_pixels, 494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base_ptr, 495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset, 496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i, j); 497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_build_conv(gallivm, 499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmp_type, type, 500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &tmp, 1, &tmp, 1); 501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return tmp; 503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Fallback to util_format_description::fetch_rgba_8unorm(). 507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (format_desc->fetch_rgba_8unorm && 510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !type.floating && type.width == 8 && !type.sign && type.norm) { 511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Fallback to calling util_format_description::fetch_rgba_8unorm. 513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This is definitely not the most efficient way of fetching pixels, as 515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * we miss the opportunity to do vectorization, but this it is a 516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * convenient for formats or scenarios for which there was no opportunity 517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * or incentive to optimize. 518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); 521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); 522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef function; 524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef tmp_ptr; 525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef tmp; 526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef res; 527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned k; 528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (gallivm_debug & GALLIVM_DEBUG_PERF) { 530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n", 531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org __FUNCTION__, format_desc->short_name); 532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Declare and bind format_desc->fetch_rgba_8unorm(). 536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org { 539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Function to call looks like: 541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) 542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef ret_type; 544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef arg_types[4]; 545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef function_type; 546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMVoidTypeInContext(gallivm->context); 548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_types[0] = pi8t; 549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_types[1] = pi8t; 550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_types[2] = i32t; 551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_types[3] = i32t; 552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org function_type = LLVMFunctionType(ret_type, arg_types, 553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Elements(arg_types), 0); 554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* make const pointer for the C fetch_rgba_8unorm function */ 556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org function = lp_build_const_int_pointer(gallivm, 557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); 558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* cast the callee pointer to the function's type */ 560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org function = LLVMBuildBitCast(builder, function, 561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMPointerType(function_type, 0), 562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "cast callee"); 563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmp_ptr = lp_build_alloca(gallivm, i32t, ""); 566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); 568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result 571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * in the SoA vectors. 572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (k = 0; k < num_pixels; ++k) { 575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef index = lp_build_const_int32(gallivm, k); 576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef args[4]; 577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); 579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, 580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base_ptr, offset, k); 581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (num_pixels == 1) { 583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[2] = i; 584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[3] = j; 585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[2] = LLVMBuildExtractElement(builder, i, index, ""); 588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[3] = LLVMBuildExtractElement(builder, j, index, ""); 589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuildCall(builder, function, args, Elements(args), ""); 592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmp = LLVMBuildLoad(builder, tmp_ptr, ""); 594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (num_pixels == 1) { 596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = tmp; 597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildInsertElement(builder, res, tmp, index, ""); 600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Bitcast from <n x i32> to <4n x i8> */ 604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); 605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return res; 607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Fallback to util_format_description::fetch_rgba_float(). 611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (format_desc->fetch_rgba_float) { 614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Fallback to calling util_format_description::fetch_rgba_float. 616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This is definitely not the most efficient way of fetching pixels, as 618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * we miss the opportunity to do vectorization, but this it is a 619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * convenient for formats or scenarios for which there was no opportunity 620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * or incentive to optimize. 621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); 624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); 625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); 626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); 627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef function; 629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef tmp_ptr; 630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; 631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef res; 632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned k; 633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (gallivm_debug & GALLIVM_DEBUG_PERF) { 635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n", 636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org __FUNCTION__, format_desc->short_name); 637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Declare and bind format_desc->fetch_rgba_float(). 641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org { 644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Function to call looks like: 646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) 647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef ret_type; 649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef arg_types[4]; 650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type = LLVMVoidTypeInContext(gallivm->context); 652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_types[0] = pf32t; 653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_types[1] = pi8t; 654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_types[2] = i32t; 655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_types[3] = i32t; 656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org function = lp_build_const_func_pointer(gallivm, 658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org func_to_pointer((func_pointer) format_desc->fetch_rgba_float), 659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_type, 660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_types, Elements(arg_types), 661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org format_desc->short_name); 662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); 665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result 668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * in the SoA vectors. 669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (k = 0; k < num_pixels; ++k) { 672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef args[4]; 673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); 675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, 676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base_ptr, offset, k); 677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (num_pixels == 1) { 679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[2] = i; 680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[3] = j; 681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef index = lp_build_const_int32(gallivm, k); 684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[2] = LLVMBuildExtractElement(builder, i, index, ""); 685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[3] = LLVMBuildExtractElement(builder, j, index, ""); 686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuildCall(builder, function, args, Elements(args), ""); 689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); 691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_build_conv(gallivm, 694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lp_float32_vec4_type(), 695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type, 696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tmps, num_pixels, &res, 1); 697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return res; 699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return lp_build_undef(gallivm, type); 703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 704