1bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca/************************************************************************** 2bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * 3bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Copyright 2010 VMware, Inc. 4bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * All Rights Reserved. 5bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * 6bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a 7bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * copy of this software and associated documentation files (the 8bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * "Software"), to deal in the Software without restriction, including 9bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * without limitation the rights to use, copy, modify, merge, publish, 10bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * distribute, sub license, and/or sell copies of the Software, and to 11bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * permit persons to whom the Software is furnished to do so, subject to 12bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * the following conditions: 13bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * 14bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * USE OR OTHER DEALINGS IN THE SOFTWARE. 21bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * 22bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * The above copyright notice and this permission notice (including the 23bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * next paragraph) shall be included in all copies or substantial portions 24bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * of the Software. 25bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * 26bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca **************************************************************************/ 27bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 28bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 29bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca#include "util/u_debug.h" 30bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca#include "lp_bld_debug.h" 31bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca#include "lp_bld_const.h" 32bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca#include "lp_bld_format.h" 33bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca#include "lp_bld_gather.h" 34efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul#include "lp_bld_init.h" 35bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 36bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 37bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca/** 38bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Get the pointer to one element from scatter positions in memory. 39bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * 40bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @sa lp_build_gather() 41bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca */ 42bb1546f55be3b243b71d39e5fb7457c5b21e32c9José FonsecaLLVMValueRef 43efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_gather_elem_ptr(struct gallivm_state *gallivm, 44bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca unsigned length, 45bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef base_ptr, 46bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef offsets, 47bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca unsigned i) 48bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca{ 49bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef offset; 50bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef ptr; 51bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 52efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); 53bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 54bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca if (length == 1) { 55bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca assert(i == 0); 56bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca offset = offsets; 57bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca } else { 58efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef index = lp_build_const_int32(gallivm, i); 59efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul offset = LLVMBuildExtractElement(gallivm->builder, offsets, index, ""); 60bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca } 61bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 62efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul ptr = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, ""); 63bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 64bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca return ptr; 65bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca} 66bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 67bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 68bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca/** 69bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Gather one element from scatter positions in memory. 70bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * 71bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @sa lp_build_gather() 72bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca */ 73bb1546f55be3b243b71d39e5fb7457c5b21e32c9José FonsecaLLVMValueRef 74efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_gather_elem(struct gallivm_state *gallivm, 75bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca unsigned length, 76bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca unsigned src_width, 77bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca unsigned dst_width, 78bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef base_ptr, 79bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef offsets, 80bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca unsigned i) 81bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca{ 82efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width); 83bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); 84efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width); 85bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef ptr; 86bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef res; 87bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 88efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); 89bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 90efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); 91efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); 92efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul res = LLVMBuildLoad(gallivm->builder, ptr, ""); 93bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 94bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca assert(src_width <= dst_width); 95bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca if (src_width > dst_width) 96efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, ""); 97bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca if (src_width < dst_width) 98efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); 99bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 100bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca return res; 101bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca} 102bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 103bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 104bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca/** 105bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Gather elements from scatter positions in memory into a single vector. 106bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Use for fetching texels from a texture. 107bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * For SSE, typical values are length=4, src_width=32, dst_width=32. 108bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * 109bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @param length length of the offsets 110bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @param src_width src element width in bits 111bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @param dst_width result element width in bits (src will be expanded to fit) 112bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @param base_ptr base pointer, should be a i8 pointer type. 113bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @param offsets vector with offsets 114bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca */ 115bb1546f55be3b243b71d39e5fb7457c5b21e32c9José FonsecaLLVMValueRef 116efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_gather(struct gallivm_state *gallivm, 117bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca unsigned length, 118bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca unsigned src_width, 119bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca unsigned dst_width, 120bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef base_ptr, 121bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef offsets) 122bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca{ 123bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef res; 124bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 125bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca if (length == 1) { 126bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca /* Scalar */ 127efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul return lp_build_gather_elem(gallivm, length, 128bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca src_width, dst_width, 129bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca base_ptr, offsets, 0); 130bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca } else { 131bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca /* Vector */ 132bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 133efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width); 134bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); 135bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca unsigned i; 136bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 137bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca res = LLVMGetUndef(dst_vec_type); 138bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca for (i = 0; i < length; ++i) { 139efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef index = lp_build_const_int32(gallivm, i); 140bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca LLVMValueRef elem; 141efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul elem = lp_build_gather_elem(gallivm, length, 142bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca src_width, dst_width, 143bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca base_ptr, offsets, i); 144efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, ""); 145bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca } 146bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca } 147bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca 148bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca return res; 149bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca} 15052049744620854487012151a7ac26ca978905411Tom Stellard 15152049744620854487012151a7ac26ca978905411Tom StellardLLVMValueRef 15252049744620854487012151a7ac26ca978905411Tom Stellardlp_build_gather_values(struct gallivm_state * gallivm, 15352049744620854487012151a7ac26ca978905411Tom Stellard LLVMValueRef * values, 15452049744620854487012151a7ac26ca978905411Tom Stellard unsigned value_count) 15552049744620854487012151a7ac26ca978905411Tom Stellard{ 15652049744620854487012151a7ac26ca978905411Tom Stellard LLVMTypeRef vec_type = LLVMVectorType(LLVMTypeOf(values[0]), value_count); 15752049744620854487012151a7ac26ca978905411Tom Stellard LLVMBuilderRef builder = gallivm->builder; 15852049744620854487012151a7ac26ca978905411Tom Stellard LLVMValueRef vec = LLVMGetUndef(vec_type); 15952049744620854487012151a7ac26ca978905411Tom Stellard unsigned i; 16052049744620854487012151a7ac26ca978905411Tom Stellard 16152049744620854487012151a7ac26ca978905411Tom Stellard for (i = 0; i < value_count; i++) { 16252049744620854487012151a7ac26ca978905411Tom Stellard LLVMValueRef index = lp_build_const_int32(gallivm, i); 16352049744620854487012151a7ac26ca978905411Tom Stellard vec = LLVMBuildInsertElement(builder, vec, values[i], index, ""); 16452049744620854487012151a7ac26ca978905411Tom Stellard } 16552049744620854487012151a7ac26ca978905411Tom Stellard return vec; 16652049744620854487012151a7ac26ca978905411Tom Stellard} 167