1bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca/**************************************************************************
2bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca *
3bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Copyright 2010 VMware, Inc.
4bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * All Rights Reserved.
5bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca *
6bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a
7bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * copy of this software and associated documentation files (the
8bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * "Software"), to deal in the Software without restriction, including
9bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * without limitation the rights to use, copy, modify, merge, publish,
10bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * distribute, sub license, and/or sell copies of the Software, and to
11bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * permit persons to whom the Software is furnished to do so, subject to
12bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * the following conditions:
13bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca *
14bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * USE OR OTHER DEALINGS IN THE SOFTWARE.
21bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca *
22bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * The above copyright notice and this permission notice (including the
23bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * next paragraph) shall be included in all copies or substantial portions
24bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * of the Software.
25bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca *
26bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca **************************************************************************/
27bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
28bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
29bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca#include "util/u_debug.h"
30bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca#include "lp_bld_debug.h"
31bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca#include "lp_bld_const.h"
32bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca#include "lp_bld_format.h"
33bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca#include "lp_bld_gather.h"
34efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul#include "lp_bld_init.h"
35bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
36bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
37bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca/**
38bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Get the pointer to one element from scatter positions in memory.
39bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca *
40bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @sa lp_build_gather()
41bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca */
42bb1546f55be3b243b71d39e5fb7457c5b21e32c9José FonsecaLLVMValueRef
43efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_gather_elem_ptr(struct gallivm_state *gallivm,
44bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                         unsigned length,
45bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                         LLVMValueRef base_ptr,
46bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                         LLVMValueRef offsets,
47bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                         unsigned i)
48bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca{
49bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   LLVMValueRef offset;
50bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   LLVMValueRef ptr;
51bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
52efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));
53bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
54bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   if (length == 1) {
55bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca      assert(i == 0);
56bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca      offset = offsets;
57bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   } else {
58efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      LLVMValueRef index = lp_build_const_int32(gallivm, i);
59efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      offset = LLVMBuildExtractElement(gallivm->builder, offsets, index, "");
60bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   }
61bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
62efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   ptr = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, "");
63bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
64bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   return ptr;
65bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca}
66bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
67bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
68bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca/**
69bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Gather one element from scatter positions in memory.
70bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca *
71bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @sa lp_build_gather()
72bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca */
73bb1546f55be3b243b71d39e5fb7457c5b21e32c9José FonsecaLLVMValueRef
74efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_gather_elem(struct gallivm_state *gallivm,
75bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                     unsigned length,
76bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                     unsigned src_width,
77bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                     unsigned dst_width,
78bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                     LLVMValueRef base_ptr,
79bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                     LLVMValueRef offsets,
80bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                     unsigned i)
81bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca{
82efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
83bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
84efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
85bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   LLVMValueRef ptr;
86bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   LLVMValueRef res;
87bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
88efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));
89bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
90efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
91efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
92efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   res = LLVMBuildLoad(gallivm->builder, ptr, "");
93bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
94bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   assert(src_width <= dst_width);
95bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   if (src_width > dst_width)
96efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
97bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   if (src_width < dst_width)
98efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
99bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
100bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   return res;
101bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca}
102bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
103bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
104bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca/**
105bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Gather elements from scatter positions in memory into a single vector.
106bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * Use for fetching texels from a texture.
107bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * For SSE, typical values are length=4, src_width=32, dst_width=32.
108bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca *
109bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @param length length of the offsets
110bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @param src_width src element width in bits
111bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @param dst_width result element width in bits (src will be expanded to fit)
112bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @param base_ptr base pointer, should be a i8 pointer type.
113bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca * @param offsets vector with offsets
114bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca */
115bb1546f55be3b243b71d39e5fb7457c5b21e32c9José FonsecaLLVMValueRef
116efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_gather(struct gallivm_state *gallivm,
117bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                unsigned length,
118bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                unsigned src_width,
119bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                unsigned dst_width,
120bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                LLVMValueRef base_ptr,
121bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                LLVMValueRef offsets)
122bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca{
123bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   LLVMValueRef res;
124bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
125bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   if (length == 1) {
126bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca      /* Scalar */
127efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      return lp_build_gather_elem(gallivm, length,
128bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                                  src_width, dst_width,
129bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                                  base_ptr, offsets, 0);
130bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   } else {
131bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca      /* Vector */
132bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
133efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
134bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca      LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
135bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca      unsigned i;
136bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
137bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca      res = LLVMGetUndef(dst_vec_type);
138bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca      for (i = 0; i < length; ++i) {
139efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         LLVMValueRef index = lp_build_const_int32(gallivm, i);
140bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca         LLVMValueRef elem;
141efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         elem = lp_build_gather_elem(gallivm, length,
142bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                                     src_width, dst_width,
143bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca                                     base_ptr, offsets, i);
144efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, "");
145bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca      }
146bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   }
147bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca
148bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca   return res;
149bb1546f55be3b243b71d39e5fb7457c5b21e32c9José Fonseca}
15052049744620854487012151a7ac26ca978905411Tom Stellard
15152049744620854487012151a7ac26ca978905411Tom StellardLLVMValueRef
15252049744620854487012151a7ac26ca978905411Tom Stellardlp_build_gather_values(struct gallivm_state * gallivm,
15352049744620854487012151a7ac26ca978905411Tom Stellard                       LLVMValueRef * values,
15452049744620854487012151a7ac26ca978905411Tom Stellard                       unsigned value_count)
15552049744620854487012151a7ac26ca978905411Tom Stellard{
15652049744620854487012151a7ac26ca978905411Tom Stellard   LLVMTypeRef vec_type = LLVMVectorType(LLVMTypeOf(values[0]), value_count);
15752049744620854487012151a7ac26ca978905411Tom Stellard   LLVMBuilderRef builder = gallivm->builder;
15852049744620854487012151a7ac26ca978905411Tom Stellard   LLVMValueRef vec = LLVMGetUndef(vec_type);
15952049744620854487012151a7ac26ca978905411Tom Stellard   unsigned i;
16052049744620854487012151a7ac26ca978905411Tom Stellard
16152049744620854487012151a7ac26ca978905411Tom Stellard   for (i = 0; i < value_count; i++) {
16252049744620854487012151a7ac26ca978905411Tom Stellard      LLVMValueRef index = lp_build_const_int32(gallivm, i);
16352049744620854487012151a7ac26ca978905411Tom Stellard      vec = LLVMBuildInsertElement(builder, vec, values[i], index, "");
16452049744620854487012151a7ac26ca978905411Tom Stellard   }
16552049744620854487012151a7ac26ca978905411Tom Stellard   return vec;
16652049744620854487012151a7ac26ca978905411Tom Stellard}
167