1/**************************************************************************
2 *
3 * Copyright 2015 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "lp_bld_format.h"
29#include "lp_bld_type.h"
30#include "lp_bld_struct.h"
31#include "lp_bld_const.h"
32#include "lp_bld_flow.h"
33#include "lp_bld_swizzle.h"
34
35#include "util/u_math.h"
36
37
38/**
39 * @file
40 * Complex block-compression based formats are handled here by using a cache,
41 * so re-decoding of every pixel is not required.
42 * Especially for bilinear filtering, texel reuse is very high hence even
43 * a small cache helps.
44 * The elements in the cache are the decoded blocks - currently things
45 * are restricted to formats which are 4x4 block based, and the decoded
46 * texels must fit into 4x8 bits.
47 * The cache is direct mapped so hitrates aren't all that great and cache
48 * thrashing could happen.
49 *
50 * @author Roland Scheidegger <sroland@vmware.com>
51 */
52
53
54#if LP_BUILD_FORMAT_CACHE_DEBUG
55static void
56update_cache_access(struct gallivm_state *gallivm,
57                    LLVMValueRef ptr,
58                    unsigned count,
59                    unsigned index)
60{
61   LLVMBuilderRef builder = gallivm->builder;
62   LLVMValueRef member_ptr, cache_access;
63
64   assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL ||
65          index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
66
67   member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, "");
68   cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access");
69   cache_access = LLVMBuildAdd(builder, cache_access,
70                               LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
71                                                                   count, 0), "");
72   LLVMBuildStore(builder, cache_access, member_ptr);
73}
74#endif
75
76
77static void
78store_cached_block(struct gallivm_state *gallivm,
79                   LLVMValueRef *col,
80                   LLVMValueRef tag_value,
81                   LLVMValueRef hash_index,
82                   LLVMValueRef cache)
83{
84   LLVMBuilderRef builder = gallivm->builder;
85   LLVMValueRef ptr, indices[3];
86   LLVMTypeRef type_ptr4x32;
87   unsigned count;
88
89   type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
90   indices[0] = lp_build_const_int32(gallivm, 0);
91   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
92   indices[2] = hash_index;
93   ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
94   LLVMBuildStore(builder, tag_value, ptr);
95
96   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
97   hash_index = LLVMBuildMul(builder, hash_index,
98                             lp_build_const_int32(gallivm, 16), "");
99   for (count = 0; count < 4; count++) {
100      indices[2] = hash_index;
101      ptr = LLVMBuildGEP(builder, cache, indices, ARRAY_SIZE(indices), "");
102      ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, "");
103      LLVMBuildStore(builder, col[count], ptr);
104      hash_index = LLVMBuildAdd(builder, hash_index,
105                                lp_build_const_int32(gallivm, 4), "");
106   }
107}
108
109
110static LLVMValueRef
111lookup_cached_pixel(struct gallivm_state *gallivm,
112                    LLVMValueRef ptr,
113                    LLVMValueRef index)
114{
115   LLVMBuilderRef builder = gallivm->builder;
116   LLVMValueRef member_ptr, indices[3];
117
118   indices[0] = lp_build_const_int32(gallivm, 0);
119   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
120   indices[2] = index;
121   member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), "");
122   return LLVMBuildLoad(builder, member_ptr, "cache_data");
123}
124
125
126static LLVMValueRef
127lookup_tag_data(struct gallivm_state *gallivm,
128                LLVMValueRef ptr,
129                LLVMValueRef index)
130{
131   LLVMBuilderRef builder = gallivm->builder;
132   LLVMValueRef member_ptr, indices[3];
133
134   indices[0] = lp_build_const_int32(gallivm, 0);
135   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
136   indices[2] = index;
137   member_ptr = LLVMBuildGEP(builder, ptr, indices, ARRAY_SIZE(indices), "");
138   return LLVMBuildLoad(builder, member_ptr, "tag_data");
139}
140
141
142static void
143update_cached_block(struct gallivm_state *gallivm,
144                    const struct util_format_description *format_desc,
145                    LLVMValueRef ptr_addr,
146                    LLVMValueRef hash_index,
147                    LLVMValueRef cache)
148
149{
150   LLVMBuilderRef builder = gallivm->builder;
151   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
152   LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
153   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
154   LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
155   LLVMValueRef function;
156   LLVMValueRef tag_value, tmp_ptr;
157   LLVMValueRef col[4];
158   unsigned i, j;
159
160   /*
161    * Use format_desc->fetch_rgba_8unorm() for each pixel in the block.
162    * This doesn't actually make any sense whatsoever, someone would need
163    * to write a function doing this for all pixels in a block (either as
164    * an external c function or with generated code). Don't ask.
165    */
166
167   {
168      /*
169       * Function to call looks like:
170       *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
171       */
172      LLVMTypeRef ret_type;
173      LLVMTypeRef arg_types[4];
174      LLVMTypeRef function_type;
175
176      assert(format_desc->fetch_rgba_8unorm);
177
178      ret_type = LLVMVoidTypeInContext(gallivm->context);
179      arg_types[0] = pi8t;
180      arg_types[1] = pi8t;
181      arg_types[2] = i32t;
182      arg_types[3] = i32t;
183      function_type = LLVMFunctionType(ret_type, arg_types,
184                                       ARRAY_SIZE(arg_types), 0);
185
186      /* make const pointer for the C fetch_rgba_8unorm function */
187      function = lp_build_const_int_pointer(gallivm,
188         func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
189
190      /* cast the callee pointer to the function's type */
191      function = LLVMBuildBitCast(builder, function,
192                                  LLVMPointerType(function_type, 0),
193                                  "cast callee");
194   }
195
196   tmp_ptr = lp_build_array_alloca(gallivm, i32x4,
197                                   lp_build_const_int32(gallivm, 16),
198                                   "tmp_decode_store");
199   tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
200
201   /*
202    * Invoke format_desc->fetch_rgba_8unorm() for each pixel.
203    * This is going to be really really slow.
204    * Note: the block store format is actually
205    * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ...
206    */
207   for (i = 0; i < 4; ++i) {
208      for (j = 0; j < 4; ++j) {
209         LLVMValueRef args[4];
210         LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4);
211
212         /*
213          * Note we actually supply a pointer to the start of the block,
214          * not the start of the texture.
215          */
216         args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, "");
217         args[1] = ptr_addr;
218         args[2] = LLVMConstInt(i32t, i, 0);
219         args[3] = LLVMConstInt(i32t, j, 0);
220         LLVMBuildCall(builder, function, args, ARRAY_SIZE(args), "");
221      }
222   }
223
224   /* Finally store the block - pointless mem copy + update tag. */
225   tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), "");
226   for (i = 0; i < 4; ++i) {
227      LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i);
228      LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, "");
229      col[i] = LLVMBuildLoad(builder, ptr, "");
230   }
231
232   tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr,
233                                 LLVMInt64TypeInContext(gallivm->context), "");
234   store_cached_block(gallivm, col, tag_value, hash_index, cache);
235}
236
237
238/*
239 * Do a cached lookup.
240 *
241 * Returns (vectors of) 4x8 rgba aos value
242 */
243LLVMValueRef
244lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
245                             const struct util_format_description *format_desc,
246                             unsigned n,
247                             LLVMValueRef base_ptr,
248                             LLVMValueRef offset,
249                             LLVMValueRef i,
250                             LLVMValueRef j,
251                             LLVMValueRef cache)
252
253{
254   LLVMBuilderRef builder = gallivm->builder;
255   unsigned count, low_bit, log2size;
256   LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
257   LLVMValueRef ij_index, hash_index, hash_mask, block_index;
258   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
259   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
260   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
261   struct lp_type type;
262   struct lp_build_context bld32;
263   memset(&type, 0, sizeof type);
264   type.width = 32;
265   type.length = n;
266
267   assert(format_desc->block.width == 4);
268   assert(format_desc->block.height == 4);
269
270   lp_build_context_init(&bld32, gallivm, type);
271
272   /*
273    * compute hash - we use direct mapped cache, the hash function could
274    *                be better but it needs to be simple
275    * per-element:
276    *    compare offset with offset stored at tag (hash)
277    *    if not equal decode/store block, update tag
278    *    extract color from cache
279    *    assemble result vector
280    */
281
282   /* TODO: not ideal with 32bit pointers... */
283
284   low_bit = util_logbase2(format_desc->block.bits / 8);
285   log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
286   addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
287   ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
288   ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
289   /* For the hash function, first mask off the unused lowest bits. Then just
290      do some xor with address bits - only use lower 32bits */
291   ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
292   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
293                                 lp_build_const_int_vec(gallivm, type, low_bit), "");
294   /* This only really makes sense for size 64,128,256 */
295   hash_index = ptr_addrtrunc;
296   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
297                                 lp_build_const_int_vec(gallivm, type, 2*log2size), "");
298   hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
299   tmp = LLVMBuildLShr(builder, hash_index,
300                       lp_build_const_int_vec(gallivm, type, log2size), "");
301   hash_index = LLVMBuildXor(builder, hash_index, tmp, "");
302
303   hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
304   hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
305   ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
306   ij_index = LLVMBuildAdd(builder, ij_index, j, "");
307   block_index = LLVMBuildShl(builder, hash_index,
308                              lp_build_const_int_vec(gallivm, type, 4), "");
309   block_index = LLVMBuildAdd(builder, ij_index, block_index, "");
310
311   if (n > 1) {
312      color = LLVMGetUndef(LLVMVectorType(i32t, n));
313      for (count = 0; count < n; count++) {
314         LLVMValueRef index, cond, colorx;
315         LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
316         struct lp_build_if_state if_ctx;
317
318         index = lp_build_const_int32(gallivm, count);
319         offsetx = LLVMBuildExtractElement(builder, offset, index, "");
320         addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
321         addrx = LLVMBuildAdd(builder, addrx, addr, "");
322         block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
323         hash_indexx = LLVMBuildLShr(builder, block_indexx,
324                                     lp_build_const_int32(gallivm, 4), "");
325         offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
326         cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");
327
328         lp_build_if(&if_ctx, gallivm, cond);
329         {
330            ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
331                                          LLVMPointerType(i8t, 0), "");
332            update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
333#if LP_BUILD_FORMAT_CACHE_DEBUG
334            update_cache_access(gallivm, cache, 1,
335                                LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
336#endif
337         }
338         lp_build_endif(&if_ctx);
339
340         colorx = lookup_cached_pixel(gallivm, cache, block_indexx);
341
342         color = LLVMBuildInsertElement(builder, color, colorx,
343                                        lp_build_const_int32(gallivm, count), "");
344      }
345   }
346   else {
347      LLVMValueRef cond;
348      struct lp_build_if_state if_ctx;
349
350      tmp = LLVMBuildZExt(builder, offset, i64t, "");
351      addr = LLVMBuildAdd(builder, tmp, addr, "");
352      offset_stored = lookup_tag_data(gallivm, cache, hash_index);
353      cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");
354
355      lp_build_if(&if_ctx, gallivm, cond);
356      {
357         tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
358         update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
359#if LP_BUILD_FORMAT_CACHE_DEBUG
360         update_cache_access(gallivm, cache, 1,
361                             LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
362#endif
363      }
364      lp_build_endif(&if_ctx);
365
366      color = lookup_cached_pixel(gallivm, cache, block_index);
367   }
368#if LP_BUILD_FORMAT_CACHE_DEBUG
369   update_cache_access(gallivm, cache, n,
370                       LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
371#endif
372   return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
373}
374
375