lp_bld_sample_aos.c revision ba9c1773d77afc71adc7bad3d8c326b104c5e094
1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36#include "pipe/p_defines.h"
37#include "pipe/p_state.h"
38#include "util/u_debug.h"
39#include "util/u_dump.h"
40#include "util/u_memory.h"
41#include "util/u_math.h"
42#include "util/u_format.h"
43#include "lp_bld_debug.h"
44#include "lp_bld_type.h"
45#include "lp_bld_const.h"
46#include "lp_bld_conv.h"
47#include "lp_bld_arit.h"
48#include "lp_bld_bitarit.h"
49#include "lp_bld_logic.h"
50#include "lp_bld_swizzle.h"
51#include "lp_bld_pack.h"
52#include "lp_bld_flow.h"
53#include "lp_bld_gather.h"
54#include "lp_bld_format.h"
55#include "lp_bld_init.h"
56#include "lp_bld_sample.h"
57#include "lp_bld_sample_aos.h"
58#include "lp_bld_quad.h"
59
60
61/**
62 * Build LLVM code for texture coord wrapping, for nearest filtering,
63 * for scaled integer texcoords.
64 * \param block_length  is the length of the pixel block along the
65 *                      coordinate axis
66 * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
67 * \param length  the texture size along one dimension
68 * \param stride  pixel stride along the coordinate axis (in bytes)
69 * \param is_pot  if TRUE, length is a power of two
70 * \param wrap_mode  one of PIPE_TEX_WRAP_x
71 * \param out_offset  byte offset for the wrapped coordinate
72 * \param out_i  resulting sub-block pixel coordinate for coord0
73 */
74static void
75lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
76                                 unsigned block_length,
77                                 LLVMValueRef coord,
78                                 LLVMValueRef length,
79                                 LLVMValueRef stride,
80                                 boolean is_pot,
81                                 unsigned wrap_mode,
82                                 LLVMValueRef *out_offset,
83                                 LLVMValueRef *out_i)
84{
85   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
86   LLVMBuilderRef builder = bld->gallivm->builder;
87   LLVMValueRef length_minus_one;
88
89   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
90
91   switch(wrap_mode) {
92   case PIPE_TEX_WRAP_REPEAT:
93      if(is_pot)
94         coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
95      else {
96         /* Add a bias to the texcoord to handle negative coords */
97         LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
98         coord = LLVMBuildAdd(builder, coord, bias, "");
99         coord = LLVMBuildURem(builder, coord, length, "");
100      }
101      break;
102
103   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
104      coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
105      coord = lp_build_min(int_coord_bld, coord, length_minus_one);
106      break;
107
108   case PIPE_TEX_WRAP_CLAMP:
109   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
110   case PIPE_TEX_WRAP_MIRROR_REPEAT:
111   case PIPE_TEX_WRAP_MIRROR_CLAMP:
112   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
113   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
114   default:
115      assert(0);
116   }
117
118   lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
119                                  out_offset, out_i);
120}
121
122
123/**
124 * Build LLVM code for texture coord wrapping, for linear filtering,
125 * for scaled integer texcoords.
126 * \param block_length  is the length of the pixel block along the
127 *                      coordinate axis
128 * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
129 * \param length  the texture size along one dimension
130 * \param stride  pixel stride along the coordinate axis (in bytes)
131 * \param is_pot  if TRUE, length is a power of two
132 * \param wrap_mode  one of PIPE_TEX_WRAP_x
133 * \param offset0  resulting relative offset for coord0
134 * \param offset1  resulting relative offset for coord0 + 1
135 * \param i0  resulting sub-block pixel coordinate for coord0
136 * \param i1  resulting sub-block pixel coordinate for coord0 + 1
137 */
138static void
139lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
140                                unsigned block_length,
141                                LLVMValueRef coord0,
142                                LLVMValueRef length,
143                                LLVMValueRef stride,
144                                boolean is_pot,
145                                unsigned wrap_mode,
146                                LLVMValueRef *offset0,
147                                LLVMValueRef *offset1,
148                                LLVMValueRef *i0,
149                                LLVMValueRef *i1)
150{
151   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
152   LLVMBuilderRef builder = bld->gallivm->builder;
153   LLVMValueRef length_minus_one;
154   LLVMValueRef lmask, umask, mask;
155
156   if (block_length != 1) {
157      /*
158       * If the pixel block covers more than one pixel then there is no easy
159       * way to calculate offset1 relative to offset0. Instead, compute them
160       * independently.
161       */
162
163      LLVMValueRef coord1;
164
165      lp_build_sample_wrap_nearest_int(bld,
166                                       block_length,
167                                       coord0,
168                                       length,
169                                       stride,
170                                       is_pot,
171                                       wrap_mode,
172                                       offset0, i0);
173
174      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
175
176      lp_build_sample_wrap_nearest_int(bld,
177                                       block_length,
178                                       coord1,
179                                       length,
180                                       stride,
181                                       is_pot,
182                                       wrap_mode,
183                                       offset1, i1);
184
185      return;
186   }
187
188   /*
189    * Scalar pixels -- try to compute offset0 and offset1 with a single stride
190    * multiplication.
191    */
192
193   *i0 = int_coord_bld->zero;
194   *i1 = int_coord_bld->zero;
195
196   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
197
198   switch(wrap_mode) {
199   case PIPE_TEX_WRAP_REPEAT:
200      if (is_pot) {
201         coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
202      }
203      else {
204         /* Add a bias to the texcoord to handle negative coords */
205         LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
206         coord0 = LLVMBuildAdd(builder, coord0, bias, "");
207         coord0 = LLVMBuildURem(builder, coord0, length, "");
208      }
209
210      mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
211                              PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
212
213      *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
214      *offset1 = LLVMBuildAnd(builder,
215                              lp_build_add(int_coord_bld, *offset0, stride),
216                              mask, "");
217      break;
218
219   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
220      lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
221                               PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
222      umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
223                               PIPE_FUNC_LESS, coord0, length_minus_one);
224
225      coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
226      coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
227
228      mask = LLVMBuildAnd(builder, lmask, umask, "");
229
230      *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
231      *offset1 = lp_build_add(int_coord_bld,
232                              *offset0,
233                              LLVMBuildAnd(builder, stride, mask, ""));
234      break;
235
236   case PIPE_TEX_WRAP_CLAMP:
237   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
238   case PIPE_TEX_WRAP_MIRROR_REPEAT:
239   case PIPE_TEX_WRAP_MIRROR_CLAMP:
240   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
241   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
242   default:
243      assert(0);
244      *offset0 = int_coord_bld->zero;
245      *offset1 = int_coord_bld->zero;
246      break;
247   }
248}
249
250
251/**
252 * Sample a single texture image with nearest sampling.
253 * If sampling a cube texture, r = cube face in [0,5].
254 * Return filtered color as two vectors of 16-bit fixed point values.
255 */
256static void
257lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
258                              LLVMValueRef int_size,
259                              LLVMValueRef row_stride_vec,
260                              LLVMValueRef img_stride_vec,
261                              LLVMValueRef data_ptr,
262                              LLVMValueRef s,
263                              LLVMValueRef t,
264                              LLVMValueRef r,
265                              LLVMValueRef *colors_lo,
266                              LLVMValueRef *colors_hi)
267{
268   const unsigned dims = bld->dims;
269   LLVMBuilderRef builder = bld->gallivm->builder;
270   struct lp_build_context i32, h16, u8n;
271   LLVMTypeRef i32_vec_type, u8n_vec_type;
272   LLVMValueRef i32_c8;
273   LLVMValueRef width_vec, height_vec, depth_vec;
274   LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
275   LLVMValueRef x_stride;
276   LLVMValueRef x_offset, offset;
277   LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
278
279   lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32));
280   lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16));
281   lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8));
282
283   i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
284   u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
285
286   lp_build_extract_image_sizes(bld,
287                                bld->int_size_type,
288                                bld->int_coord_type,
289                                int_size,
290                                &width_vec,
291                                &height_vec,
292                                &depth_vec);
293
294   if (bld->static_state->normalized_coords) {
295      LLVMValueRef scaled_size;
296      LLVMValueRef flt_size;
297
298      /* scale size by 256 (8 fractional bits) */
299      scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
300
301      flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
302
303      lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
304   }
305   else {
306      /* scale coords by 256 (8 fractional bits) */
307      s = lp_build_mul_imm(&bld->coord_bld, s, 256);
308      if (dims >= 2)
309         t = lp_build_mul_imm(&bld->coord_bld, t, 256);
310      if (dims >= 3)
311         r = lp_build_mul_imm(&bld->coord_bld, r, 256);
312   }
313
314   /* convert float to int */
315   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
316   if (dims >= 2)
317      t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
318   if (dims >= 3)
319      r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
320
321   /* compute floor (shift right 8) */
322   i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
323   s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
324   if (dims >= 2)
325      t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
326   if (dims >= 3)
327      r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
328
329   /* get pixel, row, image strides */
330   x_stride = lp_build_const_vec(bld->gallivm,
331                                 bld->int_coord_bld.type,
332                                 bld->format_desc->block.bits/8);
333
334   /* Do texcoord wrapping, compute texel offset */
335   lp_build_sample_wrap_nearest_int(bld,
336                                    bld->format_desc->block.width,
337                                    s_ipart, width_vec, x_stride,
338                                    bld->static_state->pot_width,
339                                    bld->static_state->wrap_s,
340                                    &x_offset, &x_subcoord);
341   offset = x_offset;
342   if (dims >= 2) {
343      LLVMValueRef y_offset;
344      lp_build_sample_wrap_nearest_int(bld,
345                                       bld->format_desc->block.height,
346                                       t_ipart, height_vec, row_stride_vec,
347                                       bld->static_state->pot_height,
348                                       bld->static_state->wrap_t,
349                                       &y_offset, &y_subcoord);
350      offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
351      if (dims >= 3) {
352         LLVMValueRef z_offset;
353         lp_build_sample_wrap_nearest_int(bld,
354                                          1, /* block length (depth) */
355                                          r_ipart, depth_vec, img_stride_vec,
356                                          bld->static_state->pot_depth,
357                                          bld->static_state->wrap_r,
358                                          &z_offset, &z_subcoord);
359         offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
360      }
361      else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
362         LLVMValueRef z_offset;
363         /* The r coord is the cube face in [0,5] */
364         z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
365         offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
366      }
367   }
368
369   /*
370    * Fetch the pixels as 4 x 32bit (rgba order might differ):
371    *
372    *   rgba0 rgba1 rgba2 rgba3
373    *
374    * bit cast them into 16 x u8
375    *
376    *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
377    *
378    * unpack them into two 8 x i16:
379    *
380    *   r0 g0 b0 a0 r1 g1 b1 a1
381    *   r2 g2 b2 a2 r3 g3 b3 a3
382    *
383    * The higher 8 bits of the resulting elements will be zero.
384    */
385   {
386      LLVMValueRef rgba8;
387
388      if (util_format_is_rgba8_variant(bld->format_desc)) {
389         /*
390          * Given the format is a rgba8, just read the pixels as is,
391          * without any swizzling. Swizzling will be done later.
392          */
393         rgba8 = lp_build_gather(bld->gallivm,
394                                 bld->texel_type.length,
395                                 bld->format_desc->block.bits,
396                                 bld->texel_type.width,
397                                 data_ptr, offset);
398
399         rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
400      }
401      else {
402         rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
403                                         bld->format_desc,
404                                         u8n.type,
405                                         data_ptr, offset,
406                                         x_subcoord,
407                                         y_subcoord);
408      }
409
410      /* Expand one 4*rgba8 to two 2*rgba16 */
411      lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
412                       rgba8,
413                       colors_lo, colors_hi);
414   }
415}
416
417
418/**
419 * Sample a single texture image with (bi-)(tri-)linear sampling.
420 * Return filtered color as two vectors of 16-bit fixed point values.
421 */
422static void
423lp_build_sample_image_linear(struct lp_build_sample_context *bld,
424                             LLVMValueRef int_size,
425                             LLVMValueRef row_stride_vec,
426                             LLVMValueRef img_stride_vec,
427                             LLVMValueRef data_ptr,
428                             LLVMValueRef s,
429                             LLVMValueRef t,
430                             LLVMValueRef r,
431                             LLVMValueRef *colors_lo,
432                             LLVMValueRef *colors_hi)
433{
434   const unsigned dims = bld->dims;
435   LLVMBuilderRef builder = bld->gallivm->builder;
436   struct lp_build_context i32, h16, u8n;
437   LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
438   LLVMValueRef i32_c8, i32_c128, i32_c255;
439   LLVMValueRef width_vec, height_vec, depth_vec;
440   LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
441   LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_fpart_lo = NULL, t_fpart_hi = NULL;
442   LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_fpart_lo = NULL, r_fpart_hi = NULL;
443   LLVMValueRef x_stride, y_stride, z_stride;
444   LLVMValueRef x_offset0, x_offset1;
445   LLVMValueRef y_offset0, y_offset1;
446   LLVMValueRef z_offset0, z_offset1;
447   LLVMValueRef offset[2][2][2]; /* [z][y][x] */
448   LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
449   LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
450   LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
451   LLVMValueRef packed_lo, packed_hi;
452   unsigned x, y, z;
453   unsigned i, j, k;
454   unsigned numj, numk;
455
456   lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32));
457   lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16));
458   lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8));
459
460   i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
461   h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type);
462   u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
463
464   lp_build_extract_image_sizes(bld,
465                                bld->int_size_type,
466                                bld->int_coord_type,
467                                int_size,
468                                &width_vec,
469                                &height_vec,
470                                &depth_vec);
471
472   if (bld->static_state->normalized_coords) {
473      LLVMValueRef scaled_size;
474      LLVMValueRef flt_size;
475
476      /* scale size by 256 (8 fractional bits) */
477      scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
478
479      flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
480
481      lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
482   }
483   else {
484      /* scale coords by 256 (8 fractional bits) */
485      s = lp_build_mul_imm(&bld->coord_bld, s, 256);
486      if (dims >= 2)
487         t = lp_build_mul_imm(&bld->coord_bld, t, 256);
488      if (dims >= 3)
489         r = lp_build_mul_imm(&bld->coord_bld, r, 256);
490   }
491
492   /* convert float to int */
493   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
494   if (dims >= 2)
495      t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
496   if (dims >= 3)
497      r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
498
499   /* subtract 0.5 (add -128) */
500   i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
501   if (!bld->static_state->force_nearest_s) {
502      s = LLVMBuildAdd(builder, s, i32_c128, "");
503   }
504   if (dims >= 2 && !bld->static_state->force_nearest_t) {
505      t = LLVMBuildAdd(builder, t, i32_c128, "");
506   }
507   if (dims >= 3) {
508      r = LLVMBuildAdd(builder, r, i32_c128, "");
509   }
510
511   /* compute floor (shift right 8) */
512   i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
513   s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
514   if (dims >= 2)
515      t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
516   if (dims >= 3)
517      r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
518
519   /* compute fractional part (AND with 0xff) */
520   i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
521   s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
522   if (dims >= 2)
523      t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
524   if (dims >= 3)
525      r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
526
527   /* get pixel, row and image strides */
528   x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
529                                 bld->format_desc->block.bits/8);
530   y_stride = row_stride_vec;
531   z_stride = img_stride_vec;
532
533   /* do texcoord wrapping and compute texel offsets */
534   lp_build_sample_wrap_linear_int(bld,
535                                   bld->format_desc->block.width,
536                                   s_ipart, width_vec, x_stride,
537                                   bld->static_state->pot_width,
538                                   bld->static_state->wrap_s,
539                                   &x_offset0, &x_offset1,
540                                   &x_subcoord[0], &x_subcoord[1]);
541   for (z = 0; z < 2; z++) {
542      for (y = 0; y < 2; y++) {
543         offset[z][y][0] = x_offset0;
544         offset[z][y][1] = x_offset1;
545      }
546   }
547
548   if (dims >= 2) {
549      lp_build_sample_wrap_linear_int(bld,
550                                      bld->format_desc->block.height,
551                                      t_ipart, height_vec, y_stride,
552                                      bld->static_state->pot_height,
553                                      bld->static_state->wrap_t,
554                                      &y_offset0, &y_offset1,
555                                      &y_subcoord[0], &y_subcoord[1]);
556
557      for (z = 0; z < 2; z++) {
558         for (x = 0; x < 2; x++) {
559            offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
560                                           offset[z][0][x], y_offset0);
561            offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
562                                           offset[z][1][x], y_offset1);
563         }
564      }
565   }
566
567   if (dims >= 3) {
568      lp_build_sample_wrap_linear_int(bld,
569                                      bld->format_desc->block.height,
570                                      r_ipart, depth_vec, z_stride,
571                                      bld->static_state->pot_depth,
572                                      bld->static_state->wrap_r,
573                                      &z_offset0, &z_offset1,
574                                      &z_subcoord[0], &z_subcoord[1]);
575      for (y = 0; y < 2; y++) {
576         for (x = 0; x < 2; x++) {
577            offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
578                                           offset[0][y][x], z_offset0);
579            offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
580                                           offset[1][y][x], z_offset1);
581         }
582      }
583   }
584   else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
585      LLVMValueRef z_offset;
586      z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
587      for (y = 0; y < 2; y++) {
588         for (x = 0; x < 2; x++) {
589            /* The r coord is the cube face in [0,5] */
590            offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
591                                           offset[0][y][x], z_offset);
592         }
593      }
594   }
595
596   /*
597    * Transform 4 x i32 in
598    *
599    *   s_fpart = {s0, s1, s2, s3}
600    *
601    * into 8 x i16
602    *
603    *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
604    *
605    * into two 8 x i16
606    *
607    *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
608    *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
609    *
610    * and likewise for t_fpart. There is no risk of loosing precision here
611    * since the fractional parts only use the lower 8bits.
612    */
613   s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
614   if (dims >= 2)
615      t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
616   if (dims >= 3)
617      r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
618
619   {
620      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
621      LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
622      LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
623      LLVMValueRef shuffle_lo;
624      LLVMValueRef shuffle_hi;
625
626      for (j = 0; j < h16.type.length; j += 4) {
627#ifdef PIPE_ARCH_LITTLE_ENDIAN
628         unsigned subindex = 0;
629#else
630         unsigned subindex = 1;
631#endif
632         LLVMValueRef index;
633
634         index = LLVMConstInt(elem_type, j/2 + subindex, 0);
635         for (i = 0; i < 4; ++i)
636            shuffles_lo[j + i] = index;
637
638         index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
639         for (i = 0; i < 4; ++i)
640            shuffles_hi[j + i] = index;
641      }
642
643      shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
644      shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
645
646      s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
647                                          shuffle_lo, "");
648      s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
649                                          shuffle_hi, "");
650      if (dims >= 2) {
651         t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
652                                             shuffle_lo, "");
653         t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
654                                             shuffle_hi, "");
655      }
656      if (dims >= 3) {
657         r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
658                                             shuffle_lo, "");
659         r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
660                                             shuffle_hi, "");
661      }
662   }
663
664   /*
665    * Fetch the pixels as 4 x 32bit (rgba order might differ):
666    *
667    *   rgba0 rgba1 rgba2 rgba3
668    *
669    * bit cast them into 16 x u8
670    *
671    *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
672    *
673    * unpack them into two 8 x i16:
674    *
675    *   r0 g0 b0 a0 r1 g1 b1 a1
676    *   r2 g2 b2 a2 r3 g3 b3 a3
677    *
678    * The higher 8 bits of the resulting elements will be zero.
679    */
680   numj = 1 + (dims >= 2);
681   numk = 1 + (dims >= 3);
682
683   for (k = 0; k < numk; k++) {
684      for (j = 0; j < numj; j++) {
685         for (i = 0; i < 2; i++) {
686            LLVMValueRef rgba8;
687
688            if (util_format_is_rgba8_variant(bld->format_desc)) {
689               /*
690                * Given the format is a rgba8, just read the pixels as is,
691                * without any swizzling. Swizzling will be done later.
692                */
693               rgba8 = lp_build_gather(bld->gallivm,
694                                       bld->texel_type.length,
695                                       bld->format_desc->block.bits,
696                                       bld->texel_type.width,
697                                       data_ptr, offset[k][j][i]);
698
699               rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
700            }
701            else {
702               rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
703                                               bld->format_desc,
704                                               u8n.type,
705                                               data_ptr, offset[k][j][i],
706                                               x_subcoord[i],
707                                               y_subcoord[j]);
708            }
709
710            /* Expand one 4*rgba8 to two 2*rgba16 */
711            lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
712                             rgba8,
713                             &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
714         }
715      }
716   }
717
718   /*
719    * Linear interpolation with 8.8 fixed point.
720    */
721   if (bld->static_state->force_nearest_s) {
722      /* special case 1-D lerp */
723      packed_lo = lp_build_lerp(&h16,
724                                t_fpart_lo,
725                                neighbors_lo[0][0][0],
726                                neighbors_lo[0][0][1]);
727
728      packed_hi = lp_build_lerp(&h16,
729                                t_fpart_hi,
730                                neighbors_hi[0][1][0],
731                                neighbors_hi[0][1][0]);
732   }
733   else if (bld->static_state->force_nearest_t) {
734      /* special case 1-D lerp */
735      packed_lo = lp_build_lerp(&h16,
736                                s_fpart_lo,
737                                neighbors_lo[0][0][0],
738                                neighbors_lo[0][0][1]);
739
740      packed_hi = lp_build_lerp(&h16,
741                                s_fpart_hi,
742                                neighbors_hi[0][0][0],
743                                neighbors_hi[0][0][1]);
744   }
745   else {
746      /* general 1/2/3-D lerping */
747      if (dims == 1) {
748         packed_lo = lp_build_lerp(&h16,
749                                   s_fpart_lo,
750                                   neighbors_lo[0][0][0],
751                                   neighbors_lo[0][0][1]);
752
753         packed_hi = lp_build_lerp(&h16,
754                                   s_fpart_hi,
755                                   neighbors_hi[0][0][0],
756                                   neighbors_hi[0][0][1]);
757      }
758      else {
759         /* 2-D lerp */
760         packed_lo = lp_build_lerp_2d(&h16,
761                                      s_fpart_lo, t_fpart_lo,
762                                      neighbors_lo[0][0][0],
763                                      neighbors_lo[0][0][1],
764                                      neighbors_lo[0][1][0],
765                                      neighbors_lo[0][1][1]);
766
767         packed_hi = lp_build_lerp_2d(&h16,
768                                      s_fpart_hi, t_fpart_hi,
769                                      neighbors_hi[0][0][0],
770                                      neighbors_hi[0][0][1],
771                                      neighbors_hi[0][1][0],
772                                      neighbors_hi[0][1][1]);
773
774         if (dims >= 3) {
775            LLVMValueRef packed_lo2, packed_hi2;
776
777            /* lerp in the second z slice */
778            packed_lo2 = lp_build_lerp_2d(&h16,
779                                          s_fpart_lo, t_fpart_lo,
780                                          neighbors_lo[1][0][0],
781                                          neighbors_lo[1][0][1],
782                                          neighbors_lo[1][1][0],
783                                          neighbors_lo[1][1][1]);
784
785            packed_hi2 = lp_build_lerp_2d(&h16,
786                                          s_fpart_hi, t_fpart_hi,
787                                          neighbors_hi[1][0][0],
788                                          neighbors_hi[1][0][1],
789                                          neighbors_hi[1][1][0],
790                                          neighbors_hi[1][1][1]);
791            /* interp between two z slices */
792            packed_lo = lp_build_lerp(&h16, r_fpart_lo,
793                                      packed_lo, packed_lo2);
794            packed_hi = lp_build_lerp(&h16, r_fpart_hi,
795                                      packed_hi, packed_hi2);
796         }
797      }
798   }
799
800   *colors_lo = packed_lo;
801   *colors_hi = packed_hi;
802}
803
804
805/**
806 * Sample the texture/mipmap using given image filter and mip filter.
807 * data0_ptr and data1_ptr point to the two mipmap levels to sample
808 * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
809 * If we're using nearest miplevel sampling the '1' values will be null/unused.
810 */
811static void
812lp_build_sample_mipmap(struct lp_build_sample_context *bld,
813                       unsigned img_filter,
814                       unsigned mip_filter,
815                       LLVMValueRef s,
816                       LLVMValueRef t,
817                       LLVMValueRef r,
818                       LLVMValueRef ilevel0,
819                       LLVMValueRef ilevel1,
820                       LLVMValueRef lod_fpart,
821                       LLVMValueRef colors_lo_var,
822                       LLVMValueRef colors_hi_var)
823{
824   LLVMBuilderRef builder = bld->gallivm->builder;
825   LLVMValueRef size0;
826   LLVMValueRef size1;
827   LLVMValueRef row_stride0_vec;
828   LLVMValueRef row_stride1_vec;
829   LLVMValueRef img_stride0_vec;
830   LLVMValueRef img_stride1_vec;
831   LLVMValueRef data_ptr0;
832   LLVMValueRef data_ptr1;
833   LLVMValueRef colors0_lo, colors0_hi;
834   LLVMValueRef colors1_lo, colors1_hi;
835
836   /* sample the first mipmap level */
837   lp_build_mipmap_level_sizes(bld, ilevel0,
838                               &size0,
839                               &row_stride0_vec, &img_stride0_vec);
840   data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
841   if (img_filter == PIPE_TEX_FILTER_NEAREST) {
842      lp_build_sample_image_nearest(bld,
843                                    size0,
844                                    row_stride0_vec, img_stride0_vec,
845                                    data_ptr0, s, t, r,
846                                    &colors0_lo, &colors0_hi);
847   }
848   else {
849      assert(img_filter == PIPE_TEX_FILTER_LINEAR);
850      lp_build_sample_image_linear(bld,
851                                   size0,
852                                   row_stride0_vec, img_stride0_vec,
853                                   data_ptr0, s, t, r,
854                                   &colors0_lo, &colors0_hi);
855   }
856
857   /* Store the first level's colors in the output variables */
858   LLVMBuildStore(builder, colors0_lo, colors_lo_var);
859   LLVMBuildStore(builder, colors0_hi, colors_hi_var);
860
861   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
862      LLVMValueRef h16_scale = lp_build_const_float(bld->gallivm, 256.0);
863      LLVMTypeRef i32_type = LLVMIntTypeInContext(bld->gallivm->context, 32);
864      struct lp_build_if_state if_ctx;
865      LLVMValueRef need_lerp;
866
867      lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
868      lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");
869
870      /* need_lerp = lod_fpart > 0 */
871      need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
872                                lod_fpart, LLVMConstNull(i32_type),
873                                "need_lerp");
874
875      lp_build_if(&if_ctx, bld->gallivm, need_lerp);
876      {
877         struct lp_build_context h16_bld;
878
879         lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16));
880
881         /* sample the second mipmap level */
882         lp_build_mipmap_level_sizes(bld, ilevel1,
883                                     &size1,
884                                     &row_stride1_vec, &img_stride1_vec);
885         data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
886         if (img_filter == PIPE_TEX_FILTER_NEAREST) {
887            lp_build_sample_image_nearest(bld,
888                                          size1,
889                                          row_stride1_vec, img_stride1_vec,
890                                          data_ptr1, s, t, r,
891                                          &colors1_lo, &colors1_hi);
892         }
893         else {
894            lp_build_sample_image_linear(bld,
895                                         size1,
896                                         row_stride1_vec, img_stride1_vec,
897                                         data_ptr1, s, t, r,
898                                         &colors1_lo, &colors1_hi);
899         }
900
901         /* interpolate samples from the two mipmap levels */
902
903         lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
904         lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
905
906#if HAVE_LLVM == 0x208
907         /* This is a work-around for a bug in LLVM 2.8.
908          * Evidently, something goes wrong in the construction of the
909          * lod_fpart short[8] vector.  Adding this no-effect shuffle seems
910          * to force the vector to be properly constructed.
911          * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
912          */
913         {
914            LLVMValueRef shuffles[8], shuffle;
915            int i;
916            assert(h16_bld.type.length <= Elements(shuffles));
917            for (i = 0; i < h16_bld.type.length; i++)
918               shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1));
919            shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
920            lod_fpart = LLVMBuildShuffleVector(builder,
921                                               lod_fpart, lod_fpart,
922                                               shuffle, "");
923         }
924#endif
925
926         colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
927                                    colors0_lo, colors1_lo);
928         colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
929                                    colors0_hi, colors1_hi);
930
931         LLVMBuildStore(builder, colors0_lo, colors_lo_var);
932         LLVMBuildStore(builder, colors0_hi, colors_hi_var);
933      }
934      lp_build_endif(&if_ctx);
935   }
936}
937
938
939
940/**
941 * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
942 * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
943 * but only limited texture coord wrap modes.
944 */
945void
946lp_build_sample_aos(struct lp_build_sample_context *bld,
947                    unsigned unit,
948                    LLVMValueRef s,
949                    LLVMValueRef t,
950                    LLVMValueRef r,
951                    const LLVMValueRef *ddx,
952                    const LLVMValueRef *ddy,
953                    LLVMValueRef lod_bias, /* optional */
954                    LLVMValueRef explicit_lod, /* optional */
955                    LLVMValueRef texel_out[4])
956{
957   struct lp_build_context *int_bld = &bld->int_bld;
958   LLVMBuilderRef builder = bld->gallivm->builder;
959   const unsigned mip_filter = bld->static_state->min_mip_filter;
960   const unsigned min_filter = bld->static_state->min_img_filter;
961   const unsigned mag_filter = bld->static_state->mag_img_filter;
962   const unsigned dims = bld->dims;
963   LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
964   LLVMValueRef ilevel0, ilevel1 = NULL;
965   LLVMValueRef packed, packed_lo, packed_hi;
966   LLVMValueRef unswizzled[4];
967   LLVMValueRef face_ddx[4], face_ddy[4];
968   struct lp_build_context h16_bld;
969   LLVMValueRef first_level;
970   LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0);
971
972   /* we only support the common/simple wrap modes at this time */
973   assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
974   if (dims >= 2)
975      assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
976   if (dims >= 3)
977      assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
978
979
980   /* make 16-bit fixed-pt builder context */
981   lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16));
982
983   /* cube face selection, compute pre-face coords, etc. */
984   if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
985      LLVMValueRef face, face_s, face_t;
986      lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
987      s = face_s; /* vec */
988      t = face_t; /* vec */
989      /* use 'r' to indicate cube face */
990      r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
991
992      /* recompute ddx, ddy using the new (s,t) face texcoords */
993      face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
994      face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
995      face_ddx[2] = NULL;
996      face_ddx[3] = NULL;
997      face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
998      face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
999      face_ddy[2] = NULL;
1000      face_ddy[3] = NULL;
1001      ddx = face_ddx;
1002      ddy = face_ddy;
1003   }
1004
1005   /*
1006    * Compute the level of detail (float).
1007    */
1008   if (min_filter != mag_filter ||
1009       mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1010      /* Need to compute lod either to choose mipmap levels or to
1011       * distinguish between minification/magnification with one mipmap level.
1012       */
1013      lp_build_lod_selector(bld, unit, ddx, ddy,
1014                            lod_bias, explicit_lod,
1015                            mip_filter,
1016                            &lod_ipart, &lod_fpart);
1017   } else {
1018      lod_ipart = i32t_zero;
1019   }
1020
1021   /*
1022    * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1023    */
1024   switch (mip_filter) {
1025   default:
1026      assert(0 && "bad mip_filter value in lp_build_sample_aos()");
1027      /* fall-through */
1028   case PIPE_TEX_MIPFILTER_NONE:
1029      /* always use mip level 0 */
1030      if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1031         /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1032          * We should be able to set ilevel0 = const(0) but that causes
1033          * bad x86 code to be emitted.
1034          */
1035         assert(lod_ipart);
1036         lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
1037      }
1038      else {
1039         first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1040                                                       bld->gallivm, unit);
1041         ilevel0 = first_level;
1042      }
1043      break;
1044   case PIPE_TEX_MIPFILTER_NEAREST:
1045      assert(lod_ipart);
1046      lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
1047      break;
1048   case PIPE_TEX_MIPFILTER_LINEAR:
1049      assert(lod_ipart);
1050      assert(lod_fpart);
1051      lp_build_linear_mip_levels(bld, unit,
1052                                 lod_ipart, &lod_fpart,
1053                                 &ilevel0, &ilevel1);
1054      break;
1055   }
1056
1057   /*
1058    * Get/interpolate texture colors.
1059    */
1060
1061   packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo");
1062   packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi");
1063
1064   if (min_filter == mag_filter) {
1065      /* no need to distinquish between minification and magnification */
1066      lp_build_sample_mipmap(bld,
1067                             min_filter, mip_filter,
1068                             s, t, r,
1069                             ilevel0, ilevel1, lod_fpart,
1070                             packed_lo, packed_hi);
1071   }
1072   else {
1073      /* Emit conditional to choose min image filter or mag image filter
1074       * depending on the lod being > 0 or <= 0, respectively.
1075       */
1076      struct lp_build_if_state if_ctx;
1077      LLVMValueRef minify;
1078
1079      /* minify = lod >= 0.0 */
1080      minify = LLVMBuildICmp(builder, LLVMIntSGE,
1081                             lod_ipart, int_bld->zero, "");
1082
1083      lp_build_if(&if_ctx, bld->gallivm, minify);
1084      {
1085         /* Use the minification filter */
1086         lp_build_sample_mipmap(bld,
1087                                min_filter, mip_filter,
1088                                s, t, r,
1089                                ilevel0, ilevel1, lod_fpart,
1090                                packed_lo, packed_hi);
1091      }
1092      lp_build_else(&if_ctx);
1093      {
1094         /* Use the magnification filter */
1095         lp_build_sample_mipmap(bld,
1096                                mag_filter, PIPE_TEX_MIPFILTER_NONE,
1097                                s, t, r,
1098                                ilevel0, NULL, NULL,
1099                                packed_lo, packed_hi);
1100      }
1101      lp_build_endif(&if_ctx);
1102   }
1103
1104   /*
1105    * combine the values stored in 'packed_lo' and 'packed_hi' variables
1106    * into 'packed'
1107    */
1108   packed = lp_build_pack2(bld->gallivm,
1109                           h16_bld.type, lp_type_unorm(8),
1110                           LLVMBuildLoad(builder, packed_lo, ""),
1111                           LLVMBuildLoad(builder, packed_hi, ""));
1112
1113   /*
1114    * Convert to SoA and swizzle.
1115    */
1116   lp_build_rgba8_to_f32_soa(bld->gallivm,
1117                             bld->texel_type,
1118                             packed, unswizzled);
1119
1120   if (util_format_is_rgba8_variant(bld->format_desc)) {
1121      lp_build_format_swizzle_soa(bld->format_desc,
1122                                  &bld->texel_bld,
1123                                  unswizzled, texel_out);
1124   }
1125   else {
1126      texel_out[0] = unswizzled[0];
1127      texel_out[1] = unswizzled[1];
1128      texel_out[2] = unswizzled[2];
1129      texel_out[3] = unswizzled[3];
1130   }
1131}
1132