lp_bld_sample_soa.c revision 2ccae040a458ad0f95ee46916e2ea467d5cf9d02
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35#include "pipe/p_defines.h"
36#include "pipe/p_state.h"
37#include "util/u_debug.h"
38#include "util/u_dump.h"
39#include "util/u_memory.h"
40#include "util/u_math.h"
41#include "util/u_format.h"
42#include "util/u_cpu_detect.h"
43#include "lp_bld_debug.h"
44#include "lp_bld_type.h"
45#include "lp_bld_const.h"
46#include "lp_bld_conv.h"
47#include "lp_bld_arit.h"
48#include "lp_bld_logic.h"
49#include "lp_bld_swizzle.h"
50#include "lp_bld_pack.h"
51#include "lp_bld_format.h"
52#include "lp_bld_sample.h"
53
54
55/**
56 * Keep all information for sampling code generation in a single place.
57 */
58struct lp_build_sample_context
59{
60   LLVMBuilderRef builder;
61
62   const struct lp_sampler_static_state *static_state;
63
64   struct lp_sampler_dynamic_state *dynamic_state;
65
66   const struct util_format_description *format_desc;
67
68   /** regular scalar float type */
69   struct lp_type float_type;
70   struct lp_build_context float_bld;
71
72   /** regular scalar float type */
73   struct lp_type int_type;
74   struct lp_build_context int_bld;
75
76   /** Incoming coordinates type and build context */
77   struct lp_type coord_type;
78   struct lp_build_context coord_bld;
79
80   /** Unsigned integer coordinates */
81   struct lp_type uint_coord_type;
82   struct lp_build_context uint_coord_bld;
83
84   /** Signed integer coordinates */
85   struct lp_type int_coord_type;
86   struct lp_build_context int_coord_bld;
87
88   /** Output texels type and build context */
89   struct lp_type texel_type;
90   struct lp_build_context texel_bld;
91};
92
93
94/**
95 * Does the given texture wrap mode allow sampling the texture border color?
96 * XXX maybe move this into gallium util code.
97 */
98static boolean
99wrap_mode_uses_border_color(unsigned mode)
100{
101   switch (mode) {
102   case PIPE_TEX_WRAP_REPEAT:
103   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
104   case PIPE_TEX_WRAP_MIRROR_REPEAT:
105   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
106      return FALSE;
107   case PIPE_TEX_WRAP_CLAMP:
108   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
109   case PIPE_TEX_WRAP_MIRROR_CLAMP:
110   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
111      return TRUE;
112   default:
113      assert(0 && "unexpected wrap mode");
114      return FALSE;
115   }
116}
117
118
119static LLVMValueRef
120lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
121                          LLVMValueRef data_array, LLVMValueRef level)
122{
123   LLVMValueRef indexes[2], data_ptr;
124   indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
125   indexes[1] = level;
126   data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
127   data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
128   return data_ptr;
129}
130
131
132static LLVMValueRef
133lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
134                                LLVMValueRef data_array, int level)
135{
136   LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
137   return lp_build_get_mipmap_level(bld, data_array, lvl);
138}
139
140
141/**
142 * Gen code to fetch a texel from a texture at int coords (x, y).
143 * The result, texel, will be:
144 *   texel[0] = red values
145 *   texel[1] = green values
146 *   texel[2] = blue values
147 *   texel[3] = alpha values
148 */
149static void
150lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
151                          LLVMValueRef width,
152                          LLVMValueRef height,
153                          LLVMValueRef x,
154                          LLVMValueRef y,
155                          LLVMValueRef y_stride,
156                          LLVMValueRef data_ptr,
157                          LLVMValueRef *texel)
158{
159   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
160   LLVMValueRef offset;
161   LLVMValueRef packed;
162   LLVMValueRef use_border = NULL;
163
164   /* use_border = x < 0 || x >= width || y < 0 || y >= height */
165   if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
166      LLVMValueRef b1, b2;
167      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
168      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
169      use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
170   }
171
172   if (wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
173      LLVMValueRef b1, b2;
174      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
175      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
176      if (use_border) {
177         use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
178         use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
179      }
180      else {
181         use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
182      }
183   }
184
185   /*
186    * Note: if we find an app which frequently samples the texture border
187    * we might want to implement a true conditional here to avoid sampling
188    * the texture whenever possible (since that's quite a bit of code).
189    * Ex:
190    *   if (use_border) {
191    *      texel = border_color;
192    *   }
193    *   else {
194    *      texel = sample_texture(coord);
195    *   }
196    * As it is now, we always sample the texture, then selectively replace
197    * the texel color results with the border color.
198    */
199
200   /* convert x,y coords to linear offset from start of texture, in bytes */
201   offset = lp_build_sample_offset(&bld->uint_coord_bld,
202                                   bld->format_desc,
203                                   x, y, y_stride);
204
205   assert(bld->format_desc->block.width == 1);
206   assert(bld->format_desc->block.height == 1);
207   assert(bld->format_desc->block.bits <= bld->texel_type.width);
208
209   /* gather the texels from the texture */
210   packed = lp_build_gather(bld->builder,
211                            bld->texel_type.length,
212                            bld->format_desc->block.bits,
213                            bld->texel_type.width,
214                            data_ptr, offset);
215
216   /* convert texels to float rgba */
217   lp_build_unpack_rgba_soa(bld->builder,
218                            bld->format_desc,
219                            bld->texel_type,
220                            packed, texel);
221
222   if (use_border) {
223      /* select texel color or border color depending on use_border */
224      int chan;
225      for (chan = 0; chan < 4; chan++) {
226         LLVMValueRef border_chan =
227            lp_build_const_scalar(bld->texel_type,
228                                  bld->static_state->border_color[chan]);
229         texel[chan] = lp_build_select(&bld->texel_bld, use_border,
230                                       border_chan, texel[chan]);
231      }
232   }
233}
234
235
236static LLVMValueRef
237lp_build_sample_packed(struct lp_build_sample_context *bld,
238                       LLVMValueRef x,
239                       LLVMValueRef y,
240                       LLVMValueRef y_stride,
241                       LLVMValueRef data_array)
242{
243   LLVMValueRef offset;
244   LLVMValueRef data_ptr;
245
246   offset = lp_build_sample_offset(&bld->uint_coord_bld,
247                                   bld->format_desc,
248                                   x, y, y_stride);
249
250   assert(bld->format_desc->block.width == 1);
251   assert(bld->format_desc->block.height == 1);
252   assert(bld->format_desc->block.bits <= bld->texel_type.width);
253
254   /* get pointer to mipmap level 0 data */
255   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
256
257   return lp_build_gather(bld->builder,
258                          bld->texel_type.length,
259                          bld->format_desc->block.bits,
260                          bld->texel_type.width,
261                          data_ptr, offset);
262}
263
264
265/**
266 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
267 */
268static LLVMValueRef
269lp_build_coord_mirror(struct lp_build_sample_context *bld,
270                      LLVMValueRef coord)
271{
272   struct lp_build_context *coord_bld = &bld->coord_bld;
273   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
274   LLVMValueRef fract, flr, isOdd;
275
276   /* fract = coord - floor(coord) */
277   fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
278
279   /* flr = ifloor(coord); */
280   flr = lp_build_ifloor(coord_bld, coord);
281
282   /* isOdd = flr & 1 */
283   isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
284
285   /* make coord positive or negative depending on isOdd */
286   coord = lp_build_set_sign(coord_bld, fract, isOdd);
287
288   /* convert isOdd to float */
289   isOdd = lp_build_int_to_float(coord_bld, isOdd);
290
291   /* add isOdd to coord */
292   coord = lp_build_add(coord_bld, coord, isOdd);
293
294   return coord;
295}
296
297
298/**
299 * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
300 * Return whether the given mode is supported by that function.
301 */
302static boolean
303is_simple_wrap_mode(unsigned mode)
304{
305   switch (mode) {
306   case PIPE_TEX_WRAP_REPEAT:
307   case PIPE_TEX_WRAP_CLAMP:
308   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
309      return TRUE;
310   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
311   default:
312      return FALSE;
313   }
314}
315
316
317/**
318 * Build LLVM code for texture wrap mode, for scaled integer texcoords.
319 * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
320 * \param length  the texture size along one dimension
321 * \param is_pot  if TRUE, length is a power of two
322 * \param wrap_mode  one of PIPE_TEX_WRAP_x
323 */
324static LLVMValueRef
325lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
326                         LLVMValueRef coord,
327                         LLVMValueRef length,
328                         boolean is_pot,
329                         unsigned wrap_mode)
330{
331   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
332   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
333   LLVMValueRef length_minus_one;
334
335   length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
336
337   switch(wrap_mode) {
338   case PIPE_TEX_WRAP_REPEAT:
339      if(is_pot)
340         coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
341      else
342         /* Signed remainder won't give the right results for negative
343          * dividends but unsigned remainder does.*/
344         coord = LLVMBuildURem(bld->builder, coord, length, "");
345      break;
346
347   case PIPE_TEX_WRAP_CLAMP:
348   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
349   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
350      coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
351      coord = lp_build_min(int_coord_bld, coord, length_minus_one);
352      break;
353
354   case PIPE_TEX_WRAP_MIRROR_REPEAT:
355   case PIPE_TEX_WRAP_MIRROR_CLAMP:
356   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
357   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
358      /* FIXME */
359      _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
360                    util_dump_tex_wrap(wrap_mode, TRUE));
361      coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero);
362      coord = lp_build_min(uint_coord_bld, coord, length_minus_one);
363      break;
364
365   default:
366      assert(0);
367   }
368
369   return coord;
370}
371
372
373/**
374 * Build LLVM code for texture wrap mode for linear filtering.
375 * \param x0_out  returns first integer texcoord
376 * \param x1_out  returns second integer texcoord
377 * \param weight_out  returns linear interpolation weight
378 */
379static void
380lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
381                            LLVMValueRef coord,
382                            LLVMValueRef length,
383                            boolean is_pot,
384                            unsigned wrap_mode,
385                            LLVMValueRef *x0_out,
386                            LLVMValueRef *x1_out,
387                            LLVMValueRef *weight_out)
388{
389   struct lp_build_context *coord_bld = &bld->coord_bld;
390   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
391   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
392   LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
393   LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
394   LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
395   LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
396   LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
397   LLVMValueRef coord0, coord1, weight;
398
399   switch(wrap_mode) {
400   case PIPE_TEX_WRAP_REPEAT:
401      /* mul by size and subtract 0.5 */
402      coord = lp_build_mul(coord_bld, coord, length_f);
403      coord = lp_build_sub(coord_bld, coord, half);
404      /* convert to int */
405      coord0 = lp_build_ifloor(coord_bld, coord);
406      coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
407      /* compute lerp weight */
408      weight = lp_build_fract(coord_bld, coord);
409      /* repeat wrap */
410      if (is_pot) {
411         coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
412         coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
413      }
414      else {
415         /* Signed remainder won't give the right results for negative
416          * dividends but unsigned remainder does.*/
417         coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
418         coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
419      }
420      break;
421
422   case PIPE_TEX_WRAP_CLAMP:
423      if (bld->static_state->normalized_coords) {
424         coord = lp_build_mul(coord_bld, coord, length_f);
425      }
426      weight = lp_build_fract(coord_bld, coord);
427      coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
428                              length_f_minus_one);
429      coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
430      coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero,
431                              length_f_minus_one);
432      coord0 = lp_build_ifloor(coord_bld, coord0);
433      coord1 = lp_build_ifloor(coord_bld, coord1);
434      break;
435
436   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
437      if (bld->static_state->normalized_coords) {
438         /* clamp to [0,1] */
439         coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
440         /* mul by tex size and subtract 0.5 */
441         coord = lp_build_mul(coord_bld, coord, length_f);
442         coord = lp_build_sub(coord_bld, coord, half);
443      }
444      else {
445         LLVMValueRef min, max;
446         /* clamp to [0.5, length - 0.5] */
447         min = lp_build_const_scalar(coord_bld->type, 0.5F);
448         max = lp_build_sub(coord_bld, length_f, min);
449         coord = lp_build_clamp(coord_bld, coord, min, max);
450      }
451      /* compute lerp weight */
452      weight = lp_build_fract(coord_bld, coord);
453      /* coord0 = floor(coord); */
454      coord0 = lp_build_ifloor(coord_bld, coord);
455      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
456      /* coord0 = max(coord0, 0) */
457      coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
458      /* coord1 = min(coord1, length-1) */
459      coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
460      break;
461
462   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
463      {
464         LLVMValueRef min, max;
465         if (bld->static_state->normalized_coords) {
466            /* min = -1.0 / (2 * length) = -0.5 / length */
467            min = lp_build_mul(coord_bld,
468                               lp_build_const_scalar(coord_bld->type, -0.5F),
469                               lp_build_rcp(coord_bld, length_f));
470            /* max = 1.0 - min */
471            max = lp_build_sub(coord_bld, coord_bld->one, min);
472            /* coord = clamp(coord, min, max) */
473            coord = lp_build_clamp(coord_bld, coord, min, max);
474            /* scale coord to length (and sub 0.5?) */
475            coord = lp_build_mul(coord_bld, coord, length_f);
476            coord = lp_build_sub(coord_bld, coord, half);
477         }
478         else {
479            /* clamp to [-0.5, length + 0.5] */
480            min = lp_build_const_scalar(coord_bld->type, -0.5F);
481            max = lp_build_sub(coord_bld, length_f, min);
482            coord = lp_build_clamp(coord_bld, coord, min, max);
483            coord = lp_build_sub(coord_bld, coord, half);
484         }
485         /* compute lerp weight */
486         weight = lp_build_fract(coord_bld, coord);
487         /* convert to int */
488         coord0 = lp_build_ifloor(coord_bld, coord);
489         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
490      }
491      break;
492
493   case PIPE_TEX_WRAP_MIRROR_REPEAT:
494      /* compute mirror function */
495      coord = lp_build_coord_mirror(bld, coord);
496
497      /* scale coord to length */
498      coord = lp_build_mul(coord_bld, coord, length_f);
499      coord = lp_build_sub(coord_bld, coord, half);
500
501      /* compute lerp weight */
502      weight = lp_build_fract(coord_bld, coord);
503
504      /* convert to int coords */
505      coord0 = lp_build_ifloor(coord_bld, coord);
506      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
507
508      /* coord0 = max(coord0, 0) */
509      coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
510      /* coord1 = min(coord1, length-1) */
511      coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
512      break;
513
514   case PIPE_TEX_WRAP_MIRROR_CLAMP:
515      {
516         LLVMValueRef min, max;
517         /* min = 1.0 / (2 * length) */
518         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
519         /* max = 1.0 - min */
520         max = lp_build_sub(coord_bld, coord_bld->one, min);
521
522         coord = lp_build_abs(coord_bld, coord);
523         coord = lp_build_clamp(coord_bld, coord, min, max);
524         coord = lp_build_mul(coord_bld, coord, length_f);
525         if(0)coord = lp_build_sub(coord_bld, coord, half);
526         weight = lp_build_fract(coord_bld, coord);
527         coord0 = lp_build_ifloor(coord_bld, coord);
528         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
529      }
530      break;
531
532   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
533      {
534         LLVMValueRef min, max;
535         /* min = 1.0 / (2 * length) */
536         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
537         /* max = 1.0 - min */
538         max = lp_build_sub(coord_bld, coord_bld->one, min);
539
540         coord = lp_build_abs(coord_bld, coord);
541         coord = lp_build_clamp(coord_bld, coord, min, max);
542         coord = lp_build_mul(coord_bld, coord, length_f);
543         coord = lp_build_sub(coord_bld, coord, half);
544         weight = lp_build_fract(coord_bld, coord);
545         coord0 = lp_build_ifloor(coord_bld, coord);
546         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
547      }
548      break;
549
550   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
551      {
552         LLVMValueRef min, max;
553         /* min = -1.0 / (2 * length) = -0.5 / length */
554         min = lp_build_mul(coord_bld,
555                            lp_build_const_scalar(coord_bld->type, -0.5F),
556                            lp_build_rcp(coord_bld, length_f));
557         /* max = 1.0 - min */
558         max = lp_build_sub(coord_bld, coord_bld->one, min);
559
560         coord = lp_build_abs(coord_bld, coord);
561         coord = lp_build_clamp(coord_bld, coord, min, max);
562         coord = lp_build_mul(coord_bld, coord, length_f);
563         coord = lp_build_sub(coord_bld, coord, half);
564         weight = lp_build_fract(coord_bld, coord);
565         coord0 = lp_build_ifloor(coord_bld, coord);
566         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
567      }
568      break;
569
570   default:
571      assert(0);
572      coord0 = NULL;
573      coord1 = NULL;
574      weight = NULL;
575   }
576
577   *x0_out = coord0;
578   *x1_out = coord1;
579   *weight_out = weight;
580}
581
582
583/**
584 * Build LLVM code for texture wrap mode for nearest filtering.
585 * \param coord  the incoming texcoord (nominally in [0,1])
586 * \param length  the texture size along one dimension, as int
587 * \param is_pot  if TRUE, length is a power of two
588 * \param wrap_mode  one of PIPE_TEX_WRAP_x
589 */
590static LLVMValueRef
591lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
592                             LLVMValueRef coord,
593                             LLVMValueRef length,
594                             boolean is_pot,
595                             unsigned wrap_mode)
596{
597   struct lp_build_context *coord_bld = &bld->coord_bld;
598   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
599   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
600   LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
601   LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
602   LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
603   LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
604   LLVMValueRef icoord;
605
606   switch(wrap_mode) {
607   case PIPE_TEX_WRAP_REPEAT:
608      coord = lp_build_mul(coord_bld, coord, length_f);
609      icoord = lp_build_ifloor(coord_bld, coord);
610      if (is_pot)
611         icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
612      else
613         /* Signed remainder won't give the right results for negative
614          * dividends but unsigned remainder does.*/
615         icoord = LLVMBuildURem(bld->builder, icoord, length, "");
616      break;
617
618   case PIPE_TEX_WRAP_CLAMP:
619      /* mul by size */
620      if (bld->static_state->normalized_coords) {
621         coord = lp_build_mul(coord_bld, coord, length_f);
622      }
623      /* floor */
624      icoord = lp_build_ifloor(coord_bld, coord);
625      /* clamp to [0, size-1].  Note: int coord builder type */
626      icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
627                              length_minus_one);
628      break;
629
630   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
631      {
632         LLVMValueRef min, max;
633         if (bld->static_state->normalized_coords) {
634            /* min = 1.0 / (2 * length) */
635            min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
636            /* max = length - min */
637            max = lp_build_sub(coord_bld, length_f, min);
638            /* scale coord to length */
639            coord = lp_build_mul(coord_bld, coord, length_f);
640         }
641         else {
642            /* clamp to [0.5, length - 0.5] */
643            min = lp_build_const_scalar(coord_bld->type, 0.5F);
644            max = lp_build_sub(coord_bld, length_f, min);
645         }
646         /* coord = clamp(coord, min, max) */
647         coord = lp_build_clamp(coord_bld, coord, min, max);
648         icoord = lp_build_ifloor(coord_bld, coord);
649      }
650      break;
651
652   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
653      /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
654      {
655         LLVMValueRef min, max;
656         if (bld->static_state->normalized_coords) {
657            /* min = -1.0 / (2 * length) = -0.5 / length */
658            min = lp_build_mul(coord_bld,
659                               lp_build_const_scalar(coord_bld->type, -0.5F),
660                               lp_build_rcp(coord_bld, length_f));
661            /* max = length - min */
662            max = lp_build_sub(coord_bld, length_f, min);
663            /* scale coord to length */
664            coord = lp_build_mul(coord_bld, coord, length_f);
665         }
666         else {
667            /* clamp to [-0.5, length + 0.5] */
668            min = lp_build_const_scalar(coord_bld->type, -0.5F);
669            max = lp_build_sub(coord_bld, length_f, min);
670         }
671         /* coord = clamp(coord, min, max) */
672         coord = lp_build_clamp(coord_bld, coord, min, max);
673         icoord = lp_build_ifloor(coord_bld, coord);
674      }
675      break;
676
677   case PIPE_TEX_WRAP_MIRROR_REPEAT:
678      {
679         LLVMValueRef min, max;
680         /* min = 1.0 / (2 * length) */
681         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
682         /* max = length - min */
683         max = lp_build_sub(coord_bld, length_f, min);
684
685         /* compute mirror function */
686         coord = lp_build_coord_mirror(bld, coord);
687
688         /* scale coord to length */
689         coord = lp_build_mul(coord_bld, coord, length_f);
690
691         /* coord = clamp(coord, min, max) */
692         coord = lp_build_clamp(coord_bld, coord, min, max);
693         icoord = lp_build_ifloor(coord_bld, coord);
694      }
695      break;
696
697   case PIPE_TEX_WRAP_MIRROR_CLAMP:
698      coord = lp_build_abs(coord_bld, coord);
699      coord = lp_build_mul(coord_bld, coord, length_f);
700      coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one);
701      icoord = lp_build_ifloor(coord_bld, coord);
702      break;
703
704   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
705      {
706         LLVMValueRef min, max;
707         /* min = 1.0 / (2 * length) */
708         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
709         /* max = length - min */
710         max = lp_build_sub(coord_bld, length_f, min);
711
712         coord = lp_build_abs(coord_bld, coord);
713         coord = lp_build_mul(coord_bld, coord, length_f);
714         coord = lp_build_clamp(coord_bld, coord, min, max);
715         icoord = lp_build_ifloor(coord_bld, coord);
716      }
717      break;
718
719   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
720      {
721         LLVMValueRef min, max;
722         /* min = 1.0 / (2 * length) */
723         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
724         min = lp_build_negate(coord_bld, min);
725         /* max = length - min */
726         max = lp_build_sub(coord_bld, length_f, min);
727
728         coord = lp_build_abs(coord_bld, coord);
729         coord = lp_build_mul(coord_bld, coord, length_f);
730         coord = lp_build_clamp(coord_bld, coord, min, max);
731         icoord = lp_build_ifloor(coord_bld, coord);
732      }
733      break;
734
735   default:
736      assert(0);
737      icoord = NULL;
738   }
739
740   return icoord;
741}
742
743
744/**
745 * Codegen equivalent for u_minify().
746 * Return max(1, base_size >> level);
747 */
748static LLVMValueRef
749lp_build_minify(struct lp_build_sample_context *bld,
750                LLVMValueRef base_size,
751                LLVMValueRef level)
752{
753   LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
754   size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
755   return size;
756}
757
758
759static int
760texture_dims(enum pipe_texture_target tex)
761{
762   switch (tex) {
763   case PIPE_TEXTURE_1D:
764      return 1;
765   case PIPE_TEXTURE_2D:
766   case PIPE_TEXTURE_CUBE:
767      return 2;
768   case PIPE_TEXTURE_3D:
769      return 3;
770   default:
771      assert(0 && "bad texture target in texture_dims()");
772      return 2;
773   }
774}
775
776
777/**
778 * Generate code to compute texture level of detail (lambda).
779 * \param s  vector of texcoord s values
780 * \param t  vector of texcoord t values
781 * \param r  vector of texcoord r values
782 * \param width  scalar int texture width
783 * \param height  scalar int texture height
784 * \param depth  scalar int texture depth
785 */
786static LLVMValueRef
787lp_build_lod_selector(struct lp_build_sample_context *bld,
788                      LLVMValueRef s,
789                      LLVMValueRef t,
790                      LLVMValueRef r,
791                      LLVMValueRef width,
792                      LLVMValueRef height,
793                      LLVMValueRef depth)
794
795{
796   const int dims = texture_dims(bld->static_state->target);
797   struct lp_build_context *coord_bld = &bld->coord_bld;
798   struct lp_build_context *float_bld = &bld->float_bld;
799   LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias);
800   LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
801   LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod);
802
803   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
804   LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
805   LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
806
807   LLVMValueRef s0, s1, s2;
808   LLVMValueRef t0, t1, t2;
809   LLVMValueRef r0, r1, r2;
810   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
811   LLVMValueRef rho, lod;
812
813   /*
814    * dsdx = abs(s[1] - s[0]);
815    * dsdy = abs(s[2] - s[0]);
816    * dtdx = abs(t[1] - t[0]);
817    * dtdy = abs(t[2] - t[0]);
818    * drdx = abs(r[1] - r[0]);
819    * drdy = abs(r[2] - r[0]);
820    * XXX we're assuming a four-element quad in 2x2 layout here.
821    */
822   s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
823   s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
824   s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
825   dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
826   dsdx = lp_build_abs(float_bld, dsdx);
827   dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
828   dsdy = lp_build_abs(float_bld, dsdy);
829   if (dims > 1) {
830      t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
831      t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
832      t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
833      dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
834      dtdx = lp_build_abs(float_bld, dtdx);
835      dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
836      dtdy = lp_build_abs(float_bld, dtdy);
837      if (dims > 2) {
838         r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
839         r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
840         r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
841         drdx = LLVMBuildSub(bld->builder, r1, r0, "");
842         drdx = lp_build_abs(float_bld, drdx);
843         drdy = LLVMBuildSub(bld->builder, r2, r0, "");
844         drdy = lp_build_abs(float_bld, drdy);
845      }
846   }
847
848   /* Compute rho = max of all partial derivatives scaled by texture size.
849    * XXX this could be vectorized somewhat
850    */
851   rho = LLVMBuildMul(bld->builder,
852                      lp_build_max(float_bld, dsdx, dsdy),
853                      lp_build_int_to_float(float_bld, width), "");
854   if (dims > 1) {
855      LLVMValueRef max;
856      max = LLVMBuildMul(bld->builder,
857                         lp_build_max(float_bld, dtdx, dtdy),
858                         lp_build_int_to_float(float_bld, height), "");
859      rho = lp_build_max(float_bld, rho, max);
860      if (dims > 2) {
861         max = LLVMBuildMul(bld->builder,
862                            lp_build_max(float_bld, drdx, drdy),
863                            lp_build_int_to_float(float_bld, depth), "");
864         rho = lp_build_max(float_bld, rho, max);
865      }
866   }
867
868   /* compute lod = log2(rho) */
869   lod = lp_build_log2(float_bld, rho);
870
871   /* add lod bias */
872   lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
873
874   /* clamp lod */
875   lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
876
877   return lod;
878}
879
880
881/**
882 * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
883 * mipmap level index.
884 * Note: this is all scalar code.
885 * \param lod  scalar float texture level of detail
886 * \param level_out  returns integer
887 */
888static void
889lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
890                           unsigned unit,
891                           LLVMValueRef lod,
892                           LLVMValueRef *level_out)
893{
894   struct lp_build_context *float_bld = &bld->float_bld;
895   struct lp_build_context *int_bld = &bld->int_bld;
896   LLVMValueRef last_level, level;
897
898   LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
899
900   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
901                                               bld->builder, unit);
902
903   /* convert float lod to integer */
904   level = lp_build_iround(float_bld, lod);
905
906   /* clamp level to legal range of levels */
907   *level_out = lp_build_clamp(int_bld, level, zero, last_level);
908}
909
910
911/**
912 * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
913 * two (adjacent) mipmap level indexes.  Later, we'll sample from those
914 * two mipmap levels and interpolate between them.
915 */
916static void
917lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
918                           unsigned unit,
919                           LLVMValueRef lod,
920                           LLVMValueRef *level0_out,
921                           LLVMValueRef *level1_out,
922                           LLVMValueRef *weight_out)
923{
924   struct lp_build_context *coord_bld = &bld->coord_bld;
925   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
926   LLVMValueRef last_level, level;
927
928   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
929                                               bld->builder, unit);
930
931   /* convert float lod to integer */
932   level = lp_build_ifloor(coord_bld, lod);
933
934   /* compute level 0 and clamp to legal range of levels */
935   *level0_out = lp_build_clamp(int_coord_bld, level,
936                                int_coord_bld->zero,
937                                last_level);
938   /* compute level 1 and clamp to legal range of levels */
939   *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one);
940   *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero);
941
942   *weight_out = lp_build_fract(coord_bld, lod);
943}
944
945
946
947/**
948 * Sample 2D texture with nearest filtering, no mipmapping.
949 */
950static void
951lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
952                               LLVMValueRef s,
953                               LLVMValueRef t,
954                               LLVMValueRef width,
955                               LLVMValueRef height,
956                               LLVMValueRef stride,
957                               LLVMValueRef data_array,
958                               LLVMValueRef *texel)
959{
960   LLVMValueRef x, y;
961   LLVMValueRef data_ptr;
962
963   x = lp_build_sample_wrap_nearest(bld, s, width,
964                                    bld->static_state->pot_width,
965                                    bld->static_state->wrap_s);
966   y = lp_build_sample_wrap_nearest(bld, t, height,
967                                    bld->static_state->pot_height,
968                                    bld->static_state->wrap_t);
969
970   lp_build_name(x, "tex.x.wrapped");
971   lp_build_name(y, "tex.y.wrapped");
972
973   /* get pointer to mipmap level 0 data */
974   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
975
976   lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel);
977}
978
979
980/**
981 * Sample 2D texture with nearest filtering, nearest mipmap.
982 */
983static void
984lp_build_sample_2d_nearest_mip_nearest_soa(struct lp_build_sample_context *bld,
985                                           unsigned unit,
986                                           LLVMValueRef s,
987                                           LLVMValueRef t,
988                                           LLVMValueRef width,
989                                           LLVMValueRef height,
990                                           LLVMValueRef width_vec,
991                                           LLVMValueRef height_vec,
992                                           LLVMValueRef stride,
993                                           LLVMValueRef data_array,
994                                           LLVMValueRef *texel)
995{
996   LLVMValueRef x, y;
997   LLVMValueRef lod, ilevel, ilevel_vec;
998   LLVMValueRef data_ptr;
999
1000   /* compute float LOD */
1001   lod = lp_build_lod_selector(bld, s, t, NULL, width, height, NULL);
1002
1003   /* convert LOD to int */
1004   lp_build_nearest_mip_level(bld, unit, lod, &ilevel);
1005
1006   ilevel_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel);
1007
1008   /* compute width_vec, height at mipmap level 'ilevel' */
1009   width_vec = lp_build_minify(bld, width_vec, ilevel_vec);
1010   height_vec = lp_build_minify(bld, height_vec, ilevel_vec);
1011   stride = lp_build_minify(bld, stride, ilevel_vec);
1012
1013   x = lp_build_sample_wrap_nearest(bld, s, width_vec,
1014                                    bld->static_state->pot_width,
1015                                    bld->static_state->wrap_s);
1016   y = lp_build_sample_wrap_nearest(bld, t, height_vec,
1017                                    bld->static_state->pot_height,
1018                                    bld->static_state->wrap_t);
1019
1020   lp_build_name(x, "tex.x.wrapped");
1021   lp_build_name(y, "tex.y.wrapped");
1022
1023   /* get pointer to mipmap level [ilevel] data */
1024   if (0)
1025      data_ptr = lp_build_get_mipmap_level(bld, data_array, ilevel);
1026   else
1027      data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
1028
1029   lp_build_sample_texel_soa(bld, width_vec, height_vec, x, y, stride, data_ptr, texel);
1030}
1031
1032
1033/**
1034 * Sample 2D texture with bilinear filtering.
1035 */
1036static void
1037lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
1038                              LLVMValueRef s,
1039                              LLVMValueRef t,
1040                              LLVMValueRef width,
1041                              LLVMValueRef height,
1042                              LLVMValueRef stride,
1043                              LLVMValueRef data_array,
1044                              LLVMValueRef *texel)
1045{
1046   LLVMValueRef s_fpart;
1047   LLVMValueRef t_fpart;
1048   LLVMValueRef x0, x1;
1049   LLVMValueRef y0, y1;
1050   LLVMValueRef neighbors[2][2][4];
1051   LLVMValueRef data_ptr;
1052   unsigned chan;
1053
1054   lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width,
1055                               bld->static_state->wrap_s, &x0, &x1, &s_fpart);
1056   lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height,
1057                               bld->static_state->wrap_t, &y0, &y1, &t_fpart);
1058
1059   /* get pointer to mipmap level 0 data */
1060   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
1061
1062   lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]);
1063   lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]);
1064   lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]);
1065   lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]);
1066
1067   /* TODO: Don't interpolate missing channels */
1068   for(chan = 0; chan < 4; ++chan) {
1069      texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
1070                                     s_fpart, t_fpart,
1071                                     neighbors[0][0][chan],
1072                                     neighbors[0][1][chan],
1073                                     neighbors[1][0][chan],
1074                                     neighbors[1][1][chan]);
1075   }
1076}
1077
1078
1079static void
1080lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
1081                          struct lp_type dst_type,
1082                          LLVMValueRef packed,
1083                          LLVMValueRef *rgba)
1084{
1085   LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
1086   unsigned chan;
1087
1088   /* Decode the input vector components */
1089   for (chan = 0; chan < 4; ++chan) {
1090      unsigned start = chan*8;
1091      unsigned stop = start + 8;
1092      LLVMValueRef input;
1093
1094      input = packed;
1095
1096      if(start)
1097         input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
1098
1099      if(stop < 32)
1100         input = LLVMBuildAnd(builder, input, mask, "");
1101
1102      input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
1103
1104      rgba[chan] = input;
1105   }
1106}
1107
1108
1109static void
1110lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
1111                              LLVMValueRef s,
1112                              LLVMValueRef t,
1113                              LLVMValueRef width,
1114                              LLVMValueRef height,
1115                              LLVMValueRef stride,
1116                              LLVMValueRef data_array,
1117                              LLVMValueRef *texel)
1118{
1119   LLVMBuilderRef builder = bld->builder;
1120   struct lp_build_context i32, h16, u8n;
1121   LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
1122   LLVMValueRef i32_c8, i32_c128, i32_c255;
1123   LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
1124   LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
1125   LLVMValueRef x0, x1;
1126   LLVMValueRef y0, y1;
1127   LLVMValueRef neighbors[2][2];
1128   LLVMValueRef neighbors_lo[2][2];
1129   LLVMValueRef neighbors_hi[2][2];
1130   LLVMValueRef packed, packed_lo, packed_hi;
1131   LLVMValueRef unswizzled[4];
1132
1133   lp_build_context_init(&i32, builder, lp_type_int_vec(32));
1134   lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1135   lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1136
1137   i32_vec_type = lp_build_vec_type(i32.type);
1138   h16_vec_type = lp_build_vec_type(h16.type);
1139   u8n_vec_type = lp_build_vec_type(u8n.type);
1140
1141   if (bld->static_state->normalized_coords) {
1142      LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
1143      LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
1144      LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
1145      s = lp_build_mul(&bld->coord_bld, s, fp_width);
1146      t = lp_build_mul(&bld->coord_bld, t, fp_height);
1147   }
1148
1149   /* scale coords by 256 (8 fractional bits) */
1150   s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1151   t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1152
1153   /* convert float to int */
1154   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1155   t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1156
1157   /* subtract 0.5 (add -128) */
1158   i32_c128 = lp_build_int_const_scalar(i32.type, -128);
1159   s = LLVMBuildAdd(builder, s, i32_c128, "");
1160   t = LLVMBuildAdd(builder, t, i32_c128, "");
1161
1162   /* compute floor (shift right 8) */
1163   i32_c8 = lp_build_int_const_scalar(i32.type, 8);
1164   s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1165   t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1166
1167   /* compute fractional part (AND with 0xff) */
1168   i32_c255 = lp_build_int_const_scalar(i32.type, 255);
1169   s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1170   t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1171
1172   x0 = s_ipart;
1173   y0 = t_ipart;
1174
1175   x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
1176   y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
1177
1178   x0 = lp_build_sample_wrap_int(bld, x0, width,  bld->static_state->pot_width,
1179                                 bld->static_state->wrap_s);
1180   y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
1181                                 bld->static_state->wrap_t);
1182
1183   x1 = lp_build_sample_wrap_int(bld, x1, width,  bld->static_state->pot_width,
1184                                 bld->static_state->wrap_s);
1185   y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
1186                                 bld->static_state->wrap_t);
1187
1188   /*
1189    * Transform 4 x i32 in
1190    *
1191    *   s_fpart = {s0, s1, s2, s3}
1192    *
1193    * into 8 x i16
1194    *
1195    *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
1196    *
1197    * into two 8 x i16
1198    *
1199    *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
1200    *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
1201    *
1202    * and likewise for t_fpart. There is no risk of loosing precision here
1203    * since the fractional parts only use the lower 8bits.
1204    */
1205
1206   s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
1207   t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
1208
1209   {
1210      LLVMTypeRef elem_type = LLVMInt32Type();
1211      LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
1212      LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
1213      LLVMValueRef shuffle_lo;
1214      LLVMValueRef shuffle_hi;
1215      unsigned i, j;
1216
1217      for(j = 0; j < h16.type.length; j += 4) {
1218         unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
1219         LLVMValueRef index;
1220
1221         index = LLVMConstInt(elem_type, j/2 + subindex, 0);
1222         for(i = 0; i < 4; ++i)
1223            shuffles_lo[j + i] = index;
1224
1225         index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
1226         for(i = 0; i < 4; ++i)
1227            shuffles_hi[j + i] = index;
1228      }
1229
1230      shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
1231      shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
1232
1233      s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
1234      t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
1235      s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
1236      t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
1237   }
1238
1239   /*
1240    * Fetch the pixels as 4 x 32bit (rgba order might differ):
1241    *
1242    *   rgba0 rgba1 rgba2 rgba3
1243    *
1244    * bit cast them into 16 x u8
1245    *
1246    *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
1247    *
1248    * unpack them into two 8 x i16:
1249    *
1250    *   r0 g0 b0 a0 r1 g1 b1 a1
1251    *   r2 g2 b2 a2 r3 g3 b3 a3
1252    *
1253    * The higher 8 bits of the resulting elements will be zero.
1254    */
1255
1256   neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
1257   neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
1258   neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
1259   neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
1260
1261   neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
1262   neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
1263   neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
1264   neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
1265
1266   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
1267   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
1268   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
1269   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
1270
1271   /*
1272    * Linear interpolate with 8.8 fixed point.
1273    */
1274
1275   packed_lo = lp_build_lerp_2d(&h16,
1276                                s_fpart_lo, t_fpart_lo,
1277                                neighbors_lo[0][0],
1278                                neighbors_lo[0][1],
1279                                neighbors_lo[1][0],
1280                                neighbors_lo[1][1]);
1281
1282   packed_hi = lp_build_lerp_2d(&h16,
1283                                s_fpart_hi, t_fpart_hi,
1284                                neighbors_hi[0][0],
1285                                neighbors_hi[0][1],
1286                                neighbors_hi[1][0],
1287                                neighbors_hi[1][1]);
1288
1289   packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
1290
1291   /*
1292    * Convert to SoA and swizzle.
1293    */
1294
1295   packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
1296
1297   lp_build_rgba8_to_f32_soa(bld->builder,
1298                             bld->texel_type,
1299                             packed, unswizzled);
1300
1301   lp_build_format_swizzle_soa(bld->format_desc,
1302                               bld->texel_type, unswizzled,
1303                               texel);
1304}
1305
1306
1307static void
1308lp_build_sample_compare(struct lp_build_sample_context *bld,
1309                        LLVMValueRef p,
1310                        LLVMValueRef *texel)
1311{
1312   struct lp_build_context *texel_bld = &bld->texel_bld;
1313   LLVMValueRef res;
1314   unsigned chan;
1315
1316   if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
1317      return;
1318
1319   /* TODO: Compare before swizzling, to avoid redundant computations */
1320   res = NULL;
1321   for(chan = 0; chan < 4; ++chan) {
1322      LLVMValueRef cmp;
1323      cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
1324      cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
1325
1326      if(res)
1327         res = lp_build_add(texel_bld, res, cmp);
1328      else
1329         res = cmp;
1330   }
1331
1332   assert(res);
1333   res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
1334
1335   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1336   for(chan = 0; chan < 3; ++chan)
1337      texel[chan] = res;
1338   texel[3] = texel_bld->one;
1339}
1340
1341
1342/**
1343 * Build texture sampling code.
1344 * 'texel' will return a vector of four LLVMValueRefs corresponding to
1345 * R, G, B, A.
1346 * \param type  vector float type to use for coords, etc.
1347 */
1348void
1349lp_build_sample_soa(LLVMBuilderRef builder,
1350                    const struct lp_sampler_static_state *static_state,
1351                    struct lp_sampler_dynamic_state *dynamic_state,
1352                    struct lp_type type,
1353                    unsigned unit,
1354                    unsigned num_coords,
1355                    const LLVMValueRef *coords,
1356                    LLVMValueRef lodbias,
1357                    LLVMValueRef *texel)
1358{
1359   struct lp_build_sample_context bld;
1360   LLVMValueRef width, width_vec;
1361   LLVMValueRef height, height_vec;
1362   LLVMValueRef stride, stride_vec;
1363   LLVMValueRef data_array;
1364   LLVMValueRef s;
1365   LLVMValueRef t;
1366   LLVMValueRef r;
1367   boolean done = FALSE;
1368
1369   (void) lp_build_lod_selector;   /* temporary to silence warning */
1370   (void) lp_build_nearest_mip_level;
1371   (void) lp_build_linear_mip_levels;
1372   (void) lp_build_minify;
1373
1374   /* Setup our build context */
1375   memset(&bld, 0, sizeof bld);
1376   bld.builder = builder;
1377   bld.static_state = static_state;
1378   bld.dynamic_state = dynamic_state;
1379   bld.format_desc = util_format_description(static_state->format);
1380
1381   bld.float_type = lp_type_float(32);
1382   bld.int_type = lp_type_int(32);
1383   bld.coord_type = type;
1384   bld.uint_coord_type = lp_uint_type(type);
1385   bld.int_coord_type = lp_int_type(type);
1386   bld.texel_type = type;
1387
1388   lp_build_context_init(&bld.float_bld, builder, bld.float_type);
1389   lp_build_context_init(&bld.int_bld, builder, bld.int_type);
1390   lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
1391   lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
1392   lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
1393   lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
1394
1395   /* Get the dynamic state */
1396   width = dynamic_state->width(dynamic_state, builder, unit);
1397   height = dynamic_state->height(dynamic_state, builder, unit);
1398   stride = dynamic_state->stride(dynamic_state, builder, unit);
1399   data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
1400   /* Note that data_array is an array[level] of pointers to texture images */
1401
1402   s = coords[0];
1403   t = coords[1];
1404   r = coords[2];
1405
1406   width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
1407   height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
1408   stride_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
1409
1410   if(static_state->target == PIPE_TEXTURE_1D)
1411      t = bld.coord_bld.zero;
1412
1413   switch (static_state->min_mip_filter) {
1414   case PIPE_TEX_MIPFILTER_NONE:
1415      break;
1416   case PIPE_TEX_MIPFILTER_NEAREST:
1417
1418      switch (static_state->min_img_filter) {
1419      case PIPE_TEX_FILTER_NEAREST:
1420         lp_build_sample_2d_nearest_mip_nearest_soa(&bld, unit,
1421                                                    s, t,
1422                                                    width, height,
1423                                                    width_vec, height_vec,
1424                                                    stride_vec,
1425                                                    data_array, texel);
1426         done = TRUE;
1427         break;
1428      }
1429
1430      break;
1431   case PIPE_TEX_MIPFILTER_LINEAR:
1432      break;
1433   default:
1434      assert(0 && "invalid mip filter");
1435   }
1436
1437   if (!done) {
1438      switch (static_state->min_img_filter) {
1439      case PIPE_TEX_FILTER_NEAREST:
1440         lp_build_sample_2d_nearest_soa(&bld, s, t, width_vec, height_vec,
1441                                        stride_vec, data_array, texel);
1442         break;
1443      case PIPE_TEX_FILTER_LINEAR:
1444         if(lp_format_is_rgba8(bld.format_desc) &&
1445            is_simple_wrap_mode(static_state->wrap_s) &&
1446            is_simple_wrap_mode(static_state->wrap_t))
1447            lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
1448                                          stride_vec, data_array, texel);
1449         else
1450            lp_build_sample_2d_linear_soa(&bld, s, t, width_vec, height_vec,
1451                                          stride_vec, data_array, texel);
1452         break;
1453      default:
1454         assert(0);
1455      }
1456   }
1457
1458   /* FIXME: respect static_state->min_mip_filter */;
1459   /* FIXME: respect static_state->mag_img_filter */;
1460
1461   lp_build_sample_compare(&bld, r, texel);
1462}
1463