lp_bld_sample_soa.c revision 67a2f98be79b368c316ebe6731112734d306b3f6
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35#include "pipe/p_defines.h"
36#include "pipe/p_state.h"
37#include "util/u_debug.h"
38#include "util/u_dump.h"
39#include "util/u_memory.h"
40#include "util/u_math.h"
41#include "util/u_format.h"
42#include "util/u_cpu_detect.h"
43#include "lp_bld_debug.h"
44#include "lp_bld_type.h"
45#include "lp_bld_const.h"
46#include "lp_bld_conv.h"
47#include "lp_bld_arit.h"
48#include "lp_bld_logic.h"
49#include "lp_bld_swizzle.h"
50#include "lp_bld_pack.h"
51#include "lp_bld_format.h"
52#include "lp_bld_sample.h"
53
54
55/**
56 * Keep all information for sampling code generation in a single place.
57 */
58struct lp_build_sample_context
59{
60   LLVMBuilderRef builder;
61
62   const struct lp_sampler_static_state *static_state;
63
64   struct lp_sampler_dynamic_state *dynamic_state;
65
66   const struct util_format_description *format_desc;
67
68   /** regular scalar float type */
69   struct lp_type float_type;
70   struct lp_build_context float_bld;
71
72   /** regular scalar float type */
73   struct lp_type int_type;
74   struct lp_build_context int_bld;
75
76   /** Incoming coordinates type and build context */
77   struct lp_type coord_type;
78   struct lp_build_context coord_bld;
79
80   /** Unsigned integer coordinates */
81   struct lp_type uint_coord_type;
82   struct lp_build_context uint_coord_bld;
83
84   /** Signed integer coordinates */
85   struct lp_type int_coord_type;
86   struct lp_build_context int_coord_bld;
87
88   /** Output texels type and build context */
89   struct lp_type texel_type;
90   struct lp_build_context texel_bld;
91};
92
93
94/**
95 * Does the given texture wrap mode allow sampling the texture border color?
96 * XXX maybe move this into gallium util code.
97 */
98static boolean
99wrap_mode_uses_border_color(unsigned mode)
100{
101   switch (mode) {
102   case PIPE_TEX_WRAP_REPEAT:
103   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
104   case PIPE_TEX_WRAP_MIRROR_REPEAT:
105   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
106      return FALSE;
107   case PIPE_TEX_WRAP_CLAMP:
108   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
109   case PIPE_TEX_WRAP_MIRROR_CLAMP:
110   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
111      return TRUE;
112   default:
113      assert(0 && "unexpected wrap mode");
114      return FALSE;
115   }
116}
117
118
119static LLVMValueRef
120lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
121                          LLVMValueRef data_array, LLVMValueRef level)
122{
123   LLVMValueRef indexes[2], data_ptr;
124   indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
125   indexes[1] = level;
126   data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
127   data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
128   return data_ptr;
129}
130
131
132static LLVMValueRef
133lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
134                                LLVMValueRef data_array, int level)
135{
136   LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
137   return lp_build_get_mipmap_level(bld, data_array, lvl);
138}
139
140
141static int
142texture_dims(enum pipe_texture_target tex)
143{
144   switch (tex) {
145   case PIPE_TEXTURE_1D:
146      return 1;
147   case PIPE_TEXTURE_2D:
148   case PIPE_TEXTURE_CUBE:
149      return 2;
150   case PIPE_TEXTURE_3D:
151      return 3;
152   default:
153      assert(0 && "bad texture target in texture_dims()");
154      return 2;
155   }
156}
157
158
159
160/**
161 * Generate code to fetch a texel from a texture at int coords (x, y, z).
162 * The computation depends on whether the texture is 1D, 2D or 3D.
163 * The result, texel, will be:
164 *   texel[0] = red values
165 *   texel[1] = green values
166 *   texel[2] = blue values
167 *   texel[3] = alpha values
168 */
169static void
170lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
171                          LLVMValueRef width,
172                          LLVMValueRef height,
173                          LLVMValueRef depth,
174                          LLVMValueRef x,
175                          LLVMValueRef y,
176                          LLVMValueRef z,
177                          LLVMValueRef y_stride,
178                          LLVMValueRef z_stride,
179                          LLVMValueRef data_ptr,
180                          LLVMValueRef *texel)
181{
182   const int dims = texture_dims(bld->static_state->target);
183   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
184   LLVMValueRef offset;
185   LLVMValueRef packed;
186   LLVMValueRef use_border = NULL;
187
188   /* use_border = x < 0 || x >= width || y < 0 || y >= height */
189   if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
190      LLVMValueRef b1, b2;
191      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
192      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
193      use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
194   }
195
196   if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
197      LLVMValueRef b1, b2;
198      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
199      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
200      if (use_border) {
201         use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
202         use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
203      }
204      else {
205         use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
206      }
207   }
208
209   if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
210      LLVMValueRef b1, b2;
211      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
212      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
213      if (use_border) {
214         use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
215         use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
216      }
217      else {
218         use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
219      }
220   }
221
222   /*
223    * Note: if we find an app which frequently samples the texture border
224    * we might want to implement a true conditional here to avoid sampling
225    * the texture whenever possible (since that's quite a bit of code).
226    * Ex:
227    *   if (use_border) {
228    *      texel = border_color;
229    *   }
230    *   else {
231    *      texel = sample_texture(coord);
232    *   }
233    * As it is now, we always sample the texture, then selectively replace
234    * the texel color results with the border color.
235    */
236
237   /* convert x,y,z coords to linear offset from start of texture, in bytes */
238   offset = lp_build_sample_offset(&bld->uint_coord_bld,
239                                   bld->format_desc,
240                                   x, y, z, y_stride, z_stride);
241
242   assert(bld->format_desc->block.width == 1);
243   assert(bld->format_desc->block.height == 1);
244   assert(bld->format_desc->block.bits <= bld->texel_type.width);
245
246   /* gather the texels from the texture */
247   packed = lp_build_gather(bld->builder,
248                            bld->texel_type.length,
249                            bld->format_desc->block.bits,
250                            bld->texel_type.width,
251                            data_ptr, offset);
252
253   texel[0] = texel[1] = texel[2] = texel[3] = NULL;
254
255   /* convert texels to float rgba */
256   lp_build_unpack_rgba_soa(bld->builder,
257                            bld->format_desc,
258                            bld->texel_type,
259                            packed, texel);
260
261   if (use_border) {
262      /* select texel color or border color depending on use_border */
263      int chan;
264      for (chan = 0; chan < 4; chan++) {
265         LLVMValueRef border_chan =
266            lp_build_const_scalar(bld->texel_type,
267                                  bld->static_state->border_color[chan]);
268         texel[chan] = lp_build_select(&bld->texel_bld, use_border,
269                                       border_chan, texel[chan]);
270      }
271   }
272}
273
274
275static LLVMValueRef
276lp_build_sample_packed(struct lp_build_sample_context *bld,
277                       LLVMValueRef x,
278                       LLVMValueRef y,
279                       LLVMValueRef y_stride,
280                       LLVMValueRef data_array)
281{
282   LLVMValueRef offset;
283   LLVMValueRef data_ptr;
284
285   offset = lp_build_sample_offset(&bld->uint_coord_bld,
286                                   bld->format_desc,
287                                   x, y, NULL, y_stride, NULL);
288
289   assert(bld->format_desc->block.width == 1);
290   assert(bld->format_desc->block.height == 1);
291   assert(bld->format_desc->block.bits <= bld->texel_type.width);
292
293   /* get pointer to mipmap level 0 data */
294   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
295
296   return lp_build_gather(bld->builder,
297                          bld->texel_type.length,
298                          bld->format_desc->block.bits,
299                          bld->texel_type.width,
300                          data_ptr, offset);
301}
302
303
304/**
305 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
306 */
307static LLVMValueRef
308lp_build_coord_mirror(struct lp_build_sample_context *bld,
309                      LLVMValueRef coord)
310{
311   struct lp_build_context *coord_bld = &bld->coord_bld;
312   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
313   LLVMValueRef fract, flr, isOdd;
314
315   /* fract = coord - floor(coord) */
316   fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
317
318   /* flr = ifloor(coord); */
319   flr = lp_build_ifloor(coord_bld, coord);
320
321   /* isOdd = flr & 1 */
322   isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
323
324   /* make coord positive or negative depending on isOdd */
325   coord = lp_build_set_sign(coord_bld, fract, isOdd);
326
327   /* convert isOdd to float */
328   isOdd = lp_build_int_to_float(coord_bld, isOdd);
329
330   /* add isOdd to coord */
331   coord = lp_build_add(coord_bld, coord, isOdd);
332
333   return coord;
334}
335
336
337/**
338 * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
339 * Return whether the given mode is supported by that function.
340 */
341static boolean
342is_simple_wrap_mode(unsigned mode)
343{
344   switch (mode) {
345   case PIPE_TEX_WRAP_REPEAT:
346   case PIPE_TEX_WRAP_CLAMP:
347   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
348      return TRUE;
349   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
350   default:
351      return FALSE;
352   }
353}
354
355
356/**
357 * Build LLVM code for texture wrap mode, for scaled integer texcoords.
358 * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
359 * \param length  the texture size along one dimension
360 * \param is_pot  if TRUE, length is a power of two
361 * \param wrap_mode  one of PIPE_TEX_WRAP_x
362 */
363static LLVMValueRef
364lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
365                         LLVMValueRef coord,
366                         LLVMValueRef length,
367                         boolean is_pot,
368                         unsigned wrap_mode)
369{
370   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
371   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
372   LLVMValueRef length_minus_one;
373
374   length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
375
376   switch(wrap_mode) {
377   case PIPE_TEX_WRAP_REPEAT:
378      if(is_pot)
379         coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
380      else
381         /* Signed remainder won't give the right results for negative
382          * dividends but unsigned remainder does.*/
383         coord = LLVMBuildURem(bld->builder, coord, length, "");
384      break;
385
386   case PIPE_TEX_WRAP_CLAMP:
387   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
388   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
389      coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
390      coord = lp_build_min(int_coord_bld, coord, length_minus_one);
391      break;
392
393   case PIPE_TEX_WRAP_MIRROR_REPEAT:
394   case PIPE_TEX_WRAP_MIRROR_CLAMP:
395   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
396   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
397      /* FIXME */
398      _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
399                    util_dump_tex_wrap(wrap_mode, TRUE));
400      coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero);
401      coord = lp_build_min(uint_coord_bld, coord, length_minus_one);
402      break;
403
404   default:
405      assert(0);
406   }
407
408   return coord;
409}
410
411
412/**
413 * Build LLVM code for texture wrap mode for linear filtering.
414 * \param x0_out  returns first integer texcoord
415 * \param x1_out  returns second integer texcoord
416 * \param weight_out  returns linear interpolation weight
417 */
418static void
419lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
420                            LLVMValueRef coord,
421                            LLVMValueRef length,
422                            boolean is_pot,
423                            unsigned wrap_mode,
424                            LLVMValueRef *x0_out,
425                            LLVMValueRef *x1_out,
426                            LLVMValueRef *weight_out)
427{
428   struct lp_build_context *coord_bld = &bld->coord_bld;
429   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
430   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
431   LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
432   LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
433   LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
434   LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
435   LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
436   LLVMValueRef coord0, coord1, weight;
437
438   switch(wrap_mode) {
439   case PIPE_TEX_WRAP_REPEAT:
440      /* mul by size and subtract 0.5 */
441      coord = lp_build_mul(coord_bld, coord, length_f);
442      coord = lp_build_sub(coord_bld, coord, half);
443      /* convert to int */
444      coord0 = lp_build_ifloor(coord_bld, coord);
445      coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
446      /* compute lerp weight */
447      weight = lp_build_fract(coord_bld, coord);
448      /* repeat wrap */
449      if (is_pot) {
450         coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
451         coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
452      }
453      else {
454         /* Signed remainder won't give the right results for negative
455          * dividends but unsigned remainder does.*/
456         coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
457         coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
458      }
459      break;
460
461   case PIPE_TEX_WRAP_CLAMP:
462      if (bld->static_state->normalized_coords) {
463         coord = lp_build_mul(coord_bld, coord, length_f);
464      }
465      weight = lp_build_fract(coord_bld, coord);
466      coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
467                              length_f_minus_one);
468      coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
469      coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero,
470                              length_f_minus_one);
471      coord0 = lp_build_ifloor(coord_bld, coord0);
472      coord1 = lp_build_ifloor(coord_bld, coord1);
473      break;
474
475   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
476      if (bld->static_state->normalized_coords) {
477         /* clamp to [0,1] */
478         coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
479         /* mul by tex size and subtract 0.5 */
480         coord = lp_build_mul(coord_bld, coord, length_f);
481         coord = lp_build_sub(coord_bld, coord, half);
482      }
483      else {
484         LLVMValueRef min, max;
485         /* clamp to [0.5, length - 0.5] */
486         min = lp_build_const_scalar(coord_bld->type, 0.5F);
487         max = lp_build_sub(coord_bld, length_f, min);
488         coord = lp_build_clamp(coord_bld, coord, min, max);
489      }
490      /* compute lerp weight */
491      weight = lp_build_fract(coord_bld, coord);
492      /* coord0 = floor(coord); */
493      coord0 = lp_build_ifloor(coord_bld, coord);
494      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
495      /* coord0 = max(coord0, 0) */
496      coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
497      /* coord1 = min(coord1, length-1) */
498      coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
499      break;
500
501   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
502      {
503         LLVMValueRef min, max;
504         if (bld->static_state->normalized_coords) {
505            /* min = -1.0 / (2 * length) = -0.5 / length */
506            min = lp_build_mul(coord_bld,
507                               lp_build_const_scalar(coord_bld->type, -0.5F),
508                               lp_build_rcp(coord_bld, length_f));
509            /* max = 1.0 - min */
510            max = lp_build_sub(coord_bld, coord_bld->one, min);
511            /* coord = clamp(coord, min, max) */
512            coord = lp_build_clamp(coord_bld, coord, min, max);
513            /* scale coord to length (and sub 0.5?) */
514            coord = lp_build_mul(coord_bld, coord, length_f);
515            coord = lp_build_sub(coord_bld, coord, half);
516         }
517         else {
518            /* clamp to [-0.5, length + 0.5] */
519            min = lp_build_const_scalar(coord_bld->type, -0.5F);
520            max = lp_build_sub(coord_bld, length_f, min);
521            coord = lp_build_clamp(coord_bld, coord, min, max);
522            coord = lp_build_sub(coord_bld, coord, half);
523         }
524         /* compute lerp weight */
525         weight = lp_build_fract(coord_bld, coord);
526         /* convert to int */
527         coord0 = lp_build_ifloor(coord_bld, coord);
528         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
529      }
530      break;
531
532   case PIPE_TEX_WRAP_MIRROR_REPEAT:
533      /* compute mirror function */
534      coord = lp_build_coord_mirror(bld, coord);
535
536      /* scale coord to length */
537      coord = lp_build_mul(coord_bld, coord, length_f);
538      coord = lp_build_sub(coord_bld, coord, half);
539
540      /* compute lerp weight */
541      weight = lp_build_fract(coord_bld, coord);
542
543      /* convert to int coords */
544      coord0 = lp_build_ifloor(coord_bld, coord);
545      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
546
547      /* coord0 = max(coord0, 0) */
548      coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
549      /* coord1 = min(coord1, length-1) */
550      coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
551      break;
552
553   case PIPE_TEX_WRAP_MIRROR_CLAMP:
554      {
555         LLVMValueRef min, max;
556         /* min = 1.0 / (2 * length) */
557         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
558         /* max = 1.0 - min */
559         max = lp_build_sub(coord_bld, coord_bld->one, min);
560
561         coord = lp_build_abs(coord_bld, coord);
562         coord = lp_build_clamp(coord_bld, coord, min, max);
563         coord = lp_build_mul(coord_bld, coord, length_f);
564         if(0)coord = lp_build_sub(coord_bld, coord, half);
565         weight = lp_build_fract(coord_bld, coord);
566         coord0 = lp_build_ifloor(coord_bld, coord);
567         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
568      }
569      break;
570
571   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
572      {
573         LLVMValueRef min, max;
574         /* min = 1.0 / (2 * length) */
575         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
576         /* max = 1.0 - min */
577         max = lp_build_sub(coord_bld, coord_bld->one, min);
578
579         coord = lp_build_abs(coord_bld, coord);
580         coord = lp_build_clamp(coord_bld, coord, min, max);
581         coord = lp_build_mul(coord_bld, coord, length_f);
582         coord = lp_build_sub(coord_bld, coord, half);
583         weight = lp_build_fract(coord_bld, coord);
584         coord0 = lp_build_ifloor(coord_bld, coord);
585         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
586      }
587      break;
588
589   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
590      {
591         LLVMValueRef min, max;
592         /* min = -1.0 / (2 * length) = -0.5 / length */
593         min = lp_build_mul(coord_bld,
594                            lp_build_const_scalar(coord_bld->type, -0.5F),
595                            lp_build_rcp(coord_bld, length_f));
596         /* max = 1.0 - min */
597         max = lp_build_sub(coord_bld, coord_bld->one, min);
598
599         coord = lp_build_abs(coord_bld, coord);
600         coord = lp_build_clamp(coord_bld, coord, min, max);
601         coord = lp_build_mul(coord_bld, coord, length_f);
602         coord = lp_build_sub(coord_bld, coord, half);
603         weight = lp_build_fract(coord_bld, coord);
604         coord0 = lp_build_ifloor(coord_bld, coord);
605         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
606      }
607      break;
608
609   default:
610      assert(0);
611      coord0 = NULL;
612      coord1 = NULL;
613      weight = NULL;
614   }
615
616   *x0_out = coord0;
617   *x1_out = coord1;
618   *weight_out = weight;
619}
620
621
622/**
623 * Build LLVM code for texture wrap mode for nearest filtering.
624 * \param coord  the incoming texcoord (nominally in [0,1])
625 * \param length  the texture size along one dimension, as int
626 * \param is_pot  if TRUE, length is a power of two
627 * \param wrap_mode  one of PIPE_TEX_WRAP_x
628 */
629static LLVMValueRef
630lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
631                             LLVMValueRef coord,
632                             LLVMValueRef length,
633                             boolean is_pot,
634                             unsigned wrap_mode)
635{
636   struct lp_build_context *coord_bld = &bld->coord_bld;
637   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
638   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
639   LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
640   LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
641   LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
642   LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
643   LLVMValueRef icoord;
644
645   switch(wrap_mode) {
646   case PIPE_TEX_WRAP_REPEAT:
647      coord = lp_build_mul(coord_bld, coord, length_f);
648      icoord = lp_build_ifloor(coord_bld, coord);
649      if (is_pot)
650         icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
651      else
652         /* Signed remainder won't give the right results for negative
653          * dividends but unsigned remainder does.*/
654         icoord = LLVMBuildURem(bld->builder, icoord, length, "");
655      break;
656
657   case PIPE_TEX_WRAP_CLAMP:
658      /* mul by size */
659      if (bld->static_state->normalized_coords) {
660         coord = lp_build_mul(coord_bld, coord, length_f);
661      }
662      /* floor */
663      icoord = lp_build_ifloor(coord_bld, coord);
664      /* clamp to [0, size-1].  Note: int coord builder type */
665      icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
666                              length_minus_one);
667      break;
668
669   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
670      {
671         LLVMValueRef min, max;
672         if (bld->static_state->normalized_coords) {
673            /* min = 1.0 / (2 * length) */
674            min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
675            /* max = length - min */
676            max = lp_build_sub(coord_bld, length_f, min);
677            /* scale coord to length */
678            coord = lp_build_mul(coord_bld, coord, length_f);
679         }
680         else {
681            /* clamp to [0.5, length - 0.5] */
682            min = lp_build_const_scalar(coord_bld->type, 0.5F);
683            max = lp_build_sub(coord_bld, length_f, min);
684         }
685         /* coord = clamp(coord, min, max) */
686         coord = lp_build_clamp(coord_bld, coord, min, max);
687         icoord = lp_build_ifloor(coord_bld, coord);
688      }
689      break;
690
691   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
692      /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
693      {
694         LLVMValueRef min, max;
695         if (bld->static_state->normalized_coords) {
696            /* min = -1.0 / (2 * length) = -0.5 / length */
697            min = lp_build_mul(coord_bld,
698                               lp_build_const_scalar(coord_bld->type, -0.5F),
699                               lp_build_rcp(coord_bld, length_f));
700            /* max = length - min */
701            max = lp_build_sub(coord_bld, length_f, min);
702            /* scale coord to length */
703            coord = lp_build_mul(coord_bld, coord, length_f);
704         }
705         else {
706            /* clamp to [-0.5, length + 0.5] */
707            min = lp_build_const_scalar(coord_bld->type, -0.5F);
708            max = lp_build_sub(coord_bld, length_f, min);
709         }
710         /* coord = clamp(coord, min, max) */
711         coord = lp_build_clamp(coord_bld, coord, min, max);
712         icoord = lp_build_ifloor(coord_bld, coord);
713      }
714      break;
715
716   case PIPE_TEX_WRAP_MIRROR_REPEAT:
717      {
718         LLVMValueRef min, max;
719         /* min = 1.0 / (2 * length) */
720         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
721         /* max = length - min */
722         max = lp_build_sub(coord_bld, length_f, min);
723
724         /* compute mirror function */
725         coord = lp_build_coord_mirror(bld, coord);
726
727         /* scale coord to length */
728         coord = lp_build_mul(coord_bld, coord, length_f);
729
730         /* coord = clamp(coord, min, max) */
731         coord = lp_build_clamp(coord_bld, coord, min, max);
732         icoord = lp_build_ifloor(coord_bld, coord);
733      }
734      break;
735
736   case PIPE_TEX_WRAP_MIRROR_CLAMP:
737      coord = lp_build_abs(coord_bld, coord);
738      coord = lp_build_mul(coord_bld, coord, length_f);
739      coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one);
740      icoord = lp_build_ifloor(coord_bld, coord);
741      break;
742
743   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
744      {
745         LLVMValueRef min, max;
746         /* min = 1.0 / (2 * length) */
747         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
748         /* max = length - min */
749         max = lp_build_sub(coord_bld, length_f, min);
750
751         coord = lp_build_abs(coord_bld, coord);
752         coord = lp_build_mul(coord_bld, coord, length_f);
753         coord = lp_build_clamp(coord_bld, coord, min, max);
754         icoord = lp_build_ifloor(coord_bld, coord);
755      }
756      break;
757
758   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
759      {
760         LLVMValueRef min, max;
761         /* min = 1.0 / (2 * length) */
762         min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
763         min = lp_build_negate(coord_bld, min);
764         /* max = length - min */
765         max = lp_build_sub(coord_bld, length_f, min);
766
767         coord = lp_build_abs(coord_bld, coord);
768         coord = lp_build_mul(coord_bld, coord, length_f);
769         coord = lp_build_clamp(coord_bld, coord, min, max);
770         icoord = lp_build_ifloor(coord_bld, coord);
771      }
772      break;
773
774   default:
775      assert(0);
776      icoord = NULL;
777   }
778
779   return icoord;
780}
781
782
783/**
784 * Codegen equivalent for u_minify().
785 * Return max(1, base_size >> level);
786 */
787static LLVMValueRef
788lp_build_minify(struct lp_build_sample_context *bld,
789                LLVMValueRef base_size,
790                LLVMValueRef level)
791{
792   LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
793   size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
794   return size;
795}
796
797
798/**
799 * Generate code to compute texture level of detail (lambda).
800 * \param s  vector of texcoord s values
801 * \param t  vector of texcoord t values
802 * \param r  vector of texcoord r values
803 * \param width  scalar int texture width
804 * \param height  scalar int texture height
805 * \param depth  scalar int texture depth
806 */
807static LLVMValueRef
808lp_build_lod_selector(struct lp_build_sample_context *bld,
809                      LLVMValueRef s,
810                      LLVMValueRef t,
811                      LLVMValueRef r,
812                      LLVMValueRef width,
813                      LLVMValueRef height,
814                      LLVMValueRef depth)
815
816{
817   const int dims = texture_dims(bld->static_state->target);
818   struct lp_build_context *float_bld = &bld->float_bld;
819   LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias);
820   LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
821   LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod);
822
823   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
824   LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
825   LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
826
827   LLVMValueRef s0, s1, s2;
828   LLVMValueRef t0, t1, t2;
829   LLVMValueRef r0, r1, r2;
830   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
831   LLVMValueRef rho, lod;
832
833   /*
834    * dsdx = abs(s[1] - s[0]);
835    * dsdy = abs(s[2] - s[0]);
836    * dtdx = abs(t[1] - t[0]);
837    * dtdy = abs(t[2] - t[0]);
838    * drdx = abs(r[1] - r[0]);
839    * drdy = abs(r[2] - r[0]);
840    * XXX we're assuming a four-element quad in 2x2 layout here.
841    */
842   s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
843   s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
844   s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
845   dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
846   dsdx = lp_build_abs(float_bld, dsdx);
847   dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
848   dsdy = lp_build_abs(float_bld, dsdy);
849   if (dims > 1) {
850      t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
851      t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
852      t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
853      dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
854      dtdx = lp_build_abs(float_bld, dtdx);
855      dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
856      dtdy = lp_build_abs(float_bld, dtdy);
857      if (dims > 2) {
858         r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
859         r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
860         r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
861         drdx = LLVMBuildSub(bld->builder, r1, r0, "");
862         drdx = lp_build_abs(float_bld, drdx);
863         drdy = LLVMBuildSub(bld->builder, r2, r0, "");
864         drdy = lp_build_abs(float_bld, drdy);
865      }
866   }
867
868   /* Compute rho = max of all partial derivatives scaled by texture size.
869    * XXX this could be vectorized somewhat
870    */
871   rho = LLVMBuildMul(bld->builder,
872                      lp_build_max(float_bld, dsdx, dsdy),
873                      lp_build_int_to_float(float_bld, width), "");
874   if (dims > 1) {
875      LLVMValueRef max;
876      max = LLVMBuildMul(bld->builder,
877                         lp_build_max(float_bld, dtdx, dtdy),
878                         lp_build_int_to_float(float_bld, height), "");
879      rho = lp_build_max(float_bld, rho, max);
880      if (dims > 2) {
881         max = LLVMBuildMul(bld->builder,
882                            lp_build_max(float_bld, drdx, drdy),
883                            lp_build_int_to_float(float_bld, depth), "");
884         rho = lp_build_max(float_bld, rho, max);
885      }
886   }
887
888   /* compute lod = log2(rho) */
889   lod = lp_build_log2(float_bld, rho);
890
891   /* add lod bias */
892   lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
893
894   /* clamp lod */
895   lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
896
897   return lod;
898}
899
900
901/**
902 * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
903 * mipmap level index.
904 * Note: this is all scalar code.
905 * \param lod  scalar float texture level of detail
906 * \param level_out  returns integer
907 */
908static void
909lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
910                           unsigned unit,
911                           LLVMValueRef lod,
912                           LLVMValueRef *level_out)
913{
914   struct lp_build_context *float_bld = &bld->float_bld;
915   struct lp_build_context *int_bld = &bld->int_bld;
916   LLVMValueRef last_level, level;
917
918   LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
919
920   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
921                                               bld->builder, unit);
922
923   /* convert float lod to integer */
924   level = lp_build_iround(float_bld, lod);
925
926   /* clamp level to legal range of levels */
927   *level_out = lp_build_clamp(int_bld, level, zero, last_level);
928}
929
930
931/**
932 * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
933 * two (adjacent) mipmap level indexes.  Later, we'll sample from those
934 * two mipmap levels and interpolate between them.
935 */
936static void
937lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
938                           unsigned unit,
939                           LLVMValueRef lod,
940                           LLVMValueRef *level0_out,
941                           LLVMValueRef *level1_out,
942                           LLVMValueRef *weight_out)
943{
944   struct lp_build_context *float_bld = &bld->float_bld;
945   struct lp_build_context *int_bld = &bld->int_bld;
946   LLVMValueRef last_level, level;
947
948   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
949                                               bld->builder, unit);
950
951   /* convert float lod to integer */
952   level = lp_build_ifloor(float_bld, lod);
953
954   /* compute level 0 and clamp to legal range of levels */
955   *level0_out = lp_build_clamp(int_bld, level,
956                                int_bld->zero,
957                                last_level);
958   /* compute level 1 and clamp to legal range of levels */
959   *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
960   *level1_out = lp_build_min(int_bld, *level1_out, int_bld->zero);
961
962   *weight_out = lp_build_fract(float_bld, lod);
963}
964
965
966/**
967 * Generate code to sample a mipmap level with nearest filtering.
968 */
969static void
970lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
971                              LLVMValueRef width_vec,
972                              LLVMValueRef height_vec,
973                              LLVMValueRef depth_vec,
974                              LLVMValueRef row_stride_vec,
975                              LLVMValueRef img_stride_vec,
976                              LLVMValueRef data_ptr,
977                              LLVMValueRef s,
978                              LLVMValueRef t,
979                              LLVMValueRef r,
980                              LLVMValueRef colors_out[4])
981{
982   const int dims = texture_dims(bld->static_state->target);
983   LLVMValueRef x, y, z;
984
985   /*
986    * Compute integer texcoords.
987    */
988   x = lp_build_sample_wrap_nearest(bld, s, width_vec,
989                                    bld->static_state->pot_width,
990                                    bld->static_state->wrap_s);
991   lp_build_name(x, "tex.x.wrapped");
992
993   if (dims >= 2) {
994      y = lp_build_sample_wrap_nearest(bld, t, height_vec,
995                                       bld->static_state->pot_height,
996                                       bld->static_state->wrap_t);
997      lp_build_name(y, "tex.y.wrapped");
998
999      if (dims == 3) {
1000         z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
1001                                          bld->static_state->pot_height,
1002                                          bld->static_state->wrap_r);
1003         lp_build_name(z, "tex.z.wrapped");
1004      }
1005      else {
1006         z = NULL;
1007      }
1008   }
1009   else {
1010      y = NULL;
1011   }
1012
1013   /*
1014    * Get texture colors.
1015    */
1016   lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1017                             x, y, z,
1018                             row_stride_vec, img_stride_vec,
1019                             data_ptr, colors_out);
1020}
1021
1022
1023/**
1024 * Generate code to sample a mipmap level with linear filtering.
1025 * 1D, 2D and 3D images are suppored.
1026 */
1027static void
1028lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1029                             LLVMValueRef width_vec,
1030                             LLVMValueRef height_vec,
1031                             LLVMValueRef depth_vec,
1032                             LLVMValueRef row_stride_vec,
1033                             LLVMValueRef img_stride_vec,
1034                             LLVMValueRef data_ptr,
1035                             LLVMValueRef s,
1036                             LLVMValueRef t,
1037                             LLVMValueRef r,
1038                             LLVMValueRef colors_out[4])
1039{
1040   const int dims = texture_dims(bld->static_state->target);
1041   LLVMValueRef x0, y0, z0, x1, y1, z1;
1042   LLVMValueRef s_fpart, t_fpart, r_fpart;
1043   LLVMValueRef neighbors[2][2][4];
1044   int chan;
1045
1046   /*
1047    * Compute integer texcoords.
1048    */
1049   lp_build_sample_wrap_linear(bld, s, width_vec,
1050                               bld->static_state->pot_width,
1051                               bld->static_state->wrap_s,
1052                               &x0, &x1, &s_fpart);
1053   lp_build_name(x0, "tex.x0.wrapped");
1054   lp_build_name(x1, "tex.x1.wrapped");
1055
1056   if (dims >= 2) {
1057      lp_build_sample_wrap_linear(bld, t, height_vec,
1058                                  bld->static_state->pot_height,
1059                                  bld->static_state->wrap_t,
1060                                  &y0, &y1, &t_fpart);
1061      lp_build_name(y0, "tex.y0.wrapped");
1062      lp_build_name(y1, "tex.y1.wrapped");
1063
1064      if (dims == 3) {
1065         lp_build_sample_wrap_linear(bld, r, depth_vec,
1066                                     bld->static_state->pot_depth,
1067                                     bld->static_state->wrap_r,
1068                                     &z0, &z1, &r_fpart);
1069         lp_build_name(z0, "tex.z0.wrapped");
1070         lp_build_name(z1, "tex.z1.wrapped");
1071      }
1072      else {
1073         z0 = z1 = r_fpart = NULL;
1074      }
1075   }
1076   else {
1077      y0 = y1 = t_fpart = NULL;
1078   }
1079
1080   /*
1081    * Get texture colors.
1082    */
1083   /* get x0/x1 texels */
1084   lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1085                             x0, y0, z0,
1086                             row_stride_vec, img_stride_vec,
1087                             data_ptr, neighbors[0][0]);
1088   lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1089                             x1, y0, z0,
1090                             row_stride_vec, img_stride_vec,
1091                             data_ptr, neighbors[0][1]);
1092
1093   if (dims == 1) {
1094      /* Interpolate two samples from 1D image to produce one color */
1095      colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1096                                       neighbors[0][0][chan],
1097                                       neighbors[0][1][chan]);
1098   }
1099   else {
1100      /* 2D/3D texture */
1101      LLVMValueRef colors0[4];
1102
1103      /* get x0/x1 texels at y1 */
1104      lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1105                                x0, y1, z0,
1106                                row_stride_vec, img_stride_vec,
1107                                data_ptr, neighbors[1][0]);
1108      lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1109                                x1, y1, z0,
1110                                row_stride_vec, img_stride_vec,
1111                                data_ptr, neighbors[1][1]);
1112
1113      /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1114      for (chan = 0; chan < 4; chan++) {
1115         colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1116                                          s_fpart, t_fpart,
1117                                          neighbors[0][0][chan],
1118                                          neighbors[0][1][chan],
1119                                          neighbors[1][0][chan],
1120                                          neighbors[1][1][chan]);
1121      }
1122
1123      if (dims == 3) {
1124         LLVMValueRef neighbors1[2][2][4];
1125         LLVMValueRef colors1[4];
1126
1127         /* get x0/x1/y0/y1 texels at z1 */
1128         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1129                                   x0, y0, z1,
1130                                   row_stride_vec, img_stride_vec,
1131                                   data_ptr, neighbors1[0][0]);
1132         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1133                                   x1, y0, z1,
1134                                   row_stride_vec, img_stride_vec,
1135                                   data_ptr, neighbors1[0][1]);
1136         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1137                                   x0, y1, z1,
1138                                   row_stride_vec, img_stride_vec,
1139                                   data_ptr, neighbors1[1][0]);
1140         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1141                                   x1, y1, z1,
1142                                   row_stride_vec, img_stride_vec,
1143                                   data_ptr, neighbors1[1][1]);
1144
1145         /* Bilinear interpolate the four samples from the second Z slice */
1146         for (chan = 0; chan < 4; chan++) {
1147            colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1148                                             s_fpart, t_fpart,
1149                                             neighbors1[0][0][chan],
1150                                             neighbors1[0][1][chan],
1151                                             neighbors1[1][0][chan],
1152                                             neighbors1[1][1][chan]);
1153         }
1154
1155         /* Linearly interpolate the two samples from the two 3D slices */
1156         for (chan = 0; chan < 4; chan++) {
1157            colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1158                                             r_fpart,
1159                                             colors0[chan], colors1[chan]);
1160         }
1161      }
1162      else {
1163         /* 2D tex */
1164         for (chan = 0; chan < 4; chan++) {
1165            colors_out[chan] = colors0[chan];
1166         }
1167      }
1168   }
1169}
1170
1171
1172
1173/**
1174 * General texture sampling codegen.
1175 * This function handles texture sampling for all texture targets (1D,
1176 * 2D, 3D, cube) and all filtering modes.
1177 */
1178static void
1179lp_build_sample_general(struct lp_build_sample_context *bld,
1180                        unsigned unit,
1181                        LLVMValueRef s,
1182                        LLVMValueRef t,
1183                        LLVMValueRef r,
1184                        LLVMValueRef width,
1185                        LLVMValueRef height,
1186                        LLVMValueRef depth,
1187                        LLVMValueRef width_vec,
1188                        LLVMValueRef height_vec,
1189                        LLVMValueRef depth_vec,
1190                        LLVMValueRef row_stride_vec,
1191                        LLVMValueRef img_stride_vec,
1192                        LLVMValueRef data_array,
1193                        LLVMValueRef *colors_out)
1194{
1195   const unsigned mip_filter = bld->static_state->min_mip_filter;
1196   const unsigned min_filter = bld->static_state->min_img_filter;
1197   const unsigned mag_filter = bld->static_state->mag_img_filter;
1198   const int dims = texture_dims(bld->static_state->target);
1199   LLVMValueRef lod, lod_fpart;
1200   LLVMValueRef ilevel0, ilevel1, ilevel0_vec, ilevel1_vec;
1201   LLVMValueRef width0_vec, height0_vec, depth0_vec;
1202   LLVMValueRef width1_vec, height1_vec, depth1_vec;
1203   LLVMValueRef row_stride0_vec, row_stride1_vec;
1204   LLVMValueRef img_stride0_vec, img_stride1_vec;
1205   LLVMValueRef data_ptr0, data_ptr1;
1206   int chan;
1207
1208   /*
1209   printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1210          mip_filter, min_filter, mag_filter);
1211   */
1212
1213   /*
1214    * Compute the level of detail (mipmap level index(es)).
1215    */
1216   if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1217      /* always use mip level 0 */
1218      ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
1219   }
1220   else {
1221      /* compute float LOD */
1222      lod = lp_build_lod_selector(bld, s, t, r, width, height, depth);
1223
1224      if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
1225         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1226      }
1227      else {
1228         assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
1229         lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
1230                                    &lod_fpart);
1231         lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
1232      }
1233   }
1234
1235   /*
1236    * Convert scalar integer mipmap levels into vectors.
1237    */
1238   ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
1239   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
1240      ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
1241
1242   /*
1243    * Compute width, height at mipmap level 'ilevel0'
1244    */
1245   width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
1246   if (dims >= 2) {
1247      height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
1248      row_stride0_vec = lp_build_minify(bld, row_stride_vec, ilevel0_vec);
1249      if (dims == 3) {
1250         depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
1251      }
1252   }
1253   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1254      /* compute width, height, depth for second mipmap level at ilevel1 */
1255      width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
1256      if (dims >= 2) {
1257         height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
1258         row_stride1_vec = lp_build_minify(bld, row_stride_vec, ilevel1_vec);
1259         if (dims == 3) {
1260            depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
1261         }
1262      }
1263   }
1264
1265   /*
1266    * Choose cube face, recompute texcoords.
1267    */
1268   if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1269
1270   }
1271
1272   /*
1273    * Get pointer(s) to image data for mipmap level(s).
1274    */
1275   data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1276   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1277      data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1278   }
1279
1280   /*
1281    * Get/interpolate texture colors.
1282    */
1283   /* XXX temporarily force this path: */
1284   if (1 /*min_filter == mag_filter*/) {
1285      /* same filter for minification or magnification */
1286      LLVMValueRef colors0[4], colors1[4];
1287
1288      if (min_filter == PIPE_TEX_FILTER_NEAREST) {
1289         lp_build_sample_image_nearest(bld,
1290                                       width0_vec, height0_vec, depth0_vec,
1291                                       row_stride0_vec, img_stride0_vec,
1292                                       data_ptr0, s, t, r, colors0);
1293
1294         if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1295            /* sample the second mipmap level, and interp */
1296            lp_build_sample_image_nearest(bld,
1297                                          width1_vec, height1_vec, depth1_vec,
1298                                          row_stride1_vec, img_stride1_vec,
1299                                          data_ptr1, s, t, r, colors1);
1300         }
1301      }
1302      else {
1303         assert(min_filter == PIPE_TEX_FILTER_LINEAR);
1304
1305         lp_build_sample_image_linear(bld,
1306                                      width0_vec, height0_vec, depth0_vec,
1307                                      row_stride0_vec, img_stride0_vec,
1308                                      data_ptr0, s, t, r, colors0);
1309
1310
1311         if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1312            /* sample the second mipmap level, and interp */
1313            lp_build_sample_image_linear(bld,
1314                                         width1_vec, height1_vec, depth1_vec,
1315                                         row_stride1_vec, img_stride1_vec,
1316                                         data_ptr1, s, t, r, colors1);
1317         }
1318      }
1319
1320      if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1321         /* interpolate samples from the two mipmap levels */
1322         for (chan = 0; chan < 4; chan++) {
1323            colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1324                                             colors0[chan], colors1[chan]);
1325         }
1326      }
1327      else {
1328         /* use first/only level's colors */
1329         for (chan = 0; chan < 4; chan++) {
1330            colors_out[chan] = colors0[chan];
1331         }
1332      }
1333   }
1334   else {
1335      /* emit conditional to choose min image filter or mag image filter
1336       * depending on the lod being >0 or <= 0, respectively.
1337       */
1338      abort();
1339   }
1340}
1341
1342
1343
1344static void
1345lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
1346                          struct lp_type dst_type,
1347                          LLVMValueRef packed,
1348                          LLVMValueRef *rgba)
1349{
1350   LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
1351   unsigned chan;
1352
1353   /* Decode the input vector components */
1354   for (chan = 0; chan < 4; ++chan) {
1355      unsigned start = chan*8;
1356      unsigned stop = start + 8;
1357      LLVMValueRef input;
1358
1359      input = packed;
1360
1361      if(start)
1362         input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
1363
1364      if(stop < 32)
1365         input = LLVMBuildAnd(builder, input, mask, "");
1366
1367      input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
1368
1369      rgba[chan] = input;
1370   }
1371}
1372
1373
1374static void
1375lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
1376                              LLVMValueRef s,
1377                              LLVMValueRef t,
1378                              LLVMValueRef width,
1379                              LLVMValueRef height,
1380                              LLVMValueRef stride,
1381                              LLVMValueRef data_array,
1382                              LLVMValueRef *texel)
1383{
1384   LLVMBuilderRef builder = bld->builder;
1385   struct lp_build_context i32, h16, u8n;
1386   LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
1387   LLVMValueRef i32_c8, i32_c128, i32_c255;
1388   LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
1389   LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
1390   LLVMValueRef x0, x1;
1391   LLVMValueRef y0, y1;
1392   LLVMValueRef neighbors[2][2];
1393   LLVMValueRef neighbors_lo[2][2];
1394   LLVMValueRef neighbors_hi[2][2];
1395   LLVMValueRef packed, packed_lo, packed_hi;
1396   LLVMValueRef unswizzled[4];
1397
1398   lp_build_context_init(&i32, builder, lp_type_int_vec(32));
1399   lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1400   lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1401
1402   i32_vec_type = lp_build_vec_type(i32.type);
1403   h16_vec_type = lp_build_vec_type(h16.type);
1404   u8n_vec_type = lp_build_vec_type(u8n.type);
1405
1406   if (bld->static_state->normalized_coords) {
1407      LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
1408      LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
1409      LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
1410      s = lp_build_mul(&bld->coord_bld, s, fp_width);
1411      t = lp_build_mul(&bld->coord_bld, t, fp_height);
1412   }
1413
1414   /* scale coords by 256 (8 fractional bits) */
1415   s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1416   t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1417
1418   /* convert float to int */
1419   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1420   t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1421
1422   /* subtract 0.5 (add -128) */
1423   i32_c128 = lp_build_int_const_scalar(i32.type, -128);
1424   s = LLVMBuildAdd(builder, s, i32_c128, "");
1425   t = LLVMBuildAdd(builder, t, i32_c128, "");
1426
1427   /* compute floor (shift right 8) */
1428   i32_c8 = lp_build_int_const_scalar(i32.type, 8);
1429   s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1430   t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1431
1432   /* compute fractional part (AND with 0xff) */
1433   i32_c255 = lp_build_int_const_scalar(i32.type, 255);
1434   s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1435   t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1436
1437   x0 = s_ipart;
1438   y0 = t_ipart;
1439
1440   x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
1441   y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
1442
1443   x0 = lp_build_sample_wrap_int(bld, x0, width,  bld->static_state->pot_width,
1444                                 bld->static_state->wrap_s);
1445   y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
1446                                 bld->static_state->wrap_t);
1447
1448   x1 = lp_build_sample_wrap_int(bld, x1, width,  bld->static_state->pot_width,
1449                                 bld->static_state->wrap_s);
1450   y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
1451                                 bld->static_state->wrap_t);
1452
1453   /*
1454    * Transform 4 x i32 in
1455    *
1456    *   s_fpart = {s0, s1, s2, s3}
1457    *
1458    * into 8 x i16
1459    *
1460    *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
1461    *
1462    * into two 8 x i16
1463    *
1464    *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
1465    *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
1466    *
1467    * and likewise for t_fpart. There is no risk of loosing precision here
1468    * since the fractional parts only use the lower 8bits.
1469    */
1470
1471   s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
1472   t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
1473
1474   {
1475      LLVMTypeRef elem_type = LLVMInt32Type();
1476      LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
1477      LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
1478      LLVMValueRef shuffle_lo;
1479      LLVMValueRef shuffle_hi;
1480      unsigned i, j;
1481
1482      for(j = 0; j < h16.type.length; j += 4) {
1483         unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
1484         LLVMValueRef index;
1485
1486         index = LLVMConstInt(elem_type, j/2 + subindex, 0);
1487         for(i = 0; i < 4; ++i)
1488            shuffles_lo[j + i] = index;
1489
1490         index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
1491         for(i = 0; i < 4; ++i)
1492            shuffles_hi[j + i] = index;
1493      }
1494
1495      shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
1496      shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
1497
1498      s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
1499      t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
1500      s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
1501      t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
1502   }
1503
1504   /*
1505    * Fetch the pixels as 4 x 32bit (rgba order might differ):
1506    *
1507    *   rgba0 rgba1 rgba2 rgba3
1508    *
1509    * bit cast them into 16 x u8
1510    *
1511    *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
1512    *
1513    * unpack them into two 8 x i16:
1514    *
1515    *   r0 g0 b0 a0 r1 g1 b1 a1
1516    *   r2 g2 b2 a2 r3 g3 b3 a3
1517    *
1518    * The higher 8 bits of the resulting elements will be zero.
1519    */
1520
1521   neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
1522   neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
1523   neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
1524   neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
1525
1526   neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
1527   neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
1528   neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
1529   neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
1530
1531   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
1532   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
1533   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
1534   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
1535
1536   /*
1537    * Linear interpolate with 8.8 fixed point.
1538    */
1539
1540   packed_lo = lp_build_lerp_2d(&h16,
1541                                s_fpart_lo, t_fpart_lo,
1542                                neighbors_lo[0][0],
1543                                neighbors_lo[0][1],
1544                                neighbors_lo[1][0],
1545                                neighbors_lo[1][1]);
1546
1547   packed_hi = lp_build_lerp_2d(&h16,
1548                                s_fpart_hi, t_fpart_hi,
1549                                neighbors_hi[0][0],
1550                                neighbors_hi[0][1],
1551                                neighbors_hi[1][0],
1552                                neighbors_hi[1][1]);
1553
1554   packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
1555
1556   /*
1557    * Convert to SoA and swizzle.
1558    */
1559
1560   packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
1561
1562   lp_build_rgba8_to_f32_soa(bld->builder,
1563                             bld->texel_type,
1564                             packed, unswizzled);
1565
1566   lp_build_format_swizzle_soa(bld->format_desc,
1567                               bld->texel_type, unswizzled,
1568                               texel);
1569}
1570
1571
1572static void
1573lp_build_sample_compare(struct lp_build_sample_context *bld,
1574                        LLVMValueRef p,
1575                        LLVMValueRef *texel)
1576{
1577   struct lp_build_context *texel_bld = &bld->texel_bld;
1578   LLVMValueRef res;
1579   unsigned chan;
1580
1581   if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
1582      return;
1583
1584   /* TODO: Compare before swizzling, to avoid redundant computations */
1585   res = NULL;
1586   for(chan = 0; chan < 4; ++chan) {
1587      LLVMValueRef cmp;
1588      cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
1589      cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
1590
1591      if(res)
1592         res = lp_build_add(texel_bld, res, cmp);
1593      else
1594         res = cmp;
1595   }
1596
1597   assert(res);
1598   res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
1599
1600   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1601   for(chan = 0; chan < 3; ++chan)
1602      texel[chan] = res;
1603   texel[3] = texel_bld->one;
1604}
1605
1606
1607/**
1608 * Build texture sampling code.
1609 * 'texel' will return a vector of four LLVMValueRefs corresponding to
1610 * R, G, B, A.
1611 * \param type  vector float type to use for coords, etc.
1612 */
1613void
1614lp_build_sample_soa(LLVMBuilderRef builder,
1615                    const struct lp_sampler_static_state *static_state,
1616                    struct lp_sampler_dynamic_state *dynamic_state,
1617                    struct lp_type type,
1618                    unsigned unit,
1619                    unsigned num_coords,
1620                    const LLVMValueRef *coords,
1621                    LLVMValueRef lodbias,
1622                    LLVMValueRef *texel)
1623{
1624   struct lp_build_sample_context bld;
1625   LLVMValueRef width, width_vec;
1626   LLVMValueRef height, height_vec;
1627   LLVMValueRef depth, depth_vec;
1628   LLVMValueRef stride, stride_vec;
1629   LLVMValueRef data_array;
1630   LLVMValueRef s;
1631   LLVMValueRef t;
1632   LLVMValueRef r;
1633
1634   (void) lp_build_lod_selector;   /* temporary to silence warning */
1635   (void) lp_build_nearest_mip_level;
1636   (void) lp_build_linear_mip_levels;
1637   (void) lp_build_minify;
1638
1639   /* Setup our build context */
1640   memset(&bld, 0, sizeof bld);
1641   bld.builder = builder;
1642   bld.static_state = static_state;
1643   bld.dynamic_state = dynamic_state;
1644   bld.format_desc = util_format_description(static_state->format);
1645
1646   bld.float_type = lp_type_float(32);
1647   bld.int_type = lp_type_int(32);
1648   bld.coord_type = type;
1649   bld.uint_coord_type = lp_uint_type(type);
1650   bld.int_coord_type = lp_int_type(type);
1651   bld.texel_type = type;
1652
1653   lp_build_context_init(&bld.float_bld, builder, bld.float_type);
1654   lp_build_context_init(&bld.int_bld, builder, bld.int_type);
1655   lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
1656   lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
1657   lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
1658   lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
1659
1660   /* Get the dynamic state */
1661   width = dynamic_state->width(dynamic_state, builder, unit);
1662   height = dynamic_state->height(dynamic_state, builder, unit);
1663   depth = dynamic_state->depth(dynamic_state, builder, unit);
1664   stride = dynamic_state->stride(dynamic_state, builder, unit);
1665   data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
1666   /* Note that data_array is an array[level] of pointers to texture images */
1667
1668   s = coords[0];
1669   t = coords[1];
1670   r = coords[2];
1671
1672   width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
1673   height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
1674   depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
1675   stride_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
1676
1677   if (lp_format_is_rgba8(bld.format_desc) &&
1678       static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
1679       static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
1680       static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
1681       is_simple_wrap_mode(static_state->wrap_s) &&
1682       is_simple_wrap_mode(static_state->wrap_t)) {
1683      /* special case */
1684      lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
1685                                    stride_vec, data_array, texel);
1686   }
1687   else {
1688      lp_build_sample_general(&bld, unit, s, t, r,
1689                              width, height, depth,
1690                              width_vec, height_vec, depth_vec,
1691                              stride_vec, NULL, data_array,
1692                              texel);
1693   }
1694
1695   lp_build_sample_compare(&bld, r, texel);
1696}
1697