lp_bld_sample_soa.c revision 923256626931c057d1a7c20d8900768b0c1faea9
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36#include "pipe/p_defines.h"
37#include "pipe/p_state.h"
38#include "util/u_debug.h"
39#include "util/u_dump.h"
40#include "util/u_memory.h"
41#include "util/u_math.h"
42#include "util/u_format.h"
43#include "util/u_cpu_detect.h"
44#include "lp_bld_debug.h"
45#include "lp_bld_type.h"
46#include "lp_bld_const.h"
47#include "lp_bld_conv.h"
48#include "lp_bld_arit.h"
49#include "lp_bld_logic.h"
50#include "lp_bld_swizzle.h"
51#include "lp_bld_pack.h"
52#include "lp_bld_flow.h"
53#include "lp_bld_gather.h"
54#include "lp_bld_format.h"
55#include "lp_bld_sample.h"
56#include "lp_bld_quad.h"
57
58
59/**
60 * Keep all information for sampling code generation in a single place.
61 */
62struct lp_build_sample_context
63{
64   LLVMBuilderRef builder;
65
66   const struct lp_sampler_static_state *static_state;
67
68   struct lp_sampler_dynamic_state *dynamic_state;
69
70   const struct util_format_description *format_desc;
71
72   /** regular scalar float type */
73   struct lp_type float_type;
74   struct lp_build_context float_bld;
75
76   /** regular scalar float type */
77   struct lp_type int_type;
78   struct lp_build_context int_bld;
79
80   /** Incoming coordinates type and build context */
81   struct lp_type coord_type;
82   struct lp_build_context coord_bld;
83
84   /** Unsigned integer coordinates */
85   struct lp_type uint_coord_type;
86   struct lp_build_context uint_coord_bld;
87
88   /** Signed integer coordinates */
89   struct lp_type int_coord_type;
90   struct lp_build_context int_coord_bld;
91
92   /** Output texels type and build context */
93   struct lp_type texel_type;
94   struct lp_build_context texel_bld;
95};
96
97
98/**
99 * Does the given texture wrap mode allow sampling the texture border color?
100 * XXX maybe move this into gallium util code.
101 */
102static boolean
103wrap_mode_uses_border_color(unsigned mode)
104{
105   switch (mode) {
106   case PIPE_TEX_WRAP_REPEAT:
107   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
108   case PIPE_TEX_WRAP_MIRROR_REPEAT:
109   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
110      return FALSE;
111   case PIPE_TEX_WRAP_CLAMP:
112   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
113   case PIPE_TEX_WRAP_MIRROR_CLAMP:
114   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
115      return TRUE;
116   default:
117      assert(0 && "unexpected wrap mode");
118      return FALSE;
119   }
120}
121
122
123static LLVMValueRef
124lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
125                          LLVMValueRef data_array, LLVMValueRef level)
126{
127   LLVMValueRef indexes[2], data_ptr;
128   indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
129   indexes[1] = level;
130   data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
131   data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
132   return data_ptr;
133}
134
135
136static LLVMValueRef
137lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
138                                LLVMValueRef data_array, int level)
139{
140   LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
141   return lp_build_get_mipmap_level(bld, data_array, lvl);
142}
143
144
145/**
146 * Dereference stride_array[mipmap_level] array to get a stride.
147 * Return stride as a vector.
148 */
149static LLVMValueRef
150lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
151                              LLVMValueRef stride_array, LLVMValueRef level)
152{
153   LLVMValueRef indexes[2], stride;
154   indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
155   indexes[1] = level;
156   stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
157   stride = LLVMBuildLoad(bld->builder, stride, "");
158   stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
159   return stride;
160}
161
162
163/** Dereference stride_array[0] array to get a stride (as vector). */
164static LLVMValueRef
165lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
166                                    LLVMValueRef stride_array, int level)
167{
168   LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
169   return lp_build_get_level_stride_vec(bld, stride_array, lvl);
170}
171
172
173static int
174texture_dims(enum pipe_texture_target tex)
175{
176   switch (tex) {
177   case PIPE_TEXTURE_1D:
178      return 1;
179   case PIPE_TEXTURE_2D:
180   case PIPE_TEXTURE_CUBE:
181      return 2;
182   case PIPE_TEXTURE_3D:
183      return 3;
184   default:
185      assert(0 && "bad texture target in texture_dims()");
186      return 2;
187   }
188}
189
190
191static void
192apply_sampler_swizzle(struct lp_build_sample_context *bld,
193                      LLVMValueRef *texel)
194{
195   unsigned char swizzles[4];
196
197   swizzles[0] = bld->static_state->swizzle_r;
198   swizzles[1] = bld->static_state->swizzle_g;
199   swizzles[2] = bld->static_state->swizzle_b;
200   swizzles[3] = bld->static_state->swizzle_a;
201
202   lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
203}
204
205
206
207/**
208 * Generate code to fetch a texel from a texture at int coords (x, y, z).
209 * The computation depends on whether the texture is 1D, 2D or 3D.
210 * The result, texel, will be:
211 *   texel[0] = red values
212 *   texel[1] = green values
213 *   texel[2] = blue values
214 *   texel[3] = alpha values
215 */
216static void
217lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
218                          LLVMValueRef width,
219                          LLVMValueRef height,
220                          LLVMValueRef depth,
221                          LLVMValueRef x,
222                          LLVMValueRef y,
223                          LLVMValueRef z,
224                          LLVMValueRef y_stride,
225                          LLVMValueRef z_stride,
226                          LLVMValueRef data_ptr,
227                          LLVMValueRef texel_out[4])
228{
229   const int dims = texture_dims(bld->static_state->target);
230   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
231   LLVMValueRef offset;
232   LLVMValueRef i, j;
233   LLVMValueRef use_border = NULL;
234
235   /* use_border = x < 0 || x >= width || y < 0 || y >= height */
236   if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
237      LLVMValueRef b1, b2;
238      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
239      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
240      use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
241   }
242
243   if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
244      LLVMValueRef b1, b2;
245      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
246      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
247      if (use_border) {
248         use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
249         use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
250      }
251      else {
252         use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
253      }
254   }
255
256   if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
257      LLVMValueRef b1, b2;
258      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
259      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
260      if (use_border) {
261         use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
262         use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
263      }
264      else {
265         use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
266      }
267   }
268
269   /* convert x,y,z coords to linear offset from start of texture, in bytes */
270   lp_build_sample_offset(&bld->uint_coord_bld,
271                          bld->format_desc,
272                          x, y, z, y_stride, z_stride,
273                          &offset, &i, &j);
274
275   if (use_border) {
276      /* If we can sample the border color, it means that texcoords may
277       * lie outside the bounds of the texture image.  We need to do
278       * something to prevent reading out of bounds and causing a segfault.
279       *
280       * Simply AND the texture coords with !use_border.  This will cause
281       * coords which are out of bounds to become zero.  Zero's guaranteed
282       * to be inside the texture image.
283       */
284      offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border);
285   }
286
287   lp_build_fetch_rgba_soa(bld->builder,
288                           bld->format_desc,
289                           bld->texel_type,
290                           data_ptr, offset,
291                           i, j,
292                           texel_out);
293
294   apply_sampler_swizzle(bld, texel_out);
295
296   /*
297    * Note: if we find an app which frequently samples the texture border
298    * we might want to implement a true conditional here to avoid sampling
299    * the texture whenever possible (since that's quite a bit of code).
300    * Ex:
301    *   if (use_border) {
302    *      texel = border_color;
303    *   }
304    *   else {
305    *      texel = sample_texture(coord);
306    *   }
307    * As it is now, we always sample the texture, then selectively replace
308    * the texel color results with the border color.
309    */
310
311   if (use_border) {
312      /* select texel color or border color depending on use_border */
313      int chan;
314      for (chan = 0; chan < 4; chan++) {
315         LLVMValueRef border_chan =
316            lp_build_const_vec(bld->texel_type,
317                                  bld->static_state->border_color[chan]);
318         texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
319                                           border_chan, texel_out[chan]);
320      }
321   }
322}
323
324
325/**
326 * Fetch the texels as <4n x i8> in AoS form.
327 */
328static LLVMValueRef
329lp_build_sample_packed(struct lp_build_sample_context *bld,
330                       LLVMValueRef x,
331                       LLVMValueRef y,
332                       LLVMValueRef y_stride,
333                       LLVMValueRef data_array)
334{
335   LLVMValueRef offset, i, j;
336   LLVMValueRef data_ptr;
337   LLVMValueRef res;
338
339   /* convert x,y,z coords to linear offset from start of texture, in bytes */
340   lp_build_sample_offset(&bld->uint_coord_bld,
341                          bld->format_desc,
342                          x, y, NULL, y_stride, NULL,
343                          &offset, &i, &j);
344
345   /* get pointer to mipmap level 0 data */
346   data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
347
348   if (util_format_is_rgba8_variant(bld->format_desc)) {
349      /* Just fetch the data directly without swizzling */
350      assert(bld->format_desc->block.width == 1);
351      assert(bld->format_desc->block.height == 1);
352      assert(bld->format_desc->block.bits <= bld->texel_type.width);
353
354      res = lp_build_gather(bld->builder,
355                            bld->texel_type.length,
356                            bld->format_desc->block.bits,
357                            bld->texel_type.width,
358                            data_ptr, offset);
359   }
360   else {
361      struct lp_type type;
362
363      assert(bld->texel_type.width == 32);
364
365      memset(&type, 0, sizeof type);
366      type.width = 8;
367      type.length = bld->texel_type.length*4;
368      type.norm = TRUE;
369
370      res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
371                                    data_ptr, offset, i, j);
372   }
373
374   return res;
375}
376
377
378/**
379 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
380 */
381static LLVMValueRef
382lp_build_coord_mirror(struct lp_build_sample_context *bld,
383                      LLVMValueRef coord)
384{
385   struct lp_build_context *coord_bld = &bld->coord_bld;
386   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
387   LLVMValueRef fract, flr, isOdd;
388
389   /* fract = coord - floor(coord) */
390   fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
391
392   /* flr = ifloor(coord); */
393   flr = lp_build_ifloor(coord_bld, coord);
394
395   /* isOdd = flr & 1 */
396   isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
397
398   /* make coord positive or negative depending on isOdd */
399   coord = lp_build_set_sign(coord_bld, fract, isOdd);
400
401   /* convert isOdd to float */
402   isOdd = lp_build_int_to_float(coord_bld, isOdd);
403
404   /* add isOdd to coord */
405   coord = lp_build_add(coord_bld, coord, isOdd);
406
407   return coord;
408}
409
410
411/**
412 * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
413 * Return whether the given mode is supported by that function.
414 */
415static boolean
416is_simple_wrap_mode(unsigned mode)
417{
418   switch (mode) {
419   case PIPE_TEX_WRAP_REPEAT:
420   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
421      return TRUE;
422   default:
423      return FALSE;
424   }
425}
426
427
428/**
429 * Build LLVM code for texture wrap mode, for scaled integer texcoords.
430 * \param coord  the incoming texcoord (s,t,r or q) scaled to the texture size
431 * \param length  the texture size along one dimension
432 * \param is_pot  if TRUE, length is a power of two
433 * \param wrap_mode  one of PIPE_TEX_WRAP_x
434 */
435static LLVMValueRef
436lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
437                         LLVMValueRef coord,
438                         LLVMValueRef length,
439                         boolean is_pot,
440                         unsigned wrap_mode)
441{
442   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
443   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
444   LLVMValueRef length_minus_one;
445
446   length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
447
448   switch(wrap_mode) {
449   case PIPE_TEX_WRAP_REPEAT:
450      if(is_pot)
451         coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
452      else
453         /* Signed remainder won't give the right results for negative
454          * dividends but unsigned remainder does.*/
455         coord = LLVMBuildURem(bld->builder, coord, length, "");
456      break;
457
458   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
459      coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
460      coord = lp_build_min(int_coord_bld, coord, length_minus_one);
461      break;
462
463   case PIPE_TEX_WRAP_CLAMP:
464   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
465   case PIPE_TEX_WRAP_MIRROR_REPEAT:
466   case PIPE_TEX_WRAP_MIRROR_CLAMP:
467   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
468   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
469   default:
470      assert(0);
471   }
472
473   return coord;
474}
475
476
477/**
478 * Build LLVM code for texture wrap mode for linear filtering.
479 * \param x0_out  returns first integer texcoord
480 * \param x1_out  returns second integer texcoord
481 * \param weight_out  returns linear interpolation weight
482 */
483static void
484lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
485                            LLVMValueRef coord,
486                            LLVMValueRef length,
487                            boolean is_pot,
488                            unsigned wrap_mode,
489                            LLVMValueRef *x0_out,
490                            LLVMValueRef *x1_out,
491                            LLVMValueRef *weight_out)
492{
493   struct lp_build_context *coord_bld = &bld->coord_bld;
494   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
495   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
496   LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
497   LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
498   LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
499   LLVMValueRef coord0, coord1, weight;
500
501   switch(wrap_mode) {
502   case PIPE_TEX_WRAP_REPEAT:
503      /* mul by size and subtract 0.5 */
504      coord = lp_build_mul(coord_bld, coord, length_f);
505      coord = lp_build_sub(coord_bld, coord, half);
506      /* convert to int */
507      coord0 = lp_build_ifloor(coord_bld, coord);
508      coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
509      /* compute lerp weight */
510      weight = lp_build_fract(coord_bld, coord);
511      /* repeat wrap */
512      if (is_pot) {
513         coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
514         coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
515      }
516      else {
517         /* Signed remainder won't give the right results for negative
518          * dividends but unsigned remainder does.*/
519         coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
520         coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
521      }
522      break;
523
524   case PIPE_TEX_WRAP_CLAMP:
525      if (bld->static_state->normalized_coords) {
526         /* scale coord to length */
527         coord = lp_build_mul(coord_bld, coord, length_f);
528      }
529
530      /* clamp to [0, length] */
531      coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
532
533      coord = lp_build_sub(coord_bld, coord, half);
534
535      weight = lp_build_fract(coord_bld, coord);
536      coord0 = lp_build_ifloor(coord_bld, coord);
537      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
538      break;
539
540   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
541      if (bld->static_state->normalized_coords) {
542         /* clamp to [0,1] */
543         coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
544         /* mul by tex size and subtract 0.5 */
545         coord = lp_build_mul(coord_bld, coord, length_f);
546         coord = lp_build_sub(coord_bld, coord, half);
547      }
548      else {
549         LLVMValueRef min, max;
550         /* clamp to [0.5, length - 0.5] */
551         min = half;
552         max = lp_build_sub(coord_bld, length_f, min);
553         coord = lp_build_clamp(coord_bld, coord, min, max);
554      }
555      /* compute lerp weight */
556      weight = lp_build_fract(coord_bld, coord);
557      /* coord0 = floor(coord); */
558      coord0 = lp_build_ifloor(coord_bld, coord);
559      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
560      /* coord0 = max(coord0, 0) */
561      coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
562      /* coord1 = min(coord1, length-1) */
563      coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
564      break;
565
566   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
567      {
568         LLVMValueRef min, max;
569         if (bld->static_state->normalized_coords) {
570            /* scale coord to length */
571            coord = lp_build_mul(coord_bld, coord, length_f);
572         }
573         /* clamp to [-0.5, length + 0.5] */
574         min = lp_build_const_vec(coord_bld->type, -0.5F);
575         max = lp_build_sub(coord_bld, length_f, min);
576         coord = lp_build_clamp(coord_bld, coord, min, max);
577         coord = lp_build_sub(coord_bld, coord, half);
578         /* compute lerp weight */
579         weight = lp_build_fract(coord_bld, coord);
580         /* convert to int */
581         coord0 = lp_build_ifloor(coord_bld, coord);
582         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
583      }
584      break;
585
586   case PIPE_TEX_WRAP_MIRROR_REPEAT:
587      /* compute mirror function */
588      coord = lp_build_coord_mirror(bld, coord);
589
590      /* scale coord to length */
591      coord = lp_build_mul(coord_bld, coord, length_f);
592      coord = lp_build_sub(coord_bld, coord, half);
593
594      /* compute lerp weight */
595      weight = lp_build_fract(coord_bld, coord);
596
597      /* convert to int coords */
598      coord0 = lp_build_ifloor(coord_bld, coord);
599      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
600
601      /* coord0 = max(coord0, 0) */
602      coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
603      /* coord1 = min(coord1, length-1) */
604      coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
605      break;
606
607   case PIPE_TEX_WRAP_MIRROR_CLAMP:
608      coord = lp_build_abs(coord_bld, coord);
609
610      if (bld->static_state->normalized_coords) {
611         /* scale coord to length */
612         coord = lp_build_mul(coord_bld, coord, length_f);
613      }
614
615      /* clamp to [0, length] */
616      coord = lp_build_min(coord_bld, coord, length_f);
617
618      coord = lp_build_sub(coord_bld, coord, half);
619
620      weight = lp_build_fract(coord_bld, coord);
621      coord0 = lp_build_ifloor(coord_bld, coord);
622      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
623      break;
624
625   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
626      {
627         LLVMValueRef min, max;
628
629         coord = lp_build_abs(coord_bld, coord);
630
631         if (bld->static_state->normalized_coords) {
632            /* scale coord to length */
633            coord = lp_build_mul(coord_bld, coord, length_f);
634         }
635
636         /* clamp to [0.5, length - 0.5] */
637         min = half;
638         max = lp_build_sub(coord_bld, length_f, min);
639         coord = lp_build_clamp(coord_bld, coord, min, max);
640
641         coord = lp_build_sub(coord_bld, coord, half);
642
643         weight = lp_build_fract(coord_bld, coord);
644         coord0 = lp_build_ifloor(coord_bld, coord);
645         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
646      }
647      break;
648
649   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
650      {
651         LLVMValueRef min, max;
652
653         coord = lp_build_abs(coord_bld, coord);
654
655         if (bld->static_state->normalized_coords) {
656            /* scale coord to length */
657            coord = lp_build_mul(coord_bld, coord, length_f);
658         }
659
660         /* clamp to [-0.5, length + 0.5] */
661         min = lp_build_negate(coord_bld, half);
662         max = lp_build_sub(coord_bld, length_f, min);
663         coord = lp_build_clamp(coord_bld, coord, min, max);
664
665         coord = lp_build_sub(coord_bld, coord, half);
666
667         weight = lp_build_fract(coord_bld, coord);
668         coord0 = lp_build_ifloor(coord_bld, coord);
669         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
670      }
671      break;
672
673   default:
674      assert(0);
675      coord0 = NULL;
676      coord1 = NULL;
677      weight = NULL;
678   }
679
680   *x0_out = coord0;
681   *x1_out = coord1;
682   *weight_out = weight;
683}
684
685
686/**
687 * Build LLVM code for texture wrap mode for nearest filtering.
688 * \param coord  the incoming texcoord (nominally in [0,1])
689 * \param length  the texture size along one dimension, as int
690 * \param is_pot  if TRUE, length is a power of two
691 * \param wrap_mode  one of PIPE_TEX_WRAP_x
692 */
693static LLVMValueRef
694lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
695                             LLVMValueRef coord,
696                             LLVMValueRef length,
697                             boolean is_pot,
698                             unsigned wrap_mode)
699{
700   struct lp_build_context *coord_bld = &bld->coord_bld;
701   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
702   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
703   LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
704   LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
705   LLVMValueRef icoord;
706
707   switch(wrap_mode) {
708   case PIPE_TEX_WRAP_REPEAT:
709      coord = lp_build_mul(coord_bld, coord, length_f);
710      icoord = lp_build_ifloor(coord_bld, coord);
711      if (is_pot)
712         icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
713      else
714         /* Signed remainder won't give the right results for negative
715          * dividends but unsigned remainder does.*/
716         icoord = LLVMBuildURem(bld->builder, icoord, length, "");
717      break;
718
719   case PIPE_TEX_WRAP_CLAMP:
720   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
721      if (bld->static_state->normalized_coords) {
722         /* scale coord to length */
723         coord = lp_build_mul(coord_bld, coord, length_f);
724      }
725
726      /* floor */
727      icoord = lp_build_ifloor(coord_bld, coord);
728
729      /* clamp to [0, length - 1]. */
730      icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
731                              length_minus_one);
732      break;
733
734   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
735      /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
736      {
737         LLVMValueRef min, max;
738
739         if (bld->static_state->normalized_coords) {
740            /* scale coord to length */
741            coord = lp_build_mul(coord_bld, coord, length_f);
742         }
743
744         icoord = lp_build_ifloor(coord_bld, coord);
745
746         /* clamp to [-1, length] */
747         min = lp_build_negate(int_coord_bld, int_coord_bld->one);
748         max = length;
749         icoord = lp_build_clamp(int_coord_bld, icoord, min, max);
750      }
751      break;
752
753   case PIPE_TEX_WRAP_MIRROR_REPEAT:
754      /* compute mirror function */
755      coord = lp_build_coord_mirror(bld, coord);
756
757      /* scale coord to length */
758      assert(bld->static_state->normalized_coords);
759      coord = lp_build_mul(coord_bld, coord, length_f);
760
761      icoord = lp_build_ifloor(coord_bld, coord);
762
763      /* clamp to [0, length - 1] */
764      icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
765      break;
766
767   case PIPE_TEX_WRAP_MIRROR_CLAMP:
768   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
769      coord = lp_build_abs(coord_bld, coord);
770
771      if (bld->static_state->normalized_coords) {
772         /* scale coord to length */
773         coord = lp_build_mul(coord_bld, coord, length_f);
774      }
775
776      icoord = lp_build_ifloor(coord_bld, coord);
777
778      /* clamp to [0, length - 1] */
779      icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
780      break;
781
782   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
783      coord = lp_build_abs(coord_bld, coord);
784
785      if (bld->static_state->normalized_coords) {
786         /* scale coord to length */
787         coord = lp_build_mul(coord_bld, coord, length_f);
788      }
789
790      icoord = lp_build_ifloor(coord_bld, coord);
791
792      /* clamp to [0, length] */
793      icoord = lp_build_min(int_coord_bld, icoord, length);
794      break;
795
796   default:
797      assert(0);
798      icoord = NULL;
799   }
800
801   return icoord;
802}
803
804
805/**
806 * Codegen equivalent for u_minify().
807 * Return max(1, base_size >> level);
808 */
809static LLVMValueRef
810lp_build_minify(struct lp_build_sample_context *bld,
811                LLVMValueRef base_size,
812                LLVMValueRef level)
813{
814   LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
815   size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
816   return size;
817}
818
819
820/**
821 * Generate code to compute texture level of detail (lambda).
822 * \param ddx  partial derivatives of (s, t, r, q) with respect to X
823 * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
824 * \param lod_bias  optional float vector with the shader lod bias
825 * \param explicit_lod  optional float vector with the explicit lod
826 * \param width  scalar int texture width
827 * \param height  scalar int texture height
828 * \param depth  scalar int texture depth
829 *
830 * XXX: The resulting lod is scalar, so ignore all but the first element of
831 * derivatives, lod_bias, etc that are passed by the shader.
832 */
833static LLVMValueRef
834lp_build_lod_selector(struct lp_build_sample_context *bld,
835                      const LLVMValueRef ddx[4],
836                      const LLVMValueRef ddy[4],
837                      LLVMValueRef lod_bias, /* optional */
838                      LLVMValueRef explicit_lod, /* optional */
839                      LLVMValueRef width,
840                      LLVMValueRef height,
841                      LLVMValueRef depth)
842
843{
844   if (bld->static_state->min_lod == bld->static_state->max_lod) {
845      /* User is forcing sampling from a particular mipmap level.
846       * This is hit during mipmap generation.
847       */
848      return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
849   }
850   else {
851      struct lp_build_context *float_bld = &bld->float_bld;
852      LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
853                                                    bld->static_state->lod_bias);
854      LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
855                                           bld->static_state->min_lod);
856      LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
857                                           bld->static_state->max_lod);
858      LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
859      LLVMValueRef lod;
860
861      if (explicit_lod) {
862         lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
863                                       index0, "");
864      }
865      else {
866         const int dims = texture_dims(bld->static_state->target);
867         LLVMValueRef dsdx, dsdy;
868         LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
869         LLVMValueRef rho;
870
871         dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
872         dsdx = lp_build_abs(float_bld, dsdx);
873         dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
874         dsdy = lp_build_abs(float_bld, dsdy);
875         if (dims > 1) {
876            dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
877            dtdx = lp_build_abs(float_bld, dtdx);
878            dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
879            dtdy = lp_build_abs(float_bld, dtdy);
880            if (dims > 2) {
881               drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
882               drdx = lp_build_abs(float_bld, drdx);
883               drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
884               drdy = lp_build_abs(float_bld, drdy);
885            }
886         }
887
888         /* Compute rho = max of all partial derivatives scaled by texture size.
889          * XXX this could be vectorized somewhat
890          */
891         rho = LLVMBuildFMul(bld->builder,
892                            lp_build_max(float_bld, dsdx, dsdy),
893                            lp_build_int_to_float(float_bld, width), "");
894         if (dims > 1) {
895            LLVMValueRef max;
896            max = LLVMBuildFMul(bld->builder,
897                               lp_build_max(float_bld, dtdx, dtdy),
898                               lp_build_int_to_float(float_bld, height), "");
899            rho = lp_build_max(float_bld, rho, max);
900            if (dims > 2) {
901               max = LLVMBuildFMul(bld->builder,
902                                  lp_build_max(float_bld, drdx, drdy),
903                                  lp_build_int_to_float(float_bld, depth), "");
904               rho = lp_build_max(float_bld, rho, max);
905            }
906         }
907
908         /* compute lod = log2(rho) */
909         lod = lp_build_log2(float_bld, rho);
910
911         /* add shader lod bias */
912         if (lod_bias) {
913            lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
914                                               index0, "");
915            lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
916         }
917      }
918
919      /* add sampler lod bias */
920      lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
921
922      /* clamp lod */
923      lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
924
925      return lod;
926   }
927}
928
929
930/**
931 * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
932 * mipmap level index.
933 * Note: this is all scalar code.
934 * \param lod  scalar float texture level of detail
935 * \param level_out  returns integer
936 */
937static void
938lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
939                           unsigned unit,
940                           LLVMValueRef lod,
941                           LLVMValueRef *level_out)
942{
943   struct lp_build_context *float_bld = &bld->float_bld;
944   struct lp_build_context *int_bld = &bld->int_bld;
945   LLVMValueRef last_level, level;
946
947   LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
948
949   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
950                                               bld->builder, unit);
951
952   /* convert float lod to integer */
953   level = lp_build_iround(float_bld, lod);
954
955   /* clamp level to legal range of levels */
956   *level_out = lp_build_clamp(int_bld, level, zero, last_level);
957}
958
959
960/**
961 * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
962 * two (adjacent) mipmap level indexes.  Later, we'll sample from those
963 * two mipmap levels and interpolate between them.
964 */
965static void
966lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
967                           unsigned unit,
968                           LLVMValueRef lod,
969                           LLVMValueRef *level0_out,
970                           LLVMValueRef *level1_out,
971                           LLVMValueRef *weight_out)
972{
973   struct lp_build_context *float_bld = &bld->float_bld;
974   struct lp_build_context *int_bld = &bld->int_bld;
975   LLVMValueRef last_level, level;
976
977   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
978                                               bld->builder, unit);
979
980   /* convert float lod to integer */
981   level = lp_build_ifloor(float_bld, lod);
982
983   /* compute level 0 and clamp to legal range of levels */
984   *level0_out = lp_build_clamp(int_bld, level,
985                                int_bld->zero,
986                                last_level);
987   /* compute level 1 and clamp to legal range of levels */
988   level = lp_build_add(int_bld, level, int_bld->one);
989   *level1_out = lp_build_clamp(int_bld, level,
990                                int_bld->zero,
991                                last_level);
992
993   *weight_out = lp_build_fract(float_bld, lod);
994}
995
996
997/**
998 * Generate code to sample a mipmap level with nearest filtering.
999 * If sampling a cube texture, r = cube face in [0,5].
1000 */
1001static void
1002lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
1003                              LLVMValueRef width_vec,
1004                              LLVMValueRef height_vec,
1005                              LLVMValueRef depth_vec,
1006                              LLVMValueRef row_stride_vec,
1007                              LLVMValueRef img_stride_vec,
1008                              LLVMValueRef data_ptr,
1009                              LLVMValueRef s,
1010                              LLVMValueRef t,
1011                              LLVMValueRef r,
1012                              LLVMValueRef colors_out[4])
1013{
1014   const int dims = texture_dims(bld->static_state->target);
1015   LLVMValueRef x, y, z;
1016
1017   /*
1018    * Compute integer texcoords.
1019    */
1020   x = lp_build_sample_wrap_nearest(bld, s, width_vec,
1021                                    bld->static_state->pot_width,
1022                                    bld->static_state->wrap_s);
1023   lp_build_name(x, "tex.x.wrapped");
1024
1025   if (dims >= 2) {
1026      y = lp_build_sample_wrap_nearest(bld, t, height_vec,
1027                                       bld->static_state->pot_height,
1028                                       bld->static_state->wrap_t);
1029      lp_build_name(y, "tex.y.wrapped");
1030
1031      if (dims == 3) {
1032         z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
1033                                          bld->static_state->pot_height,
1034                                          bld->static_state->wrap_r);
1035         lp_build_name(z, "tex.z.wrapped");
1036      }
1037      else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1038         z = r;
1039      }
1040      else {
1041         z = NULL;
1042      }
1043   }
1044   else {
1045      y = z = NULL;
1046   }
1047
1048   /*
1049    * Get texture colors.
1050    */
1051   lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1052                             x, y, z,
1053                             row_stride_vec, img_stride_vec,
1054                             data_ptr, colors_out);
1055}
1056
1057
1058/**
1059 * Generate code to sample a mipmap level with linear filtering.
1060 * If sampling a cube texture, r = cube face in [0,5].
1061 */
1062static void
1063lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1064                             LLVMValueRef width_vec,
1065                             LLVMValueRef height_vec,
1066                             LLVMValueRef depth_vec,
1067                             LLVMValueRef row_stride_vec,
1068                             LLVMValueRef img_stride_vec,
1069                             LLVMValueRef data_ptr,
1070                             LLVMValueRef s,
1071                             LLVMValueRef t,
1072                             LLVMValueRef r,
1073                             LLVMValueRef colors_out[4])
1074{
1075   const int dims = texture_dims(bld->static_state->target);
1076   LLVMValueRef x0, y0, z0, x1, y1, z1;
1077   LLVMValueRef s_fpart, t_fpart, r_fpart;
1078   LLVMValueRef neighbors[2][2][4];
1079   int chan;
1080
1081   /*
1082    * Compute integer texcoords.
1083    */
1084   lp_build_sample_wrap_linear(bld, s, width_vec,
1085                               bld->static_state->pot_width,
1086                               bld->static_state->wrap_s,
1087                               &x0, &x1, &s_fpart);
1088   lp_build_name(x0, "tex.x0.wrapped");
1089   lp_build_name(x1, "tex.x1.wrapped");
1090
1091   if (dims >= 2) {
1092      lp_build_sample_wrap_linear(bld, t, height_vec,
1093                                  bld->static_state->pot_height,
1094                                  bld->static_state->wrap_t,
1095                                  &y0, &y1, &t_fpart);
1096      lp_build_name(y0, "tex.y0.wrapped");
1097      lp_build_name(y1, "tex.y1.wrapped");
1098
1099      if (dims == 3) {
1100         lp_build_sample_wrap_linear(bld, r, depth_vec,
1101                                     bld->static_state->pot_depth,
1102                                     bld->static_state->wrap_r,
1103                                     &z0, &z1, &r_fpart);
1104         lp_build_name(z0, "tex.z0.wrapped");
1105         lp_build_name(z1, "tex.z1.wrapped");
1106      }
1107      else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1108         z0 = z1 = r;  /* cube face */
1109         r_fpart = NULL;
1110      }
1111      else {
1112         z0 = z1 = NULL;
1113         r_fpart = NULL;
1114      }
1115   }
1116   else {
1117      y0 = y1 = t_fpart = NULL;
1118      z0 = z1 = r_fpart = NULL;
1119   }
1120
1121   /*
1122    * Get texture colors.
1123    */
1124   /* get x0/x1 texels */
1125   lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1126                             x0, y0, z0,
1127                             row_stride_vec, img_stride_vec,
1128                             data_ptr, neighbors[0][0]);
1129   lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1130                             x1, y0, z0,
1131                             row_stride_vec, img_stride_vec,
1132                             data_ptr, neighbors[0][1]);
1133
1134   if (dims == 1) {
1135      /* Interpolate two samples from 1D image to produce one color */
1136      for (chan = 0; chan < 4; chan++) {
1137         colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1138                                          neighbors[0][0][chan],
1139                                          neighbors[0][1][chan]);
1140      }
1141   }
1142   else {
1143      /* 2D/3D texture */
1144      LLVMValueRef colors0[4];
1145
1146      /* get x0/x1 texels at y1 */
1147      lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1148                                x0, y1, z0,
1149                                row_stride_vec, img_stride_vec,
1150                                data_ptr, neighbors[1][0]);
1151      lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1152                                x1, y1, z0,
1153                                row_stride_vec, img_stride_vec,
1154                                data_ptr, neighbors[1][1]);
1155
1156      /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1157      for (chan = 0; chan < 4; chan++) {
1158         colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1159                                          s_fpart, t_fpart,
1160                                          neighbors[0][0][chan],
1161                                          neighbors[0][1][chan],
1162                                          neighbors[1][0][chan],
1163                                          neighbors[1][1][chan]);
1164      }
1165
1166      if (dims == 3) {
1167         LLVMValueRef neighbors1[2][2][4];
1168         LLVMValueRef colors1[4];
1169
1170         /* get x0/x1/y0/y1 texels at z1 */
1171         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1172                                   x0, y0, z1,
1173                                   row_stride_vec, img_stride_vec,
1174                                   data_ptr, neighbors1[0][0]);
1175         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1176                                   x1, y0, z1,
1177                                   row_stride_vec, img_stride_vec,
1178                                   data_ptr, neighbors1[0][1]);
1179         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1180                                   x0, y1, z1,
1181                                   row_stride_vec, img_stride_vec,
1182                                   data_ptr, neighbors1[1][0]);
1183         lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1184                                   x1, y1, z1,
1185                                   row_stride_vec, img_stride_vec,
1186                                   data_ptr, neighbors1[1][1]);
1187
1188         /* Bilinear interpolate the four samples from the second Z slice */
1189         for (chan = 0; chan < 4; chan++) {
1190            colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1191                                             s_fpart, t_fpart,
1192                                             neighbors1[0][0][chan],
1193                                             neighbors1[0][1][chan],
1194                                             neighbors1[1][0][chan],
1195                                             neighbors1[1][1][chan]);
1196         }
1197
1198         /* Linearly interpolate the two samples from the two 3D slices */
1199         for (chan = 0; chan < 4; chan++) {
1200            colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1201                                             r_fpart,
1202                                             colors0[chan], colors1[chan]);
1203         }
1204      }
1205      else {
1206         /* 2D tex */
1207         for (chan = 0; chan < 4; chan++) {
1208            colors_out[chan] = colors0[chan];
1209         }
1210      }
1211   }
1212}
1213
1214
1215/** Helper used by lp_build_cube_lookup() */
1216static LLVMValueRef
1217lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
1218{
1219   /* ima = -0.5 / abs(coord); */
1220   LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
1221   LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1222   LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
1223   return ima;
1224}
1225
1226
1227/**
1228 * Helper used by lp_build_cube_lookup()
1229 * \param sign  scalar +1 or -1
1230 * \param coord  float vector
1231 * \param ima  float vector
1232 */
1233static LLVMValueRef
1234lp_build_cube_coord(struct lp_build_context *coord_bld,
1235                    LLVMValueRef sign, int negate_coord,
1236                    LLVMValueRef coord, LLVMValueRef ima)
1237{
1238   /* return negate(coord) * ima * sign + 0.5; */
1239   LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
1240   LLVMValueRef res;
1241
1242   assert(negate_coord == +1 || negate_coord == -1);
1243
1244   if (negate_coord == -1) {
1245      coord = lp_build_negate(coord_bld, coord);
1246   }
1247
1248   res = lp_build_mul(coord_bld, coord, ima);
1249   if (sign) {
1250      sign = lp_build_broadcast_scalar(coord_bld, sign);
1251      res = lp_build_mul(coord_bld, res, sign);
1252   }
1253   res = lp_build_add(coord_bld, res, half);
1254
1255   return res;
1256}
1257
1258
1259/** Helper used by lp_build_cube_lookup()
1260 * Return (major_coord >= 0) ? pos_face : neg_face;
1261 */
1262static LLVMValueRef
1263lp_build_cube_face(struct lp_build_sample_context *bld,
1264                   LLVMValueRef major_coord,
1265                   unsigned pos_face, unsigned neg_face)
1266{
1267   LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1268                                    major_coord,
1269                                    bld->float_bld.zero, "");
1270   LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
1271   LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
1272   LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
1273   return res;
1274}
1275
1276
1277
1278/**
1279 * Generate code to do cube face selection and compute per-face texcoords.
1280 */
1281static void
1282lp_build_cube_lookup(struct lp_build_sample_context *bld,
1283                     LLVMValueRef s,
1284                     LLVMValueRef t,
1285                     LLVMValueRef r,
1286                     LLVMValueRef *face,
1287                     LLVMValueRef *face_s,
1288                     LLVMValueRef *face_t)
1289{
1290   struct lp_build_context *float_bld = &bld->float_bld;
1291   struct lp_build_context *coord_bld = &bld->coord_bld;
1292   LLVMValueRef rx, ry, rz;
1293   LLVMValueRef arx, ary, arz;
1294   LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
1295   LLVMValueRef arx_ge_ary, arx_ge_arz;
1296   LLVMValueRef ary_ge_arx, ary_ge_arz;
1297   LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
1298   LLVMValueRef rx_pos, ry_pos, rz_pos;
1299
1300   assert(bld->coord_bld.type.length == 4);
1301
1302   /*
1303    * Use the average of the four pixel's texcoords to choose the face.
1304    */
1305   rx = lp_build_mul(float_bld, c25,
1306                     lp_build_sum_vector(&bld->coord_bld, s));
1307   ry = lp_build_mul(float_bld, c25,
1308                     lp_build_sum_vector(&bld->coord_bld, t));
1309   rz = lp_build_mul(float_bld, c25,
1310                     lp_build_sum_vector(&bld->coord_bld, r));
1311
1312   arx = lp_build_abs(float_bld, rx);
1313   ary = lp_build_abs(float_bld, ry);
1314   arz = lp_build_abs(float_bld, rz);
1315
1316   /*
1317    * Compare sign/magnitude of rx,ry,rz to determine face
1318    */
1319   arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
1320   arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
1321   ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
1322   ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
1323
1324   arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
1325   ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1326
1327   rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
1328   ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
1329   rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
1330
1331   {
1332      struct lp_build_flow_context *flow_ctx;
1333      struct lp_build_if_state if_ctx;
1334
1335      flow_ctx = lp_build_flow_create(bld->builder);
1336      lp_build_flow_scope_begin(flow_ctx);
1337
1338      *face_s = bld->coord_bld.undef;
1339      *face_t = bld->coord_bld.undef;
1340      *face = bld->int_bld.undef;
1341
1342      lp_build_name(*face_s, "face_s");
1343      lp_build_name(*face_t, "face_t");
1344      lp_build_name(*face, "face");
1345
1346      lp_build_flow_scope_declare(flow_ctx, face_s);
1347      lp_build_flow_scope_declare(flow_ctx, face_t);
1348      lp_build_flow_scope_declare(flow_ctx, face);
1349
1350      lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
1351      {
1352         /* +/- X face */
1353         LLVMValueRef sign = lp_build_sgn(float_bld, rx);
1354         LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
1355         *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
1356         *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1357         *face = lp_build_cube_face(bld, rx,
1358                                    PIPE_TEX_FACE_POS_X,
1359                                    PIPE_TEX_FACE_NEG_X);
1360      }
1361      lp_build_else(&if_ctx);
1362      {
1363         struct lp_build_flow_context *flow_ctx2;
1364         struct lp_build_if_state if_ctx2;
1365
1366         LLVMValueRef face_s2 = bld->coord_bld.undef;
1367         LLVMValueRef face_t2 = bld->coord_bld.undef;
1368         LLVMValueRef face2 = bld->int_bld.undef;
1369
1370         flow_ctx2 = lp_build_flow_create(bld->builder);
1371         lp_build_flow_scope_begin(flow_ctx2);
1372         lp_build_flow_scope_declare(flow_ctx2, &face_s2);
1373         lp_build_flow_scope_declare(flow_ctx2, &face_t2);
1374         lp_build_flow_scope_declare(flow_ctx2, &face2);
1375
1376         ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1377
1378         lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
1379         {
1380            /* +/- Y face */
1381            LLVMValueRef sign = lp_build_sgn(float_bld, ry);
1382            LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
1383            face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
1384            face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
1385            face2 = lp_build_cube_face(bld, ry,
1386                                       PIPE_TEX_FACE_POS_Y,
1387                                       PIPE_TEX_FACE_NEG_Y);
1388         }
1389         lp_build_else(&if_ctx2);
1390         {
1391            /* +/- Z face */
1392            LLVMValueRef sign = lp_build_sgn(float_bld, rz);
1393            LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
1394            face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
1395            face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1396            face2 = lp_build_cube_face(bld, rz,
1397                                       PIPE_TEX_FACE_POS_Z,
1398                                       PIPE_TEX_FACE_NEG_Z);
1399         }
1400         lp_build_endif(&if_ctx2);
1401         lp_build_flow_scope_end(flow_ctx2);
1402         lp_build_flow_destroy(flow_ctx2);
1403         *face_s = face_s2;
1404         *face_t = face_t2;
1405         *face = face2;
1406      }
1407
1408      lp_build_endif(&if_ctx);
1409      lp_build_flow_scope_end(flow_ctx);
1410      lp_build_flow_destroy(flow_ctx);
1411   }
1412}
1413
1414
1415
1416/**
1417 * Sample the texture/mipmap using given image filter and mip filter.
1418 * data0_ptr and data1_ptr point to the two mipmap levels to sample
1419 * from.  width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1420 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1421 */
1422static void
1423lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1424                       unsigned img_filter,
1425                       unsigned mip_filter,
1426                       LLVMValueRef s,
1427                       LLVMValueRef t,
1428                       LLVMValueRef r,
1429                       LLVMValueRef lod_fpart,
1430                       LLVMValueRef width0_vec,
1431                       LLVMValueRef width1_vec,
1432                       LLVMValueRef height0_vec,
1433                       LLVMValueRef height1_vec,
1434                       LLVMValueRef depth0_vec,
1435                       LLVMValueRef depth1_vec,
1436                       LLVMValueRef row_stride0_vec,
1437                       LLVMValueRef row_stride1_vec,
1438                       LLVMValueRef img_stride0_vec,
1439                       LLVMValueRef img_stride1_vec,
1440                       LLVMValueRef data_ptr0,
1441                       LLVMValueRef data_ptr1,
1442                       LLVMValueRef *colors_out)
1443{
1444   LLVMValueRef colors0[4], colors1[4];
1445   int chan;
1446
1447   if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1448      /* sample the first mipmap level */
1449      lp_build_sample_image_nearest(bld,
1450                                    width0_vec, height0_vec, depth0_vec,
1451                                    row_stride0_vec, img_stride0_vec,
1452                                    data_ptr0, s, t, r, colors0);
1453
1454      if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1455         /* sample the second mipmap level */
1456         lp_build_sample_image_nearest(bld,
1457                                       width1_vec, height1_vec, depth1_vec,
1458                                       row_stride1_vec, img_stride1_vec,
1459                                       data_ptr1, s, t, r, colors1);
1460      }
1461   }
1462   else {
1463      assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1464
1465      /* sample the first mipmap level */
1466      lp_build_sample_image_linear(bld,
1467                                   width0_vec, height0_vec, depth0_vec,
1468                                   row_stride0_vec, img_stride0_vec,
1469                                   data_ptr0, s, t, r, colors0);
1470
1471      if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1472         /* sample the second mipmap level */
1473         lp_build_sample_image_linear(bld,
1474                                      width1_vec, height1_vec, depth1_vec,
1475                                      row_stride1_vec, img_stride1_vec,
1476                                      data_ptr1, s, t, r, colors1);
1477      }
1478   }
1479
1480   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1481      /* interpolate samples from the two mipmap levels */
1482      for (chan = 0; chan < 4; chan++) {
1483         colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1484                                          colors0[chan], colors1[chan]);
1485      }
1486   }
1487   else {
1488      /* use first/only level's colors */
1489      for (chan = 0; chan < 4; chan++) {
1490         colors_out[chan] = colors0[chan];
1491      }
1492   }
1493}
1494
1495
1496
1497/**
1498 * General texture sampling codegen.
1499 * This function handles texture sampling for all texture targets (1D,
1500 * 2D, 3D, cube) and all filtering modes.
1501 */
1502static void
1503lp_build_sample_general(struct lp_build_sample_context *bld,
1504                        unsigned unit,
1505                        LLVMValueRef s,
1506                        LLVMValueRef t,
1507                        LLVMValueRef r,
1508                        const LLVMValueRef *ddx,
1509                        const LLVMValueRef *ddy,
1510                        LLVMValueRef lod_bias, /* optional */
1511                        LLVMValueRef explicit_lod, /* optional */
1512                        LLVMValueRef width,
1513                        LLVMValueRef height,
1514                        LLVMValueRef depth,
1515                        LLVMValueRef width_vec,
1516                        LLVMValueRef height_vec,
1517                        LLVMValueRef depth_vec,
1518                        LLVMValueRef row_stride_array,
1519                        LLVMValueRef img_stride_array,
1520                        LLVMValueRef data_array,
1521                        LLVMValueRef *colors_out)
1522{
1523   struct lp_build_context *float_bld = &bld->float_bld;
1524   const unsigned mip_filter = bld->static_state->min_mip_filter;
1525   const unsigned min_filter = bld->static_state->min_img_filter;
1526   const unsigned mag_filter = bld->static_state->mag_img_filter;
1527   const int dims = texture_dims(bld->static_state->target);
1528   LLVMValueRef lod = NULL, lod_fpart = NULL;
1529   LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
1530   LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
1531   LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
1532   LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
1533   LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
1534   LLVMValueRef data_ptr0, data_ptr1 = NULL;
1535   LLVMValueRef face_ddx[4], face_ddy[4];
1536
1537   /*
1538   printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1539          mip_filter, min_filter, mag_filter);
1540   */
1541
1542   /*
1543    * Choose cube face, recompute texcoords and derivatives for the chosen face.
1544    */
1545   if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1546      LLVMValueRef face, face_s, face_t;
1547      lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
1548      s = face_s; /* vec */
1549      t = face_t; /* vec */
1550      /* use 'r' to indicate cube face */
1551      r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
1552
1553      /* recompute ddx, ddy using the new (s,t) face texcoords */
1554      face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
1555      face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
1556      face_ddx[2] = NULL;
1557      face_ddx[3] = NULL;
1558      face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
1559      face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
1560      face_ddy[2] = NULL;
1561      face_ddy[3] = NULL;
1562      ddx = face_ddx;
1563      ddy = face_ddy;
1564   }
1565
1566   /*
1567    * Compute the level of detail (float).
1568    */
1569   if (min_filter != mag_filter ||
1570       mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1571      /* Need to compute lod either to choose mipmap levels or to
1572       * distinguish between minification/magnification with one mipmap level.
1573       */
1574      lod = lp_build_lod_selector(bld, ddx, ddy,
1575                                  lod_bias, explicit_lod,
1576                                  width, height, depth);
1577   }
1578
1579   /*
1580    * Compute integer mipmap level(s) to fetch texels from.
1581    */
1582   if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1583      /* always use mip level 0 */
1584      if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1585         /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1586          * We should be able to set ilevel0 = const(0) but that causes
1587          * bad x86 code to be emitted.
1588          */
1589         lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
1590         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1591      }
1592      else {
1593         ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
1594      }
1595   }
1596   else {
1597      assert(lod);
1598      if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
1599         lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1600      }
1601      else {
1602         assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
1603         lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
1604                                    &lod_fpart);
1605         lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
1606      }
1607   }
1608
1609   /*
1610    * Convert scalar integer mipmap levels into vectors.
1611    */
1612   ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
1613   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
1614      ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
1615
1616   /*
1617    * Compute width, height at mipmap level 'ilevel0'
1618    */
1619   width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
1620   if (dims >= 2) {
1621      height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
1622      row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1623                                                      ilevel0);
1624      if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1625         img_stride0_vec = lp_build_get_level_stride_vec(bld,
1626                                                         img_stride_array,
1627                                                         ilevel0);
1628         if (dims == 3) {
1629            depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
1630         }
1631      }
1632   }
1633   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1634      /* compute width, height, depth for second mipmap level at 'ilevel1' */
1635      width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
1636      if (dims >= 2) {
1637         height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
1638         row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1639                                                         ilevel1);
1640         if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1641            img_stride1_vec = lp_build_get_level_stride_vec(bld,
1642                                                            img_stride_array,
1643                                                            ilevel1);
1644            if (dims ==3) {
1645               depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
1646            }
1647         }
1648      }
1649   }
1650
1651   /*
1652    * Get pointer(s) to image data for mipmap level(s).
1653    */
1654   data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1655   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1656      data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1657   }
1658
1659   /*
1660    * Get/interpolate texture colors.
1661    */
1662   if (min_filter == mag_filter) {
1663      /* no need to distinquish between minification and magnification */
1664      lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
1665                             width0_vec, width1_vec,
1666                             height0_vec, height1_vec,
1667                             depth0_vec, depth1_vec,
1668                             row_stride0_vec, row_stride1_vec,
1669                             img_stride0_vec, img_stride1_vec,
1670                             data_ptr0, data_ptr1,
1671                             colors_out);
1672   }
1673   else {
1674      /* Emit conditional to choose min image filter or mag image filter
1675       * depending on the lod being >0 or <= 0, respectively.
1676       */
1677      struct lp_build_flow_context *flow_ctx;
1678      struct lp_build_if_state if_ctx;
1679      LLVMValueRef minify;
1680
1681      flow_ctx = lp_build_flow_create(bld->builder);
1682      lp_build_flow_scope_begin(flow_ctx);
1683
1684      lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
1685      lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
1686      lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
1687      lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
1688
1689      /* minify = lod > 0.0 */
1690      minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1691                             lod, float_bld->zero, "");
1692
1693      lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
1694      {
1695         /* Use the minification filter */
1696         lp_build_sample_mipmap(bld, min_filter, mip_filter,
1697                                s, t, r, lod_fpart,
1698                                width0_vec, width1_vec,
1699                                height0_vec, height1_vec,
1700                                depth0_vec, depth1_vec,
1701                                row_stride0_vec, row_stride1_vec,
1702                                img_stride0_vec, img_stride1_vec,
1703                                data_ptr0, data_ptr1,
1704                                colors_out);
1705      }
1706      lp_build_else(&if_ctx);
1707      {
1708         /* Use the magnification filter */
1709         lp_build_sample_mipmap(bld, mag_filter, mip_filter,
1710                                s, t, r, lod_fpart,
1711                                width0_vec, width1_vec,
1712                                height0_vec, height1_vec,
1713                                depth0_vec, depth1_vec,
1714                                row_stride0_vec, row_stride1_vec,
1715                                img_stride0_vec, img_stride1_vec,
1716                                data_ptr0, data_ptr1,
1717                                colors_out);
1718      }
1719      lp_build_endif(&if_ctx);
1720
1721      lp_build_flow_scope_end(flow_ctx);
1722      lp_build_flow_destroy(flow_ctx);
1723   }
1724}
1725
1726
1727
1728static void
1729lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
1730                              LLVMValueRef s,
1731                              LLVMValueRef t,
1732                              LLVMValueRef width,
1733                              LLVMValueRef height,
1734                              LLVMValueRef stride_array,
1735                              LLVMValueRef data_array,
1736                              LLVMValueRef texel_out[4])
1737{
1738   LLVMBuilderRef builder = bld->builder;
1739   struct lp_build_context i32, h16, u8n;
1740   LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
1741   LLVMValueRef i32_c8, i32_c128, i32_c255;
1742   LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
1743   LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
1744   LLVMValueRef x0, x1;
1745   LLVMValueRef y0, y1;
1746   LLVMValueRef neighbors[2][2];
1747   LLVMValueRef neighbors_lo[2][2];
1748   LLVMValueRef neighbors_hi[2][2];
1749   LLVMValueRef packed, packed_lo, packed_hi;
1750   LLVMValueRef unswizzled[4];
1751   LLVMValueRef stride;
1752
1753   assert(bld->static_state->target == PIPE_TEXTURE_2D);
1754   assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
1755   assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
1756   assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
1757
1758   lp_build_context_init(&i32, builder, lp_type_int_vec(32));
1759   lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1760   lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1761
1762   i32_vec_type = lp_build_vec_type(i32.type);
1763   h16_vec_type = lp_build_vec_type(h16.type);
1764   u8n_vec_type = lp_build_vec_type(u8n.type);
1765
1766   if (bld->static_state->normalized_coords) {
1767      LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
1768      LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
1769      LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
1770      s = lp_build_mul(&bld->coord_bld, s, fp_width);
1771      t = lp_build_mul(&bld->coord_bld, t, fp_height);
1772   }
1773
1774   /* scale coords by 256 (8 fractional bits) */
1775   s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1776   t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1777
1778   /* convert float to int */
1779   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1780   t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1781
1782   /* subtract 0.5 (add -128) */
1783   i32_c128 = lp_build_const_int_vec(i32.type, -128);
1784   s = LLVMBuildAdd(builder, s, i32_c128, "");
1785   t = LLVMBuildAdd(builder, t, i32_c128, "");
1786
1787   /* compute floor (shift right 8) */
1788   i32_c8 = lp_build_const_int_vec(i32.type, 8);
1789   s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1790   t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1791
1792   /* compute fractional part (AND with 0xff) */
1793   i32_c255 = lp_build_const_int_vec(i32.type, 255);
1794   s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1795   t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1796
1797   x0 = s_ipart;
1798   y0 = t_ipart;
1799
1800   x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
1801   y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
1802
1803   x0 = lp_build_sample_wrap_int(bld, x0, width,  bld->static_state->pot_width,
1804                                 bld->static_state->wrap_s);
1805   y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
1806                                 bld->static_state->wrap_t);
1807
1808   x1 = lp_build_sample_wrap_int(bld, x1, width,  bld->static_state->pot_width,
1809                                 bld->static_state->wrap_s);
1810   y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
1811                                 bld->static_state->wrap_t);
1812
1813   /*
1814    * Transform 4 x i32 in
1815    *
1816    *   s_fpart = {s0, s1, s2, s3}
1817    *
1818    * into 8 x i16
1819    *
1820    *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
1821    *
1822    * into two 8 x i16
1823    *
1824    *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
1825    *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
1826    *
1827    * and likewise for t_fpart. There is no risk of loosing precision here
1828    * since the fractional parts only use the lower 8bits.
1829    */
1830
1831   s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
1832   t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
1833
1834   {
1835      LLVMTypeRef elem_type = LLVMInt32Type();
1836      LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
1837      LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
1838      LLVMValueRef shuffle_lo;
1839      LLVMValueRef shuffle_hi;
1840      unsigned i, j;
1841
1842      for(j = 0; j < h16.type.length; j += 4) {
1843#ifdef PIPE_ARCH_LITTLE_ENDIAN
1844         unsigned subindex = 0;
1845#else
1846         unsigned subindex = 1;
1847#endif
1848         LLVMValueRef index;
1849
1850         index = LLVMConstInt(elem_type, j/2 + subindex, 0);
1851         for(i = 0; i < 4; ++i)
1852            shuffles_lo[j + i] = index;
1853
1854         index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
1855         for(i = 0; i < 4; ++i)
1856            shuffles_hi[j + i] = index;
1857      }
1858
1859      shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
1860      shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
1861
1862      s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
1863      t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
1864      s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
1865      t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
1866   }
1867
1868   stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
1869
1870   /*
1871    * Fetch the pixels as 4 x 32bit (rgba order might differ):
1872    *
1873    *   rgba0 rgba1 rgba2 rgba3
1874    *
1875    * bit cast them into 16 x u8
1876    *
1877    *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
1878    *
1879    * unpack them into two 8 x i16:
1880    *
1881    *   r0 g0 b0 a0 r1 g1 b1 a1
1882    *   r2 g2 b2 a2 r3 g3 b3 a3
1883    *
1884    * The higher 8 bits of the resulting elements will be zero.
1885    */
1886
1887   neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
1888   neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
1889   neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
1890   neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
1891
1892   neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
1893   neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
1894   neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
1895   neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
1896
1897   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
1898   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
1899   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
1900   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
1901
1902   /*
1903    * Linear interpolate with 8.8 fixed point.
1904    */
1905
1906   packed_lo = lp_build_lerp_2d(&h16,
1907                                s_fpart_lo, t_fpart_lo,
1908                                neighbors_lo[0][0],
1909                                neighbors_lo[0][1],
1910                                neighbors_lo[1][0],
1911                                neighbors_lo[1][1]);
1912
1913   packed_hi = lp_build_lerp_2d(&h16,
1914                                s_fpart_hi, t_fpart_hi,
1915                                neighbors_hi[0][0],
1916                                neighbors_hi[0][1],
1917                                neighbors_hi[1][0],
1918                                neighbors_hi[1][1]);
1919
1920   packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
1921
1922   /*
1923    * Convert to SoA and swizzle.
1924    */
1925
1926   lp_build_rgba8_to_f32_soa(bld->builder,
1927                             bld->texel_type,
1928                             packed, unswizzled);
1929
1930   if (util_format_is_rgba8_variant(bld->format_desc)) {
1931      lp_build_format_swizzle_soa(bld->format_desc,
1932                                  &bld->texel_bld,
1933                                  unswizzled, texel_out);
1934   } else {
1935      texel_out[0] = unswizzled[0];
1936      texel_out[1] = unswizzled[1];
1937      texel_out[2] = unswizzled[2];
1938      texel_out[3] = unswizzled[3];
1939   }
1940
1941   apply_sampler_swizzle(bld, texel_out);
1942}
1943
1944
1945static void
1946lp_build_sample_compare(struct lp_build_sample_context *bld,
1947                        LLVMValueRef p,
1948                        LLVMValueRef texel[4])
1949{
1950   struct lp_build_context *texel_bld = &bld->texel_bld;
1951   LLVMValueRef res;
1952   unsigned chan;
1953
1954   if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
1955      return;
1956
1957   /* TODO: Compare before swizzling, to avoid redundant computations */
1958   res = NULL;
1959   for(chan = 0; chan < 4; ++chan) {
1960      LLVMValueRef cmp;
1961      cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
1962      cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
1963
1964      if(res)
1965         res = lp_build_add(texel_bld, res, cmp);
1966      else
1967         res = cmp;
1968   }
1969
1970   assert(res);
1971   res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
1972
1973   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1974   for(chan = 0; chan < 3; ++chan)
1975      texel[chan] = res;
1976   texel[3] = texel_bld->one;
1977}
1978
1979
1980/**
1981 * Just set texels to white instead of actually sampling the texture.
1982 * For debugging.
1983 */
1984static void
1985lp_build_sample_nop(struct lp_build_sample_context *bld,
1986                    LLVMValueRef texel_out[4])
1987{
1988   struct lp_build_context *texel_bld = &bld->texel_bld;
1989   unsigned chan;
1990
1991   for (chan = 0; chan < 4; chan++) {
1992      /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
1993      texel_out[chan] = texel_bld->one;
1994   }
1995}
1996
1997
1998/**
1999 * Build texture sampling code.
2000 * 'texel' will return a vector of four LLVMValueRefs corresponding to
2001 * R, G, B, A.
2002 * \param type  vector float type to use for coords, etc.
2003 * \param ddx  partial derivatives of (s,t,r,q) with respect to x
2004 * \param ddy  partial derivatives of (s,t,r,q) with respect to y
2005 */
2006void
2007lp_build_sample_soa(LLVMBuilderRef builder,
2008                    const struct lp_sampler_static_state *static_state,
2009                    struct lp_sampler_dynamic_state *dynamic_state,
2010                    struct lp_type type,
2011                    unsigned unit,
2012                    unsigned num_coords,
2013                    const LLVMValueRef *coords,
2014                    const LLVMValueRef ddx[4],
2015                    const LLVMValueRef ddy[4],
2016                    LLVMValueRef lod_bias, /* optional */
2017                    LLVMValueRef explicit_lod, /* optional */
2018                    LLVMValueRef texel_out[4])
2019{
2020   struct lp_build_sample_context bld;
2021   LLVMValueRef width, width_vec;
2022   LLVMValueRef height, height_vec;
2023   LLVMValueRef depth, depth_vec;
2024   LLVMValueRef row_stride_array, img_stride_array;
2025   LLVMValueRef data_array;
2026   LLVMValueRef s;
2027   LLVMValueRef t;
2028   LLVMValueRef r;
2029
2030   if (0) {
2031      enum pipe_format fmt = static_state->format;
2032      debug_printf("Sample from %s\n", util_format_name(fmt));
2033   }
2034
2035   assert(type.floating);
2036
2037   /* Setup our build context */
2038   memset(&bld, 0, sizeof bld);
2039   bld.builder = builder;
2040   bld.static_state = static_state;
2041   bld.dynamic_state = dynamic_state;
2042   bld.format_desc = util_format_description(static_state->format);
2043
2044   bld.float_type = lp_type_float(32);
2045   bld.int_type = lp_type_int(32);
2046   bld.coord_type = type;
2047   bld.uint_coord_type = lp_uint_type(type);
2048   bld.int_coord_type = lp_int_type(type);
2049   bld.texel_type = type;
2050
2051   lp_build_context_init(&bld.float_bld, builder, bld.float_type);
2052   lp_build_context_init(&bld.int_bld, builder, bld.int_type);
2053   lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
2054   lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
2055   lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
2056   lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
2057
2058   /* Get the dynamic state */
2059   width = dynamic_state->width(dynamic_state, builder, unit);
2060   height = dynamic_state->height(dynamic_state, builder, unit);
2061   depth = dynamic_state->depth(dynamic_state, builder, unit);
2062   row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
2063   img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
2064   data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
2065   /* Note that data_array is an array[level] of pointers to texture images */
2066
2067   s = coords[0];
2068   t = coords[1];
2069   r = coords[2];
2070
2071   width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
2072   height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
2073   depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
2074
2075   if (0) {
2076      /* For debug: no-op texture sampling */
2077      lp_build_sample_nop(&bld, texel_out);
2078   }
2079   else if (util_format_fits_8unorm(bld.format_desc) &&
2080            bld.format_desc->nr_channels > 1 &&
2081            static_state->target == PIPE_TEXTURE_2D &&
2082            static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
2083            static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
2084            static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
2085            is_simple_wrap_mode(static_state->wrap_s) &&
2086            is_simple_wrap_mode(static_state->wrap_t)) {
2087      /* special case */
2088      lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
2089                                    row_stride_array, data_array, texel_out);
2090   }
2091   else {
2092      lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
2093                              lod_bias, explicit_lod,
2094                              width, height, depth,
2095                              width_vec, height_vec, depth_vec,
2096                              row_stride_array, img_stride_array,
2097                              data_array,
2098                              texel_out);
2099   }
2100
2101   lp_build_sample_compare(&bld, r, texel_out);
2102}
2103