sp_tex_sample.c revision e2329f2795d48d11131e9ac105e7aa3fd2c229c1
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2008 VMware, Inc.  All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * Texture sampling
31 *
32 * Authors:
33 *   Brian Paul
34 *   Keith Whitwell
35 */
36
37#include "pipe/p_context.h"
38#include "pipe/p_defines.h"
39#include "pipe/p_shader_tokens.h"
40#include "util/u_math.h"
41#include "util/u_memory.h"
42#include "sp_quad.h"   /* only for #define QUAD_* tokens */
43#include "sp_tex_sample.h"
44#include "sp_tex_tile_cache.h"
45
46
47
48/*
49 * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
50 * see 1-pixel bands of improperly weighted linear-filtered textures.
51 * The tests/texwrap.c demo is a good test.
52 * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
53 * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
54 */
55#define FRAC(f)  ((f) - util_ifloor(f))
56
57
58/**
59 * Linear interpolation macro
60 */
61static INLINE float
62lerp(float a, float v0, float v1)
63{
64   return v0 + a * (v1 - v0);
65}
66
67
68/**
69 * Do 2D/biliner interpolation of float values.
70 * v00, v10, v01 and v11 are typically four texture samples in a square/box.
71 * a and b are the horizontal and vertical interpolants.
72 * It's important that this function is inlined when compiled with
73 * optimization!  If we find that's not true on some systems, convert
74 * to a macro.
75 */
76static INLINE float
77lerp_2d(float a, float b,
78        float v00, float v10, float v01, float v11)
79{
80   const float temp0 = lerp(a, v00, v10);
81   const float temp1 = lerp(a, v01, v11);
82   return lerp(b, temp0, temp1);
83}
84
85
86/**
87 * As above, but 3D interpolation of 8 values.
88 */
89static INLINE float
90lerp_3d(float a, float b, float c,
91        float v000, float v100, float v010, float v110,
92        float v001, float v101, float v011, float v111)
93{
94   const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
95   const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
96   return lerp(c, temp0, temp1);
97}
98
99
100
101/**
102 * If A is a signed integer, A % B doesn't give the right value for A < 0
103 * (in terms of texture repeat).  Just casting to unsigned fixes that.
104 */
105#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
106
107
108/**
109 * Apply texture coord wrapping mode and return integer texture indexes
110 * for a vector of four texcoords (S or T or P).
111 * \param wrapMode  PIPE_TEX_WRAP_x
112 * \param s  the incoming texcoords
113 * \param size  the texture image size
114 * \param icoord  returns the integer texcoords
115 * \return  integer texture index
116 */
117static void
118wrap_nearest_repeat(const float s[4], unsigned size, int icoord[4])
119{
120   uint ch;
121   /* s limited to [0,1) */
122   /* i limited to [0,size-1] */
123   for (ch = 0; ch < 4; ch++) {
124      int i = util_ifloor(s[ch] * size);
125      icoord[ch] = REMAINDER(i, size);
126   }
127}
128
129
130static void
131wrap_nearest_clamp(const float s[4], unsigned size, int icoord[4])
132{
133   uint ch;
134   /* s limited to [0,1] */
135   /* i limited to [0,size-1] */
136   for (ch = 0; ch < 4; ch++) {
137      if (s[ch] <= 0.0F)
138         icoord[ch] = 0;
139      else if (s[ch] >= 1.0F)
140         icoord[ch] = size - 1;
141      else
142         icoord[ch] = util_ifloor(s[ch] * size);
143   }
144}
145
146
147static void
148wrap_nearest_clamp_to_edge(const float s[4], unsigned size, int icoord[4])
149{
150   uint ch;
151   /* s limited to [min,max] */
152   /* i limited to [0, size-1] */
153   const float min = 1.0F / (2.0F * size);
154   const float max = 1.0F - min;
155   for (ch = 0; ch < 4; ch++) {
156      if (s[ch] < min)
157         icoord[ch] = 0;
158      else if (s[ch] > max)
159         icoord[ch] = size - 1;
160      else
161         icoord[ch] = util_ifloor(s[ch] * size);
162   }
163}
164
165
166static void
167wrap_nearest_clamp_to_border(const float s[4], unsigned size, int icoord[4])
168{
169   uint ch;
170   /* s limited to [min,max] */
171   /* i limited to [-1, size] */
172   const float min = -1.0F / (2.0F * size);
173   const float max = 1.0F - min;
174   for (ch = 0; ch < 4; ch++) {
175      if (s[ch] <= min)
176         icoord[ch] = -1;
177      else if (s[ch] >= max)
178         icoord[ch] = size;
179      else
180         icoord[ch] = util_ifloor(s[ch] * size);
181   }
182}
183
184
185static void
186wrap_nearest_mirror_repeat(const float s[4], unsigned size, int icoord[4])
187{
188   uint ch;
189   const float min = 1.0F / (2.0F * size);
190   const float max = 1.0F - min;
191   for (ch = 0; ch < 4; ch++) {
192      const int flr = util_ifloor(s[ch]);
193      float u;
194      if (flr & 1)
195         u = 1.0F - (s[ch] - (float) flr);
196      else
197         u = s[ch] - (float) flr;
198      if (u < min)
199         icoord[ch] = 0;
200      else if (u > max)
201         icoord[ch] = size - 1;
202      else
203         icoord[ch] = util_ifloor(u * size);
204   }
205}
206
207
208static void
209wrap_nearest_mirror_clamp(const float s[4], unsigned size, int icoord[4])
210{
211   uint ch;
212   for (ch = 0; ch < 4; ch++) {
213      /* s limited to [0,1] */
214      /* i limited to [0,size-1] */
215      const float u = fabsf(s[ch]);
216      if (u <= 0.0F)
217         icoord[ch] = 0;
218      else if (u >= 1.0F)
219         icoord[ch] = size - 1;
220      else
221         icoord[ch] = util_ifloor(u * size);
222   }
223}
224
225
226static void
227wrap_nearest_mirror_clamp_to_edge(const float s[4], unsigned size,
228                                  int icoord[4])
229{
230   uint ch;
231   /* s limited to [min,max] */
232   /* i limited to [0, size-1] */
233   const float min = 1.0F / (2.0F * size);
234   const float max = 1.0F - min;
235   for (ch = 0; ch < 4; ch++) {
236      const float u = fabsf(s[ch]);
237      if (u < min)
238         icoord[ch] = 0;
239      else if (u > max)
240         icoord[ch] = size - 1;
241      else
242         icoord[ch] = util_ifloor(u * size);
243   }
244}
245
246
247static void
248wrap_nearest_mirror_clamp_to_border(const float s[4], unsigned size,
249                                    int icoord[4])
250{
251   uint ch;
252   /* s limited to [min,max] */
253   /* i limited to [0, size-1] */
254   const float min = -1.0F / (2.0F * size);
255   const float max = 1.0F - min;
256   for (ch = 0; ch < 4; ch++) {
257      const float u = fabsf(s[ch]);
258      if (u < min)
259         icoord[ch] = -1;
260      else if (u > max)
261         icoord[ch] = size;
262      else
263         icoord[ch] = util_ifloor(u * size);
264   }
265}
266
267
268/**
269 * Used to compute texel locations for linear sampling for four texcoords.
270 * \param wrapMode  PIPE_TEX_WRAP_x
271 * \param s  the texcoords
272 * \param size  the texture image size
273 * \param icoord0  returns first texture indexes
274 * \param icoord1  returns second texture indexes (usually icoord0 + 1)
275 * \param w  returns blend factor/weight between texture indexes
276 * \param icoord  returns the computed integer texture coords
277 */
278static void
279wrap_linear_repeat(const float s[4], unsigned size,
280                   int icoord0[4], int icoord1[4], float w[4])
281{
282   uint ch;
283   for (ch = 0; ch < 4; ch++) {
284      float u = s[ch] * size - 0.5F;
285      icoord0[ch] = REMAINDER(util_ifloor(u), size);
286      icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
287      w[ch] = FRAC(u);
288   }
289}
290
291
292static void
293wrap_linear_clamp(const float s[4], unsigned size,
294                  int icoord0[4], int icoord1[4], float w[4])
295{
296   uint ch;
297   for (ch = 0; ch < 4; ch++) {
298      float u = CLAMP(s[ch], 0.0F, 1.0F);
299      u = u * size - 0.5f;
300      icoord0[ch] = util_ifloor(u);
301      icoord1[ch] = icoord0[ch] + 1;
302      w[ch] = FRAC(u);
303   }
304}
305
306
307static void
308wrap_linear_clamp_to_edge(const float s[4], unsigned size,
309                          int icoord0[4], int icoord1[4], float w[4])
310{
311   uint ch;
312   for (ch = 0; ch < 4; ch++) {
313      float u = CLAMP(s[ch], 0.0F, 1.0F);
314      u = u * size - 0.5f;
315      icoord0[ch] = util_ifloor(u);
316      icoord1[ch] = icoord0[ch] + 1;
317      if (icoord0[ch] < 0)
318         icoord0[ch] = 0;
319      if (icoord1[ch] >= (int) size)
320         icoord1[ch] = size - 1;
321      w[ch] = FRAC(u);
322   }
323}
324
325
326static void
327wrap_linear_clamp_to_border(const float s[4], unsigned size,
328                            int icoord0[4], int icoord1[4], float w[4])
329{
330   const float min = -1.0F / (2.0F * size);
331   const float max = 1.0F - min;
332   uint ch;
333   for (ch = 0; ch < 4; ch++) {
334      float u = CLAMP(s[ch], min, max);
335      u = u * size - 0.5f;
336      icoord0[ch] = util_ifloor(u);
337      icoord1[ch] = icoord0[ch] + 1;
338      w[ch] = FRAC(u);
339   }
340}
341
342
343static void
344wrap_linear_mirror_repeat(const float s[4], unsigned size,
345                          int icoord0[4], int icoord1[4], float w[4])
346{
347   uint ch;
348   for (ch = 0; ch < 4; ch++) {
349      const int flr = util_ifloor(s[ch]);
350      float u;
351      if (flr & 1)
352         u = 1.0F - (s[ch] - (float) flr);
353      else
354         u = s[ch] - (float) flr;
355      u = u * size - 0.5F;
356      icoord0[ch] = util_ifloor(u);
357      icoord1[ch] = icoord0[ch] + 1;
358      if (icoord0[ch] < 0)
359         icoord0[ch] = 0;
360      if (icoord1[ch] >= (int) size)
361         icoord1[ch] = size - 1;
362      w[ch] = FRAC(u);
363   }
364}
365
366
367static void
368wrap_linear_mirror_clamp(const float s[4], unsigned size,
369                         int icoord0[4], int icoord1[4], float w[4])
370{
371   uint ch;
372   for (ch = 0; ch < 4; ch++) {
373      float u = fabsf(s[ch]);
374      if (u >= 1.0F)
375         u = (float) size;
376      else
377         u *= size;
378      u -= 0.5F;
379      icoord0[ch] = util_ifloor(u);
380      icoord1[ch] = icoord0[ch] + 1;
381      w[ch] = FRAC(u);
382   }
383}
384
385
386static void
387wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size,
388                                 int icoord0[4], int icoord1[4], float w[4])
389{
390   uint ch;
391   for (ch = 0; ch < 4; ch++) {
392      float u = fabsf(s[ch]);
393      if (u >= 1.0F)
394         u = (float) size;
395      else
396         u *= size;
397      u -= 0.5F;
398      icoord0[ch] = util_ifloor(u);
399      icoord1[ch] = icoord0[ch] + 1;
400      if (icoord0[ch] < 0)
401         icoord0[ch] = 0;
402      if (icoord1[ch] >= (int) size)
403         icoord1[ch] = size - 1;
404      w[ch] = FRAC(u);
405   }
406}
407
408
409static void
410wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size,
411                                   int icoord0[4], int icoord1[4], float w[4])
412{
413   const float min = -1.0F / (2.0F * size);
414   const float max = 1.0F - min;
415   uint ch;
416   for (ch = 0; ch < 4; ch++) {
417      float u = fabsf(s[ch]);
418      if (u <= min)
419         u = min * size;
420      else if (u >= max)
421         u = max * size;
422      else
423         u *= size;
424      u -= 0.5F;
425      icoord0[ch] = util_ifloor(u);
426      icoord1[ch] = icoord0[ch] + 1;
427      w[ch] = FRAC(u);
428   }
429}
430
431
432/**
433 * For RECT textures / unnormalized texcoords
434 * Only a subset of wrap modes supported.
435 */
436static void
437wrap_nearest_unorm_clamp(const float s[4], unsigned size, int icoord[4])
438{
439   uint ch;
440   for (ch = 0; ch < 4; ch++) {
441      int i = util_ifloor(s[ch]);
442      icoord[ch]= CLAMP(i, 0, (int) size-1);
443   }
444}
445
446
447/**
448 * Handles clamp_to_edge and clamp_to_border:
449 */
450static void
451wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size,
452                                   int icoord[4])
453{
454   uint ch;
455   for (ch = 0; ch < 4; ch++) {
456      icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
457   }
458}
459
460
461/**
462 * For RECT textures / unnormalized texcoords.
463 * Only a subset of wrap modes supported.
464 */
465static void
466wrap_linear_unorm_clamp(const float s[4], unsigned size,
467                        int icoord0[4], int icoord1[4], float w[4])
468{
469   uint ch;
470   for (ch = 0; ch < 4; ch++) {
471      /* Not exactly what the spec says, but it matches NVIDIA output */
472      float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
473      icoord0[ch] = util_ifloor(u);
474      icoord1[ch] = icoord0[ch] + 1;
475      w[ch] = FRAC(u);
476   }
477}
478
479
480static void
481wrap_linear_unorm_clamp_to_border(const float s[4], unsigned size,
482                                  int icoord0[4], int icoord1[4], float w[4])
483{
484   uint ch;
485   for (ch = 0; ch < 4; ch++) {
486      float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
487      u -= 0.5F;
488      icoord0[ch] = util_ifloor(u);
489      icoord1[ch] = icoord0[ch] + 1;
490      if (icoord1[ch] > (int) size - 1)
491         icoord1[ch] = size - 1;
492      w[ch] = FRAC(u);
493   }
494}
495
496
497
498/**
499 * Examine the quad's texture coordinates to compute the partial
500 * derivatives w.r.t X and Y, then compute lambda (level of detail).
501 */
502static float
503compute_lambda_1d(const struct sp_sampler_varient *samp,
504                  const float s[QUAD_SIZE],
505                  const float t[QUAD_SIZE],
506                  const float p[QUAD_SIZE],
507                  float lodbias)
508{
509   const struct pipe_texture *texture = samp->texture;
510   const struct pipe_sampler_state *sampler = samp->sampler;
511   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
512   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
513   float rho = MAX2(dsdx, dsdy) * texture->width[0];
514   float lambda;
515
516   lambda = util_fast_log2(rho);
517   lambda += lodbias + sampler->lod_bias;
518   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
519
520   return lambda;
521}
522
523
524static float
525compute_lambda_2d(const struct sp_sampler_varient *samp,
526                  const float s[QUAD_SIZE],
527                  const float t[QUAD_SIZE],
528                  const float p[QUAD_SIZE],
529                  float lodbias)
530{
531   const struct pipe_texture *texture = samp->texture;
532   const struct pipe_sampler_state *sampler = samp->sampler;
533   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
534   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
535   float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
536   float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
537   float maxx = MAX2(dsdx, dsdy) * texture->width[0];
538   float maxy = MAX2(dtdx, dtdy) * texture->height[0];
539   float rho  = MAX2(maxx, maxy);
540   float lambda;
541
542   lambda = util_fast_log2(rho);
543   lambda += lodbias + sampler->lod_bias;
544   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
545
546   return lambda;
547}
548
549
550static float
551compute_lambda_3d(const struct sp_sampler_varient *samp,
552                  const float s[QUAD_SIZE],
553                  const float t[QUAD_SIZE],
554                  const float p[QUAD_SIZE],
555                  float lodbias)
556{
557   const struct pipe_texture *texture = samp->texture;
558   const struct pipe_sampler_state *sampler = samp->sampler;
559   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
560   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
561   float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
562   float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
563   float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
564   float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
565   float maxx = MAX2(dsdx, dsdy) * texture->width[0];
566   float maxy = MAX2(dtdx, dtdy) * texture->height[0];
567   float maxz = MAX2(dpdx, dpdy) * texture->depth[0];
568   float rho, lambda;
569
570   rho = MAX2(maxx, maxy);
571   rho = MAX2(rho, maxz);
572
573   lambda = util_fast_log2(rho);
574   lambda += lodbias + sampler->lod_bias;
575   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
576
577   return lambda;
578}
579
580
581/**
582 * Compute lambda for a vertex texture sampler.
583 * Since there aren't derivatives to use, just return the LOD bias.
584 */
585static float
586compute_lambda_vert(const struct sp_sampler_varient *samp,
587                    const float s[QUAD_SIZE],
588                    const float t[QUAD_SIZE],
589                    const float p[QUAD_SIZE],
590                    float lodbias)
591{
592   return lodbias;
593}
594
595
596
597/**
598 * Get a texel from a texture, using the texture tile cache.
599 *
600 * \param addr  the template tex address containing cube, z, face info.
601 * \param x  the x coord of texel within 2D image
602 * \param y  the y coord of texel within 2D image
603 * \param rgba  the quad to put the texel/color into
604 *
605 * XXX maybe move this into sp_tex_tile_cache.c and merge with the
606 * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
607 */
608
609
610
611
612static INLINE const float *
613get_texel_2d_no_border(const struct sp_sampler_varient *samp,
614		       union tex_tile_address addr, int x, int y)
615{
616   const struct softpipe_tex_cached_tile *tile;
617
618   addr.bits.x = x / TILE_SIZE;
619   addr.bits.y = y / TILE_SIZE;
620   y %= TILE_SIZE;
621   x %= TILE_SIZE;
622
623   tile = sp_get_cached_tile_tex(samp->cache, addr);
624
625   return &tile->data.color[y][x][0];
626}
627
628
629static INLINE const float *
630get_texel_2d(const struct sp_sampler_varient *samp,
631	     union tex_tile_address addr, int x, int y)
632{
633   const struct pipe_texture *texture = samp->texture;
634   unsigned level = addr.bits.level;
635
636   if (x < 0 || x >= (int) texture->width[level] ||
637       y < 0 || y >= (int) texture->height[level]) {
638      return samp->sampler->border_color;
639   }
640   else {
641      return get_texel_2d_no_border( samp, addr, x, y );
642   }
643}
644
645
646/* Gather a quad of adjacent texels within a tile:
647 */
648static INLINE void
649get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_varient *samp,
650					union tex_tile_address addr,
651					unsigned x, unsigned y,
652					const float *out[4])
653{
654   const struct softpipe_tex_cached_tile *tile;
655
656   addr.bits.x = x / TILE_SIZE;
657   addr.bits.y = y / TILE_SIZE;
658   y %= TILE_SIZE;
659   x %= TILE_SIZE;
660
661   tile = sp_get_cached_tile_tex(samp->cache, addr);
662
663   out[0] = &tile->data.color[y  ][x  ][0];
664   out[1] = &tile->data.color[y  ][x+1][0];
665   out[2] = &tile->data.color[y+1][x  ][0];
666   out[3] = &tile->data.color[y+1][x+1][0];
667}
668
669
670/* Gather a quad of potentially non-adjacent texels:
671 */
672static INLINE void
673get_texel_quad_2d_no_border(const struct sp_sampler_varient *samp,
674			    union tex_tile_address addr,
675			    int x0, int y0,
676			    int x1, int y1,
677			    const float *out[4])
678{
679   out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
680   out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
681   out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
682   out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
683}
684
685/* Can involve a lot of unnecessary checks for border color:
686 */
687static INLINE void
688get_texel_quad_2d(const struct sp_sampler_varient *samp,
689		  union tex_tile_address addr,
690		  int x0, int y0,
691		  int x1, int y1,
692		  const float *out[4])
693{
694   out[0] = get_texel_2d( samp, addr, x0, y0 );
695   out[1] = get_texel_2d( samp, addr, x1, y0 );
696   out[3] = get_texel_2d( samp, addr, x1, y1 );
697   out[2] = get_texel_2d( samp, addr, x0, y1 );
698}
699
700
701
702/* 3d varients:
703 */
704static INLINE const float *
705get_texel_3d_no_border(const struct sp_sampler_varient *samp,
706                       union tex_tile_address addr, int x, int y, int z)
707{
708   const struct softpipe_tex_cached_tile *tile;
709
710   addr.bits.x = x / TILE_SIZE;
711   addr.bits.y = y / TILE_SIZE;
712   addr.bits.z = z;
713   y %= TILE_SIZE;
714   x %= TILE_SIZE;
715
716   tile = sp_get_cached_tile_tex(samp->cache, addr);
717
718   return &tile->data.color[y][x][0];
719}
720
721
722static INLINE const float *
723get_texel_3d(const struct sp_sampler_varient *samp,
724	     union tex_tile_address addr, int x, int y, int z)
725{
726   const struct pipe_texture *texture = samp->texture;
727   unsigned level = addr.bits.level;
728
729   if (x < 0 || x >= (int) texture->width[level] ||
730       y < 0 || y >= (int) texture->height[level] ||
731       z < 0 || z >= (int) texture->depth[level]) {
732      return samp->sampler->border_color;
733   }
734   else {
735      return get_texel_3d_no_border( samp, addr, x, y, z );
736   }
737}
738
739
740/**
741 * Given the logbase2 of a mipmap's base level size and a mipmap level,
742 * return the size (in texels) of that mipmap level.
743 * For example, if level[0].width = 256 then base_pot will be 8.
744 * If level = 2, then we'll return 64 (the width at level=2).
745 * Return 1 if level > base_pot.
746 */
747static INLINE unsigned
748pot_level_size(unsigned base_pot, unsigned level)
749{
750   return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
751}
752
753
754/* Some image-filter fastpaths:
755 */
756static INLINE void
757img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
758                                const float s[QUAD_SIZE],
759                                const float t[QUAD_SIZE],
760                                const float p[QUAD_SIZE],
761                                float lodbias,
762                                float rgba[NUM_CHANNELS][QUAD_SIZE])
763{
764   const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
765   unsigned  j;
766   unsigned level = samp->level;
767   unsigned xpot = pot_level_size(samp->xpot, level);
768   unsigned ypot = pot_level_size(samp->ypot, level);
769   unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
770   unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
771   union tex_tile_address addr;
772
773   addr.value = 0;
774   addr.bits.level = samp->level;
775
776   for (j = 0; j < QUAD_SIZE; j++) {
777      int c;
778
779      float u = s[j] * xpot - 0.5F;
780      float v = t[j] * ypot - 0.5F;
781
782      int uflr = util_ifloor(u);
783      int vflr = util_ifloor(v);
784
785      float xw = u - (float)uflr;
786      float yw = v - (float)vflr;
787
788      int x0 = uflr & (xpot - 1);
789      int y0 = vflr & (ypot - 1);
790
791      const float *tx[4];
792
793      /* Can we fetch all four at once:
794       */
795      if (x0 < xmax && y0 < ymax) {
796         get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
797      }
798      else {
799         unsigned x1 = (x0 + 1) & (xpot - 1);
800         unsigned y1 = (y0 + 1) & (ypot - 1);
801         get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
802      }
803
804      /* interpolate R, G, B, A */
805      for (c = 0; c < 4; c++) {
806         rgba[c][j] = lerp_2d(xw, yw,
807                              tx[0][c], tx[1][c],
808                              tx[2][c], tx[3][c]);
809      }
810   }
811}
812
813
814static INLINE void
815img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
816                                 const float s[QUAD_SIZE],
817                                 const float t[QUAD_SIZE],
818                                 const float p[QUAD_SIZE],
819                                 float lodbias,
820                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
821{
822   const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
823   unsigned  j;
824   unsigned level = samp->level;
825   unsigned xpot = pot_level_size(samp->xpot, level);
826   unsigned ypot = pot_level_size(samp->ypot, level);
827   union tex_tile_address addr;
828
829   addr.value = 0;
830   addr.bits.level = samp->level;
831
832   for (j = 0; j < QUAD_SIZE; j++) {
833      int c;
834
835      float u = s[j] * xpot;
836      float v = t[j] * ypot;
837
838      int uflr = util_ifloor(u);
839      int vflr = util_ifloor(v);
840
841      int x0 = uflr & (xpot - 1);
842      int y0 = vflr & (ypot - 1);
843
844      const float *out = get_texel_2d_no_border(samp, addr, x0, y0);
845
846      for (c = 0; c < 4; c++) {
847         rgba[c][j] = out[c];
848      }
849   }
850}
851
852
853static INLINE void
854img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
855                                const float s[QUAD_SIZE],
856                                const float t[QUAD_SIZE],
857                                const float p[QUAD_SIZE],
858                                float lodbias,
859                                float rgba[NUM_CHANNELS][QUAD_SIZE])
860{
861   const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
862   unsigned  j;
863   unsigned level = samp->level;
864   unsigned xpot = pot_level_size(samp->xpot, level);
865   unsigned ypot = pot_level_size(samp->ypot, level);
866   union tex_tile_address addr;
867
868   addr.value = 0;
869   addr.bits.level = samp->level;
870
871   for (j = 0; j < QUAD_SIZE; j++) {
872      int c;
873
874      float u = s[j] * xpot;
875      float v = t[j] * ypot;
876
877      int x0, y0;
878      const float *out;
879
880      x0 = util_ifloor(u);
881      if (x0 < 0)
882         x0 = 0;
883      else if (x0 > xpot - 1)
884         x0 = xpot - 1;
885
886      y0 = util_ifloor(v);
887      if (y0 < 0)
888         y0 = 0;
889      else if (y0 > ypot - 1)
890         y0 = ypot - 1;
891
892      out = get_texel_2d_no_border(samp, addr, x0, y0);
893
894      for (c = 0; c < 4; c++) {
895         rgba[c][j] = out[c];
896      }
897   }
898}
899
900
901static void
902img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
903                        const float s[QUAD_SIZE],
904                        const float t[QUAD_SIZE],
905                        const float p[QUAD_SIZE],
906                        float lodbias,
907                        float rgba[NUM_CHANNELS][QUAD_SIZE])
908{
909   const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
910   const struct pipe_texture *texture = samp->texture;
911   unsigned level0, j;
912   int width;
913   int x[4];
914   union tex_tile_address addr;
915
916   level0 = samp->level;
917   width = texture->width[level0];
918
919   assert(width > 0);
920
921   addr.value = 0;
922   addr.bits.level = samp->level;
923
924   samp->nearest_texcoord_s(s, width, x);
925
926   for (j = 0; j < QUAD_SIZE; j++) {
927      const float *out = get_texel_2d(samp, addr, x[j], 0);
928      int c;
929      for (c = 0; c < 4; c++) {
930         rgba[c][j] = out[c];
931      }
932   }
933}
934
935
936static void
937img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
938                      const float s[QUAD_SIZE],
939                      const float t[QUAD_SIZE],
940                      const float p[QUAD_SIZE],
941                      float lodbias,
942                      float rgba[NUM_CHANNELS][QUAD_SIZE])
943{
944   const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
945   const struct pipe_texture *texture = samp->texture;
946   unsigned level0, j;
947   int width, height;
948   int x[4], y[4];
949   union tex_tile_address addr;
950
951
952   level0 = samp->level;
953   width = texture->width[level0];
954   height = texture->height[level0];
955
956   assert(width > 0);
957   assert(height > 0);
958
959   addr.value = 0;
960   addr.bits.level = samp->level;
961
962   samp->nearest_texcoord_s(s, width, x);
963   samp->nearest_texcoord_t(t, height, y);
964
965   for (j = 0; j < QUAD_SIZE; j++) {
966      const float *out = get_texel_2d(samp, addr, x[j], y[j]);
967      int c;
968      for (c = 0; c < 4; c++) {
969         rgba[c][j] = out[c];
970      }
971   }
972}
973
974
975static inline union tex_tile_address
976face(union tex_tile_address addr, unsigned face )
977{
978   addr.bits.face = face;
979   return addr;
980}
981
982
983static void
984img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
985                        const float s[QUAD_SIZE],
986                        const float t[QUAD_SIZE],
987                        const float p[QUAD_SIZE],
988                        float lodbias,
989                        float rgba[NUM_CHANNELS][QUAD_SIZE])
990{
991   const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
992   const struct pipe_texture *texture = samp->texture;
993   const unsigned *faces = samp->faces; /* zero when not cube-mapping */
994   unsigned level0, j;
995   int width, height;
996   int x[4], y[4];
997   union tex_tile_address addr;
998
999   level0 = samp->level;
1000   width = texture->width[level0];
1001   height = texture->height[level0];
1002
1003   assert(width > 0);
1004   assert(height > 0);
1005
1006   addr.value = 0;
1007   addr.bits.level = samp->level;
1008
1009   samp->nearest_texcoord_s(s, width, x);
1010   samp->nearest_texcoord_t(t, height, y);
1011
1012   for (j = 0; j < QUAD_SIZE; j++) {
1013      const float *out = get_texel_2d(samp, face(addr, faces[j]), x[j], y[j]);
1014      int c;
1015      for (c = 0; c < 4; c++) {
1016         rgba[c][j] = out[c];
1017      }
1018   }
1019}
1020
1021
1022static void
1023img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
1024                      const float s[QUAD_SIZE],
1025                      const float t[QUAD_SIZE],
1026                      const float p[QUAD_SIZE],
1027                      float lodbias,
1028                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1029{
1030   const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1031   const struct pipe_texture *texture = samp->texture;
1032   unsigned level0, j;
1033   int width, height, depth;
1034   int x[4], y[4], z[4];
1035   union tex_tile_address addr;
1036
1037   level0 = samp->level;
1038   width = texture->width[level0];
1039   height = texture->height[level0];
1040   depth = texture->depth[level0];
1041
1042   assert(width > 0);
1043   assert(height > 0);
1044   assert(depth > 0);
1045
1046   samp->nearest_texcoord_s(s, width,  x);
1047   samp->nearest_texcoord_t(t, height, y);
1048   samp->nearest_texcoord_p(p, depth,  z);
1049
1050   addr.value = 0;
1051   addr.bits.level = samp->level;
1052
1053   for (j = 0; j < QUAD_SIZE; j++) {
1054      const float *out = get_texel_3d(samp, addr, x[j], y[j], z[j]);
1055      int c;
1056      for (c = 0; c < 4; c++) {
1057         rgba[c][j] = out[c];
1058      }
1059   }
1060}
1061
1062
1063static void
1064img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
1065                     const float s[QUAD_SIZE],
1066                     const float t[QUAD_SIZE],
1067                     const float p[QUAD_SIZE],
1068                     float lodbias,
1069                     float rgba[NUM_CHANNELS][QUAD_SIZE])
1070{
1071   const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1072   const struct pipe_texture *texture = samp->texture;
1073   unsigned level0, j;
1074   int width;
1075   int x0[4], x1[4];
1076   float xw[4]; /* weights */
1077   union tex_tile_address addr;
1078
1079   level0 = samp->level;
1080   width = texture->width[level0];
1081
1082   assert(width > 0);
1083
1084   addr.value = 0;
1085   addr.bits.level = samp->level;
1086
1087   samp->linear_texcoord_s(s, width, x0, x1, xw);
1088
1089   for (j = 0; j < QUAD_SIZE; j++) {
1090      const float *tx0 = get_texel_2d(samp, addr, x0[j], 0);
1091      const float *tx1 = get_texel_2d(samp, addr, x1[j], 0);
1092      int c;
1093
1094      /* interpolate R, G, B, A */
1095      for (c = 0; c < 4; c++) {
1096         rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
1097      }
1098   }
1099}
1100
1101
1102static void
1103img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
1104                     const float s[QUAD_SIZE],
1105                     const float t[QUAD_SIZE],
1106                     const float p[QUAD_SIZE],
1107                     float lodbias,
1108                     float rgba[NUM_CHANNELS][QUAD_SIZE])
1109{
1110   const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1111   const struct pipe_texture *texture = samp->texture;
1112   unsigned level0, j;
1113   int width, height;
1114   int x0[4], y0[4], x1[4], y1[4];
1115   float xw[4], yw[4]; /* weights */
1116   union tex_tile_address addr;
1117
1118   level0 = samp->level;
1119   width = texture->width[level0];
1120   height = texture->height[level0];
1121
1122   assert(width > 0);
1123   assert(height > 0);
1124
1125   addr.value = 0;
1126   addr.bits.level = samp->level;
1127
1128   samp->linear_texcoord_s(s, width,  x0, x1, xw);
1129   samp->linear_texcoord_t(t, height, y0, y1, yw);
1130
1131   for (j = 0; j < QUAD_SIZE; j++) {
1132      const float *tx0 = get_texel_2d(samp, addr, x0[j], y0[j]);
1133      const float *tx1 = get_texel_2d(samp, addr, x1[j], y0[j]);
1134      const float *tx2 = get_texel_2d(samp, addr, x0[j], y1[j]);
1135      const float *tx3 = get_texel_2d(samp, addr, x1[j], y1[j]);
1136      int c;
1137
1138      /* interpolate R, G, B, A */
1139      for (c = 0; c < 4; c++) {
1140         rgba[c][j] = lerp_2d(xw[j], yw[j],
1141                              tx0[c], tx1[c],
1142                              tx2[c], tx3[c]);
1143      }
1144   }
1145}
1146
1147
1148static void
1149img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
1150                       const float s[QUAD_SIZE],
1151                       const float t[QUAD_SIZE],
1152                       const float p[QUAD_SIZE],
1153                       float lodbias,
1154                       float rgba[NUM_CHANNELS][QUAD_SIZE])
1155{
1156   const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1157   const struct pipe_texture *texture = samp->texture;
1158   const unsigned *faces = samp->faces; /* zero when not cube-mapping */
1159   unsigned level0, j;
1160   int width, height;
1161   int x0[4], y0[4], x1[4], y1[4];
1162   float xw[4], yw[4]; /* weights */
1163   union tex_tile_address addr;
1164
1165   level0 = samp->level;
1166   width = texture->width[level0];
1167   height = texture->height[level0];
1168
1169   assert(width > 0);
1170   assert(height > 0);
1171
1172   addr.value = 0;
1173   addr.bits.level = samp->level;
1174
1175   samp->linear_texcoord_s(s, width,  x0, x1, xw);
1176   samp->linear_texcoord_t(t, height, y0, y1, yw);
1177
1178   for (j = 0; j < QUAD_SIZE; j++) {
1179      union tex_tile_address addrj = face(addr, faces[j]);
1180      const float *tx0 = get_texel_2d(samp, addrj, x0[j], y0[j]);
1181      const float *tx1 = get_texel_2d(samp, addrj, x1[j], y0[j]);
1182      const float *tx2 = get_texel_2d(samp, addrj, x0[j], y1[j]);
1183      const float *tx3 = get_texel_2d(samp, addrj, x1[j], y1[j]);
1184      int c;
1185
1186      /* interpolate R, G, B, A */
1187      for (c = 0; c < 4; c++) {
1188         rgba[c][j] = lerp_2d(xw[j], yw[j],
1189                              tx0[c], tx1[c],
1190                              tx2[c], tx3[c]);
1191      }
1192   }
1193}
1194
1195
1196static void
1197img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
1198                     const float s[QUAD_SIZE],
1199                     const float t[QUAD_SIZE],
1200                     const float p[QUAD_SIZE],
1201                     float lodbias,
1202                     float rgba[NUM_CHANNELS][QUAD_SIZE])
1203{
1204   const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1205   const struct pipe_texture *texture = samp->texture;
1206   unsigned level0, j;
1207   int width, height, depth;
1208   int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
1209   float xw[4], yw[4], zw[4]; /* interpolation weights */
1210   union tex_tile_address addr;
1211
1212   level0 = samp->level;
1213   width = texture->width[level0];
1214   height = texture->height[level0];
1215   depth = texture->depth[level0];
1216
1217   addr.value = 0;
1218   addr.bits.level = level0;
1219
1220   assert(width > 0);
1221   assert(height > 0);
1222   assert(depth > 0);
1223
1224   samp->linear_texcoord_s(s, width,  x0, x1, xw);
1225   samp->linear_texcoord_t(t, height, y0, y1, yw);
1226   samp->linear_texcoord_p(p, depth,  z0, z1, zw);
1227
1228   for (j = 0; j < QUAD_SIZE; j++) {
1229      int c;
1230
1231      const float *tx00 = get_texel_3d(samp, addr, x0[j], y0[j], z0[j]);
1232      const float *tx01 = get_texel_3d(samp, addr, x1[j], y0[j], z0[j]);
1233      const float *tx02 = get_texel_3d(samp, addr, x0[j], y1[j], z0[j]);
1234      const float *tx03 = get_texel_3d(samp, addr, x1[j], y1[j], z0[j]);
1235
1236      const float *tx10 = get_texel_3d(samp, addr, x0[j], y0[j], z1[j]);
1237      const float *tx11 = get_texel_3d(samp, addr, x1[j], y0[j], z1[j]);
1238      const float *tx12 = get_texel_3d(samp, addr, x0[j], y1[j], z1[j]);
1239      const float *tx13 = get_texel_3d(samp, addr, x1[j], y1[j], z1[j]);
1240
1241      /* interpolate R, G, B, A */
1242      for (c = 0; c < 4; c++) {
1243         rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1244                              tx00[c], tx01[c],
1245                              tx02[c], tx03[c],
1246                              tx10[c], tx11[c],
1247                              tx12[c], tx13[c]);
1248      }
1249   }
1250}
1251
1252
1253static void
1254mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
1255                  const float s[QUAD_SIZE],
1256                  const float t[QUAD_SIZE],
1257                  const float p[QUAD_SIZE],
1258                  float lodbias,
1259                  float rgba[NUM_CHANNELS][QUAD_SIZE])
1260{
1261   struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1262   const struct pipe_texture *texture = samp->texture;
1263   int level0;
1264   float lambda;
1265
1266   lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1267   level0 = (int)lambda;
1268
1269   if (lambda < 0.0) {
1270      samp->level = 0;
1271      samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1272   }
1273   else if (level0 >= texture->last_level) {
1274      samp->level = texture->last_level;
1275      samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1276   }
1277   else {
1278      float levelBlend = lambda - level0;
1279      float rgba0[4][4];
1280      float rgba1[4][4];
1281      int c,j;
1282
1283      samp->level = level0;
1284      samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 );
1285
1286      samp->level = level0+1;
1287      samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 );
1288
1289      for (j = 0; j < QUAD_SIZE; j++) {
1290         for (c = 0; c < 4; c++) {
1291            rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1292         }
1293      }
1294   }
1295}
1296
1297
1298static void
1299mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
1300                   const float s[QUAD_SIZE],
1301                   const float t[QUAD_SIZE],
1302                   const float p[QUAD_SIZE],
1303                   float lodbias,
1304                   float rgba[NUM_CHANNELS][QUAD_SIZE])
1305{
1306   struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1307   const struct pipe_texture *texture = samp->texture;
1308   float lambda;
1309
1310   lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1311
1312   if (lambda < 0.0) {
1313      samp->level = 0;
1314      samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1315   }
1316   else {
1317      samp->level = (int)(lambda + 0.5) ;
1318      samp->level = MIN2(samp->level, (int)texture->last_level);
1319      samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1320   }
1321
1322#if 0
1323   printf("RGBA %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
1324          rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
1325          rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
1326          rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
1327          rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
1328#endif
1329}
1330
1331
1332static void
1333mip_filter_none(struct tgsi_sampler *tgsi_sampler,
1334                const float s[QUAD_SIZE],
1335                const float t[QUAD_SIZE],
1336                const float p[QUAD_SIZE],
1337                float lodbias,
1338                float rgba[NUM_CHANNELS][QUAD_SIZE])
1339{
1340   struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1341   float lambda = samp->compute_lambda(samp, s, t, p, lodbias);
1342
1343   if (lambda < 0.0) {
1344      samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1345   }
1346   else {
1347      samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
1348   }
1349}
1350
1351
1352
1353/**
1354 * Specialized version of mip_filter_linear with hard-wired calls to
1355 * 2d lambda calculation and 2d_linear_repeat_POT img filters.
1356 */
1357static void
1358mip_filter_linear_2d_linear_repeat_POT(
1359   struct tgsi_sampler *tgsi_sampler,
1360   const float s[QUAD_SIZE],
1361   const float t[QUAD_SIZE],
1362   const float p[QUAD_SIZE],
1363   float lodbias,
1364   float rgba[NUM_CHANNELS][QUAD_SIZE])
1365{
1366   struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1367   const struct pipe_texture *texture = samp->texture;
1368   int level0;
1369   float lambda;
1370
1371   lambda = compute_lambda_2d(samp, s, t, p, lodbias);
1372   level0 = (int)lambda;
1373
1374   /* Catches both negative and large values of level0:
1375    */
1376   if ((unsigned)level0 >= texture->last_level) {
1377      if (level0 < 0)
1378         samp->level = 0;
1379      else
1380         samp->level = texture->last_level;
1381
1382      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba );
1383   }
1384   else {
1385      float levelBlend = lambda - level0;
1386      float rgba0[4][4];
1387      float rgba1[4][4];
1388      int c,j;
1389
1390      samp->level = level0;
1391      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 );
1392
1393      samp->level = level0+1;
1394      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 );
1395
1396      for (j = 0; j < QUAD_SIZE; j++) {
1397         for (c = 0; c < 4; c++) {
1398            rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1399         }
1400      }
1401   }
1402}
1403
1404
1405
1406/**
1407 * Do shadow/depth comparisons.
1408 */
1409static void
1410sample_compare(struct tgsi_sampler *tgsi_sampler,
1411               const float s[QUAD_SIZE],
1412               const float t[QUAD_SIZE],
1413               const float p[QUAD_SIZE],
1414               float lodbias,
1415               float rgba[NUM_CHANNELS][QUAD_SIZE])
1416{
1417   struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1418   const struct pipe_sampler_state *sampler = samp->sampler;
1419   int j, k0, k1, k2, k3;
1420   float val;
1421
1422   samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba );
1423
1424   /**
1425    * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
1426    * When we sampled the depth texture, the depth value was put into all
1427    * RGBA channels.  We look at the red channel here.
1428    */
1429
1430   /* compare four texcoords vs. four texture samples */
1431   switch (sampler->compare_func) {
1432   case PIPE_FUNC_LESS:
1433      k0 = p[0] < rgba[0][0];
1434      k1 = p[1] < rgba[0][1];
1435      k2 = p[2] < rgba[0][2];
1436      k3 = p[3] < rgba[0][3];
1437      break;
1438   case PIPE_FUNC_LEQUAL:
1439      k0 = p[0] <= rgba[0][0];
1440      k1 = p[1] <= rgba[0][1];
1441      k2 = p[2] <= rgba[0][2];
1442      k3 = p[3] <= rgba[0][3];
1443      break;
1444   case PIPE_FUNC_GREATER:
1445      k0 = p[0] > rgba[0][0];
1446      k1 = p[1] > rgba[0][1];
1447      k2 = p[2] > rgba[0][2];
1448      k3 = p[3] > rgba[0][3];
1449      break;
1450   case PIPE_FUNC_GEQUAL:
1451      k0 = p[0] >= rgba[0][0];
1452      k1 = p[1] >= rgba[0][1];
1453      k2 = p[2] >= rgba[0][2];
1454      k3 = p[3] >= rgba[0][3];
1455      break;
1456   case PIPE_FUNC_EQUAL:
1457      k0 = p[0] == rgba[0][0];
1458      k1 = p[1] == rgba[0][1];
1459      k2 = p[2] == rgba[0][2];
1460      k3 = p[3] == rgba[0][3];
1461      break;
1462   case PIPE_FUNC_NOTEQUAL:
1463      k0 = p[0] != rgba[0][0];
1464      k1 = p[1] != rgba[0][1];
1465      k2 = p[2] != rgba[0][2];
1466      k3 = p[3] != rgba[0][3];
1467      break;
1468   case PIPE_FUNC_ALWAYS:
1469      k0 = k1 = k2 = k3 = 1;
1470      break;
1471   case PIPE_FUNC_NEVER:
1472      k0 = k1 = k2 = k3 = 0;
1473      break;
1474   default:
1475      k0 = k1 = k2 = k3 = 0;
1476      assert(0);
1477      break;
1478   }
1479
1480   /* convert four pass/fail values to an intensity in [0,1] */
1481   val = 0.25F * (k0 + k1 + k2 + k3);
1482
1483   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1484   for (j = 0; j < 4; j++) {
1485      rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
1486      rgba[3][j] = 1.0F;
1487   }
1488}
1489
1490
1491/**
1492 * Compute which cube face is referenced by each texcoord and put that
1493 * info into the sampler faces[] array.  Then sample the cube faces
1494 */
1495static void
1496sample_cube(struct tgsi_sampler *tgsi_sampler,
1497            const float s[QUAD_SIZE],
1498            const float t[QUAD_SIZE],
1499            const float p[QUAD_SIZE],
1500            float lodbias,
1501            float rgba[NUM_CHANNELS][QUAD_SIZE])
1502{
1503   struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
1504   unsigned j;
1505   float ssss[4], tttt[4];
1506
1507   /*
1508     major axis
1509     direction     target                             sc     tc    ma
1510     ----------    -------------------------------    ---    ---   ---
1511     +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
1512     -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
1513     +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
1514     -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
1515     +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
1516     -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
1517   */
1518   for (j = 0; j < QUAD_SIZE; j++) {
1519      float rx = s[j];
1520      float ry = t[j];
1521      float rz = p[j];
1522      const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
1523      unsigned face;
1524      float sc, tc, ma;
1525
1526      if (arx >= ary && arx >= arz) {
1527         if (rx >= 0.0F) {
1528            face = PIPE_TEX_FACE_POS_X;
1529            sc = -rz;
1530            tc = -ry;
1531            ma = arx;
1532         }
1533         else {
1534            face = PIPE_TEX_FACE_NEG_X;
1535            sc = rz;
1536            tc = -ry;
1537            ma = arx;
1538         }
1539      }
1540      else if (ary >= arx && ary >= arz) {
1541         if (ry >= 0.0F) {
1542            face = PIPE_TEX_FACE_POS_Y;
1543            sc = rx;
1544            tc = rz;
1545            ma = ary;
1546         }
1547         else {
1548            face = PIPE_TEX_FACE_NEG_Y;
1549            sc = rx;
1550            tc = -rz;
1551            ma = ary;
1552         }
1553      }
1554      else {
1555         if (rz > 0.0F) {
1556            face = PIPE_TEX_FACE_POS_Z;
1557            sc = rx;
1558            tc = -ry;
1559            ma = arz;
1560         }
1561         else {
1562            face = PIPE_TEX_FACE_NEG_Z;
1563            sc = -rx;
1564            tc = -ry;
1565            ma = arz;
1566         }
1567      }
1568
1569      {
1570	 const float ima = 1.0 / ma;
1571	 ssss[j] = ( sc * ima + 1.0F ) * 0.5F;
1572	 tttt[j] = ( tc * ima + 1.0F ) * 0.5F;
1573	 samp->faces[j] = face;
1574      }
1575   }
1576
1577   /* In our little pipeline, the compare stage is next.  If compare
1578    * is not active, this will point somewhere deeper into the
1579    * pipeline, eg. to mip_filter or even img_filter.
1580    */
1581   samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba);
1582}
1583
1584
1585
1586static wrap_nearest_func
1587get_nearest_unorm_wrap(unsigned mode)
1588{
1589   switch (mode) {
1590   case PIPE_TEX_WRAP_CLAMP:
1591      return wrap_nearest_unorm_clamp;
1592   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1593   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1594      return wrap_nearest_unorm_clamp_to_border;
1595   default:
1596      assert(0);
1597      return wrap_nearest_unorm_clamp;
1598   }
1599}
1600
1601
1602static wrap_nearest_func
1603get_nearest_wrap(unsigned mode)
1604{
1605   switch (mode) {
1606   case PIPE_TEX_WRAP_REPEAT:
1607      return wrap_nearest_repeat;
1608   case PIPE_TEX_WRAP_CLAMP:
1609      return wrap_nearest_clamp;
1610   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1611      return wrap_nearest_clamp_to_edge;
1612   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1613      return wrap_nearest_clamp_to_border;
1614   case PIPE_TEX_WRAP_MIRROR_REPEAT:
1615      return wrap_nearest_mirror_repeat;
1616   case PIPE_TEX_WRAP_MIRROR_CLAMP:
1617      return wrap_nearest_mirror_clamp;
1618   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1619      return wrap_nearest_mirror_clamp_to_edge;
1620   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1621      return wrap_nearest_mirror_clamp_to_border;
1622   default:
1623      assert(0);
1624      return wrap_nearest_repeat;
1625   }
1626}
1627
1628
1629static wrap_linear_func
1630get_linear_unorm_wrap(unsigned mode)
1631{
1632   switch (mode) {
1633   case PIPE_TEX_WRAP_CLAMP:
1634      return wrap_linear_unorm_clamp;
1635   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1636   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1637      return wrap_linear_unorm_clamp_to_border;
1638   default:
1639      assert(0);
1640      return wrap_linear_unorm_clamp;
1641   }
1642}
1643
1644
1645static wrap_linear_func
1646get_linear_wrap(unsigned mode)
1647{
1648   switch (mode) {
1649   case PIPE_TEX_WRAP_REPEAT:
1650      return wrap_linear_repeat;
1651   case PIPE_TEX_WRAP_CLAMP:
1652      return wrap_linear_clamp;
1653   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
1654      return wrap_linear_clamp_to_edge;
1655   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
1656      return wrap_linear_clamp_to_border;
1657   case PIPE_TEX_WRAP_MIRROR_REPEAT:
1658      return wrap_linear_mirror_repeat;
1659   case PIPE_TEX_WRAP_MIRROR_CLAMP:
1660      return wrap_linear_mirror_clamp;
1661   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
1662      return wrap_linear_mirror_clamp_to_edge;
1663   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
1664      return wrap_linear_mirror_clamp_to_border;
1665   default:
1666      assert(0);
1667      return wrap_linear_repeat;
1668   }
1669}
1670
1671
1672static compute_lambda_func
1673get_lambda_func(const union sp_sampler_key key)
1674{
1675   if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
1676      return compute_lambda_vert;
1677
1678   switch (key.bits.target) {
1679   case PIPE_TEXTURE_1D:
1680      return compute_lambda_1d;
1681   case PIPE_TEXTURE_2D:
1682   case PIPE_TEXTURE_CUBE:
1683      return compute_lambda_2d;
1684   case PIPE_TEXTURE_3D:
1685      return compute_lambda_3d;
1686   default:
1687      assert(0);
1688      return compute_lambda_1d;
1689   }
1690}
1691
1692
1693static filter_func
1694get_img_filter(const union sp_sampler_key key,
1695               unsigned filter,
1696               const struct pipe_sampler_state *sampler)
1697{
1698   switch (key.bits.target) {
1699   case PIPE_TEXTURE_1D:
1700      if (filter == PIPE_TEX_FILTER_NEAREST)
1701         return img_filter_1d_nearest;
1702      else
1703         return img_filter_1d_linear;
1704      break;
1705   case PIPE_TEXTURE_2D:
1706      /* Try for fast path:
1707       */
1708      if (key.bits.is_pot &&
1709          sampler->wrap_s == sampler->wrap_t &&
1710          sampler->normalized_coords)
1711      {
1712         switch (sampler->wrap_s) {
1713         case PIPE_TEX_WRAP_REPEAT:
1714            switch (filter) {
1715            case PIPE_TEX_FILTER_NEAREST:
1716               return img_filter_2d_nearest_repeat_POT;
1717            case PIPE_TEX_FILTER_LINEAR:
1718               return img_filter_2d_linear_repeat_POT;
1719            default:
1720               break;
1721            }
1722            break;
1723         case PIPE_TEX_WRAP_CLAMP:
1724            switch (filter) {
1725            case PIPE_TEX_FILTER_NEAREST:
1726               return img_filter_2d_nearest_clamp_POT;
1727            default:
1728               break;
1729            }
1730         }
1731      }
1732      /* Otherwise use default versions:
1733       */
1734      if (filter == PIPE_TEX_FILTER_NEAREST)
1735         return img_filter_2d_nearest;
1736      else
1737         return img_filter_2d_linear;
1738      break;
1739   case PIPE_TEXTURE_CUBE:
1740      if (filter == PIPE_TEX_FILTER_NEAREST)
1741         return img_filter_cube_nearest;
1742      else
1743         return img_filter_cube_linear;
1744      break;
1745   case PIPE_TEXTURE_3D:
1746      if (filter == PIPE_TEX_FILTER_NEAREST)
1747         return img_filter_3d_nearest;
1748      else
1749         return img_filter_3d_linear;
1750      break;
1751   default:
1752      assert(0);
1753      return img_filter_1d_nearest;
1754   }
1755}
1756
1757
1758/**
1759 * Bind the given texture object and texture cache to the sampler varient.
1760 */
1761void
1762sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
1763                                 struct softpipe_tex_tile_cache *tex_cache,
1764                                 const struct pipe_texture *texture )
1765{
1766   const struct pipe_sampler_state *sampler = samp->sampler;
1767
1768   samp->texture = texture;
1769   samp->cache = tex_cache;
1770   samp->xpot = util_unsigned_logbase2( texture->width[0] );
1771   samp->ypot = util_unsigned_logbase2( texture->height[0] );
1772   samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level);
1773}
1774
1775
1776void
1777sp_sampler_varient_destroy( struct sp_sampler_varient *samp )
1778{
1779   FREE(samp);
1780}
1781
1782
1783/**
1784 * Create a sampler varient for a given set of non-orthogonal state.
1785 */
1786struct sp_sampler_varient *
1787sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
1788                           const union sp_sampler_key key )
1789{
1790   struct sp_sampler_varient *samp = CALLOC_STRUCT(sp_sampler_varient);
1791   if (!samp)
1792      return NULL;
1793
1794   samp->sampler = sampler;
1795   samp->key = key;
1796
1797   /* Note that (for instance) linear_texcoord_s and
1798    * nearest_texcoord_s may be active at the same time, if the
1799    * sampler min_img_filter differs from its mag_img_filter.
1800    */
1801   if (sampler->normalized_coords) {
1802      samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
1803      samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
1804      samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
1805
1806      samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
1807      samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
1808      samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
1809   }
1810   else {
1811      samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
1812      samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
1813      samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
1814
1815      samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
1816      samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
1817      samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
1818   }
1819
1820   samp->compute_lambda = get_lambda_func( key );
1821
1822   samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
1823   samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
1824
1825   switch (sampler->min_mip_filter) {
1826   case PIPE_TEX_MIPFILTER_NONE:
1827      if (sampler->min_img_filter == sampler->mag_img_filter)
1828         samp->mip_filter = samp->min_img_filter;
1829      else
1830         samp->mip_filter = mip_filter_none;
1831      break;
1832
1833   case PIPE_TEX_MIPFILTER_NEAREST:
1834      samp->mip_filter = mip_filter_nearest;
1835      break;
1836
1837   case PIPE_TEX_MIPFILTER_LINEAR:
1838      if (key.bits.is_pot &&
1839          sampler->min_img_filter == sampler->mag_img_filter &&
1840          sampler->normalized_coords &&
1841          sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
1842          sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
1843          sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR)
1844      {
1845         samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
1846      }
1847      else
1848      {
1849         samp->mip_filter = mip_filter_linear;
1850      }
1851      break;
1852   }
1853
1854   if (sampler->compare_mode != FALSE) {
1855      samp->compare = sample_compare;
1856   }
1857   else {
1858      /* Skip compare operation by promoting the mip_filter function
1859       * pointer:
1860       */
1861      samp->compare = samp->mip_filter;
1862   }
1863
1864   if (key.bits.target == PIPE_TEXTURE_CUBE) {
1865      samp->base.get_samples = sample_cube;
1866   }
1867   else {
1868      samp->faces[0] = 0;
1869      samp->faces[1] = 0;
1870      samp->faces[2] = 0;
1871      samp->faces[3] = 0;
1872
1873      /* Skip cube face determination by promoting the compare
1874       * function pointer:
1875       */
1876      samp->base.get_samples = samp->compare;
1877   }
1878
1879   return samp;
1880}
1881