sp_tex_sample.c revision 1fd40e506c2207664f0c3f435e4614472ea4c540
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2008 VMware, Inc.  All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * Texture sampling
31 *
32 * Authors:
33 *   Brian Paul
34 */
35
36#include "sp_context.h"
37#include "sp_quad.h"
38#include "sp_surface.h"
39#include "sp_texture.h"
40#include "sp_tex_sample.h"
41#include "sp_tile_cache.h"
42#include "pipe/p_context.h"
43#include "pipe/p_defines.h"
44#include "util/u_math.h"
45#include "util/u_memory.h"
46
47
48
49/*
50 * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
51 * see 1-pixel bands of improperly weighted linear-filtered textures.
52 * The tests/texwrap.c demo is a good test.
53 * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
54 * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
55 */
56#define FRAC(f)  ((f) - util_ifloor(f))
57
58
59/**
60 * Linear interpolation macro
61 */
62static INLINE float
63lerp(float a, float v0, float v1)
64{
65   return v0 + a * (v1 - v0);
66}
67
68
69/**
70 * Do 2D/biliner interpolation of float values.
71 * v00, v10, v01 and v11 are typically four texture samples in a square/box.
72 * a and b are the horizontal and vertical interpolants.
73 * It's important that this function is inlined when compiled with
74 * optimization!  If we find that's not true on some systems, convert
75 * to a macro.
76 */
77static INLINE float
78lerp_2d(float a, float b,
79        float v00, float v10, float v01, float v11)
80{
81   const float temp0 = lerp(a, v00, v10);
82   const float temp1 = lerp(a, v01, v11);
83   return lerp(b, temp0, temp1);
84}
85
86
87/**
88 * As above, but 3D interpolation of 8 values.
89 */
90static INLINE float
91lerp_3d(float a, float b, float c,
92        float v000, float v100, float v010, float v110,
93        float v001, float v101, float v011, float v111)
94{
95   const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
96   const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
97   return lerp(c, temp0, temp1);
98}
99
100
101
102/**
103 * If A is a signed integer, A % B doesn't give the right value for A < 0
104 * (in terms of texture repeat).  Just casting to unsigned fixes that.
105 */
106#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
107
108
109/**
110 * Apply texture coord wrapping mode and return integer texture indexes
111 * for a vector of four texcoords (S or T or P).
112 * \param wrapMode  PIPE_TEX_WRAP_x
113 * \param s  the incoming texcoords
114 * \param size  the texture image size
115 * \param icoord  returns the integer texcoords
116 * \return  integer texture index
117 */
118static INLINE void
119nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
120                   int icoord[4])
121{
122   uint ch;
123   switch (wrapMode) {
124   case PIPE_TEX_WRAP_REPEAT:
125      /* s limited to [0,1) */
126      /* i limited to [0,size-1] */
127      for (ch = 0; ch < 4; ch++) {
128         int i = util_ifloor(s[ch] * size);
129         icoord[ch] = REMAINDER(i, size);
130      }
131      return;
132   case PIPE_TEX_WRAP_CLAMP:
133      /* s limited to [0,1] */
134      /* i limited to [0,size-1] */
135      for (ch = 0; ch < 4; ch++) {
136         if (s[ch] <= 0.0F)
137            icoord[ch] = 0;
138         else if (s[ch] >= 1.0F)
139            icoord[ch] = size - 1;
140         else
141            icoord[ch] = util_ifloor(s[ch] * size);
142      }
143      return;
144   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
145      {
146         /* s limited to [min,max] */
147         /* i limited to [0, size-1] */
148         const float min = 1.0F / (2.0F * size);
149         const float max = 1.0F - min;
150         for (ch = 0; ch < 4; ch++) {
151            if (s[ch] < min)
152               icoord[ch] = 0;
153            else if (s[ch] > max)
154               icoord[ch] = size - 1;
155            else
156               icoord[ch] = util_ifloor(s[ch] * size);
157         }
158      }
159      return;
160   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
161      {
162         /* s limited to [min,max] */
163         /* i limited to [-1, size] */
164         const float min = -1.0F / (2.0F * size);
165         const float max = 1.0F - min;
166         for (ch = 0; ch < 4; ch++) {
167            if (s[ch] <= min)
168               icoord[ch] = -1;
169            else if (s[ch] >= max)
170               icoord[ch] = size;
171            else
172               icoord[ch] = util_ifloor(s[ch] * size);
173         }
174      }
175      return;
176   case PIPE_TEX_WRAP_MIRROR_REPEAT:
177      {
178         const float min = 1.0F / (2.0F * size);
179         const float max = 1.0F - min;
180         for (ch = 0; ch < 4; ch++) {
181            const int flr = util_ifloor(s[ch]);
182            float u;
183            if (flr & 1)
184               u = 1.0F - (s[ch] - (float) flr);
185            else
186               u = s[ch] - (float) flr;
187            if (u < min)
188               icoord[ch] = 0;
189            else if (u > max)
190               icoord[ch] = size - 1;
191            else
192               icoord[ch] = util_ifloor(u * size);
193         }
194      }
195      return;
196   case PIPE_TEX_WRAP_MIRROR_CLAMP:
197      for (ch = 0; ch < 4; ch++) {
198         /* s limited to [0,1] */
199         /* i limited to [0,size-1] */
200         const float u = fabsf(s[ch]);
201         if (u <= 0.0F)
202            icoord[ch] = 0;
203         else if (u >= 1.0F)
204            icoord[ch] = size - 1;
205         else
206            icoord[ch] = util_ifloor(u * size);
207      }
208      return;
209   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
210      {
211         /* s limited to [min,max] */
212         /* i limited to [0, size-1] */
213         const float min = 1.0F / (2.0F * size);
214         const float max = 1.0F - min;
215         for (ch = 0; ch < 4; ch++) {
216            const float u = fabsf(s[ch]);
217            if (u < min)
218               icoord[ch] = 0;
219            else if (u > max)
220               icoord[ch] = size - 1;
221            else
222               icoord[ch] = util_ifloor(u * size);
223         }
224      }
225      return;
226   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
227      {
228         /* s limited to [min,max] */
229         /* i limited to [0, size-1] */
230         const float min = -1.0F / (2.0F * size);
231         const float max = 1.0F - min;
232         for (ch = 0; ch < 4; ch++) {
233            const float u = fabsf(s[ch]);
234            if (u < min)
235               icoord[ch] = -1;
236            else if (u > max)
237               icoord[ch] = size;
238            else
239               icoord[ch] = util_ifloor(u * size);
240         }
241      }
242      return;
243   default:
244      assert(0);
245   }
246}
247
248
249/**
250 * Used to compute texel locations for linear sampling for four texcoords.
251 * \param wrapMode  PIPE_TEX_WRAP_x
252 * \param s  the texcoords
253 * \param size  the texture image size
254 * \param icoord0  returns first texture indexes
255 * \param icoord1  returns second texture indexes (usually icoord0 + 1)
256 * \param w  returns blend factor/weight between texture indexes
257 * \param icoord  returns the computed integer texture coords
258 */
259static INLINE void
260linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
261                  int icoord0[4], int icoord1[4], float w[4])
262{
263   uint ch;
264
265   switch (wrapMode) {
266   case PIPE_TEX_WRAP_REPEAT:
267      for (ch = 0; ch < 4; ch++) {
268         float u = s[ch] * size - 0.5F;
269         icoord0[ch] = REMAINDER(util_ifloor(u), size);
270         icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
271         w[ch] = FRAC(u);
272      }
273      break;;
274   case PIPE_TEX_WRAP_CLAMP:
275      for (ch = 0; ch < 4; ch++) {
276         float u = CLAMP(s[ch], 0.0F, 1.0F);
277         u = u * size - 0.5f;
278         icoord0[ch] = util_ifloor(u);
279         icoord1[ch] = icoord0[ch] + 1;
280         w[ch] = FRAC(u);
281      }
282      break;;
283   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
284      for (ch = 0; ch < 4; ch++) {
285         float u = CLAMP(s[ch], 0.0F, 1.0F);
286         u = u * size - 0.5f;
287         icoord0[ch] = util_ifloor(u);
288         icoord1[ch] = icoord0[ch] + 1;
289         if (icoord0[ch] < 0)
290            icoord0[ch] = 0;
291         if (icoord1[ch] >= (int) size)
292            icoord1[ch] = size - 1;
293         w[ch] = FRAC(u);
294      }
295      break;;
296   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
297      {
298         const float min = -1.0F / (2.0F * size);
299         const float max = 1.0F - min;
300         for (ch = 0; ch < 4; ch++) {
301            float u = CLAMP(s[ch], min, max);
302            u = u * size - 0.5f;
303            icoord0[ch] = util_ifloor(u);
304            icoord1[ch] = icoord0[ch] + 1;
305            w[ch] = FRAC(u);
306         }
307      }
308      break;;
309   case PIPE_TEX_WRAP_MIRROR_REPEAT:
310      for (ch = 0; ch < 4; ch++) {
311         const int flr = util_ifloor(s[ch]);
312         float u;
313         if (flr & 1)
314            u = 1.0F - (s[ch] - (float) flr);
315         else
316            u = s[ch] - (float) flr;
317         u = u * size - 0.5F;
318         icoord0[ch] = util_ifloor(u);
319         icoord1[ch] = icoord0[ch] + 1;
320         if (icoord0[ch] < 0)
321            icoord0[ch] = 0;
322         if (icoord1[ch] >= (int) size)
323            icoord1[ch] = size - 1;
324         w[ch] = FRAC(u);
325      }
326      break;;
327   case PIPE_TEX_WRAP_MIRROR_CLAMP:
328      for (ch = 0; ch < 4; ch++) {
329         float u = fabsf(s[ch]);
330         if (u >= 1.0F)
331            u = (float) size;
332         else
333            u *= size;
334         u -= 0.5F;
335         icoord0[ch] = util_ifloor(u);
336         icoord1[ch] = icoord0[ch] + 1;
337         w[ch] = FRAC(u);
338      }
339      break;;
340   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
341      for (ch = 0; ch < 4; ch++) {
342         float u = fabsf(s[ch]);
343         if (u >= 1.0F)
344            u = (float) size;
345         else
346            u *= size;
347         u -= 0.5F;
348         icoord0[ch] = util_ifloor(u);
349         icoord1[ch] = icoord0[ch] + 1;
350         if (icoord0[ch] < 0)
351            icoord0[ch] = 0;
352         if (icoord1[ch] >= (int) size)
353            icoord1[ch] = size - 1;
354         w[ch] = FRAC(u);
355      }
356      break;;
357   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
358      {
359         const float min = -1.0F / (2.0F * size);
360         const float max = 1.0F - min;
361         for (ch = 0; ch < 4; ch++) {
362            float u = fabsf(s[ch]);
363            if (u <= min)
364               u = min * size;
365            else if (u >= max)
366               u = max * size;
367            else
368               u *= size;
369            u -= 0.5F;
370            icoord0[ch] = util_ifloor(u);
371            icoord1[ch] = icoord0[ch] + 1;
372            w[ch] = FRAC(u);
373         }
374      }
375      break;;
376   default:
377      assert(0);
378   }
379}
380
381
382/**
383 * For RECT textures / unnormalized texcoords
384 * Only a subset of wrap modes supported.
385 */
386static INLINE void
387nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
388                          int icoord[4])
389{
390   uint ch;
391   switch (wrapMode) {
392   case PIPE_TEX_WRAP_CLAMP:
393      for (ch = 0; ch < 4; ch++) {
394         int i = util_ifloor(s[ch]);
395         icoord[ch]= CLAMP(i, 0, (int) size-1);
396      }
397      return;
398   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
399      /* fall-through */
400   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
401      for (ch = 0; ch < 4; ch++) {
402         icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
403      }
404      return;
405   default:
406      assert(0);
407   }
408}
409
410
411/**
412 * For RECT textures / unnormalized texcoords.
413 * Only a subset of wrap modes supported.
414 */
415static INLINE void
416linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
417                         int icoord0[4], int icoord1[4], float w[4])
418{
419   uint ch;
420   switch (wrapMode) {
421   case PIPE_TEX_WRAP_CLAMP:
422      for (ch = 0; ch < 4; ch++) {
423         /* Not exactly what the spec says, but it matches NVIDIA output */
424         float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
425         icoord0[ch] = util_ifloor(u);
426         icoord1[ch] = icoord0[ch] + 1;
427         w[ch] = FRAC(u);
428      }
429      return;
430   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
431      /* fall-through */
432   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
433      for (ch = 0; ch < 4; ch++) {
434         float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
435         u -= 0.5F;
436         icoord0[ch] = util_ifloor(u);
437         icoord1[ch] = icoord0[ch] + 1;
438         if (icoord1[ch] > (int) size - 1)
439            icoord1[ch] = size - 1;
440         w[ch] = FRAC(u);
441      }
442      break;
443   default:
444      assert(0);
445   }
446}
447
448
449static unsigned
450choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
451{
452   /*
453      major axis
454      direction     target                             sc     tc    ma
455      ----------    -------------------------------    ---    ---   ---
456       +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
457       -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
458       +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
459       -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
460       +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
461       -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
462   */
463   const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
464   unsigned face;
465   float sc, tc, ma;
466
467   if (arx > ary && arx > arz) {
468      if (rx >= 0.0F) {
469         face = PIPE_TEX_FACE_POS_X;
470         sc = -rz;
471         tc = -ry;
472         ma = arx;
473      }
474      else {
475         face = PIPE_TEX_FACE_NEG_X;
476         sc = rz;
477         tc = -ry;
478         ma = arx;
479      }
480   }
481   else if (ary > arx && ary > arz) {
482      if (ry >= 0.0F) {
483         face = PIPE_TEX_FACE_POS_Y;
484         sc = rx;
485         tc = rz;
486         ma = ary;
487      }
488      else {
489         face = PIPE_TEX_FACE_NEG_Y;
490         sc = rx;
491         tc = -rz;
492         ma = ary;
493      }
494   }
495   else {
496      if (rz > 0.0F) {
497         face = PIPE_TEX_FACE_POS_Z;
498         sc = rx;
499         tc = -ry;
500         ma = arz;
501      }
502      else {
503         face = PIPE_TEX_FACE_NEG_Z;
504         sc = -rx;
505         tc = -ry;
506         ma = arz;
507      }
508   }
509
510   *newS = ( sc / ma + 1.0F ) * 0.5F;
511   *newT = ( tc / ma + 1.0F ) * 0.5F;
512
513   return face;
514}
515
516
517/**
518 * Examine the quad's texture coordinates to compute the partial
519 * derivatives w.r.t X and Y, then compute lambda (level of detail).
520 *
521 * This is only done for fragment shaders, not vertex shaders.
522 */
523static float
524compute_lambda(const struct pipe_texture *tex,
525               const struct pipe_sampler_state *sampler,
526               const float s[QUAD_SIZE],
527               const float t[QUAD_SIZE],
528               const float p[QUAD_SIZE],
529               float lodbias)
530{
531   float rho, lambda;
532
533   assert(sampler->normalized_coords);
534
535   assert(s);
536   {
537      float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
538      float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
539      dsdx = fabsf(dsdx);
540      dsdy = fabsf(dsdy);
541      rho = MAX2(dsdx, dsdy) * tex->width[0];
542   }
543   if (t) {
544      float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
545      float dtdy = t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT];
546      float max;
547      dtdx = fabsf(dtdx);
548      dtdy = fabsf(dtdy);
549      max = MAX2(dtdx, dtdy) * tex->height[0];
550      rho = MAX2(rho, max);
551   }
552   if (p) {
553      float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT];
554      float dpdy = p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT];
555      float max;
556      dpdx = fabsf(dpdx);
557      dpdy = fabsf(dpdy);
558      max = MAX2(dpdx, dpdy) * tex->depth[0];
559      rho = MAX2(rho, max);
560   }
561
562   lambda = util_fast_log2(rho);
563   lambda += lodbias + sampler->lod_bias;
564   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
565
566   return lambda;
567}
568
569
570/**
571 * Do several things here:
572 * 1. Compute lambda from the texcoords, if needed
573 * 2. Determine if we're minifying or magnifying
574 * 3. If minifying, choose mipmap levels
575 * 4. Return image filter to use within mipmap images
576 * \param level0  Returns first mipmap level to sample from
577 * \param level1  Returns second mipmap level to sample from
578 * \param levelBlend  Returns blend factor between levels, in [0,1]
579 * \param imgFilter  Returns either the min or mag filter, depending on lambda
580 */
581static void
582choose_mipmap_levels(const struct pipe_texture *texture,
583                     const struct pipe_sampler_state *sampler,
584                     const float s[QUAD_SIZE],
585                     const float t[QUAD_SIZE],
586                     const float p[QUAD_SIZE],
587                     boolean computeLambda,
588                     float lodbias,
589                     unsigned *level0, unsigned *level1, float *levelBlend,
590                     unsigned *imgFilter)
591{
592   if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
593      /* no mipmap selection needed */
594      *level0 = *level1 = CLAMP((int) sampler->min_lod,
595                                0, (int) texture->last_level);
596
597      if (sampler->min_img_filter != sampler->mag_img_filter) {
598         /* non-mipmapped texture, but still need to determine if doing
599          * minification or magnification.
600          */
601         float lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
602         if (lambda <= 0.0) {
603            *imgFilter = sampler->mag_img_filter;
604         }
605         else {
606            *imgFilter = sampler->min_img_filter;
607         }
608      }
609      else {
610         *imgFilter = sampler->mag_img_filter;
611      }
612   }
613   else {
614      float lambda;
615
616      if (computeLambda)
617         /* fragment shader */
618         lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
619      else
620         /* vertex shader */
621         lambda = lodbias; /* not really a bias, but absolute LOD */
622
623      if (lambda <= 0.0) { /* XXX threshold depends on the filter */
624         /* magnifying */
625         *imgFilter = sampler->mag_img_filter;
626         *level0 = *level1 = 0;
627      }
628      else {
629         /* minifying */
630         *imgFilter = sampler->min_img_filter;
631
632         /* choose mipmap level(s) and compute the blend factor between them */
633         if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
634            /* Nearest mipmap level */
635            const int lvl = (int) (lambda + 0.5);
636            *level0 =
637            *level1 = CLAMP(lvl, 0, (int) texture->last_level);
638         }
639         else {
640            /* Linear interpolation between mipmap levels */
641            const int lvl = (int) lambda;
642            *level0 = CLAMP(lvl,     0, (int) texture->last_level);
643            *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
644            *levelBlend = FRAC(lambda);  /* blending weight between levels */
645         }
646      }
647   }
648}
649
650
651/**
652 * Get a texel from a texture, using the texture tile cache.
653 *
654 * \param face  the cube face in 0..5
655 * \param level  the mipmap level
656 * \param x  the x coord of texel within 2D image
657 * \param y  the y coord of texel within 2D image
658 * \param z  which slice of a 3D texture
659 * \param rgba  the quad to put the texel/color into
660 * \param j  which element of the rgba quad to write to
661 *
662 * XXX maybe move this into sp_tile_cache.c and merge with the
663 * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
664 */
665static void
666get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler,
667                  unsigned face, unsigned level, int x, int y,
668                  const float *out[4])
669{
670   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
671
672   const struct softpipe_cached_tile *tile
673      = sp_get_cached_tile_tex(samp->cache,
674                               tile_address(x, y, 0, face, level));
675
676   y %= TILE_SIZE;
677   x %= TILE_SIZE;
678
679   out[0] = &tile->data.color[y  ][x  ][0];
680   out[1] = &tile->data.color[y  ][x+1][0];
681   out[2] = &tile->data.color[y+1][x  ][0];
682   out[3] = &tile->data.color[y+1][x+1][0];
683}
684
685static INLINE const float *
686get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler,
687                 unsigned face, unsigned level, int x, int y)
688{
689   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
690
691   const struct softpipe_cached_tile *tile
692      = sp_get_cached_tile_tex(samp->cache,
693                               tile_address(x, y, 0, face, level));
694
695   y %= TILE_SIZE;
696   x %= TILE_SIZE;
697
698   return &tile->data.color[y][x][0];
699}
700
701
702static void
703get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler,
704                     unsigned face, unsigned level,
705                     int x0, int y0,
706                     int x1, int y1,
707                     const float *out[4])
708{
709   unsigned i;
710
711   for (i = 0; i < 4; i++) {
712      unsigned tx = (i & 1) ? x1 : x0;
713      unsigned ty = (i >> 1) ? y1 : y0;
714
715      out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty );
716   }
717}
718
719static void
720get_texel(const struct tgsi_sampler *tgsi_sampler,
721                 unsigned face, unsigned level, int x, int y, int z,
722                 float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
723{
724   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
725   const struct pipe_texture *texture = samp->texture;
726   const struct pipe_sampler_state *sampler = samp->sampler;
727
728   if (x < 0 || x >= (int) texture->width[level] ||
729       y < 0 || y >= (int) texture->height[level] ||
730       z < 0 || z >= (int) texture->depth[level]) {
731      rgba[0][j] = sampler->border_color[0];
732      rgba[1][j] = sampler->border_color[1];
733      rgba[2][j] = sampler->border_color[2];
734      rgba[3][j] = sampler->border_color[3];
735   }
736   else {
737      const unsigned tx = x % TILE_SIZE;
738      const unsigned ty = y % TILE_SIZE;
739      const struct softpipe_cached_tile *tile;
740
741      tile = sp_get_cached_tile_tex(samp->cache,
742                                    tile_address(x, y, z, face, level));
743
744      rgba[0][j] = tile->data.color[ty][tx][0];
745      rgba[1][j] = tile->data.color[ty][tx][1];
746      rgba[2][j] = tile->data.color[ty][tx][2];
747      rgba[3][j] = tile->data.color[ty][tx][3];
748      if (0)
749      {
750         debug_printf("Get texel %f %f %f %f from %s\n",
751                      rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
752                      pf_name(texture->format));
753      }
754   }
755}
756
757
758/**
759 * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
760 * When we sampled the depth texture, the depth value was put into all
761 * RGBA channels.  We look at the red channel here.
762 * \param rgba  quad of (depth) texel values
763 * \param p  texture 'P' components for four pixels in quad
764 * \param j  which pixel in the quad to test [0..3]
765 */
766static INLINE void
767shadow_compare(const struct pipe_sampler_state *sampler,
768               float rgba[NUM_CHANNELS][QUAD_SIZE],
769               const float p[QUAD_SIZE],
770               uint j)
771{
772   int k;
773   switch (sampler->compare_func) {
774   case PIPE_FUNC_LESS:
775      k = p[j] < rgba[0][j];
776      break;
777   case PIPE_FUNC_LEQUAL:
778      k = p[j] <= rgba[0][j];
779      break;
780   case PIPE_FUNC_GREATER:
781      k = p[j] > rgba[0][j];
782      break;
783   case PIPE_FUNC_GEQUAL:
784      k = p[j] >= rgba[0][j];
785      break;
786   case PIPE_FUNC_EQUAL:
787      k = p[j] == rgba[0][j];
788      break;
789   case PIPE_FUNC_NOTEQUAL:
790      k = p[j] != rgba[0][j];
791      break;
792   case PIPE_FUNC_ALWAYS:
793      k = 1;
794      break;
795   case PIPE_FUNC_NEVER:
796      k = 0;
797      break;
798   default:
799      k = 0;
800      assert(0);
801      break;
802   }
803
804   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
805   rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k;
806   rgba[3][j] = 1.0F;
807}
808
809
810/**
811 * As above, but do four z/texture comparisons.
812 */
813static INLINE void
814shadow_compare4(const struct pipe_sampler_state *sampler,
815                float rgba[NUM_CHANNELS][QUAD_SIZE],
816                const float p[QUAD_SIZE])
817{
818   int j, k0, k1, k2, k3;
819   float val;
820
821   /* compare four texcoords vs. four texture samples */
822   switch (sampler->compare_func) {
823   case PIPE_FUNC_LESS:
824      k0 = p[0] < rgba[0][0];
825      k1 = p[1] < rgba[0][1];
826      k2 = p[2] < rgba[0][2];
827      k3 = p[3] < rgba[0][3];
828      break;
829   case PIPE_FUNC_LEQUAL:
830      k0 = p[0] <= rgba[0][0];
831      k1 = p[1] <= rgba[0][1];
832      k2 = p[2] <= rgba[0][2];
833      k3 = p[3] <= rgba[0][3];
834      break;
835   case PIPE_FUNC_GREATER:
836      k0 = p[0] > rgba[0][0];
837      k1 = p[1] > rgba[0][1];
838      k2 = p[2] > rgba[0][2];
839      k3 = p[3] > rgba[0][3];
840      break;
841   case PIPE_FUNC_GEQUAL:
842      k0 = p[0] >= rgba[0][0];
843      k1 = p[1] >= rgba[0][1];
844      k2 = p[2] >= rgba[0][2];
845      k3 = p[3] >= rgba[0][3];
846      break;
847   case PIPE_FUNC_EQUAL:
848      k0 = p[0] == rgba[0][0];
849      k1 = p[1] == rgba[0][1];
850      k2 = p[2] == rgba[0][2];
851      k3 = p[3] == rgba[0][3];
852      break;
853   case PIPE_FUNC_NOTEQUAL:
854      k0 = p[0] != rgba[0][0];
855      k1 = p[1] != rgba[0][1];
856      k2 = p[2] != rgba[0][2];
857      k3 = p[3] != rgba[0][3];
858      break;
859   case PIPE_FUNC_ALWAYS:
860      k0 = k1 = k2 = k3 = 1;
861      break;
862   case PIPE_FUNC_NEVER:
863      k0 = k1 = k2 = k3 = 0;
864      break;
865   default:
866      k0 = k1 = k2 = k3 = 0;
867      assert(0);
868      break;
869   }
870
871   /* convert four pass/fail values to an intensity in [0,1] */
872   val = 0.25F * (k0 + k1 + k2 + k3);
873
874   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
875   for (j = 0; j < 4; j++) {
876      rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
877      rgba[3][j] = 1.0F;
878   }
879}
880
881
882
883static void
884sp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
885                                    const float s[QUAD_SIZE],
886                                    const float t[QUAD_SIZE],
887                                    const float p[QUAD_SIZE],
888                                    float lodbias,
889                                    float rgba[NUM_CHANNELS][QUAD_SIZE])
890{
891   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
892   unsigned  j;
893   unsigned level = samp->level;
894   unsigned xpot = 1 << (samp->xpot - level);
895   unsigned ypot = 1 << (samp->ypot - level);
896   unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
897   unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
898
899   for (j = 0; j < QUAD_SIZE; j++) {
900      int c;
901
902      float u = s[j] * xpot - 0.5F;
903      float v = t[j] * ypot - 0.5F;
904
905      int uflr = util_ifloor(u);
906      int vflr = util_ifloor(v);
907
908      float xw = u - (float)uflr;
909      float yw = v - (float)vflr;
910
911      int x0 = uflr & (xpot - 1);
912      int y0 = vflr & (ypot - 1);
913
914      const float *tx[4];
915
916
917      /* Can we fetch all four at once:
918       */
919      if (x0 < xmax && y0 < ymax)
920      {
921         get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx);
922      }
923      else
924      {
925         unsigned x1 = (x0 + 1) & (xpot - 1);
926         unsigned y1 = (y0 + 1) & (ypot - 1);
927         get_texel_quad_2d_mt(tgsi_sampler, 0, level,
928                              x0, y0, x1, y1, tx);
929      }
930
931
932      /* interpolate R, G, B, A */
933      for (c = 0; c < 4; c++) {
934         rgba[c][j] = lerp_2d(xw, yw,
935                              tx[0][c], tx[1][c],
936                              tx[2][c], tx[3][c]);
937      }
938   }
939}
940
941
942static void
943sp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
944                                     const float s[QUAD_SIZE],
945                                     const float t[QUAD_SIZE],
946                                     const float p[QUAD_SIZE],
947                                     float lodbias,
948                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
949{
950   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
951   unsigned  j;
952   unsigned level = samp->level;
953   unsigned xpot = 1 << (samp->xpot - level);
954   unsigned ypot = 1 << (samp->ypot - level);
955
956   for (j = 0; j < QUAD_SIZE; j++) {
957      int c;
958
959      float u = s[j] * xpot;
960      float v = t[j] * ypot;
961
962      int uflr = util_ifloor(u);
963      int vflr = util_ifloor(v);
964
965      int x0 = uflr & (xpot - 1);
966      int y0 = vflr & (ypot - 1);
967
968      const float *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
969
970      for (c = 0; c < 4; c++) {
971         rgba[c][j] = out[c];
972      }
973   }
974}
975
976
977static void
978sp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
979                                     const float s[QUAD_SIZE],
980                                     const float t[QUAD_SIZE],
981                                     const float p[QUAD_SIZE],
982                                     float lodbias,
983                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
984{
985   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
986   unsigned  j;
987   unsigned level = samp->level;
988   unsigned xpot = (1<<samp->xpot);
989   unsigned ypot = (1<<samp->ypot);
990
991   for (j = 0; j < QUAD_SIZE; j++) {
992      int c;
993
994      float u = s[j] * xpot;
995      float v = t[j] * ypot;
996
997      int x0, y0;
998      const float *out;
999
1000      x0 = util_ifloor(u);
1001      if (x0 < 0)
1002         x0 = 0;
1003      else if (x0 > xpot - 1)
1004         x0 = xpot - 1;
1005
1006      y0 = util_ifloor(v);
1007      if (y0 < 0)
1008         y0 = 0;
1009      else if (y0 > ypot - 1)
1010         y0 = ypot - 1;
1011
1012      out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
1013
1014      for (c = 0; c < 4; c++) {
1015         rgba[c][j] = out[c];
1016      }
1017   }
1018}
1019
1020
1021static void
1022sp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
1023                                               const float s[QUAD_SIZE],
1024                                               const float t[QUAD_SIZE],
1025                                               const float p[QUAD_SIZE],
1026                                               float lodbias,
1027                                               float rgba[NUM_CHANNELS][QUAD_SIZE])
1028{
1029   struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1030   const struct pipe_texture *texture = samp->texture;
1031   const struct pipe_sampler_state *sampler = samp->sampler;
1032   int level0;
1033   float lambda;
1034
1035   lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
1036   level0 = (int)lambda;
1037
1038   if (lambda < 0.0) {
1039      samp->level = 0;
1040      sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1041                                           s, t, p, 0, rgba );
1042   }
1043   else if (level0 >= texture->last_level) {
1044      samp->level = texture->last_level;
1045      sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1046                                           s, t, p, 0, rgba );
1047   }
1048   else {
1049      float levelBlend = lambda - level0;
1050      float rgba0[4][4];
1051      float rgba1[4][4];
1052      int c,j;
1053
1054      samp->level = level0;
1055      sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1056                                           s, t, p, 0, rgba0 );
1057
1058      samp->level = level0+1;
1059      sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1060                                           s, t, p, 0, rgba1 );
1061
1062      for (j = 0; j < QUAD_SIZE; j++) {
1063         for (c = 0; c < 4; c++) {
1064            rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1065         }
1066      }
1067   }
1068}
1069
1070/**
1071 * Common code for sampling 1D/2D/cube textures.
1072 * Could probably extend for 3D...
1073 */
1074static void
1075sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler,
1076                         const float s[QUAD_SIZE],
1077                         const float t[QUAD_SIZE],
1078                         const float p[QUAD_SIZE],
1079                         boolean computeLambda,
1080                         float lodbias,
1081                         float rgba[NUM_CHANNELS][QUAD_SIZE],
1082                         const unsigned faces[4])
1083{
1084   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1085   const struct pipe_texture *texture = samp->texture;
1086   const struct pipe_sampler_state *sampler = samp->sampler;
1087   unsigned level0, level1, j, imgFilter;
1088   int width, height;
1089   float levelBlend;
1090
1091   choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
1092                        &level0, &level1, &levelBlend, &imgFilter);
1093
1094   assert(sampler->normalized_coords);
1095
1096   width = texture->width[level0];
1097   height = texture->height[level0];
1098
1099   assert(width > 0);
1100
1101   switch (imgFilter) {
1102   case PIPE_TEX_FILTER_NEAREST:
1103      {
1104         int x[4], y[4];
1105         nearest_texcoord_4(sampler->wrap_s, s, width, x);
1106         nearest_texcoord_4(sampler->wrap_t, t, height, y);
1107
1108         for (j = 0; j < QUAD_SIZE; j++) {
1109            get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
1110            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1111               shadow_compare(sampler, rgba, p, j);
1112            }
1113
1114            if (level0 != level1) {
1115               /* get texels from second mipmap level and blend */
1116               float rgba2[4][4];
1117               unsigned c;
1118               x[j] /= 2;
1119               y[j] /= 2;
1120               get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
1121                         rgba2, j);
1122               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
1123                  shadow_compare(sampler, rgba2, p, j);
1124               }
1125
1126               for (c = 0; c < NUM_CHANNELS; c++) {
1127                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
1128               }
1129            }
1130         }
1131      }
1132      break;
1133   case PIPE_TEX_FILTER_LINEAR:
1134   case PIPE_TEX_FILTER_ANISO:
1135      {
1136         int x0[4], y0[4], x1[4], y1[4];
1137         float xw[4], yw[4]; /* weights */
1138
1139         linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
1140         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
1141
1142         for (j = 0; j < QUAD_SIZE; j++) {
1143            float tx[4][4]; /* texels */
1144            int c;
1145            get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
1146            get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
1147            get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
1148            get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
1149            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1150               shadow_compare4(sampler, tx, p);
1151            }
1152
1153            /* interpolate R, G, B, A */
1154            for (c = 0; c < 4; c++) {
1155               rgba[c][j] = lerp_2d(xw[j], yw[j],
1156                                    tx[c][0], tx[c][1],
1157                                    tx[c][2], tx[c][3]);
1158            }
1159
1160            if (level0 != level1) {
1161               /* get texels from second mipmap level and blend */
1162               float rgba2[4][4];
1163
1164               /* XXX: This is incorrect -- will often end up with (x0
1165                *  == x1 && y0 == y1), meaning that we fetch the same
1166                *  texel four times and linearly interpolate between
1167                *  identical values.  The correct approach would be to
1168                *  call linear_texcoord again for the second level.
1169                */
1170               x0[j] /= 2;
1171               y0[j] /= 2;
1172               x1[j] /= 2;
1173               y1[j] /= 2;
1174               get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
1175               get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
1176               get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
1177               get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
1178               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
1179                  shadow_compare4(sampler, tx, p);
1180               }
1181
1182               /* interpolate R, G, B, A */
1183               for (c = 0; c < 4; c++) {
1184                  rgba2[c][j] = lerp_2d(xw[j], yw[j],
1185                                        tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
1186               }
1187
1188               for (c = 0; c < NUM_CHANNELS; c++) {
1189                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
1190               }
1191            }
1192         }
1193      }
1194      break;
1195   default:
1196      assert(0);
1197   }
1198}
1199
1200
1201static INLINE void
1202sp_get_samples_1d(const struct tgsi_sampler *sampler,
1203                  const float s[QUAD_SIZE],
1204                  const float t[QUAD_SIZE],
1205                  const float p[QUAD_SIZE],
1206                  boolean computeLambda,
1207                  float lodbias,
1208                  float rgba[NUM_CHANNELS][QUAD_SIZE])
1209{
1210   static const unsigned faces[4] = {0, 0, 0, 0};
1211   static const float tzero[4] = {0, 0, 0, 0};
1212   sp_get_samples_2d_common(sampler, s, tzero, NULL,
1213                            computeLambda, lodbias, rgba, faces);
1214}
1215
1216
1217static INLINE void
1218sp_get_samples_2d(const struct tgsi_sampler *sampler,
1219                  const float s[QUAD_SIZE],
1220                  const float t[QUAD_SIZE],
1221                  const float p[QUAD_SIZE],
1222                  boolean computeLambda,
1223                  float lodbias,
1224                  float rgba[NUM_CHANNELS][QUAD_SIZE])
1225{
1226   static const unsigned faces[4] = {0, 0, 0, 0};
1227   sp_get_samples_2d_common(sampler, s, t, p,
1228                            computeLambda, lodbias, rgba, faces);
1229}
1230
1231
1232static INLINE void
1233sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler,
1234                  const float s[QUAD_SIZE],
1235                  const float t[QUAD_SIZE],
1236                  const float p[QUAD_SIZE],
1237                  boolean computeLambda,
1238                  float lodbias,
1239                  float rgba[NUM_CHANNELS][QUAD_SIZE])
1240{
1241   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1242   const struct pipe_texture *texture = samp->texture;
1243   const struct pipe_sampler_state *sampler = samp->sampler;
1244   /* get/map pipe_surfaces corresponding to 3D tex slices */
1245   unsigned level0, level1, j, imgFilter;
1246   int width, height, depth;
1247   float levelBlend;
1248   const uint face = 0;
1249
1250   choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
1251                        &level0, &level1, &levelBlend, &imgFilter);
1252
1253   assert(sampler->normalized_coords);
1254
1255   width = texture->width[level0];
1256   height = texture->height[level0];
1257   depth = texture->depth[level0];
1258
1259   assert(width > 0);
1260   assert(height > 0);
1261   assert(depth > 0);
1262
1263   switch (imgFilter) {
1264   case PIPE_TEX_FILTER_NEAREST:
1265      {
1266         int x[4], y[4], z[4];
1267         nearest_texcoord_4(sampler->wrap_s, s, width, x);
1268         nearest_texcoord_4(sampler->wrap_t, t, height, y);
1269         nearest_texcoord_4(sampler->wrap_r, p, depth, z);
1270         for (j = 0; j < QUAD_SIZE; j++) {
1271            get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
1272            if (level0 != level1) {
1273               /* get texels from second mipmap level and blend */
1274               float rgba2[4][4];
1275               unsigned c;
1276               x[j] /= 2;
1277               y[j] /= 2;
1278               z[j] /= 2;
1279               get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
1280               for (c = 0; c < NUM_CHANNELS; c++) {
1281                  rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
1282               }
1283            }
1284         }
1285      }
1286      break;
1287   case PIPE_TEX_FILTER_LINEAR:
1288   case PIPE_TEX_FILTER_ANISO:
1289      {
1290         int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
1291         float xw[4], yw[4], zw[4]; /* interpolation weights */
1292         linear_texcoord_4(sampler->wrap_s, s, width,  x0, x1, xw);
1293         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
1294         linear_texcoord_4(sampler->wrap_r, p, depth,  z0, z1, zw);
1295
1296         for (j = 0; j < QUAD_SIZE; j++) {
1297            int c;
1298            float tx0[4][4], tx1[4][4];
1299            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
1300            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
1301            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
1302            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
1303            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
1304            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
1305            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
1306            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
1307
1308            /* interpolate R, G, B, A */
1309            for (c = 0; c < 4; c++) {
1310               rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1311                                    tx0[c][0], tx0[c][1],
1312                                    tx0[c][2], tx0[c][3],
1313                                    tx1[c][0], tx1[c][1],
1314                                    tx1[c][2], tx1[c][3]);
1315            }
1316
1317            if (level0 != level1) {
1318               /* get texels from second mipmap level and blend */
1319               float rgba2[4][4];
1320               x0[j] /= 2;
1321               y0[j] /= 2;
1322               z0[j] /= 2;
1323               x1[j] /= 2;
1324               y1[j] /= 2;
1325               z1[j] /= 2;
1326               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
1327               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
1328               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
1329               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
1330               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
1331               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
1332               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
1333               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
1334
1335               /* interpolate R, G, B, A */
1336               for (c = 0; c < 4; c++) {
1337                  rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1338                                        tx0[c][0], tx0[c][1],
1339                                        tx0[c][2], tx0[c][3],
1340                                        tx1[c][0], tx1[c][1],
1341                                        tx1[c][2], tx1[c][3]);
1342               }
1343
1344               /* blend mipmap levels */
1345               for (c = 0; c < NUM_CHANNELS; c++) {
1346                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
1347               }
1348            }
1349         }
1350      }
1351      break;
1352   default:
1353      assert(0);
1354   }
1355}
1356
1357
1358static void
1359sp_get_samples_cube(const struct tgsi_sampler *sampler,
1360                    const float s[QUAD_SIZE],
1361                    const float t[QUAD_SIZE],
1362                    const float p[QUAD_SIZE],
1363                    boolean computeLambda,
1364                    float lodbias,
1365                    float rgba[NUM_CHANNELS][QUAD_SIZE])
1366{
1367   unsigned faces[QUAD_SIZE], j;
1368   float ssss[4], tttt[4];
1369   for (j = 0; j < QUAD_SIZE; j++) {
1370      faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
1371   }
1372   sp_get_samples_2d_common(sampler, ssss, tttt, NULL,
1373                            computeLambda, lodbias, rgba, faces);
1374}
1375
1376
1377static void
1378sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler,
1379                    const float s[QUAD_SIZE],
1380                    const float t[QUAD_SIZE],
1381                    const float p[QUAD_SIZE],
1382                    boolean computeLambda,
1383                    float lodbias,
1384                    float rgba[NUM_CHANNELS][QUAD_SIZE])
1385{
1386   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1387   const struct pipe_texture *texture = samp->texture;
1388   const struct pipe_sampler_state *sampler = samp->sampler;
1389   const uint face = 0;
1390   unsigned level0, level1, j, imgFilter;
1391   int width, height;
1392   float levelBlend;
1393
1394   choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias,
1395                        &level0, &level1, &levelBlend, &imgFilter);
1396
1397   /* texture RECTS cannot be mipmapped */
1398   assert(level0 == level1);
1399
1400   width = texture->width[level0];
1401   height = texture->height[level0];
1402
1403   assert(width > 0);
1404
1405   switch (imgFilter) {
1406   case PIPE_TEX_FILTER_NEAREST:
1407      {
1408         int x[4], y[4];
1409         nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
1410         nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
1411         for (j = 0; j < QUAD_SIZE; j++) {
1412            get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
1413            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1414               shadow_compare(sampler, rgba, p, j);
1415            }
1416         }
1417      }
1418      break;
1419   case PIPE_TEX_FILTER_LINEAR:
1420   case PIPE_TEX_FILTER_ANISO:
1421      {
1422         int x0[4], y0[4], x1[4], y1[4];
1423         float xw[4], yw[4]; /* weights */
1424         linear_texcoord_unnorm_4(sampler->wrap_s, s, width,  x0, x1, xw);
1425         linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
1426         for (j = 0; j < QUAD_SIZE; j++) {
1427            float tx[4][4]; /* texels */
1428            int c;
1429            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
1430            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
1431            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
1432            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
1433            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1434               shadow_compare4(sampler, tx, p);
1435            }
1436            for (c = 0; c < 4; c++) {
1437               rgba[c][j] = lerp_2d(xw[j], yw[j],
1438                                    tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
1439            }
1440         }
1441      }
1442      break;
1443   default:
1444      assert(0);
1445   }
1446}
1447
1448
1449/**
1450 * Common code for vertex/fragment program texture sampling.
1451 */
1452static INLINE void
1453sp_get_samples(struct tgsi_sampler *tgsi_sampler,
1454               const float s[QUAD_SIZE],
1455               const float t[QUAD_SIZE],
1456               const float p[QUAD_SIZE],
1457               boolean computeLambda,
1458               float lodbias,
1459               float rgba[NUM_CHANNELS][QUAD_SIZE])
1460{
1461   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1462   const struct pipe_texture *texture = samp->texture;
1463   const struct pipe_sampler_state *sampler = samp->sampler;
1464
1465   if (!texture)
1466      return;
1467
1468   switch (texture->target) {
1469   case PIPE_TEXTURE_1D:
1470      assert(sampler->normalized_coords);
1471      sp_get_samples_1d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
1472      break;
1473   case PIPE_TEXTURE_2D:
1474      if (sampler->normalized_coords)
1475         sp_get_samples_2d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
1476      else
1477         sp_get_samples_rect(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
1478      break;
1479   case PIPE_TEXTURE_3D:
1480      assert(sampler->normalized_coords);
1481      sp_get_samples_3d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
1482      break;
1483   case PIPE_TEXTURE_CUBE:
1484      assert(sampler->normalized_coords);
1485      sp_get_samples_cube(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba);
1486      break;
1487   default:
1488      assert(0);
1489   }
1490
1491#if 0 /* DEBUG */
1492   {
1493      int i;
1494      printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]);
1495      for (i = 0; i < 4; i++) {
1496         printf("Frag %d: %f %f %f %f\n", i,
1497                rgba[0][i],
1498                rgba[1][i],
1499                rgba[2][i],
1500                rgba[3][i]);
1501      }
1502   }
1503#endif
1504}
1505
1506static void
1507sp_get_samples_fallback(struct tgsi_sampler *tgsi_sampler,
1508                        const float s[QUAD_SIZE],
1509                        const float t[QUAD_SIZE],
1510                        const float p[QUAD_SIZE],
1511                        float lodbias,
1512                        float rgba[NUM_CHANNELS][QUAD_SIZE])
1513{
1514   sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba);
1515}
1516
1517/**
1518 * Called via tgsi_sampler::get_samples() when running a fragment shader.
1519 * Get four filtered RGBA values from the sampler's texture.
1520 */
1521void
1522sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler,
1523                        const float s[QUAD_SIZE],
1524                        const float t[QUAD_SIZE],
1525                        const float p[QUAD_SIZE],
1526                        float lodbias,
1527                        float rgba[NUM_CHANNELS][QUAD_SIZE])
1528{
1529   struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
1530   const struct pipe_texture *texture = samp->texture;
1531   const struct pipe_sampler_state *sampler = samp->sampler;
1532
1533   tgsi_sampler->get_samples = sp_get_samples_fallback;
1534
1535   /* Try to hook in a faster sampler.  Ultimately we'll have to
1536    * code-generate these.  Luckily most of this looks like it is
1537    * orthogonal state within the sampler.
1538    */
1539   if (texture->target == PIPE_TEXTURE_2D &&
1540       sampler->min_img_filter == sampler->mag_img_filter &&
1541       sampler->wrap_s == sampler->wrap_t &&
1542       sampler->compare_mode == FALSE &&
1543       sampler->normalized_coords)
1544   {
1545      samp->xpot = util_unsigned_logbase2( samp->texture->width[0] );
1546      samp->ypot = util_unsigned_logbase2( samp->texture->height[0] );
1547
1548      if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1549         samp->level = CLAMP((int) sampler->min_lod,
1550                             0, (int) texture->last_level);
1551
1552         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
1553            switch (sampler->min_img_filter) {
1554            case PIPE_TEX_FILTER_NEAREST:
1555               tgsi_sampler->get_samples = sp_get_samples_2d_nearest_repeat_POT;
1556               break;
1557            case PIPE_TEX_FILTER_LINEAR:
1558               tgsi_sampler->get_samples = sp_get_samples_2d_linear_repeat_POT;
1559               break;
1560            default:
1561               break;
1562            }
1563         }
1564         else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) {
1565            switch (sampler->min_img_filter) {
1566            case PIPE_TEX_FILTER_NEAREST:
1567               tgsi_sampler->get_samples = sp_get_samples_2d_nearest_clamp_POT;
1568               break;
1569            default:
1570               break;
1571            }
1572         }
1573      }
1574      else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1575         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
1576            switch (sampler->min_img_filter) {
1577            case PIPE_TEX_FILTER_LINEAR:
1578               tgsi_sampler->get_samples = sp_get_samples_2d_linear_mip_linear_repeat_POT;
1579               break;
1580            default:
1581               break;
1582            }
1583         }
1584      }
1585   }
1586   else if (0) {
1587      _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n",
1588                    texture->target, PIPE_TEXTURE_2D,
1589                    sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE,
1590                    sampler->min_img_filter, sampler->mag_img_filter,
1591                    sampler->wrap_s, sampler->wrap_t,
1592                    sampler->compare_mode, FALSE,
1593                    sampler->normalized_coords, TRUE);
1594   }
1595
1596   tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
1597}
1598
1599
1600/**
1601 * Called via tgsi_sampler::get_samples() when running a vertex shader.
1602 * Get four filtered RGBA values from the sampler's texture.
1603 */
1604void
1605sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler,
1606                      const float s[QUAD_SIZE],
1607                      const float t[QUAD_SIZE],
1608                      const float p[QUAD_SIZE],
1609                      float lodbias,
1610                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1611{
1612   sp_get_samples(tgsi_sampler, s, t, p, FALSE, lodbias, rgba);
1613}
1614