1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2008-2010 VMware, Inc.  All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * Texture sampling
31 *
32 * Authors:
33 *   Brian Paul
34 *   Keith Whitwell
35 */
36
37#include "pipe/p_context.h"
38#include "pipe/p_defines.h"
39#include "pipe/p_shader_tokens.h"
40#include "util/u_math.h"
41#include "util/u_memory.h"
42#include "sp_quad.h"   /* only for #define QUAD_* tokens */
43#include "sp_tex_sample.h"
44#include "sp_tex_tile_cache.h"
45
46
47/** Set to one to help debug texture sampling */
48#define DEBUG_TEX 0
49
50
51/*
52 * Return fractional part of 'f'.  Used for computing interpolation weights.
53 * Need to be careful with negative values.
54 * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
55 * of improperly weighted linear-filtered textures.
56 * The tests/texwrap.c demo is a good test.
57 */
58static INLINE float
59frac(float f)
60{
61   return f - floorf(f);
62}
63
64
65
66/**
67 * Linear interpolation macro
68 */
69static INLINE float
70lerp(float a, float v0, float v1)
71{
72   return v0 + a * (v1 - v0);
73}
74
75
76/**
77 * Do 2D/bilinear interpolation of float values.
78 * v00, v10, v01 and v11 are typically four texture samples in a square/box.
79 * a and b are the horizontal and vertical interpolants.
80 * It's important that this function is inlined when compiled with
81 * optimization!  If we find that's not true on some systems, convert
82 * to a macro.
83 */
84static INLINE float
85lerp_2d(float a, float b,
86        float v00, float v10, float v01, float v11)
87{
88   const float temp0 = lerp(a, v00, v10);
89   const float temp1 = lerp(a, v01, v11);
90   return lerp(b, temp0, temp1);
91}
92
93
94/**
95 * As above, but 3D interpolation of 8 values.
96 */
97static INLINE float
98lerp_3d(float a, float b, float c,
99        float v000, float v100, float v010, float v110,
100        float v001, float v101, float v011, float v111)
101{
102   const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
103   const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
104   return lerp(c, temp0, temp1);
105}
106
107
108
109/**
110 * Compute coord % size for repeat wrap modes.
111 * Note that if coord is negative, coord % size doesn't give the right
112 * value.  To avoid that problem we add a large multiple of the size
113 * (rather than using a conditional).
114 */
115static INLINE int
116repeat(int coord, unsigned size)
117{
118   return (coord + size * 1024) % size;
119}
120
121
122/**
123 * Apply texture coord wrapping mode and return integer texture indexes
124 * for a vector of four texcoords (S or T or P).
125 * \param wrapMode  PIPE_TEX_WRAP_x
126 * \param s  the incoming texcoords
127 * \param size  the texture image size
128 * \param icoord  returns the integer texcoords
129 * \return  integer texture index
130 */
131static void
132wrap_nearest_repeat(float s, unsigned size, int *icoord)
133{
134   /* s limited to [0,1) */
135   /* i limited to [0,size-1] */
136   int i = util_ifloor(s * size);
137   *icoord = repeat(i, size);
138}
139
140
141static void
142wrap_nearest_clamp(float s, unsigned size, int *icoord)
143{
144   /* s limited to [0,1] */
145   /* i limited to [0,size-1] */
146   if (s <= 0.0F)
147      *icoord = 0;
148   else if (s >= 1.0F)
149      *icoord = size - 1;
150   else
151      *icoord = util_ifloor(s * size);
152}
153
154
155static void
156wrap_nearest_clamp_to_edge(float s, unsigned size, int *icoord)
157{
158   /* s limited to [min,max] */
159   /* i limited to [0, size-1] */
160   const float min = 1.0F / (2.0F * size);
161   const float max = 1.0F - min;
162   if (s < min)
163      *icoord = 0;
164   else if (s > max)
165      *icoord = size - 1;
166   else
167      *icoord = util_ifloor(s * size);
168}
169
170
171static void
172wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord)
173{
174   /* s limited to [min,max] */
175   /* i limited to [-1, size] */
176   const float min = -1.0F / (2.0F * size);
177   const float max = 1.0F - min;
178   if (s <= min)
179      *icoord = -1;
180   else if (s >= max)
181      *icoord = size;
182   else
183      *icoord = util_ifloor(s * size);
184}
185
186
187static void
188wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord)
189{
190   const float min = 1.0F / (2.0F * size);
191   const float max = 1.0F - min;
192   const int flr = util_ifloor(s);
193   float u = frac(s);
194   if (flr & 1)
195      u = 1.0F - u;
196   if (u < min)
197      *icoord = 0;
198   else if (u > max)
199      *icoord = size - 1;
200   else
201      *icoord = util_ifloor(u * size);
202}
203
204
205static void
206wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord)
207{
208   /* s limited to [0,1] */
209   /* i limited to [0,size-1] */
210   const float u = fabsf(s);
211   if (u <= 0.0F)
212      *icoord = 0;
213   else if (u >= 1.0F)
214      *icoord = size - 1;
215   else
216      *icoord = util_ifloor(u * size);
217}
218
219
220static void
221wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int *icoord)
222{
223   /* s limited to [min,max] */
224   /* i limited to [0, size-1] */
225   const float min = 1.0F / (2.0F * size);
226   const float max = 1.0F - min;
227   const float u = fabsf(s);
228   if (u < min)
229      *icoord = 0;
230   else if (u > max)
231      *icoord = size - 1;
232   else
233      *icoord = util_ifloor(u * size);
234}
235
236
237static void
238wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord)
239{
240   /* s limited to [min,max] */
241   /* i limited to [0, size-1] */
242   const float min = -1.0F / (2.0F * size);
243   const float max = 1.0F - min;
244   const float u = fabsf(s);
245   if (u < min)
246      *icoord = -1;
247   else if (u > max)
248      *icoord = size;
249   else
250      *icoord = util_ifloor(u * size);
251}
252
253
254/**
255 * Used to compute texel locations for linear sampling
256 * \param wrapMode  PIPE_TEX_WRAP_x
257 * \param s  the texcoord
258 * \param size  the texture image size
259 * \param icoord0  returns first texture index
260 * \param icoord1  returns second texture index (usually icoord0 + 1)
261 * \param w  returns blend factor/weight between texture indices
262 * \param icoord  returns the computed integer texture coord
263 */
264static void
265wrap_linear_repeat(float s, unsigned size,
266                   int *icoord0, int *icoord1, float *w)
267{
268   float u = s * size - 0.5F;
269   *icoord0 = repeat(util_ifloor(u), size);
270   *icoord1 = repeat(*icoord0 + 1, size);
271   *w = frac(u);
272}
273
274
275static void
276wrap_linear_clamp(float s, unsigned size,
277                  int *icoord0, int *icoord1, float *w)
278{
279   float u = CLAMP(s, 0.0F, 1.0F);
280   u = u * size - 0.5f;
281   *icoord0 = util_ifloor(u);
282   *icoord1 = *icoord0 + 1;
283   *w = frac(u);
284}
285
286
287static void
288wrap_linear_clamp_to_edge(float s, unsigned size,
289                          int *icoord0, int *icoord1, float *w)
290{
291   float u = CLAMP(s, 0.0F, 1.0F);
292   u = u * size - 0.5f;
293   *icoord0 = util_ifloor(u);
294   *icoord1 = *icoord0 + 1;
295   if (*icoord0 < 0)
296      *icoord0 = 0;
297   if (*icoord1 >= (int) size)
298      *icoord1 = size - 1;
299   *w = frac(u);
300}
301
302
303static void
304wrap_linear_clamp_to_border(float s, unsigned size,
305                            int *icoord0, int *icoord1, float *w)
306{
307   const float min = -1.0F / (2.0F * size);
308   const float max = 1.0F - min;
309   float u = CLAMP(s, min, max);
310   u = u * size - 0.5f;
311   *icoord0 = util_ifloor(u);
312   *icoord1 = *icoord0 + 1;
313   *w = frac(u);
314}
315
316
317static void
318wrap_linear_mirror_repeat(float s, unsigned size,
319                          int *icoord0, int *icoord1, float *w)
320{
321   const int flr = util_ifloor(s);
322   float u = frac(s);
323   if (flr & 1)
324      u = 1.0F - u;
325   u = u * size - 0.5F;
326   *icoord0 = util_ifloor(u);
327   *icoord1 = *icoord0 + 1;
328   if (*icoord0 < 0)
329      *icoord0 = 0;
330   if (*icoord1 >= (int) size)
331      *icoord1 = size - 1;
332   *w = frac(u);
333}
334
335
336static void
337wrap_linear_mirror_clamp(float s, unsigned size,
338                         int *icoord0, int *icoord1, float *w)
339{
340   float u = fabsf(s);
341   if (u >= 1.0F)
342      u = (float) size;
343   else
344      u *= size;
345   u -= 0.5F;
346   *icoord0 = util_ifloor(u);
347   *icoord1 = *icoord0 + 1;
348   *w = frac(u);
349}
350
351
352static void
353wrap_linear_mirror_clamp_to_edge(float s, unsigned size,
354                                 int *icoord0, int *icoord1, float *w)
355{
356   float u = fabsf(s);
357   if (u >= 1.0F)
358      u = (float) size;
359   else
360      u *= size;
361   u -= 0.5F;
362   *icoord0 = util_ifloor(u);
363   *icoord1 = *icoord0 + 1;
364   if (*icoord0 < 0)
365      *icoord0 = 0;
366   if (*icoord1 >= (int) size)
367      *icoord1 = size - 1;
368   *w = frac(u);
369}
370
371
372static void
373wrap_linear_mirror_clamp_to_border(float s, unsigned size,
374                                   int *icoord0, int *icoord1, float *w)
375{
376   const float min = -1.0F / (2.0F * size);
377   const float max = 1.0F - min;
378   float u = fabsf(s);
379   if (u <= min)
380      u = min * size;
381   else if (u >= max)
382      u = max * size;
383   else
384      u *= size;
385   u -= 0.5F;
386   *icoord0 = util_ifloor(u);
387   *icoord1 = *icoord0 + 1;
388   *w = frac(u);
389}
390
391
392/**
393 * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
394 */
395static void
396wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord)
397{
398   int i = util_ifloor(s);
399   *icoord = CLAMP(i, 0, (int) size-1);
400}
401
402
403/**
404 * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
405 */
406static void
407wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord)
408{
409   *icoord = util_ifloor( CLAMP(s, -0.5F, (float) size + 0.5F) );
410}
411
412
413/**
414 * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
415 */
416static void
417wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord)
418{
419   *icoord = util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
420}
421
422
423/**
424 * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
425 */
426static void
427wrap_linear_unorm_clamp(float s, unsigned size,
428                        int *icoord0, int *icoord1, float *w)
429{
430   /* Not exactly what the spec says, but it matches NVIDIA output */
431   float u = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
432   *icoord0 = util_ifloor(u);
433   *icoord1 = *icoord0 + 1;
434   *w = frac(u);
435}
436
437
438/**
439 * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
440 */
441static void
442wrap_linear_unorm_clamp_to_border(float s, unsigned size,
443                                  int *icoord0, int *icoord1, float *w)
444{
445   float u = CLAMP(s, -0.5F, (float) size + 0.5F);
446   u -= 0.5F;
447   *icoord0 = util_ifloor(u);
448   *icoord1 = *icoord0 + 1;
449   if (*icoord1 > (int) size - 1)
450      *icoord1 = size - 1;
451   *w = frac(u);
452}
453
454
455/**
456 * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
457 */
458static void
459wrap_linear_unorm_clamp_to_edge(float s, unsigned size,
460                                int *icoord0, int *icoord1, float *w)
461{
462   float u = CLAMP(s, +0.5F, (float) size - 0.5F);
463   u -= 0.5F;
464   *icoord0 = util_ifloor(u);
465   *icoord1 = *icoord0 + 1;
466   if (*icoord1 > (int) size - 1)
467      *icoord1 = size - 1;
468   *w = frac(u);
469}
470
471
472/**
473 * Do coordinate to array index conversion.  For array textures.
474 */
475static INLINE void
476wrap_array_layer(float coord, unsigned size, int *layer)
477{
478   int c = util_ifloor(coord + 0.5F);
479   *layer = CLAMP(c, 0, size - 1);
480}
481
482
483/**
484 * Examine the quad's texture coordinates to compute the partial
485 * derivatives w.r.t X and Y, then compute lambda (level of detail).
486 */
487static float
488compute_lambda_1d(const struct sp_sampler_variant *samp,
489                  const float s[TGSI_QUAD_SIZE],
490                  const float t[TGSI_QUAD_SIZE],
491                  const float p[TGSI_QUAD_SIZE])
492{
493   const struct pipe_resource *texture = samp->view->texture;
494   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
495   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
496   float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
497
498   return util_fast_log2(rho);
499}
500
501
502static float
503compute_lambda_2d(const struct sp_sampler_variant *samp,
504                  const float s[TGSI_QUAD_SIZE],
505                  const float t[TGSI_QUAD_SIZE],
506                  const float p[TGSI_QUAD_SIZE])
507{
508   const struct pipe_resource *texture = samp->view->texture;
509   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
510   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
511   float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
512   float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
513   float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
514   float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level);
515   float rho  = MAX2(maxx, maxy);
516
517   return util_fast_log2(rho);
518}
519
520
521static float
522compute_lambda_3d(const struct sp_sampler_variant *samp,
523                  const float s[TGSI_QUAD_SIZE],
524                  const float t[TGSI_QUAD_SIZE],
525                  const float p[TGSI_QUAD_SIZE])
526{
527   const struct pipe_resource *texture = samp->view->texture;
528   float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
529   float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
530   float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
531   float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
532   float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
533   float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
534   float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
535   float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level);
536   float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, samp->view->u.tex.first_level);
537   float rho;
538
539   rho = MAX2(maxx, maxy);
540   rho = MAX2(rho, maxz);
541
542   return util_fast_log2(rho);
543}
544
545
546/**
547 * Compute lambda for a vertex texture sampler.
548 * Since there aren't derivatives to use, just return 0.
549 */
550static float
551compute_lambda_vert(const struct sp_sampler_variant *samp,
552                    const float s[TGSI_QUAD_SIZE],
553                    const float t[TGSI_QUAD_SIZE],
554                    const float p[TGSI_QUAD_SIZE])
555{
556   return 0.0f;
557}
558
559
560
561/**
562 * Get a texel from a texture, using the texture tile cache.
563 *
564 * \param addr  the template tex address containing cube, z, face info.
565 * \param x  the x coord of texel within 2D image
566 * \param y  the y coord of texel within 2D image
567 * \param rgba  the quad to put the texel/color into
568 *
569 * XXX maybe move this into sp_tex_tile_cache.c and merge with the
570 * sp_get_cached_tile_tex() function.
571 */
572
573
574
575
576static INLINE const float *
577get_texel_2d_no_border(const struct sp_sampler_variant *samp,
578		       union tex_tile_address addr, int x, int y)
579{
580   const struct softpipe_tex_cached_tile *tile;
581
582   addr.bits.x = x / TILE_SIZE;
583   addr.bits.y = y / TILE_SIZE;
584   y %= TILE_SIZE;
585   x %= TILE_SIZE;
586
587   tile = sp_get_cached_tile_tex(samp->cache, addr);
588
589   return &tile->data.color[y][x][0];
590}
591
592
593static INLINE const float *
594get_texel_2d(const struct sp_sampler_variant *samp,
595	     union tex_tile_address addr, int x, int y)
596{
597   const struct pipe_resource *texture = samp->view->texture;
598   unsigned level = addr.bits.level;
599
600   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
601       y < 0 || y >= (int) u_minify(texture->height0, level)) {
602      return samp->sampler->border_color.f;
603   }
604   else {
605      return get_texel_2d_no_border( samp, addr, x, y );
606   }
607}
608
609
610/* Gather a quad of adjacent texels within a tile:
611 */
612static INLINE void
613get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_variant *samp,
614					union tex_tile_address addr,
615					unsigned x, unsigned y,
616					const float *out[4])
617{
618   const struct softpipe_tex_cached_tile *tile;
619
620   addr.bits.x = x / TILE_SIZE;
621   addr.bits.y = y / TILE_SIZE;
622   y %= TILE_SIZE;
623   x %= TILE_SIZE;
624
625   tile = sp_get_cached_tile_tex(samp->cache, addr);
626
627   out[0] = &tile->data.color[y  ][x  ][0];
628   out[1] = &tile->data.color[y  ][x+1][0];
629   out[2] = &tile->data.color[y+1][x  ][0];
630   out[3] = &tile->data.color[y+1][x+1][0];
631}
632
633
634/* Gather a quad of potentially non-adjacent texels:
635 */
636static INLINE void
637get_texel_quad_2d_no_border(const struct sp_sampler_variant *samp,
638			    union tex_tile_address addr,
639			    int x0, int y0,
640			    int x1, int y1,
641			    const float *out[4])
642{
643   out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
644   out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
645   out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
646   out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
647}
648
649/* Can involve a lot of unnecessary checks for border color:
650 */
651static INLINE void
652get_texel_quad_2d(const struct sp_sampler_variant *samp,
653		  union tex_tile_address addr,
654		  int x0, int y0,
655		  int x1, int y1,
656		  const float *out[4])
657{
658   out[0] = get_texel_2d( samp, addr, x0, y0 );
659   out[1] = get_texel_2d( samp, addr, x1, y0 );
660   out[3] = get_texel_2d( samp, addr, x1, y1 );
661   out[2] = get_texel_2d( samp, addr, x0, y1 );
662}
663
664
665
666/* 3d variants:
667 */
668static INLINE const float *
669get_texel_3d_no_border(const struct sp_sampler_variant *samp,
670                       union tex_tile_address addr, int x, int y, int z)
671{
672   const struct softpipe_tex_cached_tile *tile;
673
674   addr.bits.x = x / TILE_SIZE;
675   addr.bits.y = y / TILE_SIZE;
676   addr.bits.z = z;
677   y %= TILE_SIZE;
678   x %= TILE_SIZE;
679
680   tile = sp_get_cached_tile_tex(samp->cache, addr);
681
682   return &tile->data.color[y][x][0];
683}
684
685
686static INLINE const float *
687get_texel_3d(const struct sp_sampler_variant *samp,
688	     union tex_tile_address addr, int x, int y, int z)
689{
690   const struct pipe_resource *texture = samp->view->texture;
691   unsigned level = addr.bits.level;
692
693   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
694       y < 0 || y >= (int) u_minify(texture->height0, level) ||
695       z < 0 || z >= (int) u_minify(texture->depth0, level)) {
696      return samp->sampler->border_color.f;
697   }
698   else {
699      return get_texel_3d_no_border( samp, addr, x, y, z );
700   }
701}
702
703
704/* Get texel pointer for 1D array texture */
705static INLINE const float *
706get_texel_1d_array(const struct sp_sampler_variant *samp,
707                   union tex_tile_address addr, int x, int y)
708{
709   const struct pipe_resource *texture = samp->view->texture;
710   unsigned level = addr.bits.level;
711
712   if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
713      return samp->sampler->border_color.f;
714   }
715   else {
716      return get_texel_2d_no_border(samp, addr, x, y);
717   }
718}
719
720
721/* Get texel pointer for 2D array texture */
722static INLINE const float *
723get_texel_2d_array(const struct sp_sampler_variant *samp,
724                   union tex_tile_address addr, int x, int y, int layer)
725{
726   const struct pipe_resource *texture = samp->view->texture;
727   unsigned level = addr.bits.level;
728
729   assert(layer < (int) texture->array_size);
730   assert(layer >= 0);
731
732   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
733       y < 0 || y >= (int) u_minify(texture->height0, level)) {
734      return samp->sampler->border_color.f;
735   }
736   else {
737      return get_texel_3d_no_border(samp, addr, x, y, layer);
738   }
739}
740
741
742/**
743 * Given the logbase2 of a mipmap's base level size and a mipmap level,
744 * return the size (in texels) of that mipmap level.
745 * For example, if level[0].width = 256 then base_pot will be 8.
746 * If level = 2, then we'll return 64 (the width at level=2).
747 * Return 1 if level > base_pot.
748 */
749static INLINE unsigned
750pot_level_size(unsigned base_pot, unsigned level)
751{
752   return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
753}
754
755
756static void
757print_sample(const char *function, const float *rgba)
758{
759   debug_printf("%s %g %g %g %g\n",
760                function,
761                rgba[0], rgba[TGSI_NUM_CHANNELS], rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
762}
763
764
765static void
766print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
767{
768   debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
769                function,
770                rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
771                rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
772                rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
773                rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
774}
775
776/* Some image-filter fastpaths:
777 */
778static INLINE void
779img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
780                                float s,
781                                float t,
782                                float p,
783				unsigned level,
784                                unsigned face_id,
785                                enum tgsi_sampler_control control,
786                                float *rgba)
787{
788   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
789   unsigned xpot = pot_level_size(samp->xpot, level);
790   unsigned ypot = pot_level_size(samp->ypot, level);
791   unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
792   unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
793   union tex_tile_address addr;
794   int c;
795
796
797
798   float u = s * xpot - 0.5F;
799   float v = t * ypot - 0.5F;
800
801   int uflr = util_ifloor(u);
802   int vflr = util_ifloor(v);
803
804   float xw = u - (float)uflr;
805   float yw = v - (float)vflr;
806
807   int x0 = uflr & (xpot - 1);
808   int y0 = vflr & (ypot - 1);
809
810   const float *tx[4];
811
812   addr.value = 0;
813   addr.bits.level = level;
814
815   /* Can we fetch all four at once:
816    */
817   if (x0 < xmax && y0 < ymax) {
818      get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
819   }
820   else {
821      unsigned x1 = (x0 + 1) & (xpot - 1);
822      unsigned y1 = (y0 + 1) & (ypot - 1);
823      get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
824   }
825
826   /* interpolate R, G, B, A */
827   for (c = 0; c < TGSI_QUAD_SIZE; c++) {
828      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
829                                       tx[0][c], tx[1][c],
830                                       tx[2][c], tx[3][c]);
831   }
832
833   if (DEBUG_TEX) {
834      print_sample(__FUNCTION__, rgba);
835   }
836}
837
838
839static INLINE void
840img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
841                                 float s,
842                                 float t,
843                                 float p,
844                                 unsigned level,
845                                 unsigned face_id,
846                                 enum tgsi_sampler_control control,
847                                 float rgba[TGSI_QUAD_SIZE])
848{
849   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
850   unsigned xpot = pot_level_size(samp->xpot, level);
851   unsigned ypot = pot_level_size(samp->ypot, level);
852   const float *out;
853   union tex_tile_address addr;
854   int c;
855
856   float u = s * xpot;
857   float v = t * ypot;
858
859   int uflr = util_ifloor(u);
860   int vflr = util_ifloor(v);
861
862   int x0 = uflr & (xpot - 1);
863   int y0 = vflr & (ypot - 1);
864
865   addr.value = 0;
866   addr.bits.level = level;
867
868   out = get_texel_2d_no_border(samp, addr, x0, y0);
869   for (c = 0; c < TGSI_QUAD_SIZE; c++)
870      rgba[TGSI_NUM_CHANNELS*c] = out[c];
871
872   if (DEBUG_TEX) {
873      print_sample(__FUNCTION__, rgba);
874   }
875}
876
877
878static INLINE void
879img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
880                                float s,
881                                float t,
882                                float p,
883                                unsigned level,
884                                unsigned face_id,
885                                enum tgsi_sampler_control control,
886                                float rgba[TGSI_QUAD_SIZE])
887{
888   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
889   unsigned xpot = pot_level_size(samp->xpot, level);
890   unsigned ypot = pot_level_size(samp->ypot, level);
891   union tex_tile_address addr;
892   int c;
893
894   float u = s * xpot;
895   float v = t * ypot;
896
897   int x0, y0;
898   const float *out;
899
900   addr.value = 0;
901   addr.bits.level = level;
902
903   x0 = util_ifloor(u);
904   if (x0 < 0)
905      x0 = 0;
906   else if (x0 > xpot - 1)
907      x0 = xpot - 1;
908
909   y0 = util_ifloor(v);
910   if (y0 < 0)
911      y0 = 0;
912   else if (y0 > ypot - 1)
913      y0 = ypot - 1;
914
915   out = get_texel_2d_no_border(samp, addr, x0, y0);
916   for (c = 0; c < TGSI_QUAD_SIZE; c++)
917      rgba[TGSI_NUM_CHANNELS*c] = out[c];
918
919   if (DEBUG_TEX) {
920      print_sample(__FUNCTION__, rgba);
921   }
922}
923
924
925static void
926img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
927                      float s,
928                      float t,
929                      float p,
930                      unsigned level,
931                      unsigned face_id,
932                      enum tgsi_sampler_control control,
933                      float rgba[TGSI_QUAD_SIZE])
934{
935   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
936   const struct pipe_resource *texture = samp->view->texture;
937   int width;
938   int x;
939   union tex_tile_address addr;
940   const float *out;
941   int c;
942
943   width = u_minify(texture->width0, level);
944
945   assert(width > 0);
946
947   addr.value = 0;
948   addr.bits.level = level;
949
950   samp->nearest_texcoord_s(s, width, &x);
951
952   out = get_texel_2d(samp, addr, x, 0);
953   for (c = 0; c < TGSI_QUAD_SIZE; c++)
954      rgba[TGSI_NUM_CHANNELS*c] = out[c];
955
956   if (DEBUG_TEX) {
957      print_sample(__FUNCTION__, rgba);
958   }
959}
960
961
962static void
963img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler,
964                            float s,
965                            float t,
966                            float p,
967                            unsigned level,
968                            unsigned face_id,
969                            enum tgsi_sampler_control control,
970                            float *rgba)
971{
972   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
973   const struct pipe_resource *texture = samp->view->texture;
974   int width;
975   int x, layer;
976   union tex_tile_address addr;
977   const float *out;
978   int c;
979
980   width = u_minify(texture->width0, level);
981
982   assert(width > 0);
983
984   addr.value = 0;
985   addr.bits.level = level;
986
987   samp->nearest_texcoord_s(s, width, &x);
988   wrap_array_layer(t, texture->array_size, &layer);
989
990   out = get_texel_1d_array(samp, addr, x, layer);
991   for (c = 0; c < TGSI_QUAD_SIZE; c++)
992      rgba[TGSI_NUM_CHANNELS*c] = out[c];
993
994   if (DEBUG_TEX) {
995      print_sample(__FUNCTION__, rgba);
996   }
997}
998
999
1000static void
1001img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
1002                      float s,
1003                      float t,
1004                      float p,
1005                      unsigned level,
1006                      unsigned face_id,
1007                      enum tgsi_sampler_control control,
1008                      float *rgba)
1009{
1010   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1011   const struct pipe_resource *texture = samp->view->texture;
1012   int width, height;
1013   int x, y;
1014   union tex_tile_address addr;
1015   const float *out;
1016   int c;
1017
1018   width = u_minify(texture->width0, level);
1019   height = u_minify(texture->height0, level);
1020
1021   assert(width > 0);
1022   assert(height > 0);
1023
1024   addr.value = 0;
1025   addr.bits.level = level;
1026
1027   samp->nearest_texcoord_s(s, width, &x);
1028   samp->nearest_texcoord_t(t, height, &y);
1029
1030   out = get_texel_2d(samp, addr, x, y);
1031   for (c = 0; c < TGSI_QUAD_SIZE; c++)
1032      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1033
1034   if (DEBUG_TEX) {
1035      print_sample(__FUNCTION__, rgba);
1036   }
1037}
1038
1039
1040static void
1041img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler,
1042                            float s,
1043                            float t,
1044                            float p,
1045                            unsigned level,
1046                            unsigned face_id,
1047                            enum tgsi_sampler_control control,
1048                            float *rgba)
1049{
1050   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1051   const struct pipe_resource *texture = samp->view->texture;
1052   int width, height;
1053   int x, y, layer;
1054   union tex_tile_address addr;
1055   const float *out;
1056   int c;
1057
1058   width = u_minify(texture->width0, level);
1059   height = u_minify(texture->height0, level);
1060
1061   assert(width > 0);
1062   assert(height > 0);
1063
1064   addr.value = 0;
1065   addr.bits.level = level;
1066
1067   samp->nearest_texcoord_s(s, width, &x);
1068   samp->nearest_texcoord_t(t, height, &y);
1069   wrap_array_layer(p, texture->array_size, &layer);
1070
1071   out = get_texel_2d_array(samp, addr, x, y, layer);
1072   for (c = 0; c < TGSI_QUAD_SIZE; c++)
1073      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1074
1075   if (DEBUG_TEX) {
1076      print_sample(__FUNCTION__, rgba);
1077   }
1078}
1079
1080
1081static INLINE union tex_tile_address
1082face(union tex_tile_address addr, unsigned face )
1083{
1084   addr.bits.face = face;
1085   return addr;
1086}
1087
1088
1089static void
1090img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
1091                        float s,
1092                        float t,
1093                        float p,
1094                        unsigned level,
1095                        unsigned face_id,
1096                        enum tgsi_sampler_control control,
1097                        float *rgba)
1098{
1099   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1100   const struct pipe_resource *texture = samp->view->texture;
1101   int width, height;
1102   int x, y;
1103   union tex_tile_address addr;
1104   const float *out;
1105   int c;
1106
1107   width = u_minify(texture->width0, level);
1108   height = u_minify(texture->height0, level);
1109
1110   assert(width > 0);
1111   assert(height > 0);
1112
1113   addr.value = 0;
1114   addr.bits.level = level;
1115
1116   samp->nearest_texcoord_s(s, width, &x);
1117   samp->nearest_texcoord_t(t, height, &y);
1118
1119   out = get_texel_2d(samp, face(addr, face_id), x, y);
1120   for (c = 0; c < TGSI_QUAD_SIZE; c++)
1121      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1122
1123   if (DEBUG_TEX) {
1124      print_sample(__FUNCTION__, rgba);
1125   }
1126}
1127
1128
1129static void
1130img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
1131                      float s,
1132                      float t,
1133                      float p,
1134                      unsigned level,
1135                      unsigned face_id,
1136                      enum tgsi_sampler_control control,
1137                      float *rgba)
1138{
1139   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1140   const struct pipe_resource *texture = samp->view->texture;
1141   int width, height, depth;
1142   int x, y, z;
1143   union tex_tile_address addr;
1144   const float *out;
1145   int c;
1146
1147   width = u_minify(texture->width0, level);
1148   height = u_minify(texture->height0, level);
1149   depth = u_minify(texture->depth0, level);
1150
1151   assert(width > 0);
1152   assert(height > 0);
1153   assert(depth > 0);
1154
1155   samp->nearest_texcoord_s(s, width,  &x);
1156   samp->nearest_texcoord_t(t, height, &y);
1157   samp->nearest_texcoord_p(p, depth,  &z);
1158
1159   addr.value = 0;
1160   addr.bits.level = level;
1161
1162   out = get_texel_3d(samp, addr, x, y, z);
1163   for (c = 0; c < TGSI_QUAD_SIZE; c++)
1164      rgba[TGSI_NUM_CHANNELS*c] = out[c];
1165}
1166
1167
1168static void
1169img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
1170                     float s,
1171                     float t,
1172                     float p,
1173                     unsigned level,
1174                     unsigned face_id,
1175                     enum tgsi_sampler_control control,
1176                     float *rgba)
1177{
1178   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1179   const struct pipe_resource *texture = samp->view->texture;
1180   int width;
1181   int x0, x1;
1182   float xw; /* weights */
1183   union tex_tile_address addr;
1184   const float *tx0, *tx1;
1185   int c;
1186
1187   width = u_minify(texture->width0, level);
1188
1189   assert(width > 0);
1190
1191   addr.value = 0;
1192   addr.bits.level = level;
1193
1194   samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1195
1196   tx0 = get_texel_2d(samp, addr, x0, 0);
1197   tx1 = get_texel_2d(samp, addr, x1, 0);
1198
1199   /* interpolate R, G, B, A */
1200   for (c = 0; c < TGSI_QUAD_SIZE; c++)
1201      rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1202}
1203
1204
1205static void
1206img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler,
1207                           float s,
1208                           float t,
1209                           float p,
1210                           unsigned level,
1211                           unsigned face_id,
1212                           enum tgsi_sampler_control control,
1213                           float *rgba)
1214{
1215   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1216   const struct pipe_resource *texture = samp->view->texture;
1217   int width;
1218   int x0, x1, layer;
1219   float xw; /* weights */
1220   union tex_tile_address addr;
1221   const float *tx0, *tx1;
1222   int c;
1223
1224   width = u_minify(texture->width0, level);
1225
1226   assert(width > 0);
1227
1228   addr.value = 0;
1229   addr.bits.level = level;
1230
1231   samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1232   wrap_array_layer(t, texture->array_size, &layer);
1233
1234   tx0 = get_texel_1d_array(samp, addr, x0, layer);
1235   tx1 = get_texel_1d_array(samp, addr, x1, layer);
1236
1237   /* interpolate R, G, B, A */
1238   for (c = 0; c < TGSI_QUAD_SIZE; c++)
1239      rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1240}
1241
1242
1243static void
1244img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
1245                     float s,
1246                     float t,
1247                     float p,
1248                     unsigned level,
1249                     unsigned face_id,
1250                     enum tgsi_sampler_control control,
1251                     float *rgba)
1252{
1253   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1254   const struct pipe_resource *texture = samp->view->texture;
1255   int width, height;
1256   int x0, y0, x1, y1;
1257   float xw, yw; /* weights */
1258   union tex_tile_address addr;
1259   const float *tx0, *tx1, *tx2, *tx3;
1260   int c;
1261
1262   width = u_minify(texture->width0, level);
1263   height = u_minify(texture->height0, level);
1264
1265   assert(width > 0);
1266   assert(height > 0);
1267
1268   addr.value = 0;
1269   addr.bits.level = level;
1270
1271   samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1272   samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1273
1274   tx0 = get_texel_2d(samp, addr, x0, y0);
1275   tx1 = get_texel_2d(samp, addr, x1, y0);
1276   tx2 = get_texel_2d(samp, addr, x0, y1);
1277   tx3 = get_texel_2d(samp, addr, x1, y1);
1278
1279   /* interpolate R, G, B, A */
1280   for (c = 0; c < TGSI_QUAD_SIZE; c++)
1281      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1282                                          tx0[c], tx1[c],
1283                                          tx2[c], tx3[c]);
1284}
1285
1286
1287static void
1288img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler,
1289                           float s,
1290                           float t,
1291                           float p,
1292                           unsigned level,
1293                           unsigned face_id,
1294                           enum tgsi_sampler_control control,
1295                           float *rgba)
1296{
1297   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1298   const struct pipe_resource *texture = samp->view->texture;
1299   int width, height;
1300   int x0, y0, x1, y1, layer;
1301   float xw, yw; /* weights */
1302   union tex_tile_address addr;
1303   const float *tx0, *tx1, *tx2, *tx3;
1304   int c;
1305
1306   width = u_minify(texture->width0, level);
1307   height = u_minify(texture->height0, level);
1308
1309   assert(width > 0);
1310   assert(height > 0);
1311
1312   addr.value = 0;
1313   addr.bits.level = level;
1314
1315   samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1316   samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1317   wrap_array_layer(p, texture->array_size, &layer);
1318
1319   tx0 = get_texel_2d_array(samp, addr, x0, y0, layer);
1320   tx1 = get_texel_2d_array(samp, addr, x1, y0, layer);
1321   tx2 = get_texel_2d_array(samp, addr, x0, y1, layer);
1322   tx3 = get_texel_2d_array(samp, addr, x1, y1, layer);
1323
1324   /* interpolate R, G, B, A */
1325   for (c = 0; c < TGSI_QUAD_SIZE; c++)
1326      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1327                                          tx0[c], tx1[c],
1328                                          tx2[c], tx3[c]);
1329}
1330
1331
1332static void
1333img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
1334                       float s,
1335                       float t,
1336                       float p,
1337                       unsigned level,
1338                       unsigned face_id,
1339                       enum tgsi_sampler_control control,
1340                       float *rgba)
1341{
1342   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1343   const struct pipe_resource *texture = samp->view->texture;
1344   int width, height;
1345   int x0, y0, x1, y1;
1346   float xw, yw; /* weights */
1347   union tex_tile_address addr, addrj;
1348   const float *tx0, *tx1, *tx2, *tx3;
1349   int c;
1350
1351   width = u_minify(texture->width0, level);
1352   height = u_minify(texture->height0, level);
1353
1354   assert(width > 0);
1355   assert(height > 0);
1356
1357   addr.value = 0;
1358   addr.bits.level = level;
1359
1360   samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1361   samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1362
1363   addrj = face(addr, face_id);
1364   tx0 = get_texel_2d(samp, addrj, x0, y0);
1365   tx1 = get_texel_2d(samp, addrj, x1, y0);
1366   tx2 = get_texel_2d(samp, addrj, x0, y1);
1367   tx3 = get_texel_2d(samp, addrj, x1, y1);
1368
1369   /* interpolate R, G, B, A */
1370   for (c = 0; c < TGSI_QUAD_SIZE; c++)
1371      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1372                                          tx0[c], tx1[c],
1373                                          tx2[c], tx3[c]);
1374}
1375
1376
1377static void
1378img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
1379                     float s,
1380                     float t,
1381                     float p,
1382                     unsigned level,
1383                     unsigned face_id,
1384                     enum tgsi_sampler_control control,
1385                     float *rgba)
1386{
1387   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1388   const struct pipe_resource *texture = samp->view->texture;
1389   int width, height, depth;
1390   int x0, x1, y0, y1, z0, z1;
1391   float xw, yw, zw; /* interpolation weights */
1392   union tex_tile_address addr;
1393   const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
1394   int c;
1395
1396   width = u_minify(texture->width0, level);
1397   height = u_minify(texture->height0, level);
1398   depth = u_minify(texture->depth0, level);
1399
1400   addr.value = 0;
1401   addr.bits.level = level;
1402
1403   assert(width > 0);
1404   assert(height > 0);
1405   assert(depth > 0);
1406
1407   samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1408   samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1409   samp->linear_texcoord_p(p, depth,  &z0, &z1, &zw);
1410
1411
1412   tx00 = get_texel_3d(samp, addr, x0, y0, z0);
1413   tx01 = get_texel_3d(samp, addr, x1, y0, z0);
1414   tx02 = get_texel_3d(samp, addr, x0, y1, z0);
1415   tx03 = get_texel_3d(samp, addr, x1, y1, z0);
1416
1417   tx10 = get_texel_3d(samp, addr, x0, y0, z1);
1418   tx11 = get_texel_3d(samp, addr, x1, y0, z1);
1419   tx12 = get_texel_3d(samp, addr, x0, y1, z1);
1420   tx13 = get_texel_3d(samp, addr, x1, y1, z1);
1421
1422      /* interpolate R, G, B, A */
1423   for (c = 0; c < TGSI_QUAD_SIZE; c++)
1424      rgba[TGSI_NUM_CHANNELS*c] =  lerp_3d(xw, yw, zw,
1425                                           tx00[c], tx01[c],
1426                                           tx02[c], tx03[c],
1427                                           tx10[c], tx11[c],
1428                                           tx12[c], tx13[c]);
1429}
1430
1431
1432/* Calculate level of detail for every fragment.
1433 * Note that lambda has already been biased by global LOD bias.
1434 */
1435static INLINE void
1436compute_lod(const struct pipe_sampler_state *sampler,
1437            const float biased_lambda,
1438            const float lodbias[TGSI_QUAD_SIZE],
1439            float lod[TGSI_QUAD_SIZE])
1440{
1441   uint i;
1442
1443   for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1444      lod[i] = biased_lambda + lodbias[i];
1445      lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod);
1446   }
1447}
1448
1449
1450static void
1451mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
1452                  const float s[TGSI_QUAD_SIZE],
1453                  const float t[TGSI_QUAD_SIZE],
1454                  const float p[TGSI_QUAD_SIZE],
1455                  const float c0[TGSI_QUAD_SIZE],
1456                  enum tgsi_sampler_control control,
1457                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1458{
1459   struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1460   const struct pipe_resource *texture = samp->view->texture;
1461   int j;
1462   float lod[TGSI_QUAD_SIZE];
1463
1464   if (control == tgsi_sampler_lod_bias) {
1465      float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1466      compute_lod(samp->sampler, lambda, c0, lod);
1467   } else {
1468      assert(control == tgsi_sampler_lod_explicit);
1469
1470      memcpy(lod, c0, sizeof(lod));
1471   }
1472
1473   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1474      int level0 = samp->view->u.tex.first_level + (int)lod[j];
1475
1476      if (lod[j] < 0.0)
1477         samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1478
1479      else if (level0 >= texture->last_level)
1480         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], texture->last_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1481
1482      else {
1483         float levelBlend = frac(lod[j]);
1484         float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1485         int c;
1486
1487         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level0,   samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][0]);
1488         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level0+1, samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][1]);
1489
1490         for (c = 0; c < 4; c++) {
1491            rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
1492         }
1493      }
1494   }
1495
1496   if (DEBUG_TEX) {
1497      print_sample_4(__FUNCTION__, rgba);
1498   }
1499}
1500
1501
1502/**
1503 * Compute nearest mipmap level from texcoords.
1504 * Then sample the texture level for four elements of a quad.
1505 * \param c0  the LOD bias factors, or absolute LODs (depending on control)
1506 */
1507static void
1508mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
1509                   const float s[TGSI_QUAD_SIZE],
1510                   const float t[TGSI_QUAD_SIZE],
1511                   const float p[TGSI_QUAD_SIZE],
1512                   const float c0[TGSI_QUAD_SIZE],
1513                   enum tgsi_sampler_control control,
1514                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1515{
1516   struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1517   const struct pipe_resource *texture = samp->view->texture;
1518   float lod[TGSI_QUAD_SIZE];
1519   int j;
1520
1521   if (control == tgsi_sampler_lod_bias) {
1522      float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1523      compute_lod(samp->sampler, lambda, c0, lod);
1524   } else {
1525      assert(control == tgsi_sampler_lod_explicit);
1526
1527      memcpy(lod, c0, sizeof(lod));
1528   }
1529
1530   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1531      if (lod[j] < 0.0)
1532         samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1533      else {
1534         float level = samp->view->u.tex.first_level + (int)(lod[j] + 0.5F) ;
1535         level = MIN2(level, (int)texture->last_level);
1536         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1537      }
1538   }
1539
1540   if (DEBUG_TEX) {
1541      print_sample_4(__FUNCTION__, rgba);
1542   }
1543}
1544
1545
1546static void
1547mip_filter_none(struct tgsi_sampler *tgsi_sampler,
1548                const float s[TGSI_QUAD_SIZE],
1549                const float t[TGSI_QUAD_SIZE],
1550                const float p[TGSI_QUAD_SIZE],
1551                const float c0[TGSI_QUAD_SIZE],
1552                enum tgsi_sampler_control control,
1553                float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1554{
1555   struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1556   float lod[TGSI_QUAD_SIZE];
1557   int j;
1558
1559   if (control == tgsi_sampler_lod_bias) {
1560      float lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1561      compute_lod(samp->sampler, lambda, c0, lod);
1562   } else {
1563      assert(control == tgsi_sampler_lod_explicit);
1564
1565      memcpy(lod, c0, sizeof(lod));
1566   }
1567
1568   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1569      if (lod[j] < 0.0) {
1570         samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1571      }
1572      else {
1573         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1574      }
1575   }
1576}
1577
1578
1579static void
1580mip_filter_none_no_filter_select(struct tgsi_sampler *tgsi_sampler,
1581                                     const float s[TGSI_QUAD_SIZE],
1582                                     const float t[TGSI_QUAD_SIZE],
1583                                     const float p[TGSI_QUAD_SIZE],
1584                                     const float c0[TGSI_QUAD_SIZE],
1585                                     enum tgsi_sampler_control control,
1586                                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1587{
1588   struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1589   int j;
1590
1591   for (j = 0; j < TGSI_QUAD_SIZE; j++)
1592      samp->mag_img_filter(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1593}
1594
1595
1596/* For anisotropic filtering */
1597#define WEIGHT_LUT_SIZE 1024
1598
1599static float *weightLut = NULL;
1600
1601/**
1602 * Creates the look-up table used to speed-up EWA sampling
1603 */
1604static void
1605create_filter_table(void)
1606{
1607   unsigned i;
1608   if (!weightLut) {
1609      weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
1610
1611      for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
1612         float alpha = 2;
1613         float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
1614         float weight = (float) exp(-alpha * r2);
1615         weightLut[i] = weight;
1616      }
1617   }
1618}
1619
1620
1621/**
1622 * Elliptical weighted average (EWA) filter for producing high quality
1623 * anisotropic filtered results.
1624 * Based on the Higher Quality Elliptical Weighted Average Filter
1625 * published by Paul S. Heckbert in his Master's Thesis
1626 * "Fundamentals of Texture Mapping and Image Warping" (1989)
1627 */
1628static void
1629img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler,
1630                  const float s[TGSI_QUAD_SIZE],
1631                  const float t[TGSI_QUAD_SIZE],
1632                  const float p[TGSI_QUAD_SIZE],
1633                  unsigned level,
1634                  enum tgsi_sampler_control control,
1635                  const float dudx, const float dvdx,
1636                  const float dudy, const float dvdy,
1637                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1638{
1639   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1640   const struct pipe_resource *texture = samp->view->texture;
1641
1642   // ??? Won't the image filters blow up if level is negative?
1643   unsigned level0 = level > 0 ? level : 0;
1644   float scaling = 1.0 / (1 << level0);
1645   int width = u_minify(texture->width0, level0);
1646   int height = u_minify(texture->height0, level0);
1647
1648   float ux = dudx * scaling;
1649   float vx = dvdx * scaling;
1650   float uy = dudy * scaling;
1651   float vy = dvdy * scaling;
1652
1653   /* compute ellipse coefficients to bound the region:
1654    * A*x*x + B*x*y + C*y*y = F.
1655    */
1656   float A = vx*vx+vy*vy+1;
1657   float B = -2*(ux*vx+uy*vy);
1658   float C = ux*ux+uy*uy+1;
1659   float F = A*C-B*B/4.0;
1660
1661   /* check if it is an ellipse */
1662   /* ASSERT(F > 0.0); */
1663
1664   /* Compute the ellipse's (u,v) bounding box in texture space */
1665   float d = -B*B+4.0*C*A;
1666   float box_u = 2.0 / d * sqrt(d*C*F); /* box_u -> half of bbox with   */
1667   float box_v = 2.0 / d * sqrt(A*d*F); /* box_v -> half of bbox height */
1668
1669   float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1670   float s_buffer[TGSI_QUAD_SIZE];
1671   float t_buffer[TGSI_QUAD_SIZE];
1672   float weight_buffer[TGSI_QUAD_SIZE];
1673   unsigned buffer_next;
1674   int j;
1675   float den; /* = 0.0F; */
1676   float ddq;
1677   float U; /* = u0 - tex_u; */
1678   int v;
1679
1680   /* Scale ellipse formula to directly index the Filter Lookup Table.
1681    * i.e. scale so that F = WEIGHT_LUT_SIZE-1
1682    */
1683   double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
1684   A *= formScale;
1685   B *= formScale;
1686   C *= formScale;
1687   /* F *= formScale; */ /* no need to scale F as we don't use it below here */
1688
1689   /* For each quad, the du and dx values are the same and so the ellipse is
1690    * also the same. Note that texel/image access can only be performed using
1691    * a quad, i.e. it is not possible to get the pixel value for a single
1692    * tex coord. In order to have a better performance, the access is buffered
1693    * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
1694    * full, then the pixel values are read from the image.
1695    */
1696   ddq = 2 * A;
1697
1698   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1699      /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
1700       * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
1701       * value, q, is less than F, we're inside the ellipse
1702       */
1703      float tex_u = -0.5F + s[j] * texture->width0 * scaling;
1704      float tex_v = -0.5F + t[j] * texture->height0 * scaling;
1705
1706      int u0 = (int) floorf(tex_u - box_u);
1707      int u1 = (int) ceilf(tex_u + box_u);
1708      int v0 = (int) floorf(tex_v - box_v);
1709      int v1 = (int) ceilf(tex_v + box_v);
1710
1711      float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
1712      buffer_next = 0;
1713      den = 0;
1714      U = u0 - tex_u;
1715      for (v = v0; v <= v1; ++v) {
1716         float V = v - tex_v;
1717         float dq = A * (2 * U + 1) + B * V;
1718         float q = (C * V + B * U) * V + A * U * U;
1719
1720         int u;
1721         for (u = u0; u <= u1; ++u) {
1722            /* Note that the ellipse has been pre-scaled so F =
1723             * WEIGHT_LUT_SIZE - 1
1724             */
1725            if (q < WEIGHT_LUT_SIZE) {
1726               /* as a LUT is used, q must never be negative;
1727                * should not happen, though
1728                */
1729               const int qClamped = q >= 0.0F ? q : 0;
1730               float weight = weightLut[qClamped];
1731
1732               weight_buffer[buffer_next] = weight;
1733               s_buffer[buffer_next] = u / ((float) width);
1734               t_buffer[buffer_next] = v / ((float) height);
1735
1736               buffer_next++;
1737               if (buffer_next == TGSI_QUAD_SIZE) {
1738                  /* 4 texel coords are in the buffer -> read it now */
1739                  unsigned jj;
1740                  /* it is assumed that samp->min_img_filter is set to
1741                   * img_filter_2d_nearest or one of the
1742                   * accelerated img_filter_2d_nearest_XXX functions.
1743                   */
1744                  for (jj = 0; jj < buffer_next; jj++) {
1745                     samp->min_img_filter(tgsi_sampler, s_buffer[jj], t_buffer[jj], p[jj], level, samp->faces[j],
1746                                          tgsi_sampler_lod_bias, &rgba_temp[0][jj]);
1747                     num[0] += weight_buffer[jj] * rgba_temp[0][jj];
1748                     num[1] += weight_buffer[jj] * rgba_temp[1][jj];
1749                     num[2] += weight_buffer[jj] * rgba_temp[2][jj];
1750                     num[3] += weight_buffer[jj] * rgba_temp[3][jj];
1751                  }
1752
1753                  buffer_next = 0;
1754               }
1755
1756               den += weight;
1757            }
1758            q += dq;
1759            dq += ddq;
1760         }
1761      }
1762
1763      /* if the tex coord buffer contains unread values, we will read
1764       * them now.
1765       */
1766      if (buffer_next > 0) {
1767         unsigned jj;
1768         /* it is assumed that samp->min_img_filter is set to
1769          * img_filter_2d_nearest or one of the
1770          * accelerated img_filter_2d_nearest_XXX functions.
1771          */
1772         for (jj = 0; jj < buffer_next; jj++) {
1773            samp->min_img_filter(tgsi_sampler, s_buffer[jj], t_buffer[jj], p[jj], level, samp->faces[j],
1774                                 tgsi_sampler_lod_bias, &rgba_temp[0][jj]);
1775            num[0] += weight_buffer[jj] * rgba_temp[0][jj];
1776            num[1] += weight_buffer[jj] * rgba_temp[1][jj];
1777            num[2] += weight_buffer[jj] * rgba_temp[2][jj];
1778            num[3] += weight_buffer[jj] * rgba_temp[3][jj];
1779         }
1780      }
1781
1782      if (den <= 0.0F) {
1783         /* Reaching this place would mean that no pixels intersected
1784          * the ellipse.  This should never happen because the filter
1785          * we use always intersects at least one pixel.
1786          */
1787
1788         /*rgba[0]=0;
1789         rgba[1]=0;
1790         rgba[2]=0;
1791         rgba[3]=0;*/
1792         /* not enough pixels in resampling, resort to direct interpolation */
1793         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], level, samp->faces[j],
1794                              tgsi_sampler_lod_bias, &rgba_temp[0][j]);
1795         den = 1;
1796         num[0] = rgba_temp[0][j];
1797         num[1] = rgba_temp[1][j];
1798         num[2] = rgba_temp[2][j];
1799         num[3] = rgba_temp[3][j];
1800      }
1801
1802      rgba[0][j] = num[0] / den;
1803      rgba[1][j] = num[1] / den;
1804      rgba[2][j] = num[2] / den;
1805      rgba[3][j] = num[3] / den;
1806   }
1807}
1808
1809
1810/**
1811 * Sample 2D texture using an anisotropic filter.
1812 */
1813static void
1814mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler,
1815                        const float s[TGSI_QUAD_SIZE],
1816                        const float t[TGSI_QUAD_SIZE],
1817                        const float p[TGSI_QUAD_SIZE],
1818                        const float c0[TGSI_QUAD_SIZE],
1819                        enum tgsi_sampler_control control,
1820                        float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1821{
1822   struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1823   const struct pipe_resource *texture = samp->view->texture;
1824   int level0;
1825   float lambda;
1826   float lod[TGSI_QUAD_SIZE];
1827
1828   float s_to_u = u_minify(texture->width0, samp->view->u.tex.first_level);
1829   float t_to_v = u_minify(texture->height0, samp->view->u.tex.first_level);
1830   float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
1831   float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
1832   float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
1833   float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
1834
1835   if (control == tgsi_sampler_lod_bias) {
1836      /* note: instead of working with Px and Py, we will use the
1837       * squared length instead, to avoid sqrt.
1838       */
1839      float Px2 = dudx * dudx + dvdx * dvdx;
1840      float Py2 = dudy * dudy + dvdy * dvdy;
1841
1842      float Pmax2;
1843      float Pmin2;
1844      float e;
1845      const float maxEccentricity = samp->sampler->max_anisotropy * samp->sampler->max_anisotropy;
1846
1847      if (Px2 < Py2) {
1848         Pmax2 = Py2;
1849         Pmin2 = Px2;
1850      }
1851      else {
1852         Pmax2 = Px2;
1853         Pmin2 = Py2;
1854      }
1855
1856      /* if the eccentricity of the ellipse is too big, scale up the shorter
1857       * of the two vectors to limit the maximum amount of work per pixel
1858       */
1859      e = Pmax2 / Pmin2;
1860      if (e > maxEccentricity) {
1861         /* float s=e / maxEccentricity;
1862            minor[0] *= s;
1863            minor[1] *= s;
1864            Pmin2 *= s; */
1865         Pmin2 = Pmax2 / maxEccentricity;
1866      }
1867
1868      /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
1869       * this since 0.5*log(x) = log(sqrt(x))
1870       */
1871      lambda = 0.5F * util_fast_log2(Pmin2) + samp->sampler->lod_bias;
1872      compute_lod(samp->sampler, lambda, c0, lod);
1873   }
1874   else {
1875      assert(control == tgsi_sampler_lod_explicit);
1876
1877      memcpy(lod, c0, sizeof(lod));
1878   }
1879
1880   /* XXX: Take into account all lod values.
1881    */
1882   lambda = lod[0];
1883   level0 = samp->view->u.tex.first_level + (int)lambda;
1884
1885   /* If the ellipse covers the whole image, we can
1886    * simply return the average of the whole image.
1887    */
1888   if (level0 >= (int) texture->last_level) {
1889      int j;
1890      for (j = 0; j < TGSI_QUAD_SIZE; j++)
1891         samp->min_img_filter(tgsi_sampler, s[j], t[j], p[j], texture->last_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1892   }
1893   else {
1894      /* don't bother interpolating between multiple LODs; it doesn't
1895       * seem to be worth the extra running time.
1896       */
1897      img_filter_2d_ewa(tgsi_sampler, s, t, p, level0, tgsi_sampler_lod_bias,
1898                        dudx, dvdx, dudy, dvdy, rgba);
1899   }
1900
1901   if (DEBUG_TEX) {
1902      print_sample_4(__FUNCTION__, rgba);
1903   }
1904}
1905
1906
1907/**
1908 * Specialized version of mip_filter_linear with hard-wired calls to
1909 * 2d lambda calculation and 2d_linear_repeat_POT img filters.
1910 */
1911static void
1912mip_filter_linear_2d_linear_repeat_POT(
1913   struct tgsi_sampler *tgsi_sampler,
1914   const float s[TGSI_QUAD_SIZE],
1915   const float t[TGSI_QUAD_SIZE],
1916   const float p[TGSI_QUAD_SIZE],
1917   const float c0[TGSI_QUAD_SIZE],
1918   enum tgsi_sampler_control control,
1919   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1920{
1921   struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1922   const struct pipe_resource *texture = samp->view->texture;
1923   int j;
1924   float lambda;
1925   float lod[TGSI_QUAD_SIZE];
1926
1927   if (control == tgsi_sampler_lod_bias) {
1928      lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1929      compute_lod(samp->sampler, lambda, c0, lod);
1930   } else {
1931      assert(control == tgsi_sampler_lod_explicit);
1932
1933      memcpy(lod, c0, sizeof(lod));
1934   }
1935
1936   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1937      int level0 = samp->view->u.tex.first_level + (int)lod[j];
1938
1939      /* Catches both negative and large values of level0:
1940       */
1941      if ((unsigned)level0 >= texture->last_level) {
1942         if (level0 < 0)
1943            img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], samp->view->u.tex.first_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1944         else
1945            img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], samp->view->texture->last_level, samp->faces[j], tgsi_sampler_lod_bias, &rgba[0][j]);
1946
1947      }
1948      else {
1949         float levelBlend = frac(lod[j]);
1950         float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1951         int c;
1952
1953         img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], level0,   samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][0]);
1954         img_filter_2d_linear_repeat_POT(tgsi_sampler, s[j], t[j], p[j], level0+1, samp->faces[j], tgsi_sampler_lod_bias, &rgbax[0][1]);
1955
1956         for (c = 0; c < TGSI_NUM_CHANNELS; c++)
1957            rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
1958      }
1959   }
1960
1961   if (DEBUG_TEX) {
1962      print_sample_4(__FUNCTION__, rgba);
1963   }
1964}
1965
1966
1967/**
1968 * Do shadow/depth comparisons.
1969 */
1970static void
1971sample_compare(struct tgsi_sampler *tgsi_sampler,
1972               const float s[TGSI_QUAD_SIZE],
1973               const float t[TGSI_QUAD_SIZE],
1974               const float p[TGSI_QUAD_SIZE],
1975               const float c0[TGSI_QUAD_SIZE],
1976               enum tgsi_sampler_control control,
1977               float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1978{
1979   struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1980   const struct pipe_sampler_state *sampler = samp->sampler;
1981   int j, k0, k1, k2, k3;
1982   float val;
1983   float pc0, pc1, pc2, pc3;
1984
1985   samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba);
1986
1987   /**
1988    * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
1989    * for 2D Array texture we need to use the 'c0' (aka Q).
1990    * When we sampled the depth texture, the depth value was put into all
1991    * RGBA channels.  We look at the red channel here.
1992    */
1993
1994   if (samp->view->texture->target == PIPE_TEXTURE_2D_ARRAY ||
1995       samp->view->texture->target == PIPE_TEXTURE_CUBE) {
1996      pc0 = CLAMP(c0[0], 0.0F, 1.0F);
1997      pc1 = CLAMP(c0[1], 0.0F, 1.0F);
1998      pc2 = CLAMP(c0[2], 0.0F, 1.0F);
1999      pc3 = CLAMP(c0[3], 0.0F, 1.0F);
2000   } else {
2001      pc0 = CLAMP(p[0], 0.0F, 1.0F);
2002      pc1 = CLAMP(p[1], 0.0F, 1.0F);
2003      pc2 = CLAMP(p[2], 0.0F, 1.0F);
2004      pc3 = CLAMP(p[3], 0.0F, 1.0F);
2005   }
2006   /* compare four texcoords vs. four texture samples */
2007   switch (sampler->compare_func) {
2008   case PIPE_FUNC_LESS:
2009      k0 = pc0 < rgba[0][0];
2010      k1 = pc1 < rgba[0][1];
2011      k2 = pc2 < rgba[0][2];
2012      k3 = pc3 < rgba[0][3];
2013      break;
2014   case PIPE_FUNC_LEQUAL:
2015      k0 = pc0 <= rgba[0][0];
2016      k1 = pc1 <= rgba[0][1];
2017      k2 = pc2 <= rgba[0][2];
2018      k3 = pc3 <= rgba[0][3];
2019      break;
2020   case PIPE_FUNC_GREATER:
2021      k0 = pc0 > rgba[0][0];
2022      k1 = pc1 > rgba[0][1];
2023      k2 = pc2 > rgba[0][2];
2024      k3 = pc3 > rgba[0][3];
2025      break;
2026   case PIPE_FUNC_GEQUAL:
2027      k0 = pc0 >= rgba[0][0];
2028      k1 = pc1 >= rgba[0][1];
2029      k2 = pc2 >= rgba[0][2];
2030      k3 = pc3 >= rgba[0][3];
2031      break;
2032   case PIPE_FUNC_EQUAL:
2033      k0 = pc0 == rgba[0][0];
2034      k1 = pc1 == rgba[0][1];
2035      k2 = pc2 == rgba[0][2];
2036      k3 = pc3 == rgba[0][3];
2037      break;
2038   case PIPE_FUNC_NOTEQUAL:
2039      k0 = pc0 != rgba[0][0];
2040      k1 = pc1 != rgba[0][1];
2041      k2 = pc2 != rgba[0][2];
2042      k3 = pc3 != rgba[0][3];
2043      break;
2044   case PIPE_FUNC_ALWAYS:
2045      k0 = k1 = k2 = k3 = 1;
2046      break;
2047   case PIPE_FUNC_NEVER:
2048      k0 = k1 = k2 = k3 = 0;
2049      break;
2050   default:
2051      k0 = k1 = k2 = k3 = 0;
2052      assert(0);
2053      break;
2054   }
2055
2056   if (sampler->mag_img_filter == PIPE_TEX_FILTER_LINEAR) {
2057      /* convert four pass/fail values to an intensity in [0,1] */
2058      val = 0.25F * (k0 + k1 + k2 + k3);
2059
2060      /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
2061      for (j = 0; j < 4; j++) {
2062	 rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
2063	 rgba[3][j] = 1.0F;
2064      }
2065   } else {
2066      for (j = 0; j < 4; j++) {
2067	 rgba[0][j] = k0;
2068	 rgba[1][j] = k1;
2069	 rgba[2][j] = k2;
2070	 rgba[3][j] = 1.0F;
2071      }
2072   }
2073}
2074
2075
2076/**
2077 * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
2078 * Put face info into the sampler faces[] array.
2079 */
2080static void
2081sample_cube(struct tgsi_sampler *tgsi_sampler,
2082            const float s[TGSI_QUAD_SIZE],
2083            const float t[TGSI_QUAD_SIZE],
2084            const float p[TGSI_QUAD_SIZE],
2085            const float c0[TGSI_QUAD_SIZE],
2086            enum tgsi_sampler_control control,
2087            float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2088{
2089   struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2090   unsigned j;
2091   float ssss[4], tttt[4];
2092
2093   /* Not actually used, but the intermediate steps that do the
2094    * dereferencing don't know it.
2095    */
2096   static const float pppp[4] = { 0, 0, 0, 0 };
2097
2098   /*
2099     major axis
2100     direction    target                             sc     tc    ma
2101     ----------   -------------------------------    ---    ---   ---
2102     +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
2103     -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
2104     +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
2105     -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
2106     +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
2107     -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
2108   */
2109
2110   /* Choose the cube face and compute new s/t coords for the 2D face.
2111    *
2112    * Use the same cube face for all four pixels in the quad.
2113    *
2114    * This isn't ideal, but if we want to use a different cube face
2115    * per pixel in the quad, we'd have to also compute the per-face
2116    * LOD here too.  That's because the four post-face-selection
2117    * texcoords are no longer related to each other (they're
2118    * per-face!)  so we can't use subtraction to compute the partial
2119    * deriviates to compute the LOD.  Doing so (near cube edges
2120    * anyway) gives us pretty much random values.
2121    */
2122   {
2123      /* use the average of the four pixel's texcoords to choose the face */
2124      const float rx = 0.25F * (s[0] + s[1] + s[2] + s[3]);
2125      const float ry = 0.25F * (t[0] + t[1] + t[2] + t[3]);
2126      const float rz = 0.25F * (p[0] + p[1] + p[2] + p[3]);
2127      const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
2128
2129      if (arx >= ary && arx >= arz) {
2130         float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
2131         uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
2132         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2133            const float ima = -0.5F / fabsf(s[j]);
2134            ssss[j] = sign *  p[j] * ima + 0.5F;
2135            tttt[j] =         t[j] * ima + 0.5F;
2136            samp->faces[j] = face;
2137         }
2138      }
2139      else if (ary >= arx && ary >= arz) {
2140         float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
2141         uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
2142         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2143            const float ima = -0.5F / fabsf(t[j]);
2144            ssss[j] =        -s[j] * ima + 0.5F;
2145            tttt[j] = sign * -p[j] * ima + 0.5F;
2146            samp->faces[j] = face;
2147         }
2148      }
2149      else {
2150         float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
2151         uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
2152         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2153            const float ima = -0.5F / fabsf(p[j]);
2154            ssss[j] = sign * -s[j] * ima + 0.5F;
2155            tttt[j] =         t[j] * ima + 0.5F;
2156            samp->faces[j] = face;
2157         }
2158      }
2159   }
2160
2161   /* In our little pipeline, the compare stage is next.  If compare
2162    * is not active, this will point somewhere deeper into the
2163    * pipeline, eg. to mip_filter or even img_filter.
2164    */
2165   samp->compare(tgsi_sampler, ssss, tttt, pppp, c0, control, rgba);
2166}
2167
2168
2169static void
2170do_swizzling(const struct sp_sampler_variant *samp,
2171             float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
2172             float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2173{
2174   int j;
2175   const unsigned swizzle_r = samp->key.bits.swizzle_r;
2176   const unsigned swizzle_g = samp->key.bits.swizzle_g;
2177   const unsigned swizzle_b = samp->key.bits.swizzle_b;
2178   const unsigned swizzle_a = samp->key.bits.swizzle_a;
2179
2180   switch (swizzle_r) {
2181   case PIPE_SWIZZLE_ZERO:
2182      for (j = 0; j < 4; j++)
2183         out[0][j] = 0.0f;
2184      break;
2185   case PIPE_SWIZZLE_ONE:
2186      for (j = 0; j < 4; j++)
2187         out[0][j] = 1.0f;
2188      break;
2189   default:
2190      assert(swizzle_r < 4);
2191      for (j = 0; j < 4; j++)
2192         out[0][j] = in[swizzle_r][j];
2193   }
2194
2195   switch (swizzle_g) {
2196   case PIPE_SWIZZLE_ZERO:
2197      for (j = 0; j < 4; j++)
2198         out[1][j] = 0.0f;
2199      break;
2200   case PIPE_SWIZZLE_ONE:
2201      for (j = 0; j < 4; j++)
2202         out[1][j] = 1.0f;
2203      break;
2204   default:
2205      assert(swizzle_g < 4);
2206      for (j = 0; j < 4; j++)
2207         out[1][j] = in[swizzle_g][j];
2208   }
2209
2210   switch (swizzle_b) {
2211   case PIPE_SWIZZLE_ZERO:
2212      for (j = 0; j < 4; j++)
2213         out[2][j] = 0.0f;
2214      break;
2215   case PIPE_SWIZZLE_ONE:
2216      for (j = 0; j < 4; j++)
2217         out[2][j] = 1.0f;
2218      break;
2219   default:
2220      assert(swizzle_b < 4);
2221      for (j = 0; j < 4; j++)
2222         out[2][j] = in[swizzle_b][j];
2223   }
2224
2225   switch (swizzle_a) {
2226   case PIPE_SWIZZLE_ZERO:
2227      for (j = 0; j < 4; j++)
2228         out[3][j] = 0.0f;
2229      break;
2230   case PIPE_SWIZZLE_ONE:
2231      for (j = 0; j < 4; j++)
2232         out[3][j] = 1.0f;
2233      break;
2234   default:
2235      assert(swizzle_a < 4);
2236      for (j = 0; j < 4; j++)
2237         out[3][j] = in[swizzle_a][j];
2238   }
2239}
2240
2241
2242static void
2243sample_swizzle(struct tgsi_sampler *tgsi_sampler,
2244               const float s[TGSI_QUAD_SIZE],
2245               const float t[TGSI_QUAD_SIZE],
2246               const float p[TGSI_QUAD_SIZE],
2247               const float c0[TGSI_QUAD_SIZE],
2248               enum tgsi_sampler_control control,
2249               float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2250{
2251   struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2252   float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2253
2254   samp->sample_target(tgsi_sampler, s, t, p, c0, control, rgba_temp);
2255
2256   do_swizzling(samp, rgba_temp, rgba);
2257}
2258
2259
2260static wrap_nearest_func
2261get_nearest_unorm_wrap(unsigned mode)
2262{
2263   switch (mode) {
2264   case PIPE_TEX_WRAP_CLAMP:
2265      return wrap_nearest_unorm_clamp;
2266   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2267      return wrap_nearest_unorm_clamp_to_edge;
2268   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2269      return wrap_nearest_unorm_clamp_to_border;
2270   default:
2271      assert(0);
2272      return wrap_nearest_unorm_clamp;
2273   }
2274}
2275
2276
2277static wrap_nearest_func
2278get_nearest_wrap(unsigned mode)
2279{
2280   switch (mode) {
2281   case PIPE_TEX_WRAP_REPEAT:
2282      return wrap_nearest_repeat;
2283   case PIPE_TEX_WRAP_CLAMP:
2284      return wrap_nearest_clamp;
2285   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2286      return wrap_nearest_clamp_to_edge;
2287   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2288      return wrap_nearest_clamp_to_border;
2289   case PIPE_TEX_WRAP_MIRROR_REPEAT:
2290      return wrap_nearest_mirror_repeat;
2291   case PIPE_TEX_WRAP_MIRROR_CLAMP:
2292      return wrap_nearest_mirror_clamp;
2293   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2294      return wrap_nearest_mirror_clamp_to_edge;
2295   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2296      return wrap_nearest_mirror_clamp_to_border;
2297   default:
2298      assert(0);
2299      return wrap_nearest_repeat;
2300   }
2301}
2302
2303
2304static wrap_linear_func
2305get_linear_unorm_wrap(unsigned mode)
2306{
2307   switch (mode) {
2308   case PIPE_TEX_WRAP_CLAMP:
2309      return wrap_linear_unorm_clamp;
2310   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2311      return wrap_linear_unorm_clamp_to_edge;
2312   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2313      return wrap_linear_unorm_clamp_to_border;
2314   default:
2315      assert(0);
2316      return wrap_linear_unorm_clamp;
2317   }
2318}
2319
2320
2321static wrap_linear_func
2322get_linear_wrap(unsigned mode)
2323{
2324   switch (mode) {
2325   case PIPE_TEX_WRAP_REPEAT:
2326      return wrap_linear_repeat;
2327   case PIPE_TEX_WRAP_CLAMP:
2328      return wrap_linear_clamp;
2329   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2330      return wrap_linear_clamp_to_edge;
2331   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2332      return wrap_linear_clamp_to_border;
2333   case PIPE_TEX_WRAP_MIRROR_REPEAT:
2334      return wrap_linear_mirror_repeat;
2335   case PIPE_TEX_WRAP_MIRROR_CLAMP:
2336      return wrap_linear_mirror_clamp;
2337   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2338      return wrap_linear_mirror_clamp_to_edge;
2339   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2340      return wrap_linear_mirror_clamp_to_border;
2341   default:
2342      assert(0);
2343      return wrap_linear_repeat;
2344   }
2345}
2346
2347
2348/**
2349 * Is swizzling needed for the given state key?
2350 */
2351static INLINE bool
2352any_swizzle(union sp_sampler_key key)
2353{
2354   return (key.bits.swizzle_r != PIPE_SWIZZLE_RED ||
2355           key.bits.swizzle_g != PIPE_SWIZZLE_GREEN ||
2356           key.bits.swizzle_b != PIPE_SWIZZLE_BLUE ||
2357           key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA);
2358}
2359
2360
2361static compute_lambda_func
2362get_lambda_func(const union sp_sampler_key key)
2363{
2364   if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
2365      return compute_lambda_vert;
2366
2367   switch (key.bits.target) {
2368   case PIPE_TEXTURE_1D:
2369   case PIPE_TEXTURE_1D_ARRAY:
2370      return compute_lambda_1d;
2371   case PIPE_TEXTURE_2D:
2372   case PIPE_TEXTURE_2D_ARRAY:
2373   case PIPE_TEXTURE_RECT:
2374   case PIPE_TEXTURE_CUBE:
2375      return compute_lambda_2d;
2376   case PIPE_TEXTURE_3D:
2377      return compute_lambda_3d;
2378   default:
2379      assert(0);
2380      return compute_lambda_1d;
2381   }
2382}
2383
2384
2385static img_filter_func
2386get_img_filter(const union sp_sampler_key key,
2387               unsigned filter,
2388               const struct pipe_sampler_state *sampler)
2389{
2390   switch (key.bits.target) {
2391   case PIPE_TEXTURE_1D:
2392      if (filter == PIPE_TEX_FILTER_NEAREST)
2393         return img_filter_1d_nearest;
2394      else
2395         return img_filter_1d_linear;
2396      break;
2397   case PIPE_TEXTURE_1D_ARRAY:
2398      if (filter == PIPE_TEX_FILTER_NEAREST)
2399         return img_filter_1d_array_nearest;
2400      else
2401         return img_filter_1d_array_linear;
2402      break;
2403   case PIPE_TEXTURE_2D:
2404   case PIPE_TEXTURE_RECT:
2405      /* Try for fast path:
2406       */
2407      if (key.bits.is_pot &&
2408          sampler->wrap_s == sampler->wrap_t &&
2409          sampler->normalized_coords)
2410      {
2411         switch (sampler->wrap_s) {
2412         case PIPE_TEX_WRAP_REPEAT:
2413            switch (filter) {
2414            case PIPE_TEX_FILTER_NEAREST:
2415               return img_filter_2d_nearest_repeat_POT;
2416            case PIPE_TEX_FILTER_LINEAR:
2417               return img_filter_2d_linear_repeat_POT;
2418            default:
2419               break;
2420            }
2421            break;
2422         case PIPE_TEX_WRAP_CLAMP:
2423            switch (filter) {
2424            case PIPE_TEX_FILTER_NEAREST:
2425               return img_filter_2d_nearest_clamp_POT;
2426            default:
2427               break;
2428            }
2429         }
2430      }
2431      /* Otherwise use default versions:
2432       */
2433      if (filter == PIPE_TEX_FILTER_NEAREST)
2434         return img_filter_2d_nearest;
2435      else
2436         return img_filter_2d_linear;
2437      break;
2438   case PIPE_TEXTURE_2D_ARRAY:
2439      if (filter == PIPE_TEX_FILTER_NEAREST)
2440         return img_filter_2d_array_nearest;
2441      else
2442         return img_filter_2d_array_linear;
2443      break;
2444   case PIPE_TEXTURE_CUBE:
2445      if (filter == PIPE_TEX_FILTER_NEAREST)
2446         return img_filter_cube_nearest;
2447      else
2448         return img_filter_cube_linear;
2449      break;
2450   case PIPE_TEXTURE_3D:
2451      if (filter == PIPE_TEX_FILTER_NEAREST)
2452         return img_filter_3d_nearest;
2453      else
2454         return img_filter_3d_linear;
2455      break;
2456   default:
2457      assert(0);
2458      return img_filter_1d_nearest;
2459   }
2460}
2461
2462
2463/**
2464 * Bind the given texture object and texture cache to the sampler variant.
2465 */
2466void
2467sp_sampler_variant_bind_view( struct sp_sampler_variant *samp,
2468                              struct softpipe_tex_tile_cache *tex_cache,
2469                              const struct pipe_sampler_view *view )
2470{
2471   const struct pipe_resource *texture = view->texture;
2472
2473   samp->view = view;
2474   samp->cache = tex_cache;
2475   samp->xpot = util_logbase2( texture->width0 );
2476   samp->ypot = util_logbase2( texture->height0 );
2477}
2478
2479
2480void
2481sp_sampler_variant_destroy( struct sp_sampler_variant *samp )
2482{
2483   FREE(samp);
2484}
2485
2486
2487static void
2488sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level,
2489		int dims[4])
2490{
2491    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2492    const struct pipe_sampler_view *view = samp->view;
2493    const struct pipe_resource *texture = view->texture;
2494
2495    /* undefined according to EXT_gpu_program */
2496    level += view->u.tex.first_level;
2497    if (level > view->u.tex.last_level)
2498	return;
2499
2500    dims[0] = u_minify(texture->width0, level);
2501
2502    switch(texture->target) {
2503    case PIPE_TEXTURE_1D_ARRAY:
2504       dims[1] = texture->array_size;
2505       /* fallthrough */
2506    case PIPE_TEXTURE_1D:
2507    case PIPE_BUFFER:
2508       return;
2509    case PIPE_TEXTURE_2D_ARRAY:
2510       dims[2] = texture->array_size;
2511       /* fallthrough */
2512    case PIPE_TEXTURE_2D:
2513    case PIPE_TEXTURE_CUBE:
2514    case PIPE_TEXTURE_RECT:
2515       dims[1] = u_minify(texture->height0, level);
2516       return;
2517    case PIPE_TEXTURE_3D:
2518       dims[1] = u_minify(texture->height0, level);
2519       dims[2] = u_minify(texture->depth0, level);
2520       return;
2521    default:
2522       assert(!"unexpected texture target in sample_get_dims()");
2523       return;
2524    }
2525}
2526
2527/**
2528 * This function is only used for getting unfiltered texels via the
2529 * TXF opcode.  The GL spec says that out-of-bounds texel fetches
2530 * produce undefined results.  Instead of crashing, lets just clamp
2531 * coords to the texture image size.
2532 */
2533static void
2534sample_get_texels(struct tgsi_sampler *tgsi_sampler,
2535                  const int v_i[TGSI_QUAD_SIZE],
2536                  const int v_j[TGSI_QUAD_SIZE],
2537                  const int v_k[TGSI_QUAD_SIZE],
2538                  const int lod[TGSI_QUAD_SIZE],
2539                  const int8_t offset[3],
2540                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2541{
2542   const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
2543   union tex_tile_address addr;
2544   const struct pipe_resource *texture = samp->view->texture;
2545   int j, c;
2546   const float *tx;
2547   const bool need_swizzle = any_swizzle(samp->key);
2548   int width, height, depth, layers;
2549
2550   addr.value = 0;
2551   /* TODO write a better test for LOD */
2552   addr.bits.level = lod[0];
2553
2554   width = u_minify(texture->width0, addr.bits.level);
2555   height = u_minify(texture->height0, addr.bits.level);
2556   depth = u_minify(texture->depth0, addr.bits.level);
2557   layers = texture->array_size;
2558
2559   switch(texture->target) {
2560   case PIPE_TEXTURE_1D:
2561      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2562         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2563	 tx = get_texel_2d(samp, addr, x, 0);
2564	 for (c = 0; c < 4; c++) {
2565	    rgba[c][j] = tx[c];
2566	 }
2567      }
2568      break;
2569   case PIPE_TEXTURE_1D_ARRAY:
2570      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2571         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2572         int y = CLAMP(v_j[j], 0, layers - 1);
2573	 tx = get_texel_1d_array(samp, addr, x, y);
2574	 for (c = 0; c < 4; c++) {
2575	    rgba[c][j] = tx[c];
2576	 }
2577      }
2578      break;
2579   case PIPE_TEXTURE_2D:
2580   case PIPE_TEXTURE_RECT:
2581      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2582         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2583         int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2584	 tx = get_texel_2d(samp, addr, x, y);
2585	 for (c = 0; c < 4; c++) {
2586	    rgba[c][j] = tx[c];
2587	 }
2588      }
2589      break;
2590   case PIPE_TEXTURE_2D_ARRAY:
2591      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2592         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2593         int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2594         int layer = CLAMP(v_k[j], 0, layers - 1);
2595	 tx = get_texel_2d_array(samp, addr, x, y, layer);
2596	 for (c = 0; c < 4; c++) {
2597	    rgba[c][j] = tx[c];
2598	 }
2599      }
2600      break;
2601   case PIPE_TEXTURE_3D:
2602      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2603         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2604         int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2605         int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
2606
2607	 tx = get_texel_3d(samp, addr, x, y, z);
2608	 for (c = 0; c < 4; c++) {
2609	    rgba[c][j] = tx[c];
2610	 }
2611      }
2612      break;
2613   case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
2614   default:
2615      assert(!"Unknown or CUBE texture type in TXF processing\n");
2616      break;
2617   }
2618
2619   if (need_swizzle) {
2620      float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2621      memcpy(rgba_temp, rgba, sizeof(rgba_temp));
2622      do_swizzling(samp, rgba_temp, rgba);
2623   }
2624}
2625
2626
2627/**
2628 * Create a sampler variant for a given set of non-orthogonal state.
2629 */
2630struct sp_sampler_variant *
2631sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
2632                           const union sp_sampler_key key )
2633{
2634   struct sp_sampler_variant *samp = CALLOC_STRUCT(sp_sampler_variant);
2635   if (!samp)
2636      return NULL;
2637
2638   samp->sampler = sampler;
2639   samp->key = key;
2640
2641   /* Note that (for instance) linear_texcoord_s and
2642    * nearest_texcoord_s may be active at the same time, if the
2643    * sampler min_img_filter differs from its mag_img_filter.
2644    */
2645   if (sampler->normalized_coords) {
2646      samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
2647      samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
2648      samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
2649
2650      samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
2651      samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
2652      samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
2653   }
2654   else {
2655      samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
2656      samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
2657      samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
2658
2659      samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
2660      samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
2661      samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
2662   }
2663
2664   samp->compute_lambda = get_lambda_func( key );
2665
2666   samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
2667   samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
2668
2669   switch (sampler->min_mip_filter) {
2670   case PIPE_TEX_MIPFILTER_NONE:
2671      if (sampler->min_img_filter == sampler->mag_img_filter)
2672         samp->mip_filter = mip_filter_none_no_filter_select;
2673      else
2674         samp->mip_filter = mip_filter_none;
2675      break;
2676
2677   case PIPE_TEX_MIPFILTER_NEAREST:
2678      samp->mip_filter = mip_filter_nearest;
2679      break;
2680
2681   case PIPE_TEX_MIPFILTER_LINEAR:
2682      if (key.bits.is_pot &&
2683          key.bits.target == PIPE_TEXTURE_2D &&
2684          sampler->min_img_filter == sampler->mag_img_filter &&
2685          sampler->normalized_coords &&
2686          sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
2687          sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
2688          sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
2689         samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
2690      }
2691      else {
2692         samp->mip_filter = mip_filter_linear;
2693      }
2694
2695      /* Anisotropic filtering extension. */
2696      if (sampler->max_anisotropy > 1) {
2697      	samp->mip_filter = mip_filter_linear_aniso;
2698
2699      	/* Override min_img_filter:
2700      	 * min_img_filter needs to be set to NEAREST since we need to access
2701      	 * each texture pixel as it is and weight it later; using linear
2702      	 * filters will have incorrect results.
2703      	 * By setting the filter to NEAREST here, we can avoid calling the
2704      	 * generic img_filter_2d_nearest in the anisotropic filter function,
2705      	 * making it possible to use one of the accelerated implementations
2706      	 */
2707      	samp->min_img_filter = get_img_filter(key, PIPE_TEX_FILTER_NEAREST, sampler);
2708
2709      	/* on first access create the lookup table containing the filter weights. */
2710        if (!weightLut) {
2711           create_filter_table();
2712        }
2713      }
2714
2715      break;
2716   }
2717
2718   if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
2719      samp->compare = sample_compare;
2720   }
2721   else {
2722      /* Skip compare operation by promoting the mip_filter function
2723       * pointer:
2724       */
2725      samp->compare = samp->mip_filter;
2726   }
2727
2728   if (key.bits.target == PIPE_TEXTURE_CUBE) {
2729      samp->sample_target = sample_cube;
2730   }
2731   else {
2732      samp->faces[0] = 0;
2733      samp->faces[1] = 0;
2734      samp->faces[2] = 0;
2735      samp->faces[3] = 0;
2736
2737      /* Skip cube face determination by promoting the compare
2738       * function pointer:
2739       */
2740      samp->sample_target = samp->compare;
2741   }
2742
2743   if (any_swizzle(key)) {
2744      samp->base.get_samples = sample_swizzle;
2745   }
2746   else {
2747      samp->base.get_samples = samp->sample_target;
2748   }
2749
2750   samp->base.get_dims = sample_get_dims;
2751   samp->base.get_texel = sample_get_texels;
2752   return samp;
2753}
2754