u_gen_mipmap.c revision ac400ffce62be47fc77e8d10cabcd39b92b6c627
1/**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2008  VMware, Inc.  All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * @file
31 * Mipmap generation utility
32 *
33 * @author Brian Paul
34 */
35
36
37#include "pipe/p_context.h"
38#include "util/u_debug.h"
39#include "pipe/p_defines.h"
40#include "pipe/p_inlines.h"
41#include "pipe/p_shader_tokens.h"
42#include "pipe/p_state.h"
43
44#include "util/u_memory.h"
45#include "util/u_draw_quad.h"
46#include "util/u_gen_mipmap.h"
47#include "util/u_simple_shaders.h"
48#include "util/u_math.h"
49
50#include "cso_cache/cso_context.h"
51
52
53struct gen_mipmap_state
54{
55   struct pipe_context *pipe;
56   struct cso_context *cso;
57
58   struct pipe_blend_state blend;
59   struct pipe_depth_stencil_alpha_state depthstencil;
60   struct pipe_rasterizer_state rasterizer;
61   struct pipe_sampler_state sampler;
62
63   void *vs;
64   void *fs;
65
66   struct pipe_buffer *vbuf;  /**< quad vertices */
67   unsigned vbuf_slot;
68
69   float vertices[4][2][4];   /**< vertex/texcoords for quad */
70};
71
72
73
74enum dtype
75{
76   DTYPE_UBYTE,
77   DTYPE_UBYTE_3_3_2,
78   DTYPE_USHORT,
79   DTYPE_USHORT_4_4_4_4,
80   DTYPE_USHORT_5_6_5,
81   DTYPE_USHORT_1_5_5_5_REV,
82   DTYPE_UINT,
83   DTYPE_FLOAT,
84   DTYPE_HALF_FLOAT
85};
86
87
88typedef ushort half_float;
89
90
91static half_float
92float_to_half(float f)
93{
94   /* XXX fix this */
95   return 0;
96}
97
98static float
99half_to_float(half_float h)
100{
101   /* XXX fix this */
102   return 0.0f;
103}
104
105
106
107
108/**
109 * \name Support macros for do_row and do_row_3d
110 *
111 * The macro madness is here for two reasons.  First, it compacts the code
112 * slightly.  Second, it makes it much easier to adjust the specifics of the
113 * filter to tune the rounding characteristics.
114 */
115/*@{*/
116#define DECLARE_ROW_POINTERS(t, e) \
117      const t(*rowA)[e] = (const t(*)[e]) srcRowA; \
118      const t(*rowB)[e] = (const t(*)[e]) srcRowB; \
119      const t(*rowC)[e] = (const t(*)[e]) srcRowC; \
120      const t(*rowD)[e] = (const t(*)[e]) srcRowD; \
121      t(*dst)[e] = (t(*)[e]) dstRow
122
123#define DECLARE_ROW_POINTERS0(t) \
124      const t *rowA = (const t *) srcRowA; \
125      const t *rowB = (const t *) srcRowB; \
126      const t *rowC = (const t *) srcRowC; \
127      const t *rowD = (const t *) srcRowD; \
128      t *dst = (t *) dstRow
129
130#define FILTER_SUM_3D(Aj, Ak, Bj, Bk, Cj, Ck, Dj, Dk) \
131   ((unsigned) Aj + (unsigned) Ak \
132    + (unsigned) Bj + (unsigned) Bk \
133    + (unsigned) Cj + (unsigned) Ck \
134    + (unsigned) Dj + (unsigned) Dk \
135    + 4) >> 3
136
137#define FILTER_3D(e) \
138   do { \
139      dst[i][e] = FILTER_SUM_3D(rowA[j][e], rowA[k][e], \
140                                rowB[j][e], rowB[k][e], \
141                                rowC[j][e], rowC[k][e], \
142                                rowD[j][e], rowD[k][e]); \
143   } while(0)
144
145#define FILTER_F_3D(e) \
146   do { \
147      dst[i][e] = (rowA[j][e] + rowA[k][e] \
148                   + rowB[j][e] + rowB[k][e] \
149                   + rowC[j][e] + rowC[k][e] \
150                   + rowD[j][e] + rowD[k][e]) * 0.125F; \
151   } while(0)
152
153#define FILTER_HF_3D(e) \
154   do { \
155      const float aj = half_to_float(rowA[j][e]); \
156      const float ak = half_to_float(rowA[k][e]); \
157      const float bj = half_to_float(rowB[j][e]); \
158      const float bk = half_to_float(rowB[k][e]); \
159      const float cj = half_to_float(rowC[j][e]); \
160      const float ck = half_to_float(rowC[k][e]); \
161      const float dj = half_to_float(rowD[j][e]); \
162      const float dk = half_to_float(rowD[k][e]); \
163      dst[i][e] = float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \
164                                      * 0.125F); \
165   } while(0)
166/*@}*/
167
168
169/**
170 * Average together two rows of a source image to produce a single new
171 * row in the dest image.  It's legal for the two source rows to point
172 * to the same data.  The source width must be equal to either the
173 * dest width or two times the dest width.
174 * \param datatype  GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT, GL_FLOAT, etc.
175 * \param comps  number of components per pixel (1..4)
176 */
177static void
178do_row(enum dtype datatype, uint comps, int srcWidth,
179       const void *srcRowA, const void *srcRowB,
180       int dstWidth, void *dstRow)
181{
182   const uint k0 = (srcWidth == dstWidth) ? 0 : 1;
183   const uint colStride = (srcWidth == dstWidth) ? 1 : 2;
184
185   assert(comps >= 1);
186   assert(comps <= 4);
187
188   /* This assertion is no longer valid with non-power-of-2 textures
189   assert(srcWidth == dstWidth || srcWidth == 2 * dstWidth);
190   */
191
192   if (datatype == DTYPE_UBYTE && comps == 4) {
193      uint i, j, k;
194      const ubyte(*rowA)[4] = (const ubyte(*)[4]) srcRowA;
195      const ubyte(*rowB)[4] = (const ubyte(*)[4]) srcRowB;
196      ubyte(*dst)[4] = (ubyte(*)[4]) dstRow;
197      for (i = j = 0, k = k0; i < (uint) dstWidth;
198           i++, j += colStride, k += colStride) {
199         dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
200         dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
201         dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
202         dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4;
203      }
204   }
205   else if (datatype == DTYPE_UBYTE && comps == 3) {
206      uint i, j, k;
207      const ubyte(*rowA)[3] = (const ubyte(*)[3]) srcRowA;
208      const ubyte(*rowB)[3] = (const ubyte(*)[3]) srcRowB;
209      ubyte(*dst)[3] = (ubyte(*)[3]) dstRow;
210      for (i = j = 0, k = k0; i < (uint) dstWidth;
211           i++, j += colStride, k += colStride) {
212         dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
213         dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
214         dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
215      }
216   }
217   else if (datatype == DTYPE_UBYTE && comps == 2) {
218      uint i, j, k;
219      const ubyte(*rowA)[2] = (const ubyte(*)[2]) srcRowA;
220      const ubyte(*rowB)[2] = (const ubyte(*)[2]) srcRowB;
221      ubyte(*dst)[2] = (ubyte(*)[2]) dstRow;
222      for (i = j = 0, k = k0; i < (uint) dstWidth;
223           i++, j += colStride, k += colStride) {
224         dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) >> 2;
225         dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) >> 2;
226      }
227   }
228   else if (datatype == DTYPE_UBYTE && comps == 1) {
229      uint i, j, k;
230      const ubyte *rowA = (const ubyte *) srcRowA;
231      const ubyte *rowB = (const ubyte *) srcRowB;
232      ubyte *dst = (ubyte *) dstRow;
233      for (i = j = 0, k = k0; i < (uint) dstWidth;
234           i++, j += colStride, k += colStride) {
235         dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) >> 2;
236      }
237   }
238
239   else if (datatype == DTYPE_USHORT && comps == 4) {
240      uint i, j, k;
241      const ushort(*rowA)[4] = (const ushort(*)[4]) srcRowA;
242      const ushort(*rowB)[4] = (const ushort(*)[4]) srcRowB;
243      ushort(*dst)[4] = (ushort(*)[4]) dstRow;
244      for (i = j = 0, k = k0; i < (uint) dstWidth;
245           i++, j += colStride, k += colStride) {
246         dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
247         dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
248         dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
249         dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4;
250      }
251   }
252   else if (datatype == DTYPE_USHORT && comps == 3) {
253      uint i, j, k;
254      const ushort(*rowA)[3] = (const ushort(*)[3]) srcRowA;
255      const ushort(*rowB)[3] = (const ushort(*)[3]) srcRowB;
256      ushort(*dst)[3] = (ushort(*)[3]) dstRow;
257      for (i = j = 0, k = k0; i < (uint) dstWidth;
258           i++, j += colStride, k += colStride) {
259         dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
260         dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
261         dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
262      }
263   }
264   else if (datatype == DTYPE_USHORT && comps == 2) {
265      uint i, j, k;
266      const ushort(*rowA)[2] = (const ushort(*)[2]) srcRowA;
267      const ushort(*rowB)[2] = (const ushort(*)[2]) srcRowB;
268      ushort(*dst)[2] = (ushort(*)[2]) dstRow;
269      for (i = j = 0, k = k0; i < (uint) dstWidth;
270           i++, j += colStride, k += colStride) {
271         dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
272         dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
273      }
274   }
275   else if (datatype == DTYPE_USHORT && comps == 1) {
276      uint i, j, k;
277      const ushort *rowA = (const ushort *) srcRowA;
278      const ushort *rowB = (const ushort *) srcRowB;
279      ushort *dst = (ushort *) dstRow;
280      for (i = j = 0, k = k0; i < (uint) dstWidth;
281           i++, j += colStride, k += colStride) {
282         dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) / 4;
283      }
284   }
285
286   else if (datatype == DTYPE_FLOAT && comps == 4) {
287      uint i, j, k;
288      const float(*rowA)[4] = (const float(*)[4]) srcRowA;
289      const float(*rowB)[4] = (const float(*)[4]) srcRowB;
290      float(*dst)[4] = (float(*)[4]) dstRow;
291      for (i = j = 0, k = k0; i < (uint) dstWidth;
292           i++, j += colStride, k += colStride) {
293         dst[i][0] = (rowA[j][0] + rowA[k][0] +
294                      rowB[j][0] + rowB[k][0]) * 0.25F;
295         dst[i][1] = (rowA[j][1] + rowA[k][1] +
296                      rowB[j][1] + rowB[k][1]) * 0.25F;
297         dst[i][2] = (rowA[j][2] + rowA[k][2] +
298                      rowB[j][2] + rowB[k][2]) * 0.25F;
299         dst[i][3] = (rowA[j][3] + rowA[k][3] +
300                      rowB[j][3] + rowB[k][3]) * 0.25F;
301      }
302   }
303   else if (datatype == DTYPE_FLOAT && comps == 3) {
304      uint i, j, k;
305      const float(*rowA)[3] = (const float(*)[3]) srcRowA;
306      const float(*rowB)[3] = (const float(*)[3]) srcRowB;
307      float(*dst)[3] = (float(*)[3]) dstRow;
308      for (i = j = 0, k = k0; i < (uint) dstWidth;
309           i++, j += colStride, k += colStride) {
310         dst[i][0] = (rowA[j][0] + rowA[k][0] +
311                      rowB[j][0] + rowB[k][0]) * 0.25F;
312         dst[i][1] = (rowA[j][1] + rowA[k][1] +
313                      rowB[j][1] + rowB[k][1]) * 0.25F;
314         dst[i][2] = (rowA[j][2] + rowA[k][2] +
315                      rowB[j][2] + rowB[k][2]) * 0.25F;
316      }
317   }
318   else if (datatype == DTYPE_FLOAT && comps == 2) {
319      uint i, j, k;
320      const float(*rowA)[2] = (const float(*)[2]) srcRowA;
321      const float(*rowB)[2] = (const float(*)[2]) srcRowB;
322      float(*dst)[2] = (float(*)[2]) dstRow;
323      for (i = j = 0, k = k0; i < (uint) dstWidth;
324           i++, j += colStride, k += colStride) {
325         dst[i][0] = (rowA[j][0] + rowA[k][0] +
326                      rowB[j][0] + rowB[k][0]) * 0.25F;
327         dst[i][1] = (rowA[j][1] + rowA[k][1] +
328                      rowB[j][1] + rowB[k][1]) * 0.25F;
329      }
330   }
331   else if (datatype == DTYPE_FLOAT && comps == 1) {
332      uint i, j, k;
333      const float *rowA = (const float *) srcRowA;
334      const float *rowB = (const float *) srcRowB;
335      float *dst = (float *) dstRow;
336      for (i = j = 0, k = k0; i < (uint) dstWidth;
337           i++, j += colStride, k += colStride) {
338         dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) * 0.25F;
339      }
340   }
341
342#if 0
343   else if (datatype == HALF_DTYPE_FLOAT && comps == 4) {
344      uint i, j, k, comp;
345      const half_float(*rowA)[4] = (const half_float(*)[4]) srcRowA;
346      const half_float(*rowB)[4] = (const half_float(*)[4]) srcRowB;
347      half_float(*dst)[4] = (half_float(*)[4]) dstRow;
348      for (i = j = 0, k = k0; i < (uint) dstWidth;
349           i++, j += colStride, k += colStride) {
350         for (comp = 0; comp < 4; comp++) {
351            float aj, ak, bj, bk;
352            aj = half_to_float(rowA[j][comp]);
353            ak = half_to_float(rowA[k][comp]);
354            bj = half_to_float(rowB[j][comp]);
355            bk = half_to_float(rowB[k][comp]);
356            dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
357         }
358      }
359   }
360   else if (datatype == DTYPE_HALF_FLOAT && comps == 3) {
361      uint i, j, k, comp;
362      const half_float(*rowA)[3] = (const half_float(*)[3]) srcRowA;
363      const half_float(*rowB)[3] = (const half_float(*)[3]) srcRowB;
364      half_float(*dst)[3] = (half_float(*)[3]) dstRow;
365      for (i = j = 0, k = k0; i < (uint) dstWidth;
366           i++, j += colStride, k += colStride) {
367         for (comp = 0; comp < 3; comp++) {
368            float aj, ak, bj, bk;
369            aj = half_to_float(rowA[j][comp]);
370            ak = half_to_float(rowA[k][comp]);
371            bj = half_to_float(rowB[j][comp]);
372            bk = half_to_float(rowB[k][comp]);
373            dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
374         }
375      }
376   }
377   else if (datatype == DTYPE_HALF_FLOAT && comps == 2) {
378      uint i, j, k, comp;
379      const half_float(*rowA)[2] = (const half_float(*)[2]) srcRowA;
380      const half_float(*rowB)[2] = (const half_float(*)[2]) srcRowB;
381      half_float(*dst)[2] = (half_float(*)[2]) dstRow;
382      for (i = j = 0, k = k0; i < (uint) dstWidth;
383           i++, j += colStride, k += colStride) {
384         for (comp = 0; comp < 2; comp++) {
385            float aj, ak, bj, bk;
386            aj = half_to_float(rowA[j][comp]);
387            ak = half_to_float(rowA[k][comp]);
388            bj = half_to_float(rowB[j][comp]);
389            bk = half_to_float(rowB[k][comp]);
390            dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
391         }
392      }
393   }
394   else if (datatype == DTYPE_HALF_FLOAT && comps == 1) {
395      uint i, j, k;
396      const half_float *rowA = (const half_float *) srcRowA;
397      const half_float *rowB = (const half_float *) srcRowB;
398      half_float *dst = (half_float *) dstRow;
399      for (i = j = 0, k = k0; i < (uint) dstWidth;
400           i++, j += colStride, k += colStride) {
401         float aj, ak, bj, bk;
402         aj = half_to_float(rowA[j]);
403         ak = half_to_float(rowA[k]);
404         bj = half_to_float(rowB[j]);
405         bk = half_to_float(rowB[k]);
406         dst[i] = float_to_half((aj + ak + bj + bk) * 0.25F);
407      }
408   }
409#endif
410
411   else if (datatype == DTYPE_UINT && comps == 1) {
412      uint i, j, k;
413      const uint *rowA = (const uint *) srcRowA;
414      const uint *rowB = (const uint *) srcRowB;
415      uint *dst = (uint *) dstRow;
416      for (i = j = 0, k = k0; i < (uint) dstWidth;
417           i++, j += colStride, k += colStride) {
418         dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4;
419      }
420   }
421
422   else if (datatype == DTYPE_USHORT_5_6_5 && comps == 3) {
423      uint i, j, k;
424      const ushort *rowA = (const ushort *) srcRowA;
425      const ushort *rowB = (const ushort *) srcRowB;
426      ushort *dst = (ushort *) dstRow;
427      for (i = j = 0, k = k0; i < (uint) dstWidth;
428           i++, j += colStride, k += colStride) {
429         const int rowAr0 = rowA[j] & 0x1f;
430         const int rowAr1 = rowA[k] & 0x1f;
431         const int rowBr0 = rowB[j] & 0x1f;
432         const int rowBr1 = rowB[k] & 0x1f;
433         const int rowAg0 = (rowA[j] >> 5) & 0x3f;
434         const int rowAg1 = (rowA[k] >> 5) & 0x3f;
435         const int rowBg0 = (rowB[j] >> 5) & 0x3f;
436         const int rowBg1 = (rowB[k] >> 5) & 0x3f;
437         const int rowAb0 = (rowA[j] >> 11) & 0x1f;
438         const int rowAb1 = (rowA[k] >> 11) & 0x1f;
439         const int rowBb0 = (rowB[j] >> 11) & 0x1f;
440         const int rowBb1 = (rowB[k] >> 11) & 0x1f;
441         const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
442         const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
443         const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
444         dst[i] = (blue << 11) | (green << 5) | red;
445      }
446   }
447   else if (datatype == DTYPE_USHORT_4_4_4_4 && comps == 4) {
448      uint i, j, k;
449      const ushort *rowA = (const ushort *) srcRowA;
450      const ushort *rowB = (const ushort *) srcRowB;
451      ushort *dst = (ushort *) dstRow;
452      for (i = j = 0, k = k0; i < (uint) dstWidth;
453           i++, j += colStride, k += colStride) {
454         const int rowAr0 = rowA[j] & 0xf;
455         const int rowAr1 = rowA[k] & 0xf;
456         const int rowBr0 = rowB[j] & 0xf;
457         const int rowBr1 = rowB[k] & 0xf;
458         const int rowAg0 = (rowA[j] >> 4) & 0xf;
459         const int rowAg1 = (rowA[k] >> 4) & 0xf;
460         const int rowBg0 = (rowB[j] >> 4) & 0xf;
461         const int rowBg1 = (rowB[k] >> 4) & 0xf;
462         const int rowAb0 = (rowA[j] >> 8) & 0xf;
463         const int rowAb1 = (rowA[k] >> 8) & 0xf;
464         const int rowBb0 = (rowB[j] >> 8) & 0xf;
465         const int rowBb1 = (rowB[k] >> 8) & 0xf;
466         const int rowAa0 = (rowA[j] >> 12) & 0xf;
467         const int rowAa1 = (rowA[k] >> 12) & 0xf;
468         const int rowBa0 = (rowB[j] >> 12) & 0xf;
469         const int rowBa1 = (rowB[k] >> 12) & 0xf;
470         const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
471         const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
472         const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
473         const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2;
474         dst[i] = (alpha << 12) | (blue << 8) | (green << 4) | red;
475      }
476   }
477   else if (datatype == DTYPE_USHORT_1_5_5_5_REV && comps == 4) {
478      uint i, j, k;
479      const ushort *rowA = (const ushort *) srcRowA;
480      const ushort *rowB = (const ushort *) srcRowB;
481      ushort *dst = (ushort *) dstRow;
482      for (i = j = 0, k = k0; i < (uint) dstWidth;
483           i++, j += colStride, k += colStride) {
484         const int rowAr0 = rowA[j] & 0x1f;
485         const int rowAr1 = rowA[k] & 0x1f;
486         const int rowBr0 = rowB[j] & 0x1f;
487         const int rowBr1 = rowB[k] & 0x1f;
488         const int rowAg0 = (rowA[j] >> 5) & 0x1f;
489         const int rowAg1 = (rowA[k] >> 5) & 0x1f;
490         const int rowBg0 = (rowB[j] >> 5) & 0x1f;
491         const int rowBg1 = (rowB[k] >> 5) & 0x1f;
492         const int rowAb0 = (rowA[j] >> 10) & 0x1f;
493         const int rowAb1 = (rowA[k] >> 10) & 0x1f;
494         const int rowBb0 = (rowB[j] >> 10) & 0x1f;
495         const int rowBb1 = (rowB[k] >> 10) & 0x1f;
496         const int rowAa0 = (rowA[j] >> 15) & 0x1;
497         const int rowAa1 = (rowA[k] >> 15) & 0x1;
498         const int rowBa0 = (rowB[j] >> 15) & 0x1;
499         const int rowBa1 = (rowB[k] >> 15) & 0x1;
500         const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
501         const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
502         const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
503         const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2;
504         dst[i] = (alpha << 15) | (blue << 10) | (green << 5) | red;
505      }
506   }
507   else if (datatype == DTYPE_UBYTE_3_3_2 && comps == 3) {
508      uint i, j, k;
509      const ubyte *rowA = (const ubyte *) srcRowA;
510      const ubyte *rowB = (const ubyte *) srcRowB;
511      ubyte *dst = (ubyte *) dstRow;
512      for (i = j = 0, k = k0; i < (uint) dstWidth;
513           i++, j += colStride, k += colStride) {
514         const int rowAr0 = rowA[j] & 0x3;
515         const int rowAr1 = rowA[k] & 0x3;
516         const int rowBr0 = rowB[j] & 0x3;
517         const int rowBr1 = rowB[k] & 0x3;
518         const int rowAg0 = (rowA[j] >> 2) & 0x7;
519         const int rowAg1 = (rowA[k] >> 2) & 0x7;
520         const int rowBg0 = (rowB[j] >> 2) & 0x7;
521         const int rowBg1 = (rowB[k] >> 2) & 0x7;
522         const int rowAb0 = (rowA[j] >> 5) & 0x7;
523         const int rowAb1 = (rowA[k] >> 5) & 0x7;
524         const int rowBb0 = (rowB[j] >> 5) & 0x7;
525         const int rowBb1 = (rowB[k] >> 5) & 0x7;
526         const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
527         const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
528         const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
529         dst[i] = (blue << 5) | (green << 2) | red;
530      }
531   }
532   else {
533      debug_printf("bad format in do_row()");
534   }
535}
536
537
538/**
539 * Average together four rows of a source image to produce a single new
540 * row in the dest image.  It's legal for the two source rows to point
541 * to the same data.  The source width must be equal to either the
542 * dest width or two times the dest width.
543 *
544 * \param datatype  GL pixel type \c GL_UNSIGNED_BYTE, \c GL_UNSIGNED_SHORT,
545 *                  \c GL_FLOAT, etc.
546 * \param comps     number of components per pixel (1..4)
547 * \param srcWidth  Width of a row in the source data
548 * \param srcRowA   Pointer to one of the rows of source data
549 * \param srcRowB   Pointer to one of the rows of source data
550 * \param srcRowC   Pointer to one of the rows of source data
551 * \param srcRowD   Pointer to one of the rows of source data
552 * \param dstWidth  Width of a row in the destination data
553 * \param srcRowA   Pointer to the row of destination data
554 */
555static void
556do_row_3D(enum dtype datatype, uint comps, int srcWidth,
557          const void *srcRowA, const void *srcRowB,
558          const void *srcRowC, const void *srcRowD,
559          int dstWidth, void *dstRow)
560{
561   const uint k0 = (srcWidth == dstWidth) ? 0 : 1;
562   const uint colStride = (srcWidth == dstWidth) ? 1 : 2;
563   uint i, j, k;
564
565   assert(comps >= 1);
566   assert(comps <= 4);
567
568   if ((datatype == DTYPE_UBYTE) && (comps == 4)) {
569      DECLARE_ROW_POINTERS(ubyte, 4);
570
571      for (i = j = 0, k = k0; i < (uint) dstWidth;
572           i++, j += colStride, k += colStride) {
573         FILTER_3D(0);
574         FILTER_3D(1);
575         FILTER_3D(2);
576         FILTER_3D(3);
577      }
578   }
579   else if ((datatype == DTYPE_UBYTE) && (comps == 3)) {
580      DECLARE_ROW_POINTERS(ubyte, 3);
581
582      for (i = j = 0, k = k0; i < (uint) dstWidth;
583           i++, j += colStride, k += colStride) {
584         FILTER_3D(0);
585         FILTER_3D(1);
586         FILTER_3D(2);
587      }
588   }
589   else if ((datatype == DTYPE_UBYTE) && (comps == 2)) {
590      DECLARE_ROW_POINTERS(ubyte, 2);
591
592      for (i = j = 0, k = k0; i < (uint) dstWidth;
593           i++, j += colStride, k += colStride) {
594         FILTER_3D(0);
595         FILTER_3D(1);
596      }
597   }
598   else if ((datatype == DTYPE_UBYTE) && (comps == 1)) {
599      DECLARE_ROW_POINTERS(ubyte, 1);
600
601      for (i = j = 0, k = k0; i < (uint) dstWidth;
602           i++, j += colStride, k += colStride) {
603         FILTER_3D(0);
604      }
605   }
606   else if ((datatype == DTYPE_USHORT) && (comps == 4)) {
607      DECLARE_ROW_POINTERS(ushort, 4);
608
609      for (i = j = 0, k = k0; i < (uint) dstWidth;
610           i++, j += colStride, k += colStride) {
611         FILTER_3D(0);
612         FILTER_3D(1);
613         FILTER_3D(2);
614         FILTER_3D(3);
615      }
616   }
617   else if ((datatype == DTYPE_USHORT) && (comps == 3)) {
618      DECLARE_ROW_POINTERS(ushort, 3);
619
620      for (i = j = 0, k = k0; i < (uint) dstWidth;
621           i++, j += colStride, k += colStride) {
622         FILTER_3D(0);
623         FILTER_3D(1);
624         FILTER_3D(2);
625      }
626   }
627   else if ((datatype == DTYPE_USHORT) && (comps == 2)) {
628      DECLARE_ROW_POINTERS(ushort, 2);
629
630      for (i = j = 0, k = k0; i < (uint) dstWidth;
631           i++, j += colStride, k += colStride) {
632         FILTER_3D(0);
633         FILTER_3D(1);
634      }
635   }
636   else if ((datatype == DTYPE_USHORT) && (comps == 1)) {
637      DECLARE_ROW_POINTERS(ushort, 1);
638
639      for (i = j = 0, k = k0; i < (uint) dstWidth;
640           i++, j += colStride, k += colStride) {
641         FILTER_3D(0);
642      }
643   }
644   else if ((datatype == DTYPE_FLOAT) && (comps == 4)) {
645      DECLARE_ROW_POINTERS(float, 4);
646
647      for (i = j = 0, k = k0; i < (uint) dstWidth;
648           i++, j += colStride, k += colStride) {
649         FILTER_F_3D(0);
650         FILTER_F_3D(1);
651         FILTER_F_3D(2);
652         FILTER_F_3D(3);
653      }
654   }
655   else if ((datatype == DTYPE_FLOAT) && (comps == 3)) {
656      DECLARE_ROW_POINTERS(float, 3);
657
658      for (i = j = 0, k = k0; i < (uint) dstWidth;
659           i++, j += colStride, k += colStride) {
660         FILTER_F_3D(0);
661         FILTER_F_3D(1);
662         FILTER_F_3D(2);
663      }
664   }
665   else if ((datatype == DTYPE_FLOAT) && (comps == 2)) {
666      DECLARE_ROW_POINTERS(float, 2);
667
668      for (i = j = 0, k = k0; i < (uint) dstWidth;
669           i++, j += colStride, k += colStride) {
670         FILTER_F_3D(0);
671         FILTER_F_3D(1);
672      }
673   }
674   else if ((datatype == DTYPE_FLOAT) && (comps == 1)) {
675      DECLARE_ROW_POINTERS(float, 1);
676
677      for (i = j = 0, k = k0; i < (uint) dstWidth;
678           i++, j += colStride, k += colStride) {
679         FILTER_F_3D(0);
680      }
681   }
682   else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 4)) {
683      DECLARE_ROW_POINTERS(half_float, 4);
684
685      for (i = j = 0, k = k0; i < (uint) dstWidth;
686           i++, j += colStride, k += colStride) {
687         FILTER_HF_3D(0);
688         FILTER_HF_3D(1);
689         FILTER_HF_3D(2);
690         FILTER_HF_3D(3);
691      }
692   }
693   else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 3)) {
694      DECLARE_ROW_POINTERS(half_float, 4);
695
696      for (i = j = 0, k = k0; i < (uint) dstWidth;
697           i++, j += colStride, k += colStride) {
698         FILTER_HF_3D(0);
699         FILTER_HF_3D(1);
700         FILTER_HF_3D(2);
701      }
702   }
703   else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 2)) {
704      DECLARE_ROW_POINTERS(half_float, 4);
705
706      for (i = j = 0, k = k0; i < (uint) dstWidth;
707           i++, j += colStride, k += colStride) {
708         FILTER_HF_3D(0);
709         FILTER_HF_3D(1);
710      }
711   }
712   else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 1)) {
713      DECLARE_ROW_POINTERS(half_float, 4);
714
715      for (i = j = 0, k = k0; i < (uint) dstWidth;
716           i++, j += colStride, k += colStride) {
717         FILTER_HF_3D(0);
718      }
719   }
720   else if ((datatype == DTYPE_UINT) && (comps == 1)) {
721      const uint *rowA = (const uint *) srcRowA;
722      const uint *rowB = (const uint *) srcRowB;
723      const uint *rowC = (const uint *) srcRowC;
724      const uint *rowD = (const uint *) srcRowD;
725      float *dst = (float *) dstRow;
726
727      for (i = j = 0, k = k0; i < (uint) dstWidth;
728           i++, j += colStride, k += colStride) {
729         const uint64_t tmp = (((uint64_t) rowA[j] + (uint64_t) rowA[k])
730                               + ((uint64_t) rowB[j] + (uint64_t) rowB[k])
731                               + ((uint64_t) rowC[j] + (uint64_t) rowC[k])
732                               + ((uint64_t) rowD[j] + (uint64_t) rowD[k]));
733         dst[i] = (float)((double) tmp * 0.125);
734      }
735   }
736   else if ((datatype == DTYPE_USHORT_5_6_5) && (comps == 3)) {
737      DECLARE_ROW_POINTERS0(ushort);
738
739      for (i = j = 0, k = k0; i < (uint) dstWidth;
740           i++, j += colStride, k += colStride) {
741         const int rowAr0 = rowA[j] & 0x1f;
742         const int rowAr1 = rowA[k] & 0x1f;
743         const int rowBr0 = rowB[j] & 0x1f;
744         const int rowBr1 = rowB[k] & 0x1f;
745         const int rowCr0 = rowC[j] & 0x1f;
746         const int rowCr1 = rowC[k] & 0x1f;
747         const int rowDr0 = rowD[j] & 0x1f;
748         const int rowDr1 = rowD[k] & 0x1f;
749         const int rowAg0 = (rowA[j] >> 5) & 0x3f;
750         const int rowAg1 = (rowA[k] >> 5) & 0x3f;
751         const int rowBg0 = (rowB[j] >> 5) & 0x3f;
752         const int rowBg1 = (rowB[k] >> 5) & 0x3f;
753         const int rowCg0 = (rowC[j] >> 5) & 0x3f;
754         const int rowCg1 = (rowC[k] >> 5) & 0x3f;
755         const int rowDg0 = (rowD[j] >> 5) & 0x3f;
756         const int rowDg1 = (rowD[k] >> 5) & 0x3f;
757         const int rowAb0 = (rowA[j] >> 11) & 0x1f;
758         const int rowAb1 = (rowA[k] >> 11) & 0x1f;
759         const int rowBb0 = (rowB[j] >> 11) & 0x1f;
760         const int rowBb1 = (rowB[k] >> 11) & 0x1f;
761         const int rowCb0 = (rowC[j] >> 11) & 0x1f;
762         const int rowCb1 = (rowC[k] >> 11) & 0x1f;
763         const int rowDb0 = (rowD[j] >> 11) & 0x1f;
764         const int rowDb1 = (rowD[k] >> 11) & 0x1f;
765         const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
766                                       rowCr0, rowCr1, rowDr0, rowDr1);
767         const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
768                                       rowCg0, rowCg1, rowDg0, rowDg1);
769         const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
770                                       rowCb0, rowCb1, rowDb0, rowDb1);
771         dst[i] = (b << 11) | (g << 5) | r;
772      }
773   }
774   else if ((datatype == DTYPE_USHORT_4_4_4_4) && (comps == 4)) {
775      DECLARE_ROW_POINTERS0(ushort);
776
777      for (i = j = 0, k = k0; i < (uint) dstWidth;
778           i++, j += colStride, k += colStride) {
779         const int rowAr0 = rowA[j] & 0xf;
780         const int rowAr1 = rowA[k] & 0xf;
781         const int rowBr0 = rowB[j] & 0xf;
782         const int rowBr1 = rowB[k] & 0xf;
783         const int rowCr0 = rowC[j] & 0xf;
784         const int rowCr1 = rowC[k] & 0xf;
785         const int rowDr0 = rowD[j] & 0xf;
786         const int rowDr1 = rowD[k] & 0xf;
787         const int rowAg0 = (rowA[j] >> 4) & 0xf;
788         const int rowAg1 = (rowA[k] >> 4) & 0xf;
789         const int rowBg0 = (rowB[j] >> 4) & 0xf;
790         const int rowBg1 = (rowB[k] >> 4) & 0xf;
791         const int rowCg0 = (rowC[j] >> 4) & 0xf;
792         const int rowCg1 = (rowC[k] >> 4) & 0xf;
793         const int rowDg0 = (rowD[j] >> 4) & 0xf;
794         const int rowDg1 = (rowD[k] >> 4) & 0xf;
795         const int rowAb0 = (rowA[j] >> 8) & 0xf;
796         const int rowAb1 = (rowA[k] >> 8) & 0xf;
797         const int rowBb0 = (rowB[j] >> 8) & 0xf;
798         const int rowBb1 = (rowB[k] >> 8) & 0xf;
799         const int rowCb0 = (rowC[j] >> 8) & 0xf;
800         const int rowCb1 = (rowC[k] >> 8) & 0xf;
801         const int rowDb0 = (rowD[j] >> 8) & 0xf;
802         const int rowDb1 = (rowD[k] >> 8) & 0xf;
803         const int rowAa0 = (rowA[j] >> 12) & 0xf;
804         const int rowAa1 = (rowA[k] >> 12) & 0xf;
805         const int rowBa0 = (rowB[j] >> 12) & 0xf;
806         const int rowBa1 = (rowB[k] >> 12) & 0xf;
807         const int rowCa0 = (rowC[j] >> 12) & 0xf;
808         const int rowCa1 = (rowC[k] >> 12) & 0xf;
809         const int rowDa0 = (rowD[j] >> 12) & 0xf;
810         const int rowDa1 = (rowD[k] >> 12) & 0xf;
811         const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
812                                       rowCr0, rowCr1, rowDr0, rowDr1);
813         const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
814                                       rowCg0, rowCg1, rowDg0, rowDg1);
815         const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
816                                       rowCb0, rowCb1, rowDb0, rowDb1);
817         const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
818                                       rowCa0, rowCa1, rowDa0, rowDa1);
819
820         dst[i] = (a << 12) | (b << 8) | (g << 4) | r;
821      }
822   }
823   else if ((datatype == DTYPE_USHORT_1_5_5_5_REV) && (comps == 4)) {
824      DECLARE_ROW_POINTERS0(ushort);
825
826      for (i = j = 0, k = k0; i < (uint) dstWidth;
827           i++, j += colStride, k += colStride) {
828         const int rowAr0 = rowA[j] & 0x1f;
829         const int rowAr1 = rowA[k] & 0x1f;
830         const int rowBr0 = rowB[j] & 0x1f;
831         const int rowBr1 = rowB[k] & 0x1f;
832         const int rowCr0 = rowC[j] & 0x1f;
833         const int rowCr1 = rowC[k] & 0x1f;
834         const int rowDr0 = rowD[j] & 0x1f;
835         const int rowDr1 = rowD[k] & 0x1f;
836         const int rowAg0 = (rowA[j] >> 5) & 0x1f;
837         const int rowAg1 = (rowA[k] >> 5) & 0x1f;
838         const int rowBg0 = (rowB[j] >> 5) & 0x1f;
839         const int rowBg1 = (rowB[k] >> 5) & 0x1f;
840         const int rowCg0 = (rowC[j] >> 5) & 0x1f;
841         const int rowCg1 = (rowC[k] >> 5) & 0x1f;
842         const int rowDg0 = (rowD[j] >> 5) & 0x1f;
843         const int rowDg1 = (rowD[k] >> 5) & 0x1f;
844         const int rowAb0 = (rowA[j] >> 10) & 0x1f;
845         const int rowAb1 = (rowA[k] >> 10) & 0x1f;
846         const int rowBb0 = (rowB[j] >> 10) & 0x1f;
847         const int rowBb1 = (rowB[k] >> 10) & 0x1f;
848         const int rowCb0 = (rowC[j] >> 10) & 0x1f;
849         const int rowCb1 = (rowC[k] >> 10) & 0x1f;
850         const int rowDb0 = (rowD[j] >> 10) & 0x1f;
851         const int rowDb1 = (rowD[k] >> 10) & 0x1f;
852         const int rowAa0 = (rowA[j] >> 15) & 0x1;
853         const int rowAa1 = (rowA[k] >> 15) & 0x1;
854         const int rowBa0 = (rowB[j] >> 15) & 0x1;
855         const int rowBa1 = (rowB[k] >> 15) & 0x1;
856         const int rowCa0 = (rowC[j] >> 15) & 0x1;
857         const int rowCa1 = (rowC[k] >> 15) & 0x1;
858         const int rowDa0 = (rowD[j] >> 15) & 0x1;
859         const int rowDa1 = (rowD[k] >> 15) & 0x1;
860         const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
861                                       rowCr0, rowCr1, rowDr0, rowDr1);
862         const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
863                                       rowCg0, rowCg1, rowDg0, rowDg1);
864         const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
865                                       rowCb0, rowCb1, rowDb0, rowDb1);
866         const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
867                                       rowCa0, rowCa1, rowDa0, rowDa1);
868
869         dst[i] = (a << 15) | (b << 10) | (g << 5) | r;
870      }
871   }
872   else if ((datatype == DTYPE_UBYTE_3_3_2) && (comps == 3)) {
873      DECLARE_ROW_POINTERS0(ushort);
874
875      for (i = j = 0, k = k0; i < (uint) dstWidth;
876           i++, j += colStride, k += colStride) {
877         const int rowAr0 = rowA[j] & 0x3;
878         const int rowAr1 = rowA[k] & 0x3;
879         const int rowBr0 = rowB[j] & 0x3;
880         const int rowBr1 = rowB[k] & 0x3;
881         const int rowCr0 = rowC[j] & 0x3;
882         const int rowCr1 = rowC[k] & 0x3;
883         const int rowDr0 = rowD[j] & 0x3;
884         const int rowDr1 = rowD[k] & 0x3;
885         const int rowAg0 = (rowA[j] >> 2) & 0x7;
886         const int rowAg1 = (rowA[k] >> 2) & 0x7;
887         const int rowBg0 = (rowB[j] >> 2) & 0x7;
888         const int rowBg1 = (rowB[k] >> 2) & 0x7;
889         const int rowCg0 = (rowC[j] >> 2) & 0x7;
890         const int rowCg1 = (rowC[k] >> 2) & 0x7;
891         const int rowDg0 = (rowD[j] >> 2) & 0x7;
892         const int rowDg1 = (rowD[k] >> 2) & 0x7;
893         const int rowAb0 = (rowA[j] >> 5) & 0x7;
894         const int rowAb1 = (rowA[k] >> 5) & 0x7;
895         const int rowBb0 = (rowB[j] >> 5) & 0x7;
896         const int rowBb1 = (rowB[k] >> 5) & 0x7;
897         const int rowCb0 = (rowC[j] >> 5) & 0x7;
898         const int rowCb1 = (rowC[k] >> 5) & 0x7;
899         const int rowDb0 = (rowD[j] >> 5) & 0x7;
900         const int rowDb1 = (rowD[k] >> 5) & 0x7;
901         const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
902                                       rowCr0, rowCr1, rowDr0, rowDr1);
903         const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
904                                       rowCg0, rowCg1, rowDg0, rowDg1);
905         const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
906                                       rowCb0, rowCb1, rowDb0, rowDb1);
907         dst[i] = (b << 5) | (g << 2) | r;
908      }
909   }
910   else {
911      debug_printf("bad format in do_row_3D()");
912   }
913}
914
915
916
917static void
918format_to_type_comps(enum pipe_format pformat,
919                     enum dtype *datatype, uint *comps)
920{
921   /* XXX I think this could be implemented in terms of the pf_*() functions */
922   switch (pformat) {
923   case PIPE_FORMAT_A8R8G8B8_UNORM:
924   case PIPE_FORMAT_X8R8G8B8_UNORM:
925   case PIPE_FORMAT_B8G8R8A8_UNORM:
926   case PIPE_FORMAT_B8G8R8X8_UNORM:
927   case PIPE_FORMAT_R8G8B8A8_SRGB:
928   case PIPE_FORMAT_R8G8B8X8_SRGB:
929   case PIPE_FORMAT_A8R8G8B8_SRGB:
930   case PIPE_FORMAT_X8R8G8B8_SRGB:
931   case PIPE_FORMAT_B8G8R8A8_SRGB:
932   case PIPE_FORMAT_B8G8R8X8_SRGB:
933   case PIPE_FORMAT_R8G8B8_SRGB:
934      *datatype = DTYPE_UBYTE;
935      *comps = 4;
936      return;
937   case PIPE_FORMAT_A1R5G5B5_UNORM:
938      *datatype = DTYPE_USHORT_1_5_5_5_REV;
939      *comps = 4;
940      return;
941   case PIPE_FORMAT_A4R4G4B4_UNORM:
942      *datatype = DTYPE_USHORT_4_4_4_4;
943      *comps = 4;
944      return;
945   case PIPE_FORMAT_R5G6B5_UNORM:
946      *datatype = DTYPE_USHORT_5_6_5;
947      *comps = 3;
948      return;
949   case PIPE_FORMAT_L8_UNORM:
950   case PIPE_FORMAT_L8_SRGB:
951   case PIPE_FORMAT_A8_UNORM:
952   case PIPE_FORMAT_I8_UNORM:
953      *datatype = DTYPE_UBYTE;
954      *comps = 1;
955      return;
956   case PIPE_FORMAT_A8L8_UNORM:
957   case PIPE_FORMAT_A8L8_SRGB:
958      *datatype = DTYPE_UBYTE;
959      *comps = 2;
960      return;
961   default:
962      assert(0);
963      *datatype = DTYPE_UBYTE;
964      *comps = 0;
965      break;
966   }
967}
968
969
970static void
971reduce_1d(enum pipe_format pformat,
972          int srcWidth, const ubyte *srcPtr,
973          int dstWidth, ubyte *dstPtr)
974{
975   enum dtype datatype;
976   uint comps;
977
978   format_to_type_comps(pformat, &datatype, &comps);
979
980   /* we just duplicate the input row, kind of hack, saves code */
981   do_row(datatype, comps,
982          srcWidth, srcPtr, srcPtr,
983          dstWidth, dstPtr);
984}
985
986
987/**
988 * Strides are in bytes.  If zero, it'll be computed as width * bpp.
989 */
990static void
991reduce_2d(enum pipe_format pformat,
992          int srcWidth, int srcHeight,
993          int srcRowStride, const ubyte *srcPtr,
994          int dstWidth, int dstHeight,
995          int dstRowStride, ubyte *dstPtr)
996{
997   enum dtype datatype;
998   uint comps;
999   const int bpt = pf_get_blocksize(pformat);
1000   const ubyte *srcA, *srcB;
1001   ubyte *dst;
1002   int row;
1003
1004   format_to_type_comps(pformat, &datatype, &comps);
1005
1006   if (!srcRowStride)
1007      srcRowStride = bpt * srcWidth;
1008
1009   if (!dstRowStride)
1010      dstRowStride = bpt * dstWidth;
1011
1012   /* Compute src and dst pointers */
1013   srcA = srcPtr;
1014   if (srcHeight > 1)
1015      srcB = srcA + srcRowStride;
1016   else
1017      srcB = srcA;
1018   dst = dstPtr;
1019
1020   for (row = 0; row < dstHeight; row++) {
1021      do_row(datatype, comps,
1022             srcWidth, srcA, srcB,
1023             dstWidth, dst);
1024      srcA += 2 * srcRowStride;
1025      srcB += 2 * srcRowStride;
1026      dst += dstRowStride;
1027   }
1028}
1029
1030
1031static void
1032reduce_3d(enum pipe_format pformat,
1033          int srcWidth, int srcHeight, int srcDepth,
1034          int srcRowStride, const ubyte *srcPtr,
1035          int dstWidth, int dstHeight, int dstDepth,
1036          int dstRowStride, ubyte *dstPtr)
1037{
1038   const int bpt = pf_get_blocksize(pformat);
1039   const int border = 0;
1040   int img, row;
1041   int bytesPerSrcImage, bytesPerDstImage;
1042   int bytesPerSrcRow, bytesPerDstRow;
1043   int srcImageOffset, srcRowOffset;
1044   enum dtype datatype;
1045   uint comps;
1046
1047   format_to_type_comps(pformat, &datatype, &comps);
1048
1049   bytesPerSrcImage = srcWidth * srcHeight * bpt;
1050   bytesPerDstImage = dstWidth * dstHeight * bpt;
1051
1052   bytesPerSrcRow = srcWidth * bpt;
1053   bytesPerDstRow = dstWidth * bpt;
1054
1055   /* Offset between adjacent src images to be averaged together */
1056   srcImageOffset = (srcDepth == dstDepth) ? 0 : bytesPerSrcImage;
1057
1058   /* Offset between adjacent src rows to be averaged together */
1059   srcRowOffset = (srcHeight == dstHeight) ? 0 : srcWidth * bpt;
1060
1061   /*
1062    * Need to average together up to 8 src pixels for each dest pixel.
1063    * Break that down into 3 operations:
1064    *   1. take two rows from source image and average them together.
1065    *   2. take two rows from next source image and average them together.
1066    *   3. take the two averaged rows and average them for the final dst row.
1067    */
1068
1069   /*
1070   _mesa_printf("mip3d %d x %d x %d  ->  %d x %d x %d\n",
1071          srcWidth, srcHeight, srcDepth, dstWidth, dstHeight, dstDepth);
1072   */
1073
1074   for (img = 0; img < dstDepth; img++) {
1075      /* first source image pointer, skipping border */
1076      const ubyte *imgSrcA = srcPtr
1077         + (bytesPerSrcImage + bytesPerSrcRow + border) * bpt * border
1078         + img * (bytesPerSrcImage + srcImageOffset);
1079      /* second source image pointer, skipping border */
1080      const ubyte *imgSrcB = imgSrcA + srcImageOffset;
1081      /* address of the dest image, skipping border */
1082      ubyte *imgDst = dstPtr
1083         + (bytesPerDstImage + bytesPerDstRow + border) * bpt * border
1084         + img * bytesPerDstImage;
1085
1086      /* setup the four source row pointers and the dest row pointer */
1087      const ubyte *srcImgARowA = imgSrcA;
1088      const ubyte *srcImgARowB = imgSrcA + srcRowOffset;
1089      const ubyte *srcImgBRowA = imgSrcB;
1090      const ubyte *srcImgBRowB = imgSrcB + srcRowOffset;
1091      ubyte *dstImgRow = imgDst;
1092
1093      for (row = 0; row < dstHeight; row++) {
1094         do_row_3D(datatype, comps, srcWidth,
1095                   srcImgARowA, srcImgARowB,
1096                   srcImgBRowA, srcImgBRowB,
1097                   dstWidth, dstImgRow);
1098
1099         /* advance to next rows */
1100         srcImgARowA += bytesPerSrcRow + srcRowOffset;
1101         srcImgARowB += bytesPerSrcRow + srcRowOffset;
1102         srcImgBRowA += bytesPerSrcRow + srcRowOffset;
1103         srcImgBRowB += bytesPerSrcRow + srcRowOffset;
1104         dstImgRow += bytesPerDstRow;
1105      }
1106   }
1107}
1108
1109
1110
1111
1112static void
1113make_1d_mipmap(struct gen_mipmap_state *ctx,
1114               struct pipe_texture *pt,
1115               uint face, uint baseLevel, uint lastLevel)
1116{
1117   struct pipe_context *pipe = ctx->pipe;
1118   struct pipe_screen *screen = pipe->screen;
1119   const uint zslice = 0;
1120   uint dstLevel;
1121
1122   for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1123      const uint srcLevel = dstLevel - 1;
1124      struct pipe_transfer *srcTrans, *dstTrans;
1125      void *srcMap, *dstMap;
1126
1127      srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
1128                                          PIPE_TRANSFER_READ, 0, 0,
1129                                          u_minify(pt->width0, srcLevel),
1130                                          u_minify(pt->height0, srcLevel));
1131      dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
1132                                          PIPE_TRANSFER_WRITE, 0, 0,
1133                                          u_minify(pt->width0, dstLevel),
1134                                          u_minify(pt->height0, dstLevel));
1135
1136      srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
1137      dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
1138
1139      reduce_1d(pt->format,
1140                srcTrans->width, srcMap,
1141                dstTrans->width, dstMap);
1142
1143      screen->transfer_unmap(screen, srcTrans);
1144      screen->transfer_unmap(screen, dstTrans);
1145
1146      screen->tex_transfer_destroy(srcTrans);
1147      screen->tex_transfer_destroy(dstTrans);
1148   }
1149}
1150
1151
1152static void
1153make_2d_mipmap(struct gen_mipmap_state *ctx,
1154               struct pipe_texture *pt,
1155               uint face, uint baseLevel, uint lastLevel)
1156{
1157   struct pipe_context *pipe = ctx->pipe;
1158   struct pipe_screen *screen = pipe->screen;
1159   const uint zslice = 0;
1160   uint dstLevel;
1161
1162   assert(pf_get_blockwidth(pt->format) == 1);
1163   assert(pf_get_blockheight(pt->format) == 1);
1164
1165   for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1166      const uint srcLevel = dstLevel - 1;
1167      struct pipe_transfer *srcTrans, *dstTrans;
1168      ubyte *srcMap, *dstMap;
1169
1170      srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
1171                                          PIPE_TRANSFER_READ, 0, 0,
1172                                          u_minify(pt->width0, srcLevel),
1173                                          u_minify(pt->height0, srcLevel));
1174      dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
1175                                          PIPE_TRANSFER_WRITE, 0, 0,
1176                                          u_minify(pt->width0, dstLevel),
1177                                          u_minify(pt->height0, dstLevel));
1178
1179      srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
1180      dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
1181
1182      reduce_2d(pt->format,
1183                srcTrans->width, srcTrans->height,
1184                srcTrans->stride, srcMap,
1185                dstTrans->width, dstTrans->height,
1186                dstTrans->stride, dstMap);
1187
1188      screen->transfer_unmap(screen, srcTrans);
1189      screen->transfer_unmap(screen, dstTrans);
1190
1191      screen->tex_transfer_destroy(srcTrans);
1192      screen->tex_transfer_destroy(dstTrans);
1193   }
1194}
1195
1196
1197static void
1198make_3d_mipmap(struct gen_mipmap_state *ctx,
1199               struct pipe_texture *pt,
1200               uint face, uint baseLevel, uint lastLevel)
1201{
1202#if 0
1203   struct pipe_context *pipe = ctx->pipe;
1204   struct pipe_screen *screen = pipe->screen;
1205   uint dstLevel, zslice = 0;
1206
1207   assert(pf_get_blockwidth(pt->format) == 1);
1208   assert(pf_get_blockheight(pt->format) == 1);
1209
1210   for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1211      const uint srcLevel = dstLevel - 1;
1212      struct pipe_transfer *srcTrans, *dstTrans;
1213      ubyte *srcMap, *dstMap;
1214
1215      srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
1216                                          PIPE_TRANSFER_READ, 0, 0,
1217                                          u_minify(pt->width0, srcLevel),
1218                                          u_minify(pt->height0, srcLevel));
1219      dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
1220                                          PIPE_TRANSFER_WRITE, 0, 0,
1221                                          u_minify(pt->width0, dstLevel),
1222                                          u_minify(pt->height0, dstLevel));
1223
1224      srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
1225      dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
1226
1227      reduce_3d(pt->format,
1228                srcTrans->width, srcTrans->height,
1229                srcTrans->stride, srcMap,
1230                dstTrans->width, dstTrans->height,
1231                dstTrans->stride, dstMap);
1232
1233      screen->transfer_unmap(screen, srcTrans);
1234      screen->transfer_unmap(screen, dstTrans);
1235
1236      screen->tex_transfer_destroy(srcTrans);
1237      screen->tex_transfer_destroy(dstTrans);
1238   }
1239#else
1240   (void) reduce_3d;
1241#endif
1242}
1243
1244
1245static void
1246fallback_gen_mipmap(struct gen_mipmap_state *ctx,
1247                    struct pipe_texture *pt,
1248                    uint face, uint baseLevel, uint lastLevel)
1249{
1250   switch (pt->target) {
1251   case PIPE_TEXTURE_1D:
1252      make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel);
1253      break;
1254   case PIPE_TEXTURE_2D:
1255   case PIPE_TEXTURE_CUBE:
1256      make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel);
1257      break;
1258   case PIPE_TEXTURE_3D:
1259      make_3d_mipmap(ctx, pt, face, baseLevel, lastLevel);
1260      break;
1261   default:
1262      assert(0);
1263   }
1264}
1265
1266
1267/**
1268 * Create a mipmap generation context.
1269 * The idea is to create one of these and re-use it each time we need to
1270 * generate a mipmap.
1271 */
1272struct gen_mipmap_state *
1273util_create_gen_mipmap(struct pipe_context *pipe,
1274                       struct cso_context *cso)
1275{
1276   struct gen_mipmap_state *ctx;
1277   uint i;
1278
1279   ctx = CALLOC_STRUCT(gen_mipmap_state);
1280   if (!ctx)
1281      return NULL;
1282
1283   ctx->pipe = pipe;
1284   ctx->cso = cso;
1285
1286   /* disabled blending/masking */
1287   memset(&ctx->blend, 0, sizeof(ctx->blend));
1288   ctx->blend.colormask = PIPE_MASK_RGBA;
1289
1290   /* no-op depth/stencil/alpha */
1291   memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil));
1292
1293   /* rasterizer */
1294   memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
1295   ctx->rasterizer.front_winding = PIPE_WINDING_CW;
1296   ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
1297   ctx->rasterizer.bypass_vs_clip_and_viewport = 1;
1298   ctx->rasterizer.gl_rasterization_rules = 1;
1299
1300   /* sampler state */
1301   memset(&ctx->sampler, 0, sizeof(ctx->sampler));
1302   ctx->sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1303   ctx->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1304   ctx->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1305   ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
1306   ctx->sampler.normalized_coords = 1;
1307
1308   /* vertex shader - still needed to specify mapping from fragment
1309    * shader input semantics to vertex elements
1310    */
1311   {
1312      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
1313                                      TGSI_SEMANTIC_GENERIC };
1314      const uint semantic_indexes[] = { 0, 0 };
1315      ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
1316                                                    semantic_indexes);
1317   }
1318
1319   /* fragment shader */
1320   ctx->fs = util_make_fragment_tex_shader(pipe);
1321
1322   /* vertex data that doesn't change */
1323   for (i = 0; i < 4; i++) {
1324      ctx->vertices[i][0][2] = 0.0f; /* z */
1325      ctx->vertices[i][0][3] = 1.0f; /* w */
1326      ctx->vertices[i][1][3] = 1.0f; /* q */
1327   }
1328
1329   /* Note: the actual vertex buffer is allocated as needed below */
1330
1331   return ctx;
1332}
1333
1334
1335/**
1336 * Get next "slot" of vertex space in the vertex buffer.
1337 * We're allocating one large vertex buffer and using it piece by piece.
1338 */
1339static unsigned
1340get_next_slot(struct gen_mipmap_state *ctx)
1341{
1342   const unsigned max_slots = 4096 / sizeof ctx->vertices;
1343
1344   if (ctx->vbuf_slot >= max_slots)
1345      util_gen_mipmap_flush( ctx );
1346
1347   if (!ctx->vbuf) {
1348      ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
1349                                     32,
1350                                     PIPE_BUFFER_USAGE_VERTEX,
1351                                     max_slots * sizeof ctx->vertices);
1352   }
1353
1354   return ctx->vbuf_slot++ * sizeof ctx->vertices;
1355}
1356
1357
1358static unsigned
1359set_vertex_data(struct gen_mipmap_state *ctx,
1360                enum pipe_texture_target tex_target,
1361                uint face, float width, float height)
1362{
1363   unsigned offset;
1364
1365   /* vert[0].position */
1366   ctx->vertices[0][0][0] = 0.0f; /*x*/
1367   ctx->vertices[0][0][1] = 0.0f; /*y*/
1368
1369   /* vert[1].position */
1370   ctx->vertices[1][0][0] = width;
1371   ctx->vertices[1][0][1] = 0.0f;
1372
1373   /* vert[2].position */
1374   ctx->vertices[2][0][0] = width;
1375   ctx->vertices[2][0][1] = height;
1376
1377   /* vert[3].position */
1378   ctx->vertices[3][0][0] = 0.0f;
1379   ctx->vertices[3][0][1] = height;
1380
1381   /* Setup vertex texcoords.  This is a little tricky for cube maps. */
1382   if (tex_target == PIPE_TEXTURE_CUBE) {
1383      static const float st[4][2] = {
1384         {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
1385      };
1386      float rx, ry, rz;
1387      uint i;
1388
1389      /* loop over quad verts */
1390      for (i = 0; i < 4; i++) {
1391         /* Compute sc = +/-scale and tc = +/-scale.
1392          * Not +/-1 to avoid cube face selection ambiguity near the edges,
1393          * though that can still sometimes happen with this scale factor...
1394          */
1395         const float scale = 0.9999f;
1396         const float sc = (2.0f * st[i][0] - 1.0f) * scale;
1397         const float tc = (2.0f * st[i][1] - 1.0f) * scale;
1398
1399         switch (face) {
1400         case PIPE_TEX_FACE_POS_X:
1401            rx = 1.0f;
1402            ry = -tc;
1403            rz = -sc;
1404            break;
1405         case PIPE_TEX_FACE_NEG_X:
1406            rx = -1.0f;
1407            ry = -tc;
1408            rz = sc;
1409            break;
1410         case PIPE_TEX_FACE_POS_Y:
1411            rx = sc;
1412            ry = 1.0f;
1413            rz = tc;
1414            break;
1415         case PIPE_TEX_FACE_NEG_Y:
1416            rx = sc;
1417            ry = -1.0f;
1418            rz = -tc;
1419            break;
1420         case PIPE_TEX_FACE_POS_Z:
1421            rx = sc;
1422            ry = -tc;
1423            rz = 1.0f;
1424            break;
1425         case PIPE_TEX_FACE_NEG_Z:
1426            rx = -sc;
1427            ry = -tc;
1428            rz = -1.0f;
1429            break;
1430         default:
1431            rx = ry = rz = 0.0f;
1432            assert(0);
1433         }
1434
1435         ctx->vertices[i][1][0] = rx; /*s*/
1436         ctx->vertices[i][1][1] = ry; /*t*/
1437         ctx->vertices[i][1][2] = rz; /*r*/
1438      }
1439   }
1440   else {
1441      /* 1D/2D */
1442      ctx->vertices[0][1][0] = 0.0f; /*s*/
1443      ctx->vertices[0][1][1] = 0.0f; /*t*/
1444      ctx->vertices[0][1][2] = 0.0f; /*r*/
1445
1446      ctx->vertices[1][1][0] = 1.0f;
1447      ctx->vertices[1][1][1] = 0.0f;
1448      ctx->vertices[1][1][2] = 0.0f;
1449
1450      ctx->vertices[2][1][0] = 1.0f;
1451      ctx->vertices[2][1][1] = 1.0f;
1452      ctx->vertices[2][1][2] = 0.0f;
1453
1454      ctx->vertices[3][1][0] = 0.0f;
1455      ctx->vertices[3][1][1] = 1.0f;
1456      ctx->vertices[3][1][2] = 0.0f;
1457   }
1458
1459   offset = get_next_slot( ctx );
1460
1461   pipe_buffer_write(ctx->pipe->screen, ctx->vbuf,
1462                     offset, sizeof(ctx->vertices), ctx->vertices);
1463
1464   return offset;
1465}
1466
1467
1468
1469/**
1470 * Destroy a mipmap generation context
1471 */
1472void
1473util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
1474{
1475   struct pipe_context *pipe = ctx->pipe;
1476
1477   pipe->delete_vs_state(pipe, ctx->vs);
1478   pipe->delete_fs_state(pipe, ctx->fs);
1479
1480   pipe_buffer_reference(&ctx->vbuf, NULL);
1481
1482   FREE(ctx);
1483}
1484
1485
1486
1487/* Release vertex buffer at end of frame to avoid synchronous
1488 * rendering.
1489 */
1490void util_gen_mipmap_flush( struct gen_mipmap_state *ctx )
1491{
1492   pipe_buffer_reference(&ctx->vbuf, NULL);
1493   ctx->vbuf_slot = 0;
1494}
1495
1496
1497/**
1498 * Generate mipmap images.  It's assumed all needed texture memory is
1499 * already allocated.
1500 *
1501 * \param pt  the texture to generate mipmap levels for
1502 * \param face  which cube face to generate mipmaps for (0 for non-cube maps)
1503 * \param baseLevel  the first mipmap level to use as a src
1504 * \param lastLevel  the last mipmap level to generate
1505 * \param filter  the minification filter used to generate mipmap levels with
1506 * \param filter  one of PIPE_TEX_FILTER_LINEAR, PIPE_TEX_FILTER_NEAREST
1507 */
1508void
1509util_gen_mipmap(struct gen_mipmap_state *ctx,
1510                struct pipe_texture *pt,
1511                uint face, uint baseLevel, uint lastLevel, uint filter)
1512{
1513   struct pipe_context *pipe = ctx->pipe;
1514   struct pipe_screen *screen = pipe->screen;
1515   struct pipe_framebuffer_state fb;
1516   uint dstLevel;
1517   uint zslice = 0;
1518   uint offset;
1519
1520   /* The texture object should have room for the levels which we're
1521    * about to generate.
1522    */
1523   assert(lastLevel <= pt->last_level);
1524
1525   /* If this fails, why are we here? */
1526   assert(lastLevel > baseLevel);
1527
1528   assert(filter == PIPE_TEX_FILTER_LINEAR ||
1529          filter == PIPE_TEX_FILTER_NEAREST);
1530
1531   /* check if we can render in the texture's format */
1532   if (!screen->is_format_supported(screen, pt->format, PIPE_TEXTURE_2D,
1533                                    PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
1534      fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel);
1535      return;
1536   }
1537
1538   /* save state (restored below) */
1539   cso_save_blend(ctx->cso);
1540   cso_save_depth_stencil_alpha(ctx->cso);
1541   cso_save_rasterizer(ctx->cso);
1542   cso_save_samplers(ctx->cso);
1543   cso_save_sampler_textures(ctx->cso);
1544   cso_save_framebuffer(ctx->cso);
1545   cso_save_fragment_shader(ctx->cso);
1546   cso_save_vertex_shader(ctx->cso);
1547
1548   /* bind our state */
1549   cso_set_blend(ctx->cso, &ctx->blend);
1550   cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
1551   cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
1552
1553   cso_set_fragment_shader_handle(ctx->cso, ctx->fs);
1554   cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
1555
1556   /* init framebuffer state */
1557   memset(&fb, 0, sizeof(fb));
1558   fb.nr_cbufs = 1;
1559
1560   /* set min/mag to same filter for faster sw speed */
1561   ctx->sampler.mag_img_filter = filter;
1562   ctx->sampler.min_img_filter = filter;
1563
1564   /*
1565    * XXX for small mipmap levels, it may be faster to use the software
1566    * fallback path...
1567    */
1568   for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1569      const uint srcLevel = dstLevel - 1;
1570
1571      struct pipe_surface *surf =
1572         screen->get_tex_surface(screen, pt, face, dstLevel, zslice,
1573                                 PIPE_BUFFER_USAGE_GPU_WRITE);
1574
1575      /*
1576       * Setup framebuffer / dest surface
1577       */
1578      fb.cbufs[0] = surf;
1579      fb.width = u_minify(pt->width0, dstLevel);
1580      fb.height = u_minify(pt->height0, dstLevel);
1581      cso_set_framebuffer(ctx->cso, &fb);
1582
1583      /*
1584       * Setup sampler state
1585       * Note: we should only have to set the min/max LOD clamps to ensure
1586       * we grab texels from the right mipmap level.  But some hardware
1587       * has trouble with min clamping so we also set the lod_bias to
1588       * try to work around that.
1589       */
1590      ctx->sampler.min_lod = ctx->sampler.max_lod = (float) srcLevel;
1591      ctx->sampler.lod_bias = (float) srcLevel;
1592      cso_single_sampler(ctx->cso, 0, &ctx->sampler);
1593      cso_single_sampler_done(ctx->cso);
1594
1595      cso_set_sampler_textures(ctx->cso, 1, &pt);
1596
1597      /* quad coords in window coords (bypassing vs, clip and viewport) */
1598      offset = set_vertex_data(ctx,
1599                               pt->target,
1600                               face,
1601                               (float) u_minify(pt->width0, dstLevel),
1602                               (float) u_minify(pt->height0, dstLevel));
1603
1604      util_draw_vertex_buffer(ctx->pipe,
1605                              ctx->vbuf,
1606                              offset,
1607                              PIPE_PRIM_TRIANGLE_FAN,
1608                              4,  /* verts */
1609                              2); /* attribs/vert */
1610
1611      pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
1612
1613      /* need to signal that the texture has changed _after_ rendering to it */
1614      pipe_surface_reference( &surf, NULL );
1615   }
1616
1617   /* restore state we changed */
1618   cso_restore_blend(ctx->cso);
1619   cso_restore_depth_stencil_alpha(ctx->cso);
1620   cso_restore_rasterizer(ctx->cso);
1621   cso_restore_samplers(ctx->cso);
1622   cso_restore_sampler_textures(ctx->cso);
1623   cso_restore_framebuffer(ctx->cso);
1624   cso_restore_fragment_shader(ctx->cso);
1625   cso_restore_vertex_shader(ctx->cso);
1626}
1627