r200_texstate.c revision 85b6d0c4df5358579dafcd3ddf5b1ab60bace38d
1/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_texstate.c,v 1.3 2003/02/15 22:18:47 dawes Exp $ */
2/*
3Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
4
5The Weather Channel (TM) funded Tungsten Graphics to develop the
6initial release of the Radeon 8500 driver under the XFree86 license.
7This notice must be preserved.
8
9Permission is hereby granted, free of charge, to any person obtaining
10a copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice (including the
18next paragraph) shall be included in all copies or substantial
19portions of the Software.
20
21THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29**************************************************************************/
30
31/*
32 * Authors:
33 *   Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36#include "glheader.h"
37#include "imports.h"
38#include "context.h"
39#include "macros.h"
40#include "texformat.h"
41#include "enums.h"
42
43#include "r200_context.h"
44#include "r200_state.h"
45#include "r200_ioctl.h"
46#include "r200_swtcl.h"
47#include "r200_tex.h"
48#include "r200_tcl.h"
49
50
51#define R200_TXFORMAT_A8        R200_TXFORMAT_I8
52#define R200_TXFORMAT_L8        R200_TXFORMAT_I8
53#define R200_TXFORMAT_AL88      R200_TXFORMAT_AI88
54#define R200_TXFORMAT_YCBCR     R200_TXFORMAT_YVYU422
55#define R200_TXFORMAT_YCBCR_REV R200_TXFORMAT_VYUY422
56#define R200_TXFORMAT_RGB_DXT1  R200_TXFORMAT_DXT1
57#define R200_TXFORMAT_RGBA_DXT1 R200_TXFORMAT_DXT1
58#define R200_TXFORMAT_RGBA_DXT3 R200_TXFORMAT_DXT23
59#define R200_TXFORMAT_RGBA_DXT5 R200_TXFORMAT_DXT45
60
61#define _COLOR(f) \
62    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, 0 }
63#define _COLOR_REV(f) \
64    [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f, 0 }
65#define _ALPHA(f) \
66    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 }
67#define _ALPHA_REV(f) \
68    [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 }
69#define _YUV(f) \
70    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, R200_YUV_TO_RGB }
71#define _INVALID(f) \
72    [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
73#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
74			     && (tx_table[f].format != 0xffffffff) )
75
76static const struct {
77   GLuint format, filter;
78}
79tx_table[] =
80{
81   _ALPHA(RGBA8888),
82   _ALPHA_REV(RGBA8888),
83   _ALPHA(ARGB8888),
84   _ALPHA_REV(ARGB8888),
85   _INVALID(RGB888),
86   _COLOR(RGB565),
87   _COLOR_REV(RGB565),
88   _ALPHA(ARGB4444),
89   _ALPHA_REV(ARGB4444),
90   _ALPHA(ARGB1555),
91   _ALPHA_REV(ARGB1555),
92   _ALPHA(AL88),
93   _ALPHA_REV(AL88),
94   _ALPHA(A8),
95   _COLOR(L8),
96   _ALPHA(I8),
97   _INVALID(CI8),
98   _YUV(YCBCR),
99   _YUV(YCBCR_REV),
100   _INVALID(RGB_FXT1),
101   _INVALID(RGBA_FXT1),
102   _COLOR(RGB_DXT1),
103   _ALPHA(RGBA_DXT1),
104   _ALPHA(RGBA_DXT3),
105   _ALPHA(RGBA_DXT5),
106};
107
108#undef _COLOR
109#undef _ALPHA
110#undef _INVALID
111
112/**
113 * This function computes the number of bytes of storage needed for
114 * the given texture object (all mipmap levels, all cube faces).
115 * The \c image[face][level].x/y/width/height parameters for upload/blitting
116 * are computed here.  \c pp_txfilter, \c pp_txformat, etc. will be set here
117 * too.
118 *
119 * \param rmesa Context pointer
120 * \param tObj GL texture object whose images are to be posted to
121 *                 hardware state.
122 */
123static void r200SetTexImages( r200ContextPtr rmesa,
124			      struct gl_texture_object *tObj )
125{
126   r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData;
127   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
128   GLint curOffset, blitWidth;
129   GLint i, texelBytes;
130   GLint numLevels;
131   GLint log2Width, log2Height, log2Depth;
132
133   /* Set the hardware texture format
134    */
135
136   t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
137		       R200_TXFORMAT_ALPHA_IN_MAP);
138   t->pp_txfilter &= ~R200_YUV_TO_RGB;
139
140   if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) {
141      t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format;
142      t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter;
143   }
144   else {
145      _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
146      return;
147   }
148
149   texelBytes = baseImage->TexFormat->TexelBytes;
150
151   /* Compute which mipmap levels we really want to send to the hardware.
152    */
153
154   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
155   log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
156   log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
157   log2Depth  = tObj->Image[0][t->base.firstLevel]->DepthLog2;
158
159   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
160
161   assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS);
162
163   /* Calculate mipmap offsets and dimensions for blitting (uploading)
164    * The idea is that we lay out the mipmap levels within a block of
165    * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
166    */
167   curOffset = 0;
168   blitWidth = BLIT_WIDTH_BYTES;
169   t->tile_bits = 0;
170
171   /* figure out if this texture is suitable for tiling. */
172   if (texelBytes) {
173      if (rmesa->texmicrotile  && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
174      /* texrect might be able to use micro tiling too in theory? */
175	 (baseImage->Height > 1)) {
176	 /* allow 32 (bytes) x 1 mip (which will use two times the space
177	 the non-tiled version would use) max if base texture is large enough */
178	 if ((numLevels == 1) ||
179	   (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
180	       (baseImage->Width * texelBytes > 64)) ||
181	    ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
182	    t->tile_bits |= R200_TXO_MICRO_TILE;
183	 }
184      }
185      if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
186	 /* we can set macro tiling even for small textures, they will be untiled anyway */
187	 t->tile_bits |= R200_TXO_MACRO_TILE;
188      }
189   }
190
191   for (i = 0; i < numLevels; i++) {
192      const struct gl_texture_image *texImage;
193      GLuint size;
194
195      texImage = tObj->Image[0][i + t->base.firstLevel];
196      if ( !texImage )
197	 break;
198
199      /* find image size in bytes */
200      if (texImage->IsCompressed) {
201      /* need to calculate the size AFTER padding even though the texture is
202         submitted without padding.
203         Only handle pot textures currently - don't know if npot is even possible,
204         size calculation would certainly need (trivial) adjustments.
205         Align (and later pad) to 32byte, not sure what that 64byte blit width is
206         good for? */
207         if ((t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) == R200_TXFORMAT_DXT1) {
208            /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */
209            if ((texImage->Width + 3) < 8) /* width one block */
210               size = texImage->CompressedSize * 4;
211            else if ((texImage->Width + 3) < 16)
212               size = texImage->CompressedSize * 2;
213            else size = texImage->CompressedSize;
214         }
215         else /* DXT3/5, 16 bytes per block */
216            if ((texImage->Width + 3) < 8)
217               size = texImage->CompressedSize * 2;
218            else size = texImage->CompressedSize;
219      }
220      else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
221	 size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
222      }
223      else if (t->tile_bits & R200_TXO_MICRO_TILE) {
224	 /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
225	    though the actual offset may be different (if texture is less than
226	    32 bytes width) to the untiled case */
227	 int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
228	 size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
229	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
230      }
231      else {
232	 int w = (texImage->Width * texelBytes + 31) & ~31;
233	 size = w * texImage->Height * texImage->Depth;
234	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
235      }
236      assert(size > 0);
237
238      /* Align to 32-byte offset.  It is faster to do this unconditionally
239       * (no branch penalty).
240       */
241
242      curOffset = (curOffset + 0x1f) & ~0x1f;
243
244      if (texelBytes) {
245	 t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
246	 t->image[0][i].y = 0;
247	 t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
248	 t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
249      }
250      else {
251         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
252         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
253         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
254         t->image[0][i].height = size / t->image[0][i].width;
255      }
256
257#if 0
258      /* for debugging only and only  applicable to non-rectangle targets */
259      assert(size % t->image[0][i].width == 0);
260      assert(t->image[0][i].x == 0
261             || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1));
262#endif
263
264      if (0)
265         fprintf(stderr,
266                 "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n",
267                 i, texImage->Width, texImage->Height,
268                 t->image[0][i].x, t->image[0][i].y,
269                 t->image[0][i].width, t->image[0][i].height, size, curOffset);
270
271      curOffset += size;
272
273   }
274
275   /* Align the total size of texture memory block.
276    */
277   t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
278
279   /* Setup remaining cube face blits, if needed */
280   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
281      const GLuint faceSize = t->base.totalSize;
282      GLuint face;
283      /* reuse face 0 x/y/width/height - just update the offset when uploading */
284      for (face = 1; face < 6; face++) {
285         for (i = 0; i < numLevels; i++) {
286            t->image[face][i].x =  t->image[0][i].x;
287            t->image[face][i].y =  t->image[0][i].y;
288            t->image[face][i].width  = t->image[0][i].width;
289            t->image[face][i].height = t->image[0][i].height;
290         }
291      }
292      t->base.totalSize = 6 * faceSize; /* total texmem needed */
293   }
294
295
296   /* Hardware state:
297    */
298   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
299   t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT;
300
301   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
302		       R200_TXFORMAT_HEIGHT_MASK |
303                       R200_TXFORMAT_CUBIC_MAP_ENABLE |
304                       R200_TXFORMAT_F5_WIDTH_MASK |
305                       R200_TXFORMAT_F5_HEIGHT_MASK);
306   t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) |
307		      (log2Height << R200_TXFORMAT_HEIGHT_SHIFT));
308
309   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK);
310   if (tObj->Target == GL_TEXTURE_3D) {
311      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
312      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
313   }
314   else if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
315      ASSERT(log2Width == log2Height);
316      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
317                         (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
318/* don't think we need this bit, if it exists at all - fglrx does not set it */
319                         (R200_TXFORMAT_CUBIC_MAP_ENABLE));
320      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
321      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
322                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
323                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
324                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
325                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
326                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
327                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
328                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
329   }
330   else {
331      /* If we don't in fact send enough texture coordinates, q will be 1,
332       * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
333       */
334      t->pp_txformat_x |= R200_TEXCOORD_PROJ;
335   }
336
337   t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) |
338                   ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16));
339
340   /* Only need to round to nearest 32 for textures, but the blitter
341    * requires 64-byte aligned pitches, and we may/may not need the
342    * blitter.   NPOT only!
343    */
344   if (baseImage->IsCompressed)
345      t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
346   else
347      t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
348   t->pp_txpitch -= 32;
349
350   t->dirty_state = TEX_ALL;
351
352   /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */
353}
354
355
356
357/* ================================================================
358 * Texture combine functions
359 */
360
361/* GL_ARB_texture_env_combine support
362 */
363
364/* The color tables have combine functions for GL_SRC_COLOR,
365 * GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
366 */
367static GLuint r200_register_color[][R200_MAX_TEXTURE_UNITS] =
368{
369   {
370      R200_TXC_ARG_A_R0_COLOR,
371      R200_TXC_ARG_A_R1_COLOR,
372      R200_TXC_ARG_A_R2_COLOR,
373      R200_TXC_ARG_A_R3_COLOR,
374      R200_TXC_ARG_A_R4_COLOR,
375      R200_TXC_ARG_A_R5_COLOR
376   },
377   {
378      R200_TXC_ARG_A_R0_COLOR | R200_TXC_COMP_ARG_A,
379      R200_TXC_ARG_A_R1_COLOR | R200_TXC_COMP_ARG_A,
380      R200_TXC_ARG_A_R2_COLOR | R200_TXC_COMP_ARG_A,
381      R200_TXC_ARG_A_R3_COLOR | R200_TXC_COMP_ARG_A,
382      R200_TXC_ARG_A_R4_COLOR | R200_TXC_COMP_ARG_A,
383      R200_TXC_ARG_A_R5_COLOR | R200_TXC_COMP_ARG_A
384   },
385   {
386      R200_TXC_ARG_A_R0_ALPHA,
387      R200_TXC_ARG_A_R1_ALPHA,
388      R200_TXC_ARG_A_R2_ALPHA,
389      R200_TXC_ARG_A_R3_ALPHA,
390      R200_TXC_ARG_A_R4_ALPHA,
391      R200_TXC_ARG_A_R5_ALPHA
392   },
393   {
394      R200_TXC_ARG_A_R0_ALPHA | R200_TXC_COMP_ARG_A,
395      R200_TXC_ARG_A_R1_ALPHA | R200_TXC_COMP_ARG_A,
396      R200_TXC_ARG_A_R2_ALPHA | R200_TXC_COMP_ARG_A,
397      R200_TXC_ARG_A_R3_ALPHA | R200_TXC_COMP_ARG_A,
398      R200_TXC_ARG_A_R4_ALPHA | R200_TXC_COMP_ARG_A,
399      R200_TXC_ARG_A_R5_ALPHA | R200_TXC_COMP_ARG_A
400   },
401};
402
403static GLuint r200_tfactor_color[] =
404{
405   R200_TXC_ARG_A_TFACTOR_COLOR,
406   R200_TXC_ARG_A_TFACTOR_COLOR | R200_TXC_COMP_ARG_A,
407   R200_TXC_ARG_A_TFACTOR_ALPHA,
408   R200_TXC_ARG_A_TFACTOR_ALPHA | R200_TXC_COMP_ARG_A
409};
410
411static GLuint r200_tfactor1_color[] =
412{
413   R200_TXC_ARG_A_TFACTOR1_COLOR,
414   R200_TXC_ARG_A_TFACTOR1_COLOR | R200_TXC_COMP_ARG_A,
415   R200_TXC_ARG_A_TFACTOR1_ALPHA,
416   R200_TXC_ARG_A_TFACTOR1_ALPHA | R200_TXC_COMP_ARG_A
417};
418
419static GLuint r200_primary_color[] =
420{
421   R200_TXC_ARG_A_DIFFUSE_COLOR,
422   R200_TXC_ARG_A_DIFFUSE_COLOR | R200_TXC_COMP_ARG_A,
423   R200_TXC_ARG_A_DIFFUSE_ALPHA,
424   R200_TXC_ARG_A_DIFFUSE_ALPHA | R200_TXC_COMP_ARG_A
425};
426
427/* GL_ZERO table - indices 0-3
428 * GL_ONE  table - indices 1-4
429 */
430static GLuint r200_zero_color[] =
431{
432   R200_TXC_ARG_A_ZERO,
433   R200_TXC_ARG_A_ZERO | R200_TXC_COMP_ARG_A,
434   R200_TXC_ARG_A_ZERO,
435   R200_TXC_ARG_A_ZERO | R200_TXC_COMP_ARG_A,
436   R200_TXC_ARG_A_ZERO
437};
438
439/* The alpha tables only have GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
440 */
441static GLuint r200_register_alpha[][R200_MAX_TEXTURE_UNITS] =
442{
443   {
444      R200_TXA_ARG_A_R0_ALPHA,
445      R200_TXA_ARG_A_R1_ALPHA,
446      R200_TXA_ARG_A_R2_ALPHA,
447      R200_TXA_ARG_A_R3_ALPHA,
448      R200_TXA_ARG_A_R4_ALPHA,
449      R200_TXA_ARG_A_R5_ALPHA
450   },
451   {
452      R200_TXA_ARG_A_R0_ALPHA | R200_TXA_COMP_ARG_A,
453      R200_TXA_ARG_A_R1_ALPHA | R200_TXA_COMP_ARG_A,
454      R200_TXA_ARG_A_R2_ALPHA | R200_TXA_COMP_ARG_A,
455      R200_TXA_ARG_A_R3_ALPHA | R200_TXA_COMP_ARG_A,
456      R200_TXA_ARG_A_R4_ALPHA | R200_TXA_COMP_ARG_A,
457      R200_TXA_ARG_A_R5_ALPHA | R200_TXA_COMP_ARG_A
458   },
459};
460
461static GLuint r200_tfactor_alpha[] =
462{
463   R200_TXA_ARG_A_TFACTOR_ALPHA,
464   R200_TXA_ARG_A_TFACTOR_ALPHA | R200_TXA_COMP_ARG_A
465};
466
467static GLuint r200_tfactor1_alpha[] =
468{
469   R200_TXA_ARG_A_TFACTOR1_ALPHA,
470   R200_TXA_ARG_A_TFACTOR1_ALPHA | R200_TXA_COMP_ARG_A
471};
472
473static GLuint r200_primary_alpha[] =
474{
475   R200_TXA_ARG_A_DIFFUSE_ALPHA,
476   R200_TXA_ARG_A_DIFFUSE_ALPHA | R200_TXA_COMP_ARG_A
477};
478
479/* GL_ZERO table - indices 0-1
480 * GL_ONE  table - indices 1-2
481 */
482static GLuint r200_zero_alpha[] =
483{
484   R200_TXA_ARG_A_ZERO,
485   R200_TXA_ARG_A_ZERO | R200_TXA_COMP_ARG_A,
486   R200_TXA_ARG_A_ZERO,
487};
488
489
490/* Extract the arg from slot A, shift it into the correct argument slot
491 * and set the corresponding complement bit.
492 */
493#define R200_COLOR_ARG( n, arg )			\
494do {							\
495   color_combine |=					\
496      ((color_arg[n] & R200_TXC_ARG_A_MASK)		\
497       << R200_TXC_ARG_##arg##_SHIFT);			\
498   color_combine |=					\
499      ((color_arg[n] >> R200_TXC_COMP_ARG_A_SHIFT)	\
500       << R200_TXC_COMP_ARG_##arg##_SHIFT);		\
501} while (0)
502
503#define R200_ALPHA_ARG( n, arg )			\
504do {							\
505   alpha_combine |=					\
506      ((alpha_arg[n] & R200_TXA_ARG_A_MASK)		\
507       << R200_TXA_ARG_##arg##_SHIFT);			\
508   alpha_combine |=					\
509      ((alpha_arg[n] >> R200_TXA_COMP_ARG_A_SHIFT)	\
510       << R200_TXA_COMP_ARG_##arg##_SHIFT);		\
511} while (0)
512
513
514/* ================================================================
515 * Texture unit state management
516 */
517
518static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuint replaceargs )
519{
520   r200ContextPtr rmesa = R200_CONTEXT(ctx);
521   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
522   GLuint color_combine, alpha_combine;
523   GLuint color_scale = rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] &
524      ~(R200_TXC_SCALE_MASK | R200_TXC_OUTPUT_REG_MASK | R200_TXC_TFACTOR_SEL_MASK |
525	R200_TXC_TFACTOR1_SEL_MASK);
526   GLuint alpha_scale = rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] &
527      ~(R200_TXA_DOT_ALPHA | R200_TXA_SCALE_MASK | R200_TXA_OUTPUT_REG_MASK |
528	R200_TXA_TFACTOR_SEL_MASK | R200_TXA_TFACTOR1_SEL_MASK);
529
530   /* texUnit->_Current can be NULL if and only if the texture unit is
531    * not actually enabled.
532    */
533   assert( (texUnit->_ReallyEnabled == 0)
534	   || (texUnit->_Current != NULL) );
535
536   if ( R200_DEBUG & DEBUG_TEXTURE ) {
537      fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit );
538   }
539
540   /* Set the texture environment state.  Isn't this nice and clean?
541    * The chip will automagically set the texture alpha to 0xff when
542    * the texture format does not include an alpha component.  This
543    * reduces the amount of special-casing we have to do, alpha-only
544    * textures being a notable exception.
545    */
546
547   color_scale |= ((rmesa->state.texture.unit[unit].outputreg + 1) << R200_TXC_OUTPUT_REG_SHIFT) |
548			(unit << R200_TXC_TFACTOR_SEL_SHIFT) |
549			(replaceargs << R200_TXC_TFACTOR1_SEL_SHIFT);
550   alpha_scale |= ((rmesa->state.texture.unit[unit].outputreg + 1) << R200_TXA_OUTPUT_REG_SHIFT) |
551			(unit << R200_TXA_TFACTOR_SEL_SHIFT) |
552			(replaceargs << R200_TXA_TFACTOR1_SEL_SHIFT);
553
554   if ( !texUnit->_ReallyEnabled ) {
555      assert( unit == 0);
556      color_combine = R200_TXC_ARG_A_ZERO | R200_TXC_ARG_B_ZERO
557	  | R200_TXC_ARG_C_DIFFUSE_COLOR | R200_TXC_OP_MADD;
558      alpha_combine = R200_TXA_ARG_A_ZERO | R200_TXA_ARG_B_ZERO
559	  | R200_TXA_ARG_C_DIFFUSE_ALPHA | R200_TXA_OP_MADD;
560   }
561   else {
562      GLuint color_arg[3], alpha_arg[3];
563      GLuint i;
564      const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
565      const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
566      GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB;
567      GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA;
568
569
570      const GLint replaceoprgb =
571	 ctx->Texture.Unit[replaceargs]._CurrentCombine->OperandRGB[0] - GL_SRC_COLOR;
572      const GLint replaceopa =
573	 ctx->Texture.Unit[replaceargs]._CurrentCombine->OperandA[0] - GL_SRC_ALPHA;
574
575      /* Step 1:
576       * Extract the color and alpha combine function arguments.
577       */
578      for ( i = 0 ; i < numColorArgs ; i++ ) {
579	 GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
580	 const GLint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
581	 assert(op >= 0);
582	 assert(op <= 3);
583	 switch ( srcRGBi ) {
584	 case GL_TEXTURE:
585	    color_arg[i] = r200_register_color[op][unit];
586	    break;
587	 case GL_CONSTANT:
588	    color_arg[i] = r200_tfactor_color[op];
589	    break;
590	 case GL_PRIMARY_COLOR:
591	    color_arg[i] = r200_primary_color[op];
592	    break;
593	 case GL_PREVIOUS:
594	    if (replaceargs != unit) {
595	       const GLint srcRGBreplace =
596		  ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0];
597	       if (op >= 2) {
598		  op = op ^ replaceopa;
599	       }
600	       else {
601		  op = op ^ replaceoprgb;
602	       }
603	       switch (srcRGBreplace) {
604	       case GL_TEXTURE:
605		  color_arg[i] = r200_register_color[op][replaceargs];
606		  break;
607	       case GL_CONSTANT:
608		  color_arg[i] = r200_tfactor1_color[op];
609		  break;
610	       case GL_PRIMARY_COLOR:
611		  color_arg[i] = r200_primary_color[op];
612		  break;
613	       case GL_PREVIOUS:
614		  if (slot == 0)
615		     color_arg[i] = r200_primary_color[op];
616		  else
617		     color_arg[i] = r200_register_color[op]
618			[rmesa->state.texture.unit[replaceargs - 1].outputreg];
619		  break;
620	       case GL_ZERO:
621		  color_arg[i] = r200_zero_color[op];
622		  break;
623	       case GL_ONE:
624		  color_arg[i] = r200_zero_color[op+1];
625		  break;
626	       case GL_TEXTURE0:
627	       case GL_TEXTURE1:
628	       case GL_TEXTURE2:
629	       case GL_TEXTURE3:
630	       case GL_TEXTURE4:
631	       case GL_TEXTURE5:
632		  color_arg[i] = r200_register_color[op][srcRGBreplace - GL_TEXTURE0];
633		  break;
634	       default:
635	       return GL_FALSE;
636	       }
637	    }
638	    else {
639	       if (slot == 0)
640		  color_arg[i] = r200_primary_color[op];
641	       else
642		  color_arg[i] = r200_register_color[op]
643		     [rmesa->state.texture.unit[unit - 1].outputreg];
644            }
645	    break;
646	 case GL_ZERO:
647	    color_arg[i] = r200_zero_color[op];
648	    break;
649	 case GL_ONE:
650	    color_arg[i] = r200_zero_color[op+1];
651	    break;
652	 case GL_TEXTURE0:
653	 case GL_TEXTURE1:
654	 case GL_TEXTURE2:
655	 case GL_TEXTURE3:
656	 case GL_TEXTURE4:
657	 case GL_TEXTURE5:
658	    color_arg[i] = r200_register_color[op][srcRGBi - GL_TEXTURE0];
659	    break;
660	 default:
661	    return GL_FALSE;
662	 }
663      }
664
665      for ( i = 0 ; i < numAlphaArgs ; i++ ) {
666	 GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
667	 const GLint srcAi = texUnit->_CurrentCombine->SourceA[i];
668	 assert(op >= 0);
669	 assert(op <= 1);
670	 switch ( srcAi ) {
671	 case GL_TEXTURE:
672	    alpha_arg[i] = r200_register_alpha[op][unit];
673	    break;
674	 case GL_CONSTANT:
675	    alpha_arg[i] = r200_tfactor_alpha[op];
676	    break;
677	 case GL_PRIMARY_COLOR:
678	    alpha_arg[i] = r200_primary_alpha[op];
679	    break;
680	 case GL_PREVIOUS:
681	    if (replaceargs != unit) {
682	       const GLint srcAreplace =
683		  ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0];
684	       op = op ^ replaceopa;
685	       switch (srcAreplace) {
686	       case GL_TEXTURE:
687		  alpha_arg[i] = r200_register_alpha[op][replaceargs];
688		  break;
689	       case GL_CONSTANT:
690		  alpha_arg[i] = r200_tfactor1_alpha[op];
691		  break;
692	       case GL_PRIMARY_COLOR:
693		  alpha_arg[i] = r200_primary_alpha[op];
694		  break;
695	       case GL_PREVIOUS:
696		  if (slot == 0)
697		     alpha_arg[i] = r200_primary_alpha[op];
698		  else
699		     alpha_arg[i] = r200_register_alpha[op]
700			[rmesa->state.texture.unit[replaceargs - 1].outputreg];
701		  break;
702	       case GL_ZERO:
703		  alpha_arg[i] = r200_zero_alpha[op];
704		  break;
705	       case GL_ONE:
706		  alpha_arg[i] = r200_zero_alpha[op+1];
707		  break;
708	       case GL_TEXTURE0:
709	       case GL_TEXTURE1:
710	       case GL_TEXTURE2:
711	       case GL_TEXTURE3:
712	       case GL_TEXTURE4:
713	       case GL_TEXTURE5:
714		  alpha_arg[i] = r200_register_alpha[op][srcAreplace - GL_TEXTURE0];
715		  break;
716	       default:
717	       return GL_FALSE;
718	       }
719	    }
720	    else {
721	       if (slot == 0)
722		  alpha_arg[i] = r200_primary_alpha[op];
723	       else
724		  alpha_arg[i] = r200_register_alpha[op]
725		    [rmesa->state.texture.unit[unit - 1].outputreg];
726            }
727	    break;
728	 case GL_ZERO:
729	    alpha_arg[i] = r200_zero_alpha[op];
730	    break;
731	 case GL_ONE:
732	    alpha_arg[i] = r200_zero_alpha[op+1];
733	    break;
734	 case GL_TEXTURE0:
735	 case GL_TEXTURE1:
736	 case GL_TEXTURE2:
737	 case GL_TEXTURE3:
738	 case GL_TEXTURE4:
739	 case GL_TEXTURE5:
740	    alpha_arg[i] = r200_register_alpha[op][srcAi - GL_TEXTURE0];
741	    break;
742	 default:
743	    return GL_FALSE;
744	 }
745      }
746
747      /* Step 2:
748       * Build up the color and alpha combine functions.
749       */
750      switch ( texUnit->_CurrentCombine->ModeRGB ) {
751      case GL_REPLACE:
752	 color_combine = (R200_TXC_ARG_A_ZERO |
753			  R200_TXC_ARG_B_ZERO |
754			  R200_TXC_OP_MADD);
755	 R200_COLOR_ARG( 0, C );
756	 break;
757      case GL_MODULATE:
758	 color_combine = (R200_TXC_ARG_C_ZERO |
759			  R200_TXC_OP_MADD);
760	 R200_COLOR_ARG( 0, A );
761	 R200_COLOR_ARG( 1, B );
762	 break;
763      case GL_ADD:
764	 color_combine = (R200_TXC_ARG_B_ZERO |
765			  R200_TXC_COMP_ARG_B |
766			  R200_TXC_OP_MADD);
767	 R200_COLOR_ARG( 0, A );
768	 R200_COLOR_ARG( 1, C );
769	 break;
770      case GL_ADD_SIGNED:
771	 color_combine = (R200_TXC_ARG_B_ZERO |
772			  R200_TXC_COMP_ARG_B |
773			  R200_TXC_BIAS_ARG_C |	/* new */
774			  R200_TXC_OP_MADD); /* was ADDSIGNED */
775	 R200_COLOR_ARG( 0, A );
776	 R200_COLOR_ARG( 1, C );
777	 break;
778      case GL_SUBTRACT:
779	 color_combine = (R200_TXC_ARG_B_ZERO |
780			  R200_TXC_COMP_ARG_B |
781			  R200_TXC_NEG_ARG_C |
782			  R200_TXC_OP_MADD);
783	 R200_COLOR_ARG( 0, A );
784	 R200_COLOR_ARG( 1, C );
785	 break;
786      case GL_INTERPOLATE:
787	 color_combine = (R200_TXC_OP_LERP);
788	 R200_COLOR_ARG( 0, B );
789	 R200_COLOR_ARG( 1, A );
790	 R200_COLOR_ARG( 2, C );
791	 break;
792
793      case GL_DOT3_RGB_EXT:
794      case GL_DOT3_RGBA_EXT:
795	 /* The EXT version of the DOT3 extension does not support the
796	  * scale factor, but the ARB version (and the version in OpenGL
797	  * 1.3) does.
798	  */
799	 RGBshift = 0;
800	 /* FALLTHROUGH */
801
802      case GL_DOT3_RGB:
803      case GL_DOT3_RGBA:
804	 /* DOT3 works differently on R200 than on R100.  On R100, just
805	  * setting the DOT3 mode did everything for you.  On R200, the
806	  * driver has to enable the biasing and scale in the inputs to
807	  * put them in the proper [-1,1] range.  This is what the 4x and
808	  * the -0.5 in the DOT3 spec do.  The post-scale is then set
809	  * normally.
810	  */
811
812	 color_combine = (R200_TXC_ARG_C_ZERO |
813			  R200_TXC_OP_DOT3 |
814			  R200_TXC_BIAS_ARG_A |
815			  R200_TXC_BIAS_ARG_B |
816			  R200_TXC_SCALE_ARG_A |
817			  R200_TXC_SCALE_ARG_B);
818	 R200_COLOR_ARG( 0, A );
819	 R200_COLOR_ARG( 1, B );
820	 break;
821
822      case GL_MODULATE_ADD_ATI:
823	 color_combine = (R200_TXC_OP_MADD);
824	 R200_COLOR_ARG( 0, A );
825	 R200_COLOR_ARG( 1, C );
826	 R200_COLOR_ARG( 2, B );
827	 break;
828      case GL_MODULATE_SIGNED_ADD_ATI:
829	 color_combine = (R200_TXC_BIAS_ARG_C |	/* new */
830			  R200_TXC_OP_MADD); /* was ADDSIGNED */
831	 R200_COLOR_ARG( 0, A );
832	 R200_COLOR_ARG( 1, C );
833	 R200_COLOR_ARG( 2, B );
834	 break;
835      case GL_MODULATE_SUBTRACT_ATI:
836	 color_combine = (R200_TXC_NEG_ARG_C |
837			  R200_TXC_OP_MADD);
838	 R200_COLOR_ARG( 0, A );
839	 R200_COLOR_ARG( 1, C );
840	 R200_COLOR_ARG( 2, B );
841	 break;
842      default:
843	 return GL_FALSE;
844      }
845
846      switch ( texUnit->_CurrentCombine->ModeA ) {
847      case GL_REPLACE:
848	 alpha_combine = (R200_TXA_ARG_A_ZERO |
849			  R200_TXA_ARG_B_ZERO |
850			  R200_TXA_OP_MADD);
851	 R200_ALPHA_ARG( 0, C );
852	 break;
853      case GL_MODULATE:
854	 alpha_combine = (R200_TXA_ARG_C_ZERO |
855			  R200_TXA_OP_MADD);
856	 R200_ALPHA_ARG( 0, A );
857	 R200_ALPHA_ARG( 1, B );
858	 break;
859      case GL_ADD:
860	 alpha_combine = (R200_TXA_ARG_B_ZERO |
861			  R200_TXA_COMP_ARG_B |
862			  R200_TXA_OP_MADD);
863	 R200_ALPHA_ARG( 0, A );
864	 R200_ALPHA_ARG( 1, C );
865	 break;
866      case GL_ADD_SIGNED:
867	 alpha_combine = (R200_TXA_ARG_B_ZERO |
868			  R200_TXA_COMP_ARG_B |
869			  R200_TXA_BIAS_ARG_C |	/* new */
870			  R200_TXA_OP_MADD); /* was ADDSIGNED */
871	 R200_ALPHA_ARG( 0, A );
872	 R200_ALPHA_ARG( 1, C );
873	 break;
874      case GL_SUBTRACT:
875	 alpha_combine = (R200_TXA_ARG_B_ZERO |
876			  R200_TXA_COMP_ARG_B |
877			  R200_TXA_NEG_ARG_C |
878			  R200_TXA_OP_MADD);
879	 R200_ALPHA_ARG( 0, A );
880	 R200_ALPHA_ARG( 1, C );
881	 break;
882      case GL_INTERPOLATE:
883	 alpha_combine = (R200_TXA_OP_LERP);
884	 R200_ALPHA_ARG( 0, B );
885	 R200_ALPHA_ARG( 1, A );
886	 R200_ALPHA_ARG( 2, C );
887	 break;
888
889      case GL_MODULATE_ADD_ATI:
890	 alpha_combine = (R200_TXA_OP_MADD);
891	 R200_ALPHA_ARG( 0, A );
892	 R200_ALPHA_ARG( 1, C );
893	 R200_ALPHA_ARG( 2, B );
894	 break;
895      case GL_MODULATE_SIGNED_ADD_ATI:
896	 alpha_combine = (R200_TXA_BIAS_ARG_C |	/* new */
897			  R200_TXA_OP_MADD); /* was ADDSIGNED */
898	 R200_ALPHA_ARG( 0, A );
899	 R200_ALPHA_ARG( 1, C );
900	 R200_ALPHA_ARG( 2, B );
901	 break;
902      case GL_MODULATE_SUBTRACT_ATI:
903	 alpha_combine = (R200_TXA_NEG_ARG_C |
904			  R200_TXA_OP_MADD);
905	 R200_ALPHA_ARG( 0, A );
906	 R200_ALPHA_ARG( 1, C );
907	 R200_ALPHA_ARG( 2, B );
908	 break;
909      default:
910	 return GL_FALSE;
911      }
912
913      if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT)
914	   || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) {
915	 alpha_scale |= R200_TXA_DOT_ALPHA;
916	 Ashift = RGBshift;
917      }
918
919      /* Step 3:
920       * Apply the scale factor.
921       */
922      color_scale |= (RGBshift << R200_TXC_SCALE_SHIFT);
923      alpha_scale |= (Ashift   << R200_TXA_SCALE_SHIFT);
924
925      /* All done!
926       */
927   }
928
929   if ( rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND] != color_combine ||
930	rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND] != alpha_combine ||
931	rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] != color_scale ||
932	rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] != alpha_scale) {
933      R200_STATECHANGE( rmesa, pix[slot] );
934      rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND] = color_combine;
935      rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND] = alpha_combine;
936      rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] = color_scale;
937      rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] = alpha_scale;
938   }
939
940   return GL_TRUE;
941}
942
943#define REF_COLOR 1
944#define REF_ALPHA 2
945
946static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
947{
948   r200ContextPtr rmesa = R200_CONTEXT(ctx);
949   GLint i, j, currslot;
950   GLint maxunitused = -1;
951   GLboolean texregfree[6] = {GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE};
952   GLubyte stageref[7] = {0, 0, 0, 0, 0, 0, 0};
953   GLint nextunit[R200_MAX_TEXTURE_UNITS] = {0, 0, 0, 0, 0, 0};
954   GLint currentnext = -1;
955   GLboolean ok;
956
957   /* find highest used unit */
958   for ( j = 0; j < R200_MAX_TEXTURE_UNITS; j++) {
959      if (ctx->Texture.Unit[j]._ReallyEnabled) {
960	 maxunitused = j;
961      }
962   }
963   stageref[maxunitused + 1] = REF_COLOR | REF_ALPHA;
964
965   for ( j = maxunitused; j >= 0; j-- ) {
966      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[j];
967
968      rmesa->state.texture.unit[j].outputreg = -1;
969
970      if (stageref[j + 1]) {
971
972	 /* use the lowest available reg. That gets us automatically reg0 for the last stage.
973	    need this even for disabled units, as it may get referenced due to the replace
974	    optimization */
975	 for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS; i++ ) {
976	    if (texregfree[i]) {
977	       rmesa->state.texture.unit[j].outputreg = i;
978	       break;
979	    }
980	 }
981	 if (rmesa->state.texture.unit[j].outputreg == -1) {
982	    /* no more free regs we can use. Need a fallback :-( */
983	    return GL_FALSE;
984         }
985
986         nextunit[j] = currentnext;
987
988         if (!texUnit->_ReallyEnabled) {
989	 /* the not enabled stages are referenced "indirectly",
990            must not cut off the lower stages */
991	    stageref[j] = REF_COLOR | REF_ALPHA;
992	    continue;
993         }
994	 currentnext = j;
995
996	 const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
997	 const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
998	 const GLboolean isdot3rgba = (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ||
999				      (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT);
1000
1001
1002	 /* check if we need the color part, special case for dot3_rgba
1003	    as if only the alpha part is referenced later on it still is using the color part */
1004	 if ((stageref[j + 1] & REF_COLOR) || isdot3rgba) {
1005	    for ( i = 0 ; i < numColorArgs ; i++ ) {
1006	       const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
1007	       const GLuint op = texUnit->_CurrentCombine->OperandRGB[i];
1008	       switch ( srcRGBi ) {
1009	       case GL_PREVIOUS:
1010		  /* op 0/1 are referencing color, op 2/3 alpha */
1011		  stageref[j] |= (op >> 1) + 1;
1012	          break;
1013	       case GL_TEXTURE:
1014		  texregfree[j] = GL_FALSE;
1015		  break;
1016	       case GL_TEXTURE0:
1017	       case GL_TEXTURE1:
1018	       case GL_TEXTURE2:
1019	       case GL_TEXTURE3:
1020	       case GL_TEXTURE4:
1021	       case GL_TEXTURE5:
1022		  texregfree[srcRGBi - GL_TEXTURE0] = GL_FALSE;
1023	          break;
1024	       default: /* don't care about other sources here */
1025		  break;
1026	       }
1027	    }
1028	 }
1029
1030	 /* alpha args are ignored for dot3_rgba */
1031	 if ((stageref[j + 1] & REF_ALPHA) && !isdot3rgba) {
1032
1033	    for ( i = 0 ; i < numAlphaArgs ; i++ ) {
1034	       const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i];
1035	       switch ( srcAi ) {
1036	       case GL_PREVIOUS:
1037		  stageref[j] |= REF_ALPHA;
1038		  break;
1039	       case GL_TEXTURE:
1040		  texregfree[j] = GL_FALSE;
1041		  break;
1042	       case GL_TEXTURE0:
1043	       case GL_TEXTURE1:
1044	       case GL_TEXTURE2:
1045	       case GL_TEXTURE3:
1046	       case GL_TEXTURE4:
1047	       case GL_TEXTURE5:
1048		  texregfree[srcAi - GL_TEXTURE0] = GL_FALSE;
1049		  break;
1050	       default: /* don't care about other sources here */
1051		  break;
1052	       }
1053	    }
1054	 }
1055      }
1056   }
1057
1058   /* don't enable texture sampling for units if the result is not used */
1059   for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
1060      if (ctx->Texture.Unit[i]._ReallyEnabled && !texregfree[i])
1061	 rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled;
1062      else rmesa->state.texture.unit[i].unitneeded = 0;
1063   }
1064
1065   ok = GL_TRUE;
1066   currslot = 0;
1067   rmesa->state.envneeded = 1;
1068
1069   i = 0;
1070   while ((i <= maxunitused) && (i >= 0)) {
1071      /* only output instruction if the results are referenced */
1072      if (ctx->Texture.Unit[i]._ReallyEnabled && stageref[i+1]) {
1073         GLuint replaceunit = i;
1074	 /* try to optimize GL_REPLACE away (only one level deep though) */
1075	 if (	(ctx->Texture.Unit[i]._CurrentCombine->ModeRGB == GL_REPLACE) &&
1076		(ctx->Texture.Unit[i]._CurrentCombine->ModeA == GL_REPLACE) &&
1077		(ctx->Texture.Unit[i]._CurrentCombine->ScaleShiftRGB == 0) &&
1078		(ctx->Texture.Unit[i]._CurrentCombine->ScaleShiftA == 0) &&
1079		(nextunit[i] > 0) ) {
1080	    /* yippie! can optimize it away! */
1081	    replaceunit = i;
1082	    i = nextunit[i];
1083	 }
1084
1085	 /* need env instruction slot */
1086	 rmesa->state.envneeded |= 1 << currslot;
1087	 ok = r200UpdateTextureEnv( ctx, i, currslot, replaceunit );
1088	 if (!ok) return GL_FALSE;
1089	 currslot++;
1090      }
1091      i = i + 1;
1092   }
1093
1094   if (currslot == 0) {
1095      /* need one stage at least */
1096      rmesa->state.texture.unit[0].outputreg = 0;
1097      ok = r200UpdateTextureEnv( ctx, 0, 0, 0 );
1098   }
1099
1100   R200_STATECHANGE( rmesa, ctx );
1101   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_BLEND_ENABLE_MASK | R200_MULTI_PASS_ENABLE);
1102   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= rmesa->state.envneeded << R200_TEX_BLEND_0_ENABLE_SHIFT;
1103
1104   return ok;
1105}
1106
1107#undef REF_COLOR
1108#undef REF_ALPHA
1109
1110
1111#define TEXOBJ_TXFILTER_MASK (R200_MAX_MIP_LEVEL_MASK |		\
1112			      R200_MIN_FILTER_MASK | 		\
1113			      R200_MAG_FILTER_MASK |		\
1114			      R200_MAX_ANISO_MASK |		\
1115			      R200_YUV_TO_RGB |			\
1116			      R200_YUV_TEMPERATURE_MASK |	\
1117			      R200_CLAMP_S_MASK | 		\
1118			      R200_CLAMP_T_MASK | 		\
1119			      R200_BORDER_MODE_D3D )
1120
1121#define TEXOBJ_TXFORMAT_MASK (R200_TXFORMAT_WIDTH_MASK |	\
1122			      R200_TXFORMAT_HEIGHT_MASK |	\
1123			      R200_TXFORMAT_FORMAT_MASK |	\
1124			      R200_TXFORMAT_F5_WIDTH_MASK |	\
1125			      R200_TXFORMAT_F5_HEIGHT_MASK |	\
1126			      R200_TXFORMAT_ALPHA_IN_MAP |	\
1127			      R200_TXFORMAT_CUBIC_MAP_ENABLE |	\
1128			      R200_TXFORMAT_NON_POWER2)
1129
1130#define TEXOBJ_TXFORMAT_X_MASK (R200_DEPTH_LOG2_MASK |		\
1131                                R200_TEXCOORD_MASK |		\
1132                                R200_CLAMP_Q_MASK | 		\
1133                                R200_VOLUME_FILTER_MASK)
1134
1135
1136static void import_tex_obj_state( r200ContextPtr rmesa,
1137				  int unit,
1138				  r200TexObjPtr texobj )
1139{
1140/* do not use RADEON_DB_STATE to avoid stale texture caches */
1141   GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
1142
1143   R200_STATECHANGE( rmesa, tex[unit] );
1144
1145   cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
1146   cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
1147   cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
1148   cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
1149   cmd[TEX_PP_TXFORMAT_X] &= ~TEXOBJ_TXFORMAT_X_MASK;
1150   cmd[TEX_PP_TXFORMAT_X] |= texobj->pp_txformat_x & TEXOBJ_TXFORMAT_X_MASK;
1151   cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
1152   cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
1153   cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
1154   if (rmesa->r200Screen->drmSupportsFragShader) {
1155      cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset;
1156   }
1157   else {
1158      cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset;
1159   }
1160
1161   if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) {
1162      GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
1163      GLuint bytesPerFace = texobj->base.totalSize / 6;
1164      ASSERT(texobj->base.totalSize % 6 == 0);
1165
1166      R200_STATECHANGE( rmesa, cube[unit] );
1167      cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
1168      if (rmesa->r200Screen->drmSupportsFragShader) {
1169	 /* that value is submitted twice. could change cube atom
1170	    to not include that command when new drm is used */
1171	 cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
1172      }
1173      cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace;
1174      cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace;
1175      cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace;
1176      cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace;
1177      cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace;
1178   }
1179
1180   texobj->dirty_state &= ~(1<<unit);
1181}
1182
1183
1184static void set_texgen_matrix( r200ContextPtr rmesa,
1185			       GLuint unit,
1186			       const GLfloat *s_plane,
1187			       const GLfloat *t_plane,
1188			       const GLfloat *r_plane,
1189			       const GLfloat *q_plane )
1190{
1191   GLfloat m[16];
1192
1193   m[0]  = s_plane[0];
1194   m[4]  = s_plane[1];
1195   m[8]  = s_plane[2];
1196   m[12] = s_plane[3];
1197
1198   m[1]  = t_plane[0];
1199   m[5]  = t_plane[1];
1200   m[9]  = t_plane[2];
1201   m[13] = t_plane[3];
1202
1203   m[2]  = r_plane[0];
1204   m[6]  = r_plane[1];
1205   m[10] = r_plane[2];
1206   m[14] = r_plane[3];
1207
1208   m[3]  = q_plane[0];
1209   m[7]  = q_plane[1];
1210   m[11] = q_plane[2];
1211   m[15] = q_plane[3];
1212
1213   _math_matrix_loadf( &(rmesa->TexGenMatrix[unit]), m);
1214   _math_matrix_analyse( &(rmesa->TexGenMatrix[unit]) );
1215   rmesa->TexGenEnabled |= R200_TEXMAT_0_ENABLE<<unit;
1216}
1217
1218
1219static GLuint r200_need_dis_texgen(const GLbitfield texGenEnabled,
1220				   const GLfloat *planeS,
1221				   const GLfloat *planeT,
1222				   const GLfloat *planeR,
1223				   const GLfloat *planeQ)
1224{
1225   GLuint needtgenable = 0;
1226
1227   if (!(texGenEnabled & S_BIT)) {
1228      if (((texGenEnabled & T_BIT) && planeT[0] != 0.0) ||
1229	 ((texGenEnabled & R_BIT) && planeR[0] != 0.0) ||
1230	 ((texGenEnabled & Q_BIT) && planeQ[0] != 0.0)) {
1231	 needtgenable |= S_BIT;
1232      }
1233   }
1234   if (!(texGenEnabled & T_BIT)) {
1235      if (((texGenEnabled & S_BIT) && planeS[1] != 0.0) ||
1236	 ((texGenEnabled & R_BIT) && planeR[1] != 0.0) ||
1237	 ((texGenEnabled & Q_BIT) && planeQ[1] != 0.0)) {
1238	 needtgenable |= T_BIT;
1239     }
1240   }
1241   if (!(texGenEnabled & R_BIT)) {
1242      if (((texGenEnabled & S_BIT) && planeS[2] != 0.0) ||
1243	 ((texGenEnabled & T_BIT) && planeT[2] != 0.0) ||
1244	 ((texGenEnabled & Q_BIT) && planeQ[2] != 0.0)) {
1245	 needtgenable |= R_BIT;
1246      }
1247   }
1248   if (!(texGenEnabled & Q_BIT)) {
1249      if (((texGenEnabled & S_BIT) && planeS[3] != 0.0) ||
1250	 ((texGenEnabled & T_BIT) && planeT[3] != 0.0) ||
1251	 ((texGenEnabled & R_BIT) && planeR[3] != 0.0)) {
1252	 needtgenable |= Q_BIT;
1253      }
1254   }
1255
1256   return needtgenable;
1257}
1258
1259
1260/*
1261 * Returns GL_FALSE if fallback required.
1262 */
1263static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
1264{
1265   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1266   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1267   GLuint inputshift = R200_TEXGEN_0_INPUT_SHIFT + unit*4;
1268   GLuint tgi, tgcm;
1269   GLuint mode = 0;
1270   GLboolean mixed_fallback = GL_FALSE;
1271   static const GLfloat I[16] = {
1272      1,  0,  0,  0,
1273      0,  1,  0,  0,
1274      0,  0,  1,  0,
1275      0,  0,  0,  1 };
1276   static const GLfloat reflect[16] = {
1277      -1,  0,  0,  0,
1278       0, -1,  0,  0,
1279       0,  0,  -1, 0,
1280       0,  0,  0,  1 };
1281
1282   rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
1283   rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
1284   rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
1285   rmesa->TexGenNeedNormals[unit] = GL_FALSE;
1286   tgi = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] & ~(R200_TEXGEN_INPUT_MASK <<
1287						   inputshift);
1288   tgcm = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] & ~(R200_TEXGEN_COMP_MASK <<
1289						    (unit * 4));
1290
1291   if (0)
1292      fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit);
1293
1294   if (texUnit->TexGenEnabled & S_BIT) {
1295      mode = texUnit->GenModeS;
1296   } else {
1297      tgcm |= R200_TEXGEN_COMP_S << (unit * 4);
1298   }
1299
1300   if (texUnit->TexGenEnabled & T_BIT) {
1301      if (texUnit->GenModeT != mode)
1302	 mixed_fallback = GL_TRUE;
1303   } else {
1304      tgcm |= R200_TEXGEN_COMP_T << (unit * 4);
1305   }
1306
1307   if (texUnit->TexGenEnabled & R_BIT) {
1308      if (texUnit->GenModeR != mode)
1309	 mixed_fallback = GL_TRUE;
1310   } else {
1311      tgcm |= R200_TEXGEN_COMP_R << (unit * 4);
1312   }
1313
1314   if (texUnit->TexGenEnabled & Q_BIT) {
1315      if (texUnit->GenModeQ != mode)
1316	 mixed_fallback = GL_TRUE;
1317   } else {
1318      tgcm |= R200_TEXGEN_COMP_Q << (unit * 4);
1319   }
1320
1321   if (mixed_fallback) {
1322      if (R200_DEBUG & DEBUG_FALLBACKS)
1323	 fprintf(stderr, "fallback mixed texgen, 0x%x (0x%x 0x%x 0x%x 0x%x)\n",
1324		 texUnit->TexGenEnabled, texUnit->GenModeS, texUnit->GenModeT,
1325		 texUnit->GenModeR, texUnit->GenModeQ);
1326      return GL_FALSE;
1327   }
1328
1329/* we CANNOT do mixed mode if the texgen mode requires a plane where the input
1330   is not enabled for texgen, since the planes are concatenated into texmat,
1331   and thus the input will come from texcoord rather than tex gen equation!
1332   Either fallback or just hope that those texcoords aren't really needed...
1333   Assuming the former will cause lots of unnecessary fallbacks, the latter will
1334   generate bogus results sometimes - it's pretty much impossible to really know
1335   when a fallback is needed, depends on texmat and what sort of texture is bound
1336   etc, - for now fallback if we're missing either S or T bits, there's a high
1337   probability we need the texcoords in that case.
1338   That's a lot of work for some obscure texgen mixed mode fixup - why oh why
1339   doesn't the chip just directly accept the plane parameters :-(. */
1340   switch (mode) {
1341   case GL_OBJECT_LINEAR: {
1342      GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled,
1343				texUnit->ObjectPlaneS, texUnit->ObjectPlaneT,
1344				texUnit->ObjectPlaneR, texUnit->ObjectPlaneQ );
1345      if (needtgenable & (S_BIT | T_BIT)) {
1346	 if (R200_DEBUG & DEBUG_FALLBACKS)
1347	 fprintf(stderr, "fallback mixed texgen / obj plane, 0x%x\n",
1348		 texUnit->TexGenEnabled);
1349	 return GL_FALSE;
1350      }
1351      if (needtgenable & (R_BIT)) {
1352	 tgcm &= ~(R200_TEXGEN_COMP_R << (unit * 4));
1353      }
1354      if (needtgenable & (Q_BIT)) {
1355	 tgcm &= ~(R200_TEXGEN_COMP_Q << (unit * 4));
1356      }
1357
1358      tgi |= R200_TEXGEN_INPUT_OBJ << inputshift;
1359      set_texgen_matrix( rmesa, unit,
1360	 (texUnit->TexGenEnabled & S_BIT) ? texUnit->ObjectPlaneS : I,
1361	 (texUnit->TexGenEnabled & T_BIT) ? texUnit->ObjectPlaneT : I + 4,
1362	 (texUnit->TexGenEnabled & R_BIT) ? texUnit->ObjectPlaneR : I + 8,
1363	 (texUnit->TexGenEnabled & Q_BIT) ? texUnit->ObjectPlaneQ : I + 12);
1364      }
1365      break;
1366
1367   case GL_EYE_LINEAR: {
1368      GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled,
1369				texUnit->EyePlaneS, texUnit->EyePlaneT,
1370				texUnit->EyePlaneR, texUnit->EyePlaneQ );
1371      if (needtgenable & (S_BIT | T_BIT)) {
1372	 if (R200_DEBUG & DEBUG_FALLBACKS)
1373	 fprintf(stderr, "fallback mixed texgen / eye plane, 0x%x\n",
1374		 texUnit->TexGenEnabled);
1375	 return GL_FALSE;
1376      }
1377      if (needtgenable & (R_BIT)) {
1378	 tgcm &= ~(R200_TEXGEN_COMP_R << (unit * 4));
1379      }
1380      if (needtgenable & (Q_BIT)) {
1381	 tgcm &= ~(R200_TEXGEN_COMP_Q << (unit * 4));
1382      }
1383      tgi |= R200_TEXGEN_INPUT_EYE << inputshift;
1384      set_texgen_matrix( rmesa, unit,
1385	 (texUnit->TexGenEnabled & S_BIT) ? texUnit->EyePlaneS : I,
1386	 (texUnit->TexGenEnabled & T_BIT) ? texUnit->EyePlaneT : I + 4,
1387	 (texUnit->TexGenEnabled & R_BIT) ? texUnit->EyePlaneR : I + 8,
1388	 (texUnit->TexGenEnabled & Q_BIT) ? texUnit->EyePlaneQ : I + 12);
1389      }
1390      break;
1391
1392   case GL_REFLECTION_MAP_NV:
1393      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
1394      tgi |= R200_TEXGEN_INPUT_EYE_REFLECT << inputshift;
1395      /* pretty weird, must only negate when lighting is enabled? */
1396      if (ctx->Light.Enabled)
1397	 set_texgen_matrix( rmesa, unit,
1398	    (texUnit->TexGenEnabled & S_BIT) ? reflect : I,
1399	    (texUnit->TexGenEnabled & T_BIT) ? reflect + 4 : I + 4,
1400	    (texUnit->TexGenEnabled & R_BIT) ? reflect + 8 : I + 8,
1401	    I + 12);
1402      break;
1403
1404   case GL_NORMAL_MAP_NV:
1405      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
1406      tgi |= R200_TEXGEN_INPUT_EYE_NORMAL<<inputshift;
1407      break;
1408
1409   case GL_SPHERE_MAP:
1410      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
1411      tgi |= R200_TEXGEN_INPUT_SPHERE<<inputshift;
1412      break;
1413
1414   case 0:
1415      /* All texgen units were disabled, so just pass coords through. */
1416      tgi |= unit << inputshift;
1417      break;
1418
1419   default:
1420      /* Unsupported mode, fallback:
1421       */
1422      if (R200_DEBUG & DEBUG_FALLBACKS)
1423	 fprintf(stderr, "fallback unsupported texgen, %d\n",
1424		 texUnit->GenModeS);
1425      return GL_FALSE;
1426   }
1427
1428   rmesa->TexGenEnabled |= R200_TEXGEN_TEXMAT_0_ENABLE << unit;
1429   rmesa->TexGenCompSel |= R200_OUTPUT_TEX_0 << unit;
1430
1431   if (tgi != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] ||
1432       tgcm != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2])
1433   {
1434      R200_STATECHANGE(rmesa, tcg);
1435      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] = tgi;
1436      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] = tgcm;
1437   }
1438
1439   return GL_TRUE;
1440}
1441
1442
1443static void disable_tex( GLcontext *ctx, int unit )
1444{
1445   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1446
1447   if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit)) {
1448      /* Texture unit disabled */
1449      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
1450	 /* The old texture is no longer bound to this texture unit.
1451	  * Mark it as such.
1452	  */
1453
1454	 rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit);
1455	 rmesa->state.texture.unit[unit].texobj = NULL;
1456      }
1457
1458      R200_STATECHANGE( rmesa, ctx );
1459      rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit);
1460
1461      R200_STATECHANGE( rmesa, vtx );
1462      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
1463
1464      if (rmesa->TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
1465	 TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
1466      }
1467
1468      /* Actually want to keep all units less than max active texture
1469       * enabled, right?  Fix this for >2 texunits.
1470       */
1471
1472      {
1473	 GLuint tmp = rmesa->TexGenEnabled;
1474
1475	 rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
1476	 rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
1477	 rmesa->TexGenNeedNormals[unit] = GL_FALSE;
1478	 rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
1479
1480	 if (tmp != rmesa->TexGenEnabled) {
1481	    rmesa->recheck_texgen[unit] = GL_TRUE;
1482	    rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
1483	 }
1484      }
1485   }
1486}
1487
1488void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
1489{
1490   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1491
1492   GLuint re_cntl;
1493
1494   re_cntl = rmesa->hw.set.cmd[SET_RE_CNTL] & ~(R200_VTX_STQ0_D3D << (2 * unit));
1495   if (use_d3d)
1496      re_cntl |= R200_VTX_STQ0_D3D << (2 * unit);
1497
1498   if ( re_cntl != rmesa->hw.set.cmd[SET_RE_CNTL] ) {
1499      R200_STATECHANGE( rmesa, set );
1500      rmesa->hw.set.cmd[SET_RE_CNTL] = re_cntl;
1501   }
1502}
1503
1504static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
1505{
1506   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1507   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1508   struct gl_texture_object *tObj = texUnit->_Current;
1509   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
1510
1511   /* Need to load the 2d images associated with this unit.
1512    */
1513   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
1514      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
1515      t->base.dirty_images[0] = ~0;
1516   }
1517
1518   ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D);
1519
1520   if ( t->base.dirty_images[0] ) {
1521      R200_FIREVERTICES( rmesa );
1522      r200SetTexImages( rmesa, tObj );
1523      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
1524      if ( !t->base.memBlock )
1525	 return GL_FALSE;
1526   }
1527
1528   set_re_cntl_d3d( ctx, unit, GL_FALSE );
1529
1530   return GL_TRUE;
1531}
1532
1533#if ENABLE_HW_3D_TEXTURE
1534static GLboolean enable_tex_3d( GLcontext *ctx, int unit )
1535{
1536   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1537   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1538   struct gl_texture_object *tObj = texUnit->_Current;
1539   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
1540
1541   /* Need to load the 3d images associated with this unit.
1542    */
1543   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
1544      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
1545      t->base.dirty_images[0] = ~0;
1546   }
1547
1548   ASSERT(tObj->Target == GL_TEXTURE_3D);
1549
1550   /* R100 & R200 do not support mipmaps for 3D textures.
1551    */
1552   if ( (tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR) ) {
1553      return GL_FALSE;
1554   }
1555
1556   if ( t->base.dirty_images[0] ) {
1557      R200_FIREVERTICES( rmesa );
1558      r200SetTexImages( rmesa, tObj );
1559      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
1560      if ( !t->base.memBlock )
1561	 return GL_FALSE;
1562   }
1563
1564   set_re_cntl_d3d( ctx, unit, GL_TRUE );
1565
1566   return GL_TRUE;
1567}
1568#endif
1569
1570static GLboolean enable_tex_cube( GLcontext *ctx, int unit )
1571{
1572   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1573   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1574   struct gl_texture_object *tObj = texUnit->_Current;
1575   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
1576   GLuint face;
1577
1578   /* Need to load the 2d images associated with this unit.
1579    */
1580   if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) {
1581      t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2;
1582      for (face = 0; face < 6; face++)
1583         t->base.dirty_images[face] = ~0;
1584   }
1585
1586   ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP);
1587
1588   if ( t->base.dirty_images[0] || t->base.dirty_images[1] ||
1589        t->base.dirty_images[2] || t->base.dirty_images[3] ||
1590        t->base.dirty_images[4] || t->base.dirty_images[5] ) {
1591      /* flush */
1592      R200_FIREVERTICES( rmesa );
1593      /* layout memory space, once for all faces */
1594      r200SetTexImages( rmesa, tObj );
1595   }
1596
1597   /* upload (per face) */
1598   for (face = 0; face < 6; face++) {
1599      if (t->base.dirty_images[face]) {
1600         r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, face );
1601      }
1602   }
1603
1604   if ( !t->base.memBlock ) {
1605      /* texmem alloc failed, use s/w fallback */
1606      return GL_FALSE;
1607   }
1608
1609   set_re_cntl_d3d( ctx, unit, GL_TRUE );
1610
1611   return GL_TRUE;
1612}
1613
1614static GLboolean enable_tex_rect( GLcontext *ctx, int unit )
1615{
1616   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1617   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1618   struct gl_texture_object *tObj = texUnit->_Current;
1619   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
1620
1621   if (!(t->pp_txformat & R200_TXFORMAT_NON_POWER2)) {
1622      t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
1623      t->base.dirty_images[0] = ~0;
1624   }
1625
1626   ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV);
1627
1628   if ( t->base.dirty_images[0] ) {
1629      R200_FIREVERTICES( rmesa );
1630      r200SetTexImages( rmesa, tObj );
1631      r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 );
1632      if ( !t->base.memBlock && !rmesa->prefer_gart_client_texturing )
1633	 return GL_FALSE;
1634   }
1635
1636   set_re_cntl_d3d( ctx, unit, GL_FALSE );
1637
1638   return GL_TRUE;
1639}
1640
1641
1642static GLboolean update_tex_common( GLcontext *ctx, int unit )
1643{
1644   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1645   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
1646   struct gl_texture_object *tObj = texUnit->_Current;
1647   r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
1648
1649   /* Fallback if there's a texture border */
1650   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 )
1651       return GL_FALSE;
1652
1653   /* Update state if this is a different texture object to last
1654    * time.
1655    */
1656   if ( rmesa->state.texture.unit[unit].texobj != t ) {
1657      if ( rmesa->state.texture.unit[unit].texobj != NULL ) {
1658	 /* The old texture is no longer bound to this texture unit.
1659	  * Mark it as such.
1660	  */
1661
1662	 rmesa->state.texture.unit[unit].texobj->base.bound &=
1663	     ~(1UL << unit);
1664      }
1665
1666      rmesa->state.texture.unit[unit].texobj = t;
1667      t->base.bound |= (1UL << unit);
1668      t->dirty_state |= 1<<unit;
1669      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
1670   }
1671
1672
1673   /* Newly enabled?
1674    */
1675   if ( 1|| !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit))) {
1676      R200_STATECHANGE( rmesa, ctx );
1677      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
1678
1679      R200_STATECHANGE( rmesa, vtx );
1680      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
1681      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
1682
1683      rmesa->recheck_texgen[unit] = GL_TRUE;
1684   }
1685
1686   if (t->dirty_state & (1<<unit)) {
1687      import_tex_obj_state( rmesa, unit, t );
1688   }
1689
1690   if (rmesa->recheck_texgen[unit]) {
1691      GLboolean fallback = !r200_validate_texgen( ctx, unit );
1692      TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
1693      rmesa->recheck_texgen[unit] = 0;
1694      rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
1695   }
1696
1697   FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback );
1698   return !t->border_fallback;
1699}
1700
1701
1702
1703static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit )
1704{
1705   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1706   GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;
1707
1708   if ( unitneeded & (TEXTURE_RECT_BIT) ) {
1709      return (enable_tex_rect( ctx, unit ) &&
1710	      update_tex_common( ctx, unit ));
1711   }
1712   else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) {
1713      return (enable_tex_2d( ctx, unit ) &&
1714	      update_tex_common( ctx, unit ));
1715   }
1716#if ENABLE_HW_3D_TEXTURE
1717   else if ( unitneeded & (TEXTURE_3D_BIT) ) {
1718      return (enable_tex_3d( ctx, unit ) &&
1719	      update_tex_common( ctx, unit ));
1720   }
1721#endif
1722   else if ( unitneeded & (TEXTURE_CUBE_BIT) ) {
1723      return (enable_tex_cube( ctx, unit ) &&
1724	      update_tex_common( ctx, unit ));
1725   }
1726   else if ( unitneeded ) {
1727      return GL_FALSE;
1728   }
1729   else {
1730      disable_tex( ctx, unit );
1731      return GL_TRUE;
1732   }
1733}
1734
1735
1736void r200UpdateTextureState( GLcontext *ctx )
1737{
1738   r200ContextPtr rmesa = R200_CONTEXT(ctx);
1739   GLboolean ok;
1740   GLuint dbg;
1741
1742   if (ctx->ATIFragmentShader._Enabled) {
1743      GLuint i;
1744      for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
1745	 rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled;
1746      }
1747      ok = GL_TRUE;
1748   }
1749   else {
1750      ok = r200UpdateAllTexEnv( ctx );
1751   }
1752   if (ok) {
1753      ok = (r200UpdateTextureUnit( ctx, 0 ) &&
1754	 r200UpdateTextureUnit( ctx, 1 ) &&
1755	 r200UpdateTextureUnit( ctx, 2 ) &&
1756	 r200UpdateTextureUnit( ctx, 3 ) &&
1757	 r200UpdateTextureUnit( ctx, 4 ) &&
1758	 r200UpdateTextureUnit( ctx, 5 ));
1759   }
1760
1761   if (ok && ctx->ATIFragmentShader._Enabled) {
1762      r200UpdateFragmentShader(ctx);
1763   }
1764
1765   FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
1766
1767   if (rmesa->TclFallback)
1768      r200ChooseVertexState( ctx );
1769
1770
1771   if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) {
1772
1773      /*
1774       * T0 hang workaround -------------
1775       * not needed for r200 derivatives
1776        */
1777      if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_ENABLE_MASK) == R200_TEX_0_ENABLE &&
1778	 (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
1779
1780	 R200_STATECHANGE(rmesa, ctx);
1781	 R200_STATECHANGE(rmesa, tex[1]);
1782	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
1783	 if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
1784	    rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
1785	 rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
1786      }
1787      else if (!ctx->ATIFragmentShader._Enabled) {
1788	 if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
1789	    (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & R200_TXFORMAT_LOOKUP_DISABLE)) {
1790	    R200_STATECHANGE(rmesa, tex[1]);
1791	    rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~R200_TXFORMAT_LOOKUP_DISABLE;
1792         }
1793      }
1794      /* do the same workaround for the first pass of a fragment shader.
1795       * completely unknown if necessary / sufficient.
1796       */
1797      if ((rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_ENABLE_MASK) == R200_PPX_TEX_0_ENABLE &&
1798	 (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
1799
1800	 R200_STATECHANGE(rmesa, cst);
1801	 R200_STATECHANGE(rmesa, tex[1]);
1802	 rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_1_ENABLE;
1803	 if (!(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE))
1804	    rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
1805	 rmesa->hw.tex[1].cmd[TEX_PP_TXMULTI_CTL] |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
1806      }
1807
1808      /* maybe needs to be done pairwise due to 2 parallel (physical) tex units ?
1809         looks like that's not the case, if 8500/9100 owners don't complain remove this...
1810      for ( i = 0; i < ctx->Const.MaxTextureUnits; i += 2) {
1811         if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ((R200_TEX_0_ENABLE |
1812            R200_TEX_1_ENABLE ) << i)) == (R200_TEX_0_ENABLE << i)) &&
1813            ((rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) >
1814            R200_MIN_FILTER_LINEAR)) {
1815            R200_STATECHANGE(rmesa, ctx);
1816            R200_STATECHANGE(rmesa, tex[i+1]);
1817            rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= (R200_TEX_1_ENABLE << i);
1818            rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
1819            rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] |= 0x08000000;
1820         }
1821         else {
1822            if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_1_ENABLE << i)) &&
1823               (rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] & 0x08000000)) {
1824               R200_STATECHANGE(rmesa, tex[i+1]);
1825               rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] &= ~0x08000000;
1826            }
1827         }
1828      } */
1829
1830      /*
1831       * Texture cache LRU hang workaround -------------
1832       * not needed for r200 derivatives
1833       * hopefully this covers first pass of a shader as well
1834       */
1835
1836      /* While the cases below attempt to only enable the workaround in the
1837       * specific cases necessary, they were insufficient.  See bugzilla #1519,
1838       * #729, #814.  Tests with quake3 showed no impact on performance.
1839       */
1840      dbg = 0x6;
1841
1842      /*
1843      if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE )) &&
1844         ((((rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
1845         0x04) == 0)) ||
1846         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_2_ENABLE) &&
1847         ((((rmesa->hw.tex[2].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
1848         0x04) == 0)) ||
1849         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_4_ENABLE) &&
1850         ((((rmesa->hw.tex[4].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
1851         0x04) == 0)))
1852      {
1853         dbg |= 0x02;
1854      }
1855
1856      if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_1_ENABLE )) &&
1857         ((((rmesa->hw.tex[1].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
1858         0x04) == 0)) ||
1859         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_3_ENABLE) &&
1860         ((((rmesa->hw.tex[3].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
1861         0x04) == 0)) ||
1862         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_5_ENABLE) &&
1863         ((((rmesa->hw.tex[5].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
1864         0x04) == 0)))
1865      {
1866         dbg |= 0x04;
1867      }*/
1868
1869      if (dbg != rmesa->hw.tam.cmd[TAM_DEBUG3]) {
1870         R200_STATECHANGE( rmesa, tam );
1871         rmesa->hw.tam.cmd[TAM_DEBUG3] = dbg;
1872         if (0) printf("TEXCACHE LRU HANG WORKAROUND %x\n", dbg);
1873      }
1874   }
1875}
1876