t_vp_build.c revision f31448f3c86697275bffe5363d473dd128cbd2ac
1/*
2 * Mesa 3-D graphics library
3 * Version:  6.5
4 *
5 * Copyright (C) 2005  Tungsten Graphics   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * TUNGSTEN GRAPHICS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21 * WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26/**
27 * \file t_vp_build.c
28 * Create a vertex program to execute the current fixed function T&L pipeline.
29 * \author Keith Whitwell
30 */
31
32
33#include "glheader.h"
34#include "macros.h"
35#include "enums.h"
36#include "t_context.h"
37#include "t_vp_build.h"
38
39#include "shader/program.h"
40#include "shader/program_instruction.h"
41#include "shader/arbvertparse.h"
42
43struct state_key {
44   unsigned light_global_enabled:1;
45   unsigned light_local_viewer:1;
46   unsigned light_twoside:1;
47   unsigned light_color_material:1;
48   unsigned light_color_material_mask:12;
49   unsigned light_material_mask:12;
50
51   unsigned normalize:1;
52   unsigned rescale_normals:1;
53   unsigned fog_source_is_depth:1;
54   unsigned tnl_do_vertex_fog:1;
55   unsigned separate_specular:1;
56   unsigned fog_mode:2;
57   unsigned point_attenuated:1;
58   unsigned texture_enabled_global:1;
59   unsigned fragprog_inputs_read:12;
60
61   struct {
62      unsigned light_enabled:1;
63      unsigned light_eyepos3_is_zero:1;
64      unsigned light_spotcutoff_is_180:1;
65      unsigned light_attenuated:1;
66      unsigned texunit_really_enabled:1;
67      unsigned texmat_enabled:1;
68      unsigned texgen_enabled:4;
69      unsigned texgen_mode0:4;
70      unsigned texgen_mode1:4;
71      unsigned texgen_mode2:4;
72      unsigned texgen_mode3:4;
73   } unit[8];
74};
75
76
77
78#define FOG_NONE   0
79#define FOG_LINEAR 1
80#define FOG_EXP    2
81#define FOG_EXP2   3
82
83static GLuint translate_fog_mode( GLenum mode )
84{
85   switch (mode) {
86   case GL_LINEAR: return FOG_LINEAR;
87   case GL_EXP: return FOG_EXP;
88   case GL_EXP2: return FOG_EXP2;
89   default: return FOG_NONE;
90   }
91}
92
93#define TXG_NONE           0
94#define TXG_OBJ_LINEAR     1
95#define TXG_EYE_LINEAR     2
96#define TXG_SPHERE_MAP     3
97#define TXG_REFLECTION_MAP 4
98#define TXG_NORMAL_MAP     5
99
100static GLuint translate_texgen( GLboolean enabled, GLenum mode )
101{
102   if (!enabled)
103      return TXG_NONE;
104
105   switch (mode) {
106   case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
107   case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
108   case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
109   case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
110   case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
111   default: return TXG_NONE;
112   }
113}
114
115static struct state_key *make_state_key( GLcontext *ctx )
116{
117   TNLcontext *tnl = TNL_CONTEXT(ctx);
118   struct vertex_buffer *VB = &tnl->vb;
119   struct fragment_program *fp = ctx->FragmentProgram._Current;
120   struct state_key *key = CALLOC_STRUCT(state_key);
121   GLuint i;
122
123   /* This now relies on texenvprogram.c being active:
124    */
125   assert(fp);
126
127   key->fragprog_inputs_read = fp->Base.InputsRead;
128
129   key->separate_specular = (ctx->Light.Model.ColorControl ==
130			     GL_SEPARATE_SPECULAR_COLOR);
131
132   if (ctx->Light.Enabled) {
133      key->light_global_enabled = 1;
134
135      if (ctx->Light.Model.LocalViewer)
136	 key->light_local_viewer = 1;
137
138      if (ctx->Light.Model.TwoSide)
139	 key->light_twoside = 1;
140
141      if (ctx->Light.ColorMaterialEnabled) {
142	 key->light_color_material = 1;
143	 key->light_color_material_mask = ctx->Light.ColorMaterialBitmask;
144      }
145
146      for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT ; i < _TNL_ATTRIB_INDEX ; i++)
147	 if (VB->AttribPtr[i]->stride)
148	    key->light_material_mask |= 1<<(i-_TNL_ATTRIB_MAT_FRONT_AMBIENT);
149
150      for (i = 0; i < MAX_LIGHTS; i++) {
151	 struct gl_light *light = &ctx->Light.Light[i];
152
153	 if (light->Enabled) {
154	    key->unit[i].light_enabled = 1;
155
156	    if (light->EyePosition[3] == 0.0)
157	       key->unit[i].light_eyepos3_is_zero = 1;
158
159	    if (light->SpotCutoff == 180.0)
160	       key->unit[i].light_spotcutoff_is_180 = 1;
161
162	    if (light->ConstantAttenuation != 1.0 ||
163		light->LinearAttenuation != 0.0 ||
164		light->QuadraticAttenuation != 0.0)
165	       key->unit[i].light_attenuated = 1;
166	 }
167      }
168   }
169
170   if (ctx->Transform.Normalize)
171      key->normalize = 1;
172
173   if (ctx->Transform.RescaleNormals)
174      key->rescale_normals = 1;
175
176   key->fog_mode = translate_fog_mode(fp->FogOption);
177
178   if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT)
179      key->fog_source_is_depth = 1;
180
181   if (tnl->_DoVertexFog)
182      key->tnl_do_vertex_fog = 1;
183
184   if (ctx->Point._Attenuated)
185      key->point_attenuated = 1;
186
187   if (ctx->Texture._TexGenEnabled ||
188       ctx->Texture._TexMatEnabled ||
189       ctx->Texture._EnabledUnits)
190      key->texture_enabled_global = 1;
191
192   for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
193      struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
194
195      if (texUnit->_ReallyEnabled)
196	 key->unit[i].texunit_really_enabled = 1;
197
198      if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
199	 key->unit[i].texmat_enabled = 1;
200
201      if (texUnit->TexGenEnabled) {
202	 key->unit[i].texgen_enabled = 1;
203
204	 key->unit[i].texgen_mode0 =
205	    translate_texgen( texUnit->TexGenEnabled & (1<<0),
206			      texUnit->GenModeS );
207	 key->unit[i].texgen_mode1 =
208	    translate_texgen( texUnit->TexGenEnabled & (1<<1),
209			      texUnit->GenModeT );
210	 key->unit[i].texgen_mode2 =
211	    translate_texgen( texUnit->TexGenEnabled & (1<<2),
212			      texUnit->GenModeR );
213	 key->unit[i].texgen_mode3 =
214	    translate_texgen( texUnit->TexGenEnabled & (1<<3),
215			      texUnit->GenModeQ );
216      }
217   }
218
219   return key;
220}
221
222
223
224/* Very useful debugging tool - produces annotated listing of
225 * generated program with line/function references for each
226 * instruction back into this file:
227 */
228#define DISASSEM (MESA_VERBOSE&VERBOSE_DISASSEM)
229
230/* Should be tunable by the driver - do we want to do matrix
231 * multiplications with DP4's or with MUL/MAD's?  SSE works better
232 * with the latter, drivers may differ.
233 */
234#define PREFER_DP4 0
235
236#define MAX_INSN 256
237
238/* Use uregs to represent registers internally, translate to Mesa's
239 * expected formats on emit.
240 *
241 * NOTE: These are passed by value extensively in this file rather
242 * than as usual by pointer reference.  If this disturbs you, try
243 * remembering they are just 32bits in size.
244 *
245 * GCC is smart enough to deal with these dword-sized structures in
246 * much the same way as if I had defined them as dwords and was using
247 * macros to access and set the fields.  This is much nicer and easier
248 * to evolve.
249 */
250struct ureg {
251   GLuint file:4;
252   GLint idx:8;      /* relative addressing may be negative */
253   GLuint negate:1;
254   GLuint swz:12;
255   GLuint pad:7;
256};
257
258
259struct tnl_program {
260   const struct state_key *state;
261   struct vertex_program *program;
262
263   GLuint temp_in_use;
264   GLuint temp_reserved;
265
266   struct ureg eye_position;
267   struct ureg eye_position_normalized;
268   struct ureg eye_normal;
269   struct ureg identity;
270
271   GLuint materials;
272   GLuint color_materials;
273};
274
275
276const static struct ureg undef = {
277   ~0,
278   ~0,
279   0,
280   0,
281   0
282};
283
284/* Local shorthand:
285 */
286#define X    SWIZZLE_X
287#define Y    SWIZZLE_Y
288#define Z    SWIZZLE_Z
289#define W    SWIZZLE_W
290
291
292/* Construct a ureg:
293 */
294static struct ureg make_ureg(GLuint file, GLint idx)
295{
296   struct ureg reg;
297   reg.file = file;
298   reg.idx = idx;
299   reg.negate = 0;
300   reg.swz = SWIZZLE_NOOP;
301   reg.pad = 0;
302   return reg;
303}
304
305
306
307static struct ureg negate( struct ureg reg )
308{
309   reg.negate ^= 1;
310   return reg;
311}
312
313
314static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
315{
316   reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
317			   GET_SWZ(reg.swz, y),
318			   GET_SWZ(reg.swz, z),
319			   GET_SWZ(reg.swz, w));
320
321   return reg;
322}
323
324static struct ureg swizzle1( struct ureg reg, int x )
325{
326   return swizzle(reg, x, x, x, x);
327}
328
329static struct ureg get_temp( struct tnl_program *p )
330{
331   int bit = _mesa_ffs( ~p->temp_in_use );
332   if (!bit) {
333      _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
334      _mesa_exit(1);
335   }
336
337   if (bit > p->program->Base.NumTemporaries)
338      p->program->Base.NumTemporaries = bit;
339
340   p->temp_in_use |= 1<<(bit-1);
341   return make_ureg(PROGRAM_TEMPORARY, bit-1);
342}
343
344static struct ureg reserve_temp( struct tnl_program *p )
345{
346   struct ureg temp = get_temp( p );
347   p->temp_reserved |= 1<<temp.idx;
348   return temp;
349}
350
351static void release_temp( struct tnl_program *p, struct ureg reg )
352{
353   if (reg.file == PROGRAM_TEMPORARY) {
354      p->temp_in_use &= ~(1<<reg.idx);
355      p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
356   }
357}
358
359static void release_temps( struct tnl_program *p )
360{
361   p->temp_in_use = p->temp_reserved;
362}
363
364
365
366static struct ureg register_input( struct tnl_program *p, GLuint input )
367{
368   p->program->Base.InputsRead |= (1<<input);
369   return make_ureg(PROGRAM_INPUT, input);
370}
371
372static struct ureg register_output( struct tnl_program *p, GLuint output )
373{
374   p->program->Base.OutputsWritten |= (1<<output);
375   return make_ureg(PROGRAM_OUTPUT, output);
376}
377
378static struct ureg register_const4f( struct tnl_program *p,
379			      GLfloat s0,
380			      GLfloat s1,
381			      GLfloat s2,
382			      GLfloat s3)
383{
384   GLfloat values[4];
385   GLint idx;
386   values[0] = s0;
387   values[1] = s1;
388   values[2] = s2;
389   values[3] = s3;
390   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values );
391   return make_ureg(PROGRAM_STATE_VAR, idx);
392}
393
394#define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
395#define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
396#define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
397#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
398
399static GLboolean is_undef( struct ureg reg )
400{
401   return reg.file == 0xf;
402}
403
404static struct ureg get_identity_param( struct tnl_program *p )
405{
406   if (is_undef(p->identity))
407      p->identity = register_const4f(p, 0,0,0,1);
408
409   return p->identity;
410}
411
412static struct ureg register_param6( struct tnl_program *p,
413				   GLint s0,
414				   GLint s1,
415				   GLint s2,
416				   GLint s3,
417				   GLint s4,
418				   GLint s5)
419{
420   GLint tokens[6];
421   GLint idx;
422   tokens[0] = s0;
423   tokens[1] = s1;
424   tokens[2] = s2;
425   tokens[3] = s3;
426   tokens[4] = s4;
427   tokens[5] = s5;
428   idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
429   return make_ureg(PROGRAM_STATE_VAR, idx);
430}
431
432
433#define register_param1(p,s0)          register_param6(p,s0,0,0,0,0,0)
434#define register_param2(p,s0,s1)       register_param6(p,s0,s1,0,0,0,0)
435#define register_param3(p,s0,s1,s2)    register_param6(p,s0,s1,s2,0,0,0)
436#define register_param4(p,s0,s1,s2,s3) register_param6(p,s0,s1,s2,s3,0,0)
437
438
439static void register_matrix_param6( struct tnl_program *p,
440				    GLint s0,
441				    GLint s1,
442				    GLint s2,
443				    GLint s3,
444				    GLint s4,
445				    GLint s5,
446				    struct ureg *matrix )
447{
448   GLuint i;
449
450   /* This is a bit sad as the support is there to pull the whole
451    * matrix out in one go:
452    */
453   for (i = 0; i <= s4 - s3; i++)
454      matrix[i] = register_param6( p, s0, s1, s2, i, i, s5 );
455}
456
457
458static void emit_arg( struct prog_src_register *src,
459		      struct ureg reg )
460{
461   src->File = reg.file;
462   src->Index = reg.idx;
463   src->Swizzle = reg.swz;
464   src->NegateBase = reg.negate;
465   src->Abs = 0;
466   src->NegateAbs = 0;
467   src->RelAddr = 0;
468}
469
470static void emit_dst( struct prog_dst_register *dst,
471		      struct ureg reg, GLuint mask )
472{
473   dst->File = reg.file;
474   dst->Index = reg.idx;
475   /* allow zero as a shorthand for xyzw */
476   dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
477   dst->CondMask = COND_TR;
478   dst->CondSwizzle = 0;
479   dst->CondSrc = 0;
480   dst->pad = 0;
481}
482
483static void debug_insn( struct prog_instruction *inst, const char *fn,
484			GLuint line )
485{
486   if (DISASSEM) {
487      static const char *last_fn;
488
489      if (fn != last_fn) {
490	 last_fn = fn;
491	 _mesa_printf("%s:\n", fn);
492      }
493
494      _mesa_printf("%d:\t", line);
495      _mesa_print_instruction(inst);
496   }
497}
498
499
500static void emit_op3fn(struct tnl_program *p,
501		       GLuint op,
502		       struct ureg dest,
503		       GLuint mask,
504		       struct ureg src0,
505		       struct ureg src1,
506		       struct ureg src2,
507		       const char *fn,
508		       GLuint line)
509{
510   GLuint nr = p->program->Base.NumInstructions++;
511   struct prog_instruction *inst = &p->program->Base.Instructions[nr];
512
513   if (p->program->Base.NumInstructions > MAX_INSN) {
514      _mesa_problem(0, "Out of instructions in emit_op3fn\n");
515      return;
516   }
517
518   inst->Opcode = op;
519   inst->StringPos = 0;
520   inst->Data = 0;
521
522   emit_arg( &inst->SrcReg[0], src0 );
523   emit_arg( &inst->SrcReg[1], src1 );
524   emit_arg( &inst->SrcReg[2], src2 );
525
526   emit_dst( &inst->DstReg, dest, mask );
527
528   debug_insn(inst, fn, line);
529}
530
531
532#define emit_op3(p, op, dst, mask, src0, src1, src2) \
533   emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
534
535#define emit_op2(p, op, dst, mask, src0, src1) \
536    emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
537
538#define emit_op1(p, op, dst, mask, src0) \
539    emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
540
541
542static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
543{
544   if (reg.file == PROGRAM_TEMPORARY &&
545       !(p->temp_reserved & (1<<reg.idx)))
546      return reg;
547   else {
548      struct ureg temp = get_temp(p);
549      emit_op1(p, OPCODE_MOV, temp, 0, reg);
550      return temp;
551   }
552}
553
554
555/* Currently no tracking performed of input/output/register size or
556 * active elements.  Could be used to reduce these operations, as
557 * could the matrix type.
558 */
559static void emit_matrix_transform_vec4( struct tnl_program *p,
560					struct ureg dest,
561					const struct ureg *mat,
562					struct ureg src)
563{
564   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
565   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
566   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
567   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
568}
569
570/* This version is much easier to implement if writemasks are not
571 * supported natively on the target or (like SSE), the target doesn't
572 * have a clean/obvious dotproduct implementation.
573 */
574static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
575						  struct ureg dest,
576						  const struct ureg *mat,
577						  struct ureg src)
578{
579   struct ureg tmp;
580
581   if (dest.file != PROGRAM_TEMPORARY)
582      tmp = get_temp(p);
583   else
584      tmp = dest;
585
586   emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
587   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
588   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
589   emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
590
591   if (dest.file != PROGRAM_TEMPORARY)
592      release_temp(p, tmp);
593}
594
595static void emit_matrix_transform_vec3( struct tnl_program *p,
596					struct ureg dest,
597					const struct ureg *mat,
598					struct ureg src)
599{
600   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
601   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
602   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
603}
604
605
606static void emit_normalize_vec3( struct tnl_program *p,
607				 struct ureg dest,
608				 struct ureg src )
609{
610   struct ureg tmp = get_temp(p);
611   emit_op2(p, OPCODE_DP3, tmp, 0, src, src);
612   emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
613   emit_op2(p, OPCODE_MUL, dest, 0, src, tmp);
614   release_temp(p, tmp);
615}
616
617static void emit_passthrough( struct tnl_program *p,
618			      GLuint input,
619			      GLuint output )
620{
621   struct ureg out = register_output(p, output);
622   emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
623}
624
625static struct ureg get_eye_position( struct tnl_program *p )
626{
627   if (is_undef(p->eye_position)) {
628      struct ureg pos = register_input( p, VERT_ATTRIB_POS );
629      struct ureg modelview[4];
630
631      p->eye_position = reserve_temp(p);
632
633      if (PREFER_DP4) {
634	 register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3,
635				 STATE_MATRIX, modelview );
636
637	 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
638      }
639      else {
640	 register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3,
641				 STATE_MATRIX_TRANSPOSE, modelview );
642
643	 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
644      }
645   }
646
647   return p->eye_position;
648}
649
650
651static struct ureg get_eye_position_normalized( struct tnl_program *p )
652{
653   if (is_undef(p->eye_position_normalized)) {
654      struct ureg eye = get_eye_position(p);
655      p->eye_position_normalized = reserve_temp(p);
656      emit_normalize_vec3(p, p->eye_position_normalized, eye);
657   }
658
659   return p->eye_position_normalized;
660}
661
662
663static struct ureg get_eye_normal( struct tnl_program *p )
664{
665   if (is_undef(p->eye_normal)) {
666      struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
667      struct ureg mvinv[3];
668
669      register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 2,
670			      STATE_MATRIX_INVTRANS, mvinv );
671
672      p->eye_normal = reserve_temp(p);
673
674      /* Transform to eye space:
675       */
676      emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal );
677
678      /* Normalize/Rescale:
679       */
680      if (p->state->normalize) {
681	 emit_normalize_vec3( p, p->eye_normal, p->eye_normal );
682      }
683      else if (p->state->rescale_normals) {
684	 struct ureg rescale = register_param2(p, STATE_INTERNAL,
685					       STATE_NORMAL_SCALE);
686
687	 emit_op2( p, OPCODE_MUL, p->eye_normal, 0, normal,
688		   swizzle1(rescale, X));
689      }
690   }
691
692   return p->eye_normal;
693}
694
695
696
697static void build_hpos( struct tnl_program *p )
698{
699   struct ureg pos = register_input( p, VERT_ATTRIB_POS );
700   struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
701   struct ureg mvp[4];
702
703   if (PREFER_DP4) {
704      register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3,
705			      STATE_MATRIX, mvp );
706      emit_matrix_transform_vec4( p, hpos, mvp, pos );
707   }
708   else {
709      register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3,
710			      STATE_MATRIX_TRANSPOSE, mvp );
711      emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
712   }
713}
714
715
716static GLuint material_attrib( GLuint side, GLuint property )
717{
718   return ((property - STATE_AMBIENT) * 2 +
719	   side);
720}
721
722/* Get a bitmask of which material values vary on a per-vertex basis.
723 */
724static void set_material_flags( struct tnl_program *p )
725{
726   p->color_materials = 0;
727   p->materials = 0;
728
729   if (p->state->light_color_material) {
730      p->materials =
731	 p->color_materials = p->state->light_color_material_mask;
732   }
733
734   p->materials |= p->state->light_material_mask;
735}
736
737
738static struct ureg get_material( struct tnl_program *p, GLuint side,
739				 GLuint property )
740{
741   GLuint attrib = material_attrib(side, property);
742
743   if (p->color_materials & (1<<attrib))
744      return register_input(p, VERT_ATTRIB_COLOR0);
745   else if (p->materials & (1<<attrib))
746      return register_input( p, attrib + _TNL_ATTRIB_MAT_FRONT_AMBIENT );
747   else
748      return register_param3( p, STATE_MATERIAL, side, property );
749}
750
751#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
752				   MAT_BIT_FRONT_AMBIENT | \
753				   MAT_BIT_FRONT_DIFFUSE) << (side))
754
755/* Either return a precalculated constant value or emit code to
756 * calculate these values dynamically in the case where material calls
757 * are present between begin/end pairs.
758 *
759 * Probably want to shift this to the program compilation phase - if
760 * we always emitted the calculation here, a smart compiler could
761 * detect that it was constant (given a certain set of inputs), and
762 * lift it out of the main loop.  That way the programs created here
763 * would be independent of the vertex_buffer details.
764 */
765static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
766{
767   if (p->materials & SCENE_COLOR_BITS(side)) {
768      struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
769      struct ureg material_emission = get_material(p, side, STATE_EMISSION);
770      struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
771      struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
772      struct ureg tmp = make_temp(p, material_diffuse);
773      emit_op3(p, OPCODE_MAD, tmp,  WRITEMASK_XYZ, lm_ambient,
774	       material_ambient, material_emission);
775      return tmp;
776   }
777   else
778      return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
779}
780
781
782static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
783				  GLuint side, GLuint property )
784{
785   GLuint attrib = material_attrib(side, property);
786   if (p->materials & (1<<attrib)) {
787      struct ureg light_value =
788	 register_param3(p, STATE_LIGHT, light, property);
789      struct ureg material_value = get_material(p, side, property);
790      struct ureg tmp = get_temp(p);
791      emit_op2(p, OPCODE_MUL, tmp,  0, light_value, material_value);
792      return tmp;
793   }
794   else
795      return register_param4(p, STATE_LIGHTPROD, light, side, property);
796}
797
798static struct ureg calculate_light_attenuation( struct tnl_program *p,
799						GLuint i,
800						struct ureg VPpli,
801						struct ureg dist )
802{
803   struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
804					     STATE_ATTENUATION);
805   struct ureg att = get_temp(p);
806
807   /* Calculate spot attenuation:
808    */
809   if (!p->state->unit[i].light_spotcutoff_is_180) {
810      struct ureg spot_dir = register_param3(p, STATE_LIGHT, i,
811					     STATE_SPOT_DIRECTION);
812      struct ureg spot = get_temp(p);
813      struct ureg slt = get_temp(p);
814
815      emit_normalize_vec3( p, spot, spot_dir ); /* XXX: precompute! */
816      emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot);
817      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir,W), spot);
818      emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
819      emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
820
821      release_temp(p, spot);
822      release_temp(p, slt);
823   }
824
825   /* Calculate distance attenuation:
826    */
827   if (p->state->unit[i].light_attenuated) {
828
829      /* 1/d,d,d,1/d */
830      emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
831      /* 1,d,d*d,1/d */
832      emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
833      /* 1/dist-atten */
834      emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
835
836      if (!p->state->unit[i].light_spotcutoff_is_180) {
837	 /* dist-atten */
838	 emit_op1(p, OPCODE_RCP, dist, 0, dist);
839	 /* spot-atten * dist-atten */
840	 emit_op2(p, OPCODE_MUL, att, 0, dist, att);
841      } else {
842	 /* dist-atten */
843	 emit_op1(p, OPCODE_RCP, att, 0, dist);
844      }
845   }
846
847   return att;
848}
849
850
851
852
853
854/* Need to add some addtional parameters to allow lighting in object
855 * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye
856 * space lighting.
857 */
858static void build_lighting( struct tnl_program *p )
859{
860   const GLboolean twoside = p->state->light_twoside;
861   const GLboolean separate = p->state->separate_specular;
862   GLuint nr_lights = 0, count = 0;
863   struct ureg normal = get_eye_normal(p);
864   struct ureg lit = get_temp(p);
865   struct ureg dots = get_temp(p);
866   struct ureg _col0 = undef, _col1 = undef;
867   struct ureg _bfc0 = undef, _bfc1 = undef;
868   GLuint i;
869
870   for (i = 0; i < MAX_LIGHTS; i++)
871      if (p->state->unit[i].light_enabled)
872	 nr_lights++;
873
874   set_material_flags(p);
875
876   {
877      struct ureg shininess = get_material(p, 0, STATE_SHININESS);
878      emit_op1(p, OPCODE_MOV, dots,  WRITEMASK_W, swizzle1(shininess,X));
879      release_temp(p, shininess);
880
881      _col0 = make_temp(p, get_scenecolor(p, 0));
882      if (separate)
883	 _col1 = make_temp(p, get_identity_param(p));
884      else
885	 _col1 = _col0;
886
887   }
888
889   if (twoside) {
890      struct ureg shininess = get_material(p, 1, STATE_SHININESS);
891      emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
892	       negate(swizzle1(shininess,X)));
893      release_temp(p, shininess);
894
895      _bfc0 = make_temp(p, get_scenecolor(p, 1));
896      if (separate)
897	 _bfc1 = make_temp(p, get_identity_param(p));
898      else
899	 _bfc1 = _bfc0;
900   }
901
902
903   /* If no lights, still need to emit the scenecolor.
904    */
905      {
906	 struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
907	 emit_op1(p, OPCODE_MOV, res0, 0, _col0);
908      }
909
910      if (separate) {
911	 struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
912	 emit_op1(p, OPCODE_MOV, res1, 0, _col1);
913      }
914
915      if (twoside) {
916	 struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
917	 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
918      }
919
920      if (twoside && separate) {
921	 struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
922	 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
923      }
924
925   if (nr_lights == 0) {
926      release_temps(p);
927      return;
928   }
929
930
931   for (i = 0; i < MAX_LIGHTS; i++) {
932      if (p->state->unit[i].light_enabled) {
933	 struct ureg half = undef;
934	 struct ureg att = undef, VPpli = undef;
935
936	 count++;
937
938	 if (p->state->unit[i].light_eyepos3_is_zero) {
939	    /* Can used precomputed constants in this case.
940	     * Attenuation never applies to infinite lights.
941	     */
942	    VPpli = register_param3(p, STATE_LIGHT, i,
943				    STATE_POSITION_NORMALIZED);
944	    half = register_param3(p, STATE_LIGHT, i, STATE_HALF);
945	 }
946	 else {
947	    struct ureg Ppli = register_param3(p, STATE_LIGHT, i,
948					       STATE_POSITION);
949	    struct ureg V = get_eye_position(p);
950	    struct ureg dist = get_temp(p);
951
952	    VPpli = get_temp(p);
953	    half = get_temp(p);
954
955	    /* Calulate VPpli vector
956	     */
957	    emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
958
959	    /* Normalize VPpli.  The dist value also used in
960	     * attenuation below.
961	     */
962	    emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
963	    emit_op1(p, OPCODE_RSQ, dist, 0, dist);
964	    emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
965
966
967	    /* Calculate  attenuation:
968	     */
969	    if (!p->state->unit[i].light_spotcutoff_is_180 ||
970		p->state->unit[i].light_attenuated) {
971	       att = calculate_light_attenuation(p, i, VPpli, dist);
972	    }
973
974
975	    /* Calculate viewer direction, or use infinite viewer:
976	     */
977	    if (p->state->light_local_viewer) {
978	       struct ureg eye_hat = get_eye_position_normalized(p);
979	       emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
980	    }
981	    else {
982	       struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
983	       emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
984	    }
985
986	    emit_normalize_vec3(p, half, half);
987
988	    release_temp(p, dist);
989	 }
990
991	 /* Calculate dot products:
992	  */
993	 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
994	 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
995
996
997	 /* Front face lighting:
998	  */
999	 {
1000	    struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
1001	    struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
1002	    struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
1003	    struct ureg res0, res1;
1004	    GLuint mask0, mask1;
1005
1006	    emit_op1(p, OPCODE_LIT, lit, 0, dots);
1007
1008	    if (!is_undef(att))
1009	       emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1010
1011
1012	    if (count == nr_lights) {
1013	       if (separate) {
1014		  mask0 = WRITEMASK_XYZ;
1015		  mask1 = WRITEMASK_XYZ;
1016		  res0 = register_output( p, VERT_RESULT_COL0 );
1017		  res1 = register_output( p, VERT_RESULT_COL1 );
1018	       }
1019	       else {
1020		  mask0 = 0;
1021		  mask1 = WRITEMASK_XYZ;
1022		  res0 = _col0;
1023		  res1 = register_output( p, VERT_RESULT_COL0 );
1024	       }
1025	    } else {
1026	       mask0 = 0;
1027	       mask1 = 0;
1028	       res0 = _col0;
1029	       res1 = _col1;
1030	    }
1031
1032	    emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
1033	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
1034	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
1035
1036	    release_temp(p, ambient);
1037	    release_temp(p, diffuse);
1038	    release_temp(p, specular);
1039	 }
1040
1041	 /* Back face lighting:
1042	  */
1043	 if (twoside) {
1044	    struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
1045	    struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
1046	    struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
1047	    struct ureg res0, res1;
1048	    GLuint mask0, mask1;
1049
1050	    emit_op1(p, OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z)));
1051
1052	    if (!is_undef(att))
1053	       emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1054
1055	    if (count == nr_lights) {
1056	       if (separate) {
1057		  mask0 = WRITEMASK_XYZ;
1058		  mask1 = WRITEMASK_XYZ;
1059		  res0 = register_output( p, VERT_RESULT_BFC0 );
1060		  res1 = register_output( p, VERT_RESULT_BFC1 );
1061	       }
1062	       else {
1063		  mask0 = 0;
1064		  mask1 = WRITEMASK_XYZ;
1065		  res0 = _bfc0;
1066		  res1 = register_output( p, VERT_RESULT_BFC0 );
1067	       }
1068	    } else {
1069	       res0 = _bfc0;
1070	       res1 = _bfc1;
1071	       mask0 = 0;
1072	       mask1 = 0;
1073	    }
1074
1075	    emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
1076	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
1077	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
1078
1079	    release_temp(p, ambient);
1080	    release_temp(p, diffuse);
1081	    release_temp(p, specular);
1082	 }
1083
1084	 release_temp(p, half);
1085	 release_temp(p, VPpli);
1086	 release_temp(p, att);
1087      }
1088   }
1089
1090   release_temps( p );
1091}
1092
1093
1094static void build_fog( struct tnl_program *p )
1095{
1096   struct ureg fog = register_output(p, VERT_RESULT_FOGC);
1097   struct ureg input;
1098
1099   if (p->state->fog_source_is_depth) {
1100      input = swizzle1(get_eye_position(p), Z);
1101   }
1102   else {
1103      input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
1104   }
1105
1106   if (p->state->tnl_do_vertex_fog) {
1107      struct ureg params = register_param1(p, STATE_FOG_PARAMS);
1108      struct ureg tmp = get_temp(p);
1109
1110      switch (p->state->fog_mode) {
1111      case FOG_LINEAR: {
1112	 struct ureg id = get_identity_param(p);
1113	 emit_op2(p, OPCODE_SUB, tmp, 0, swizzle1(params,Z), input);
1114	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W));
1115	 emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
1116	 emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
1117	 break;
1118      }
1119      case FOG_EXP:
1120	 emit_op1(p, OPCODE_ABS, tmp, 0, input);
1121	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,X));
1122	 emit_op2(p, OPCODE_POW, fog, WRITEMASK_X,
1123		  register_const1f(p, M_E), negate(tmp));
1124	 break;
1125      case FOG_EXP2:
1126	 emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,X));
1127	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp);
1128	 emit_op2(p, OPCODE_POW, fog, WRITEMASK_X,
1129		  register_const1f(p, M_E), negate(tmp));
1130	 break;
1131      }
1132
1133      release_temp(p, tmp);
1134   }
1135   else {
1136      /* results = incoming fog coords (compute fog per-fragment later)
1137       *
1138       * KW:  Is it really necessary to do anything in this case?
1139       */
1140      emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input);
1141   }
1142}
1143
1144static void build_reflect_texgen( struct tnl_program *p,
1145				  struct ureg dest,
1146				  GLuint writemask )
1147{
1148   struct ureg normal = get_eye_normal(p);
1149   struct ureg eye_hat = get_eye_position_normalized(p);
1150   struct ureg tmp = get_temp(p);
1151
1152   /* n.u */
1153   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1154   /* 2n.u */
1155   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1156   /* (-2n.u)n + u */
1157   emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
1158}
1159
1160static void build_sphere_texgen( struct tnl_program *p,
1161				 struct ureg dest,
1162				 GLuint writemask )
1163{
1164   struct ureg normal = get_eye_normal(p);
1165   struct ureg eye_hat = get_eye_position_normalized(p);
1166   struct ureg tmp = get_temp(p);
1167   struct ureg half = register_scalar_const(p, .5);
1168   struct ureg r = get_temp(p);
1169   struct ureg inv_m = get_temp(p);
1170   struct ureg id = get_identity_param(p);
1171
1172   /* Could share the above calculations, but it would be
1173    * a fairly odd state for someone to set (both sphere and
1174    * reflection active for different texture coordinate
1175    * components.  Of course - if two texture units enable
1176    * reflect and/or sphere, things start to tilt in favour
1177    * of seperating this out:
1178    */
1179
1180   /* n.u */
1181   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1182   /* 2n.u */
1183   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1184   /* (-2n.u)n + u */
1185   emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
1186   /* r + 0,0,1 */
1187   emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
1188   /* rx^2 + ry^2 + (rz+1)^2 */
1189   emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
1190   /* 2/m */
1191   emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
1192   /* 1/m */
1193   emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
1194   /* r/m + 1/2 */
1195   emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
1196
1197   release_temp(p, tmp);
1198   release_temp(p, r);
1199   release_temp(p, inv_m);
1200}
1201
1202
1203static void build_texture_transform( struct tnl_program *p )
1204{
1205   GLuint i, j;
1206
1207   for (i = 0; i < MAX_TEXTURE_UNITS; i++) {
1208
1209      if (!(p->state->fragprog_inputs_read & (FRAG_BIT_TEX0<<i)))
1210	 continue;
1211
1212      if (p->state->unit[i].texgen_enabled ||
1213	  p->state->unit[i].texmat_enabled) {
1214
1215	 GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
1216	 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
1217	 struct ureg out_texgen = undef;
1218
1219	 if (p->state->unit[i].texgen_enabled) {
1220	    GLuint copy_mask = 0;
1221	    GLuint sphere_mask = 0;
1222	    GLuint reflect_mask = 0;
1223	    GLuint normal_mask = 0;
1224	    GLuint modes[4];
1225
1226	    if (texmat_enabled)
1227	       out_texgen = get_temp(p);
1228	    else
1229	       out_texgen = out;
1230
1231	    modes[0] = p->state->unit[i].texgen_mode0;
1232	    modes[1] = p->state->unit[i].texgen_mode1;
1233	    modes[2] = p->state->unit[i].texgen_mode2;
1234	    modes[3] = p->state->unit[i].texgen_mode3;
1235
1236	    for (j = 0; j < 4; j++) {
1237	       switch (modes[j]) {
1238	       case TXG_OBJ_LINEAR: {
1239		  struct ureg obj = register_input(p, VERT_ATTRIB_POS);
1240		  struct ureg plane =
1241		     register_param3(p, STATE_TEXGEN, i,
1242				     STATE_TEXGEN_OBJECT_S + j);
1243
1244		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1245			   obj, plane );
1246		  break;
1247	       }
1248	       case TXG_EYE_LINEAR: {
1249		  struct ureg eye = get_eye_position(p);
1250		  struct ureg plane =
1251		     register_param3(p, STATE_TEXGEN, i,
1252				     STATE_TEXGEN_EYE_S + j);
1253
1254		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1255			   eye, plane );
1256		  break;
1257	       }
1258	       case TXG_SPHERE_MAP:
1259		  sphere_mask |= WRITEMASK_X << j;
1260		  break;
1261	       case TXG_REFLECTION_MAP:
1262		  reflect_mask |= WRITEMASK_X << j;
1263		  break;
1264	       case TXG_NORMAL_MAP:
1265		  normal_mask |= WRITEMASK_X << j;
1266		  break;
1267	       case TXG_NONE:
1268		  copy_mask |= WRITEMASK_X << j;
1269	       }
1270
1271	    }
1272
1273
1274	    if (sphere_mask) {
1275	       build_sphere_texgen(p, out_texgen, sphere_mask);
1276	    }
1277
1278	    if (reflect_mask) {
1279	       build_reflect_texgen(p, out_texgen, reflect_mask);
1280	    }
1281
1282	    if (normal_mask) {
1283	       struct ureg normal = get_eye_normal(p);
1284	       emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
1285	    }
1286
1287	    if (copy_mask) {
1288	       struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
1289	       emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
1290	    }
1291	 }
1292
1293	 if (texmat_enabled) {
1294	    struct ureg texmat[4];
1295	    struct ureg in = (!is_undef(out_texgen) ?
1296			      out_texgen :
1297			      register_input(p, VERT_ATTRIB_TEX0+i));
1298	    if (PREFER_DP4) {
1299	       register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i,
1300				       0, 3, STATE_MATRIX, texmat );
1301	       emit_matrix_transform_vec4( p, out, texmat, in );
1302	    }
1303	    else {
1304	       register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i,
1305				       0, 3, STATE_MATRIX_TRANSPOSE, texmat );
1306	       emit_transpose_matrix_transform_vec4( p, out, texmat, in );
1307	    }
1308	 }
1309
1310	 release_temps(p);
1311      }
1312      else {
1313	 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
1314      }
1315   }
1316}
1317
1318
1319/* Seems like it could be tighter:
1320 */
1321static void build_pointsize( struct tnl_program *p )
1322{
1323   struct ureg eye = get_eye_position(p);
1324   struct ureg state_size = register_param1(p, STATE_POINT_SIZE);
1325   struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
1326   struct ureg out = register_output(p, VERT_RESULT_PSIZ);
1327   struct ureg ut = get_temp(p);
1328
1329   /* 1, -Z, Z * Z, 1 */
1330   emit_op1(p, OPCODE_MOV, ut, 0, swizzle1(get_identity_param(p), W));
1331   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_YZ, ut, negate(swizzle1(eye, Z)));
1332   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_Z, ut, negate(swizzle1(eye, Z)));
1333
1334
1335   /* p1 +  p2 * dist + p3 * dist * dist, 0 */
1336   emit_op2(p, OPCODE_DP3, ut, 0, ut, state_attenuation);
1337
1338   /* 1 / factor */
1339   emit_op1(p, OPCODE_RCP, ut, 0, ut );
1340
1341   /* out = pointSize / factor */
1342   emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
1343
1344   release_temp(p, ut);
1345}
1346
1347static void build_tnl_program( struct tnl_program *p )
1348{   /* Emit the program, starting with modelviewproject:
1349    */
1350   build_hpos(p);
1351
1352   /* Lighting calculations:
1353    */
1354   if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
1355      if (p->state->light_global_enabled)
1356	 build_lighting(p);
1357      else {
1358	 if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
1359	    emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
1360
1361	 if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
1362	    emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
1363      }
1364   }
1365
1366   if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) ||
1367       p->state->fog_mode != FOG_NONE)
1368      build_fog(p);
1369
1370   if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
1371      build_texture_transform(p);
1372
1373   if (p->state->point_attenuated)
1374      build_pointsize(p);
1375
1376   /* Finish up:
1377    */
1378   emit_op1(p, OPCODE_END, undef, 0, undef);
1379
1380   /* Disassemble:
1381    */
1382   if (DISASSEM) {
1383      _mesa_printf ("\n");
1384   }
1385}
1386
1387
1388static void
1389create_new_program( const struct state_key *key,
1390                    struct vertex_program *program,
1391                    GLuint max_temps)
1392{
1393   struct tnl_program p;
1394
1395   _mesa_memset(&p, 0, sizeof(p));
1396   p.state = key;
1397   p.program = program;
1398   p.eye_position = undef;
1399   p.eye_position_normalized = undef;
1400   p.eye_normal = undef;
1401   p.identity = undef;
1402   p.temp_in_use = 0;
1403
1404   if (max_temps >= sizeof(int) * 8)
1405      p.temp_reserved = 0;
1406   else
1407      p.temp_reserved = ~((1<<max_temps)-1);
1408
1409   p.program->Base.Instructions
1410      = MALLOC(sizeof(struct prog_instruction) * MAX_INSN);
1411   p.program->Base.String = 0;
1412   p.program->Base.NumInstructions =
1413   p.program->Base.NumTemporaries =
1414   p.program->Base.NumParameters =
1415   p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
1416   p.program->Base.Parameters = _mesa_new_parameter_list();
1417   p.program->Base.InputsRead = 0;
1418   p.program->Base.OutputsWritten = 0;
1419
1420   build_tnl_program( &p );
1421}
1422
1423static void *search_cache( struct tnl_cache *cache,
1424			   GLuint hash,
1425			   const void *key,
1426			   GLuint keysize)
1427{
1428   struct tnl_cache_item *c;
1429
1430   for (c = cache->items[hash % cache->size]; c; c = c->next) {
1431      if (c->hash == hash && _mesa_memcmp(c->key, key, keysize) == 0)
1432	 return c->data;
1433   }
1434
1435   return NULL;
1436}
1437
1438static void rehash( struct tnl_cache *cache )
1439{
1440   struct tnl_cache_item **items;
1441   struct tnl_cache_item *c, *next;
1442   GLuint size, i;
1443
1444   size = cache->size * 3;
1445   items = MALLOC(size * sizeof(*items));
1446   _mesa_memset(items, 0, size * sizeof(*items));
1447
1448   for (i = 0; i < cache->size; i++)
1449      for (c = cache->items[i]; c; c = next) {
1450	 next = c->next;
1451	 c->next = items[c->hash % size];
1452	 items[c->hash % size] = c;
1453      }
1454
1455   FREE(cache->items);
1456   cache->items = items;
1457   cache->size = size;
1458}
1459
1460static void cache_item( struct tnl_cache *cache,
1461			GLuint hash,
1462			void *key,
1463			void *data )
1464{
1465   struct tnl_cache_item *c = MALLOC(sizeof(*c));
1466   c->hash = hash;
1467   c->key = key;
1468   c->data = data;
1469
1470   if (++cache->n_items > cache->size * 1.5)
1471      rehash(cache);
1472
1473   c->next = cache->items[hash % cache->size];
1474   cache->items[hash % cache->size] = c;
1475}
1476
1477static GLuint hash_key( struct state_key *key )
1478{
1479   GLuint *ikey = (GLuint *)key;
1480   GLuint hash = 0, i;
1481
1482   /* I'm sure this can be improved on, but speed is important:
1483    */
1484   for (i = 0; i < sizeof(*key)/sizeof(GLuint); i++)
1485      hash ^= ikey[i];
1486
1487   return hash;
1488}
1489
1490void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx )
1491{
1492   TNLcontext *tnl = TNL_CONTEXT(ctx);
1493   struct state_key *key;
1494   GLuint hash;
1495   struct vertex_program *prev = ctx->VertexProgram._Current;
1496
1497   if (ctx->VertexProgram._Enabled == GL_FALSE) {
1498      /* Grab all the relevent state and put it in a single structure:
1499       */
1500      key = make_state_key(ctx);
1501      hash = hash_key(key);
1502
1503      /* Look for an already-prepared program for this state:
1504       */
1505      ctx->_TnlProgram = (struct vertex_program *)
1506	 search_cache( tnl->vp_cache, hash, key, sizeof(*key) );
1507
1508      /* OK, we'll have to build a new one:
1509       */
1510      if (!ctx->_TnlProgram) {
1511	 if (0)
1512	    _mesa_printf("Build new TNL program\n");
1513
1514	 ctx->_TnlProgram = (struct vertex_program *)
1515	    ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
1516
1517	 create_new_program( key, ctx->_TnlProgram,
1518			     ctx->Const.VertexProgram.MaxTemps );
1519
1520
1521	 cache_item(tnl->vp_cache, hash, key, ctx->_TnlProgram );
1522      }
1523      else {
1524	 FREE(key);
1525	 if (0)
1526	    _mesa_printf("Found existing TNL program for key %x\n", hash);
1527      }
1528      ctx->VertexProgram._Current = ctx->_TnlProgram;
1529   }
1530   else {
1531      ctx->VertexProgram._Current = ctx->VertexProgram.Current;
1532   }
1533
1534   /* Tell the driver about the change.  Could define a new target for
1535    * this?
1536    */
1537   if (ctx->VertexProgram._Current != prev)
1538      ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, (struct program *)
1539			      ctx->VertexProgram._Current);
1540}
1541
1542
1543void _tnl_ProgramCacheDestroy( GLcontext *ctx )
1544{
1545   TNLcontext *tnl = TNL_CONTEXT(ctx);
1546   struct tnl_cache_item *c, *next;
1547   GLuint i;
1548
1549   for (i = 0; i < tnl->vp_cache->size; i++)
1550      for (c = tnl->vp_cache->items[i]; c; c = next) {
1551	 next = c->next;
1552	 FREE(c->key);
1553	 FREE(c->data);
1554	 FREE(c);
1555      }
1556
1557   FREE(tnl->vp_cache->items);
1558   FREE(tnl->vp_cache);
1559}
1560