i915_fragprog.c revision 122629f27925a9dc50029bebc5079f87f416a7e1
1/**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "glheader.h"
29#include "macros.h"
30#include "enums.h"
31
32#include "tnl/t_context.h"
33#include "intel_batchbuffer.h"
34
35#include "i915_reg.h"
36#include "i915_context.h"
37#include "i915_program.h"
38
39#include "program_instruction.h"
40#include "program.h"
41
42
43
44/* 1, -1/3!, 1/5!, -1/7! */
45static const GLfloat sin_constants[4] = {  1.0,
46					   -1.0/(3*2*1),
47					   1.0/(5*4*3*2*1),
48					   -1.0/(7*6*5*4*3*2*1) };
49
50/* 1, -1/2!, 1/4!, -1/6! */
51static const GLfloat cos_constants[4] = {  1.0,
52					   -1.0/(2*1),
53					   1.0/(4*3*2*1),
54					   -1.0/(6*5*4*3*2*1) };
55
56/**
57 * Retrieve a ureg for the given source register.  Will emit
58 * constants, apply swizzling and negation as needed.
59 */
60static GLuint src_vector( struct i915_fragment_program *p,
61			  const struct prog_src_register *source,
62			  const struct gl_fragment_program *program )
63{
64   GLuint src;
65
66   switch (source->File) {
67
68      /* Registers:
69       */
70      case PROGRAM_TEMPORARY:
71	 if (source->Index >= I915_MAX_TEMPORARY) {
72	    i915_program_error( p, "Exceeded max temporary reg" );
73	    return 0;
74	 }
75	 src = UREG( REG_TYPE_R, source->Index );
76         break;
77      case PROGRAM_INPUT:
78	 switch (source->Index) {
79	 case FRAG_ATTRIB_WPOS:
80	    src = i915_emit_decl( p,  REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL );
81	    break;
82	 case FRAG_ATTRIB_COL0:
83	    src = i915_emit_decl( p,  REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL );
84	    break;
85	 case FRAG_ATTRIB_COL1:
86	    src = i915_emit_decl( p,  REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ );
87	    src = swizzle( src, X, Y, Z, ONE );
88	    break;
89	 case FRAG_ATTRIB_FOGC:
90	    src = i915_emit_decl( p,  REG_TYPE_T, T_FOG_W, D0_CHANNEL_W );
91	    src = swizzle( src, W, W, W, W );
92	    break;
93	 case FRAG_ATTRIB_TEX0:
94	 case FRAG_ATTRIB_TEX1:
95	 case FRAG_ATTRIB_TEX2:
96	 case FRAG_ATTRIB_TEX3:
97	 case FRAG_ATTRIB_TEX4:
98	 case FRAG_ATTRIB_TEX5:
99	 case FRAG_ATTRIB_TEX6:
100	 case FRAG_ATTRIB_TEX7:
101	    src = i915_emit_decl( p,  REG_TYPE_T,
102				 T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
103				 D0_CHANNEL_ALL );
104	    break;
105
106	 default:
107	    i915_program_error( p, "Bad source->Index" );
108	    return 0;
109	 }
110         break;
111
112	 /* Various paramters and env values.  All emitted to
113	  * hardware as program constants.
114	  */
115      case PROGRAM_LOCAL_PARAM:
116         src = i915_emit_param4fv(
117	    p, program->Base.LocalParams[source->Index]);
118	 break;
119
120      case PROGRAM_ENV_PARAM:
121         src = i915_emit_param4fv(
122	    p, p->ctx->FragmentProgram.Parameters[source->Index]);
123	 break;
124
125      case PROGRAM_STATE_VAR:
126      case PROGRAM_NAMED_PARAM:
127         src = i915_emit_param4fv(
128	    p, program->Base.Parameters->ParameterValues[source->Index] );
129	 break;
130
131      default:
132	 i915_program_error( p, "Bad source->File" );
133	 return 0;
134   }
135
136   src = swizzle(src,
137		 GET_SWZ(source->Swizzle, 0),
138		 GET_SWZ(source->Swizzle, 1),
139		 GET_SWZ(source->Swizzle, 2),
140		 GET_SWZ(source->Swizzle, 3));
141
142   if (source->NegateBase)
143      src = negate( src,
144		    GET_BIT(source->NegateBase, 0),
145		    GET_BIT(source->NegateBase, 1),
146		    GET_BIT(source->NegateBase, 2),
147		    GET_BIT(source->NegateBase, 3));
148
149   return src;
150}
151
152
153static GLuint get_result_vector( struct i915_fragment_program *p,
154				 const struct prog_instruction *inst )
155{
156   switch (inst->DstReg.File) {
157   case PROGRAM_OUTPUT:
158      switch (inst->DstReg.Index) {
159      case FRAG_RESULT_COLR:
160	 return UREG(REG_TYPE_OC, 0);
161      case FRAG_RESULT_DEPR:
162	 p->depth_written = 1;
163	 return UREG(REG_TYPE_OD, 0);
164      default:
165	 i915_program_error( p, "Bad inst->DstReg.Index" );
166	 return 0;
167      }
168   case PROGRAM_TEMPORARY:
169      return UREG(REG_TYPE_R, inst->DstReg.Index);
170   default:
171      i915_program_error( p, "Bad inst->DstReg.File" );
172      return 0;
173   }
174}
175
176static GLuint get_result_flags( const struct prog_instruction *inst )
177{
178   GLuint flags = 0;
179
180   if (inst->SaturateMode == SATURATE_ZERO_ONE) flags |= A0_DEST_SATURATE;
181   if (inst->DstReg.WriteMask & WRITEMASK_X) flags |= A0_DEST_CHANNEL_X;
182   if (inst->DstReg.WriteMask & WRITEMASK_Y) flags |= A0_DEST_CHANNEL_Y;
183   if (inst->DstReg.WriteMask & WRITEMASK_Z) flags |= A0_DEST_CHANNEL_Z;
184   if (inst->DstReg.WriteMask & WRITEMASK_W) flags |= A0_DEST_CHANNEL_W;
185
186   return flags;
187}
188
189static GLuint translate_tex_src_target( struct i915_fragment_program *p,
190				     GLubyte bit )
191{
192   switch (bit) {
193   case TEXTURE_1D_INDEX:   return D0_SAMPLE_TYPE_2D;
194   case TEXTURE_2D_INDEX:   return D0_SAMPLE_TYPE_2D;
195   case TEXTURE_RECT_INDEX: return D0_SAMPLE_TYPE_2D;
196   case TEXTURE_3D_INDEX:   return D0_SAMPLE_TYPE_VOLUME;
197   case TEXTURE_CUBE_INDEX: return D0_SAMPLE_TYPE_CUBE;
198   default: i915_program_error(p, "TexSrcBit"); return 0;
199   }
200}
201
202#define EMIT_TEX( OP )						\
203do {								\
204   GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget );	\
205   GLuint sampler = i915_emit_decl(p, REG_TYPE_S,		\
206				  inst->TexSrcUnit, dim);	\
207   GLuint coord = src_vector( p, &inst->SrcReg[0], program);	\
208   /* Texel lookup */						\
209								\
210   i915_emit_texld( p,						\
211	       get_result_vector( p, inst ),			\
212	       get_result_flags( inst ),			\
213	       sampler,						\
214	       coord,						\
215	       OP);						\
216} while (0)
217
218#define EMIT_ARITH( OP, N )						\
219do {									\
220   i915_emit_arith( p,							\
221	       OP,							\
222	       get_result_vector( p, inst ), 				\
223	       get_result_flags( inst ), 0,			\
224	       (N<1)?0:src_vector( p, &inst->SrcReg[0], program),	\
225	       (N<2)?0:src_vector( p, &inst->SrcReg[1], program),	\
226	       (N<3)?0:src_vector( p, &inst->SrcReg[2], program));	\
227} while (0)
228
229#define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
230#define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
231#define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
232
233
234/* Possible concerns:
235 *
236 * SIN, COS -- could use another taylor step?
237 * LIT      -- results seem a little different to sw mesa
238 * LOG      -- different to mesa on negative numbers, but this is conformant.
239 *
240 * Parse failures -- Mesa doesn't currently give a good indication
241 * internally whether a particular program string parsed or not.  This
242 * can lead to confusion -- hopefully we cope with it ok now.
243 *
244 */
245static void upload_program( struct i915_fragment_program *p )
246{
247   const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
248   const struct prog_instruction *inst = program->Base.Instructions;
249
250/*    _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */
251
252   /* Is this a parse-failed program?  Ensure a valid program is
253    * loaded, as the flagging of an error isn't sufficient to stop
254    * this being uploaded to hardware.
255    */
256   if (inst[0].Opcode == OPCODE_END) {
257      GLuint tmp = i915_get_utemp( p );
258      i915_emit_arith( p,
259		      A0_MOV,
260		      UREG(REG_TYPE_OC, 0),
261		      A0_DEST_CHANNEL_ALL, 0,
262		      swizzle(tmp,ONE,ZERO,ONE,ONE), 0, 0);
263      return;
264   }
265
266   while (1) {
267      GLuint src0, src1, src2, flags;
268      GLuint tmp = 0;
269
270      switch (inst->Opcode) {
271      case OPCODE_ABS:
272	 src0 = src_vector( p, &inst->SrcReg[0], program);
273	 i915_emit_arith( p,
274			 A0_MAX,
275			 get_result_vector( p, inst ),
276			 get_result_flags( inst ), 0,
277			 src0, negate(src0, 1,1,1,1), 0);
278	 break;
279
280      case OPCODE_ADD:
281	 EMIT_2ARG_ARITH( A0_ADD );
282	 break;
283
284      case OPCODE_CMP:
285	 src0 = src_vector( p, &inst->SrcReg[0], program);
286	 src1 = src_vector( p, &inst->SrcReg[1], program);
287	 src2 = src_vector( p, &inst->SrcReg[2], program);
288	 i915_emit_arith( p,
289			 A0_CMP,
290			 get_result_vector( p, inst ),
291			 get_result_flags( inst ), 0,
292			 src0, src2, src1);	/* NOTE: order of src2, src1 */
293	 break;
294
295      case OPCODE_COS:
296	 src0 = src_vector( p, &inst->SrcReg[0], program);
297	 tmp = i915_get_utemp( p );
298
299	 i915_emit_arith( p,
300			 A0_MUL,
301			 tmp, A0_DEST_CHANNEL_X, 0,
302			 src0,
303			 i915_emit_const1f(p, 1.0/(M_PI * 2)),
304			 0);
305
306	 i915_emit_arith( p,
307			 A0_MOD,
308			 tmp, A0_DEST_CHANNEL_X, 0,
309			 tmp,
310			 0, 0 );
311
312	 /* By choosing different taylor constants, could get rid of this mul:
313	  */
314	 i915_emit_arith( p,
315			 A0_MUL,
316			 tmp, A0_DEST_CHANNEL_X, 0,
317			 tmp,
318			 i915_emit_const1f(p, (M_PI * 2)),
319			 0);
320
321	 /*
322	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
323	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
324	  * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
325	  * result = DP4 t0, cos_constants
326	  */
327	 i915_emit_arith( p,
328			 A0_MUL,
329			 tmp, A0_DEST_CHANNEL_XY, 0,
330			 swizzle(tmp, X,X,ONE,ONE),
331			 swizzle(tmp, X,ONE,ONE,ONE), 0);
332
333	 i915_emit_arith( p,
334			 A0_MUL,
335			 tmp, A0_DEST_CHANNEL_XYZ, 0,
336			 swizzle(tmp, X,Y,X,ONE),
337			 swizzle(tmp, X,X,ONE,ONE), 0);
338
339	 i915_emit_arith( p,
340			 A0_MUL,
341			 tmp, A0_DEST_CHANNEL_XYZ, 0,
342			 swizzle(tmp, X,X,Z,ONE),
343			 swizzle(tmp, Z,ONE,ONE,ONE), 0);
344
345	 i915_emit_arith( p,
346			 A0_DP4,
347			 get_result_vector( p, inst ),
348			 get_result_flags( inst ), 0,
349			 swizzle(tmp, ONE,Z,Y,X),
350			 i915_emit_const4fv( p, cos_constants ), 0);
351
352	 break;
353
354      case OPCODE_DP3:
355	 EMIT_2ARG_ARITH( A0_DP3 );
356	 break;
357
358      case OPCODE_DP4:
359	 EMIT_2ARG_ARITH( A0_DP4 );
360	 break;
361
362      case OPCODE_DPH:
363	 src0 = src_vector( p, &inst->SrcReg[0], program);
364	 src1 = src_vector( p, &inst->SrcReg[1], program);
365
366	 i915_emit_arith( p,
367			 A0_DP4,
368			 get_result_vector( p, inst ),
369			 get_result_flags( inst ), 0,
370			 swizzle(src0, X,Y,Z,ONE), src1, 0);
371	 break;
372
373      case OPCODE_DST:
374	 src0 = src_vector( p, &inst->SrcReg[0], program);
375	 src1 = src_vector( p, &inst->SrcReg[1], program);
376
377	 /* result[0] = 1    * 1;
378	  * result[1] = a[1] * b[1];
379	  * result[2] = a[2] * 1;
380	  * result[3] = 1    * b[3];
381	  */
382	 i915_emit_arith( p,
383			 A0_MUL,
384			 get_result_vector( p, inst ),
385			 get_result_flags( inst ), 0,
386			 swizzle(src0, ONE, Y, Z,   ONE),
387			 swizzle(src1, ONE, Y, ONE, W  ),
388			 0);
389	 break;
390
391      case OPCODE_EX2:
392	 src0 = src_vector( p, &inst->SrcReg[0], program);
393
394	 i915_emit_arith( p,
395			 A0_EXP,
396			 get_result_vector( p, inst ),
397			 get_result_flags( inst ), 0,
398			 swizzle(src0,X,X,X,X), 0, 0);
399	 break;
400
401      case OPCODE_FLR:
402	 EMIT_1ARG_ARITH( A0_FLR );
403	 break;
404
405      case OPCODE_FRC:
406	 EMIT_1ARG_ARITH( A0_FRC );
407	 break;
408
409      case OPCODE_KIL:
410	 src0 = src_vector( p, &inst->SrcReg[0], program);
411	 tmp = i915_get_utemp( p );
412
413	 i915_emit_texld( p,
414			 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
415			 0,
416			 src0,
417			 T0_TEXKILL );
418	 break;
419
420      case OPCODE_LG2:
421	 src0 = src_vector( p, &inst->SrcReg[0], program);
422
423	 i915_emit_arith( p,
424			 A0_LOG,
425			 get_result_vector( p, inst ),
426			 get_result_flags( inst ), 0,
427			 swizzle(src0,X,X,X,X), 0, 0);
428	 break;
429
430      case OPCODE_LIT:
431	 src0 = src_vector( p, &inst->SrcReg[0], program);
432	 tmp = i915_get_utemp( p );
433
434	 /* tmp = max( a.xyzw, a.00zw )
435	  * XXX: Clamp tmp.w to -128..128
436	  * tmp.y = log(tmp.y)
437	  * tmp.y = tmp.w * tmp.y
438	  * tmp.y = exp(tmp.y)
439	  * result = cmp (a.11-x1, a.1x01, a.1xy1 )
440	  */
441	 i915_emit_arith( p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
442			 src0, swizzle(src0, ZERO, ZERO, Z, W), 0 );
443
444	 i915_emit_arith( p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
445			 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
446
447	 i915_emit_arith( p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
448			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
449			 swizzle(tmp, ZERO, W, ZERO, ZERO), 0 );
450
451	 i915_emit_arith( p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
452			 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
453
454	 i915_emit_arith( p, A0_CMP,
455			 get_result_vector( p, inst ),
456			 get_result_flags( inst ), 0,
457			 negate(swizzle(tmp, ONE, ONE, X, ONE),0,0,1,0),
458			 swizzle(tmp, ONE, X, ZERO, ONE),
459			 swizzle(tmp, ONE, X, Y, ONE));
460
461	 break;
462
463      case OPCODE_LRP:
464	 src0 = src_vector( p, &inst->SrcReg[0], program);
465	 src1 = src_vector( p, &inst->SrcReg[1], program);
466	 src2 = src_vector( p, &inst->SrcReg[2], program);
467	 flags = get_result_flags( inst );
468	 tmp = i915_get_utemp( p );
469
470	 /* b*a + c*(1-a)
471	  *
472	  * b*a + c - ca
473	  *
474	  * tmp = b*a + c,
475	  * result = (-c)*a + tmp
476	  */
477	 i915_emit_arith( p, A0_MAD, tmp,
478			 flags & A0_DEST_CHANNEL_ALL, 0,
479			 src1, src0, src2 );
480
481	 i915_emit_arith( p, A0_MAD,
482			 get_result_vector( p, inst ),
483			 flags, 0,
484			 negate(src2, 1,1,1,1), src0, tmp );
485	 break;
486
487      case OPCODE_MAD:
488	 EMIT_3ARG_ARITH( A0_MAD );
489	 break;
490
491      case OPCODE_MAX:
492	 EMIT_2ARG_ARITH( A0_MAX );
493	 break;
494
495      case OPCODE_MIN:
496	 src0 = src_vector( p, &inst->SrcReg[0], program);
497	 src1 = src_vector( p, &inst->SrcReg[1], program);
498	 tmp = i915_get_utemp( p );
499	 flags = get_result_flags( inst );
500
501	 i915_emit_arith( p,
502			 A0_MAX,
503			 tmp, flags & A0_DEST_CHANNEL_ALL, 0,
504			 negate(src0,1,1,1,1),
505			 negate(src1,1,1,1,1), 0);
506
507	 i915_emit_arith( p,
508			 A0_MOV,
509			 get_result_vector( p, inst ),
510			 flags, 0,
511			 negate(tmp, 1,1,1,1), 0, 0);
512	 break;
513
514      case OPCODE_MOV:
515	 EMIT_1ARG_ARITH( A0_MOV );
516	 break;
517
518      case OPCODE_MUL:
519	 EMIT_2ARG_ARITH( A0_MUL );
520	 break;
521
522      case OPCODE_POW:
523	 src0 = src_vector( p, &inst->SrcReg[0], program);
524	 src1 = src_vector( p, &inst->SrcReg[1], program);
525	 tmp = i915_get_utemp( p );
526	 flags = get_result_flags( inst );
527
528	 /* XXX: masking on intermediate values, here and elsewhere.
529	  */
530	 i915_emit_arith( p,
531			 A0_LOG,
532			 tmp, A0_DEST_CHANNEL_X, 0,
533			 swizzle(src0,X,X,X,X), 0, 0);
534
535	 i915_emit_arith( p,
536			 A0_MUL,
537			 tmp, A0_DEST_CHANNEL_X, 0,
538			 tmp, src1, 0);
539
540
541	 i915_emit_arith( p,
542			 A0_EXP,
543			 get_result_vector( p, inst ),
544			 flags, 0,
545			 swizzle(tmp,X,X,X,X), 0, 0);
546
547	 break;
548
549      case OPCODE_RCP:
550	 src0 = src_vector( p, &inst->SrcReg[0], program);
551
552	 i915_emit_arith( p,
553			 A0_RCP,
554			 get_result_vector( p, inst ),
555			 get_result_flags( inst ), 0,
556			 swizzle(src0,X,X,X,X), 0, 0);
557	 break;
558
559      case OPCODE_RSQ:
560
561	 src0 = src_vector( p, &inst->SrcReg[0], program);
562
563	 i915_emit_arith( p,
564			 A0_RSQ,
565			 get_result_vector( p, inst ),
566			 get_result_flags( inst ), 0,
567			 swizzle(src0,X,X,X,X), 0, 0);
568	 break;
569
570      case OPCODE_SCS:
571	 src0 = src_vector( p, &inst->SrcReg[0], program);
572	 tmp = i915_get_utemp( p );
573
574	 /*
575	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
576	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
577	  * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
578	  * scs.x = DP4 t1, sin_constants
579	  * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
580	  * scs.y = DP4 t1, cos_constants
581	  */
582	 i915_emit_arith( p,
583			 A0_MUL,
584			 tmp, A0_DEST_CHANNEL_XY, 0,
585			 swizzle(src0, X,X,ONE,ONE),
586			 swizzle(src0, X,ONE,ONE,ONE), 0);
587
588	 i915_emit_arith( p,
589			 A0_MUL,
590			 tmp, A0_DEST_CHANNEL_ALL, 0,
591			 swizzle(tmp, X,Y,X,Y),
592			 swizzle(tmp, X,X,ONE,ONE), 0);
593
594	 if (inst->DstReg.WriteMask & WRITEMASK_Y) {
595	    GLuint tmp1;
596
597	    if (inst->DstReg.WriteMask & WRITEMASK_X)
598	       tmp1 = i915_get_utemp( p );
599	    else
600	       tmp1 = tmp;
601
602	    i915_emit_arith( p,
603			    A0_MUL,
604			    tmp1, A0_DEST_CHANNEL_ALL, 0,
605			    swizzle(tmp, X,Y,Y,W),
606			    swizzle(tmp, X,Z,ONE,ONE), 0);
607
608	    i915_emit_arith( p,
609			    A0_DP4,
610			    get_result_vector( p, inst ),
611			    A0_DEST_CHANNEL_Y, 0,
612			    swizzle(tmp1, W,Z,Y,X),
613			    i915_emit_const4fv( p, sin_constants ), 0);
614	 }
615
616	 if (inst->DstReg.WriteMask & WRITEMASK_X) {
617	    i915_emit_arith( p,
618			    A0_MUL,
619			    tmp, A0_DEST_CHANNEL_XYZ, 0,
620			    swizzle(tmp, X,X,Z,ONE),
621			    swizzle(tmp, Z,ONE,ONE,ONE), 0);
622
623	    i915_emit_arith( p,
624			    A0_DP4,
625			    get_result_vector( p, inst ),
626			    A0_DEST_CHANNEL_X, 0,
627			    swizzle(tmp, ONE,Z,Y,X),
628			    i915_emit_const4fv( p, cos_constants ), 0);
629	 }
630	 break;
631
632      case OPCODE_SGE:
633	 EMIT_2ARG_ARITH( A0_SGE );
634	 break;
635
636      case OPCODE_SIN:
637	 src0 = src_vector( p, &inst->SrcReg[0], program);
638	 tmp = i915_get_utemp( p );
639
640	 i915_emit_arith( p,
641			 A0_MUL,
642			 tmp, A0_DEST_CHANNEL_X, 0,
643			 src0,
644			 i915_emit_const1f(p, 1.0/(M_PI * 2)),
645			 0);
646
647	 i915_emit_arith( p,
648			 A0_MOD,
649			 tmp, A0_DEST_CHANNEL_X, 0,
650			 tmp,
651			 0, 0 );
652
653	 /* By choosing different taylor constants, could get rid of this mul:
654	  */
655	 i915_emit_arith( p,
656			 A0_MUL,
657			 tmp, A0_DEST_CHANNEL_X, 0,
658			 tmp,
659			 i915_emit_const1f(p, (M_PI * 2)),
660			 0);
661
662	 /*
663	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
664	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
665	  * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
666	  * result = DP4 t1.wzyx, sin_constants
667	  */
668	 i915_emit_arith( p,
669			 A0_MUL,
670			 tmp, A0_DEST_CHANNEL_XY, 0,
671			 swizzle(tmp, X,X,ONE,ONE),
672			 swizzle(tmp, X,ONE,ONE,ONE), 0);
673
674	 i915_emit_arith( p,
675			 A0_MUL,
676			 tmp, A0_DEST_CHANNEL_ALL, 0,
677			 swizzle(tmp, X,Y,X,Y),
678			 swizzle(tmp, X,X,ONE,ONE), 0);
679
680	 i915_emit_arith( p,
681			 A0_MUL,
682			 tmp, A0_DEST_CHANNEL_ALL, 0,
683			 swizzle(tmp, X,Y,Y,W),
684			 swizzle(tmp, X,Z,ONE,ONE), 0);
685
686	 i915_emit_arith( p,
687			 A0_DP4,
688			 get_result_vector( p, inst ),
689			 get_result_flags( inst ), 0,
690			 swizzle(tmp, W, Z, Y, X ),
691			 i915_emit_const4fv( p, sin_constants ), 0);
692	 break;
693
694      case OPCODE_SLT:
695	 EMIT_2ARG_ARITH( A0_SLT );
696	 break;
697
698      case OPCODE_SUB:
699	 src0 = src_vector( p, &inst->SrcReg[0], program);
700	 src1 = src_vector( p, &inst->SrcReg[1], program);
701
702	 i915_emit_arith( p,
703			 A0_ADD,
704			 get_result_vector( p, inst ),
705			 get_result_flags( inst ), 0,
706			 src0, negate(src1, 1,1,1,1), 0);
707	 break;
708
709      case OPCODE_SWZ:
710	 EMIT_1ARG_ARITH( A0_MOV ); /* extended swizzle handled natively */
711	 break;
712
713      case OPCODE_TEX:
714	 EMIT_TEX( T0_TEXLD );
715	 break;
716
717      case OPCODE_TXB:
718	 EMIT_TEX( T0_TEXLDB );
719	 break;
720
721      case OPCODE_TXP:
722	 EMIT_TEX( T0_TEXLDP );
723	 break;
724
725      case OPCODE_XPD:
726	 /* Cross product:
727	  *      result.x = src0.y * src1.z - src0.z * src1.y;
728	  *      result.y = src0.z * src1.x - src0.x * src1.z;
729	  *      result.z = src0.x * src1.y - src0.y * src1.x;
730	  *      result.w = undef;
731	  */
732	 src0 = src_vector( p, &inst->SrcReg[0], program);
733	 src1 = src_vector( p, &inst->SrcReg[1], program);
734	 tmp = i915_get_utemp( p );
735
736	 i915_emit_arith( p,
737			 A0_MUL,
738			 tmp, A0_DEST_CHANNEL_ALL, 0,
739			 swizzle(src0,Z,X,Y,ONE),
740			 swizzle(src1,Y,Z,X,ONE), 0);
741
742	 i915_emit_arith( p,
743			 A0_MAD,
744			 get_result_vector( p, inst ),
745			 get_result_flags( inst ), 0,
746			 swizzle(src0,Y,Z,X,ONE),
747			 swizzle(src1,Z,X,Y,ONE),
748			 negate(tmp,1,1,1,0));
749	 break;
750
751      case OPCODE_END:
752	 return;
753
754      default:
755	 i915_program_error( p, "bad opcode" );
756	 return;
757      }
758
759      inst++;
760      i915_release_utemps( p );
761   }
762}
763
764/* Rather than trying to intercept and jiggle depth writes during
765 * emit, just move the value into its correct position at the end of
766 * the program:
767 */
768static void fixup_depth_write( struct i915_fragment_program *p )
769{
770   if (p->depth_written) {
771      GLuint depth = UREG(REG_TYPE_OD, 0);
772
773      i915_emit_arith( p,
774		      A0_MOV,
775		      depth, A0_DEST_CHANNEL_W, 0,
776		      swizzle(depth,X,Y,Z,Z),
777		      0, 0);
778   }
779}
780
781
782#define FRAG_BIT_TEX(n)  (FRAG_BIT_TEX0 << (n))
783
784
785static void check_wpos( struct i915_fragment_program *p )
786{
787   GLuint inputs = p->FragProg.Base.InputsRead;
788   GLint i;
789
790   p->wpos_tex = -1;
791
792   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
793      if (inputs & FRAG_BIT_TEX(i))
794	 continue;
795      else if (inputs & FRAG_BIT_WPOS) {
796	 p->wpos_tex = i;
797	 inputs &= ~FRAG_BIT_WPOS;
798      }
799   }
800
801   if (inputs & FRAG_BIT_WPOS) {
802      i915_program_error(p, "No free texcoord for wpos value");
803   }
804}
805
806
807static void translate_program( struct i915_fragment_program *p )
808{
809   i915ContextPtr i915 = I915_CONTEXT(p->ctx);
810
811   i915_init_program( i915, p );
812   check_wpos( p );
813   upload_program( p );
814   fixup_depth_write( p );
815   i915_fini_program( p );
816
817   p->translated = 1;
818}
819
820
821static void track_params( struct i915_fragment_program *p )
822{
823   GLint i;
824
825   if (p->nr_params)
826      _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
827
828   for (i = 0; i < p->nr_params; i++) {
829      GLint reg = p->param[i].reg;
830      COPY_4V( p->constant[reg], p->param[i].values );
831   }
832
833   p->params_uptodate = 1;
834   p->on_hardware = 0;		/* overkill */
835}
836
837
838static void i915BindProgram( GLcontext *ctx,
839			    GLenum target,
840			    struct gl_program *prog )
841{
842   if (target == GL_FRAGMENT_PROGRAM_ARB) {
843      i915ContextPtr i915 = I915_CONTEXT(ctx);
844      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
845
846      if (i915->current_program == p)
847	 return;
848
849      if (i915->current_program) {
850	 i915->current_program->on_hardware = 0;
851	 i915->current_program->params_uptodate = 0;
852      }
853
854      i915->current_program = p;
855
856      assert(p->on_hardware == 0);
857      assert(p->params_uptodate == 0);
858
859      /* Hack: make sure fog is correctly enabled according to this
860       * fragment program's fog options.
861       */
862      ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB,
863			  ctx->FragmentProgram.Enabled );
864   }
865}
866
867static struct gl_program *i915NewProgram( GLcontext *ctx,
868				      GLenum target,
869				      GLuint id )
870{
871   switch (target) {
872   case GL_VERTEX_PROGRAM_ARB:
873      return _mesa_init_vertex_program( ctx, CALLOC_STRUCT(gl_vertex_program),
874					target, id );
875
876   case GL_FRAGMENT_PROGRAM_ARB: {
877      struct i915_fragment_program *prog = CALLOC_STRUCT(i915_fragment_program);
878      if (prog) {
879	 i915_init_program( I915_CONTEXT(ctx), prog );
880
881	 return _mesa_init_fragment_program( ctx, &prog->FragProg,
882					     target, id );
883      }
884      else
885	 return NULL;
886   }
887
888   default:
889      /* Just fallback:
890       */
891      return _mesa_new_program( ctx, target, id );
892   }
893}
894
895static void i915DeleteProgram( GLcontext *ctx,
896			      struct gl_program *prog )
897{
898   if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
899      i915ContextPtr i915 = I915_CONTEXT(ctx);
900      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
901
902      if (i915->current_program == p)
903	 i915->current_program = 0;
904   }
905
906   _mesa_delete_program( ctx, prog );
907}
908
909
910static GLboolean i915IsProgramNative( GLcontext *ctx,
911				     GLenum target,
912				     struct gl_program *prog )
913{
914   if (target == GL_FRAGMENT_PROGRAM_ARB) {
915      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
916
917      if (!p->translated)
918	 translate_program( p );
919
920      return !p->error;
921   }
922   else
923      return GL_TRUE;
924}
925
926static void i915ProgramStringNotify( GLcontext *ctx,
927				    GLenum target,
928				    struct gl_program *prog )
929{
930   if (target == GL_FRAGMENT_PROGRAM_ARB) {
931      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
932      p->translated = 0;
933
934      /* Hack: make sure fog is correctly enabled according to this
935       * fragment program's fog options.
936       */
937      ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB,
938			  ctx->FragmentProgram.Enabled );
939   }
940}
941
942
943void i915ValidateFragmentProgram( i915ContextPtr i915 )
944{
945   GLcontext *ctx = &i915->intel.ctx;
946   intelContextPtr intel = INTEL_CONTEXT(ctx);
947   TNLcontext *tnl = TNL_CONTEXT(ctx);
948   struct vertex_buffer *VB = &tnl->vb;
949
950   struct i915_fragment_program *p =
951      (struct i915_fragment_program *)ctx->FragmentProgram._Current;
952
953   const GLuint inputsRead = p->FragProg.Base.InputsRead;
954   GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
955   GLuint s2 = S2_TEXCOORD_NONE;
956   int i, offset = 0;
957
958   /* Important:
959    */
960   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
961
962   if (!p->translated)
963      translate_program( p );
964
965   intel->vertex_attr_count = 0;
966   intel->wpos_offset = 0;
967   intel->wpos_size = 0;
968   intel->coloroffset = 0;
969   intel->specoffset = 0;
970
971   if (inputsRead & FRAG_BITS_TEX_ANY) {
972      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 );
973   }
974   else {
975      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 );
976   }
977
978   if (inputsRead & FRAG_BIT_COL0) {
979      intel->coloroffset = offset / 4;
980      EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4 );
981   }
982
983   if ((inputsRead & (FRAG_BIT_COL1|FRAG_BIT_FOGC)) ||
984       i915->vertex_fog != I915_FOG_NONE) {
985
986      if (inputsRead & FRAG_BIT_COL1) {
987	 intel->specoffset = offset / 4;
988	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3 );
989      }
990      else
991	 EMIT_PAD(3);
992
993      if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE)
994	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1 );
995      else
996	 EMIT_PAD( 1 );
997   }
998
999#if 0
1000   if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) {
1001      EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4 );
1002   }
1003#endif
1004
1005   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1006      if (inputsRead & FRAG_BIT_TEX(i)) {
1007	 int sz = VB->TexCoordPtr[i]->size;
1008
1009	 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1010	 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
1011
1012	 EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 );
1013      }
1014      else if (i == p->wpos_tex) {
1015
1016	 /* If WPOS is required, duplicate the XYZ position data in an
1017	  * unused texture coordinate:
1018	  */
1019	 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1020	 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
1021
1022	 intel->wpos_offset = offset;
1023	 intel->wpos_size = 3 * sizeof(GLuint);
1024
1025	 EMIT_PAD( intel->wpos_size );
1026      }
1027   }
1028
1029   if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
1030       s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
1031
1032      I915_STATECHANGE( i915, I915_UPLOAD_CTX );
1033
1034      /* Must do this *after* statechange, so as not to affect
1035       * buffered vertices reliant on the old state:
1036       */
1037      intel->vertex_size = _tnl_install_attrs( &intel->ctx,
1038					       intel->vertex_attrs,
1039					       intel->vertex_attr_count,
1040					       intel->ViewportMatrix.m, 0 );
1041
1042      intel->vertex_size >>= 2;
1043
1044      i915->state.Ctx[I915_CTXREG_LIS2] = s2;
1045      i915->state.Ctx[I915_CTXREG_LIS4] = s4;
1046
1047      assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size ));
1048   }
1049
1050   if (!p->params_uptodate)
1051      track_params( p );
1052
1053   if (!p->on_hardware)
1054      i915_upload_program( i915, p );
1055}
1056
1057void i915InitFragProgFuncs( struct dd_function_table *functions )
1058{
1059   functions->BindProgram = i915BindProgram;
1060   functions->NewProgram = i915NewProgram;
1061   functions->DeleteProgram = i915DeleteProgram;
1062   functions->IsProgramNative = i915IsProgramNative;
1063   functions->ProgramStringNotify = i915ProgramStringNotify;
1064}
1065