i915_fragprog.c revision ab02552cdddf9322bfaf874f85d74e7c174a0f3b
1/**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "glheader.h"
29#include "macros.h"
30#include "enums.h"
31
32#include "tnl/tnl.h"
33#include "tnl/t_context.h"
34#include "intel_batchbuffer.h"
35
36#include "i915_reg.h"
37#include "i915_context.h"
38#include "i915_program.h"
39
40#include "prog_instruction.h"
41#include "prog_parameter.h"
42#include "program.h"
43#include "programopt.h"
44
45
46
47/* 1, -1/3!, 1/5!, -1/7! */
48static const GLfloat sin_constants[4] = {  1.0,
49					   -1.0/(3*2*1),
50					   1.0/(5*4*3*2*1),
51					   -1.0/(7*6*5*4*3*2*1) };
52
53/* 1, -1/2!, 1/4!, -1/6! */
54static const GLfloat cos_constants[4] = {  1.0,
55					   -1.0/(2*1),
56					   1.0/(4*3*2*1),
57					   -1.0/(6*5*4*3*2*1) };
58
59/**
60 * Retrieve a ureg for the given source register.  Will emit
61 * constants, apply swizzling and negation as needed.
62 */
63static GLuint src_vector( struct i915_fragment_program *p,
64			  const struct prog_src_register *source,
65			  const struct gl_fragment_program *program )
66{
67   GLuint src;
68
69   switch (source->File) {
70
71      /* Registers:
72       */
73      case PROGRAM_TEMPORARY:
74	 if (source->Index >= I915_MAX_TEMPORARY) {
75	    i915_program_error( p, "Exceeded max temporary reg" );
76	    return 0;
77	 }
78	 src = UREG( REG_TYPE_R, source->Index );
79         break;
80      case PROGRAM_INPUT:
81	 switch (source->Index) {
82	 case FRAG_ATTRIB_WPOS:
83	    src = i915_emit_decl( p,  REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL );
84	    break;
85	 case FRAG_ATTRIB_COL0:
86	    src = i915_emit_decl( p,  REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL );
87	    break;
88	 case FRAG_ATTRIB_COL1:
89	    src = i915_emit_decl( p,  REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ );
90	    src = swizzle( src, X, Y, Z, ONE );
91	    break;
92	 case FRAG_ATTRIB_FOGC:
93	    src = i915_emit_decl( p,  REG_TYPE_T, T_FOG_W, D0_CHANNEL_W );
94	    src = swizzle( src, W, W, W, W );
95	    break;
96	 case FRAG_ATTRIB_TEX0:
97	 case FRAG_ATTRIB_TEX1:
98	 case FRAG_ATTRIB_TEX2:
99	 case FRAG_ATTRIB_TEX3:
100	 case FRAG_ATTRIB_TEX4:
101	 case FRAG_ATTRIB_TEX5:
102	 case FRAG_ATTRIB_TEX6:
103	 case FRAG_ATTRIB_TEX7:
104	    src = i915_emit_decl( p,  REG_TYPE_T,
105				 T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
106				 D0_CHANNEL_ALL );
107	    break;
108
109	 default:
110	    i915_program_error( p, "Bad source->Index" );
111	    return 0;
112	 }
113         break;
114
115	 /* Various paramters and env values.  All emitted to
116	  * hardware as program constants.
117	  */
118      case PROGRAM_LOCAL_PARAM:
119         src = i915_emit_param4fv(
120	    p, program->Base.LocalParams[source->Index]);
121	 break;
122
123      case PROGRAM_ENV_PARAM:
124         src = i915_emit_param4fv(
125	    p, p->ctx->FragmentProgram.Parameters[source->Index]);
126	 break;
127
128      case PROGRAM_CONSTANT:
129      case PROGRAM_STATE_VAR:
130      case PROGRAM_NAMED_PARAM:
131         src = i915_emit_param4fv(
132	    p, program->Base.Parameters->ParameterValues[source->Index] );
133	 break;
134
135      default:
136	 i915_program_error( p, "Bad source->File" );
137	 return 0;
138   }
139
140   src = swizzle(src,
141		 GET_SWZ(source->Swizzle, 0),
142		 GET_SWZ(source->Swizzle, 1),
143		 GET_SWZ(source->Swizzle, 2),
144		 GET_SWZ(source->Swizzle, 3));
145
146   if (source->NegateBase)
147      src = negate( src,
148		    GET_BIT(source->NegateBase, 0),
149		    GET_BIT(source->NegateBase, 1),
150		    GET_BIT(source->NegateBase, 2),
151		    GET_BIT(source->NegateBase, 3));
152
153   return src;
154}
155
156
157static GLuint get_result_vector( struct i915_fragment_program *p,
158				 const struct prog_instruction *inst )
159{
160   switch (inst->DstReg.File) {
161   case PROGRAM_OUTPUT:
162      switch (inst->DstReg.Index) {
163      case FRAG_RESULT_COLR:
164	 return UREG(REG_TYPE_OC, 0);
165      case FRAG_RESULT_DEPR:
166	 p->depth_written = 1;
167	 return UREG(REG_TYPE_OD, 0);
168      default:
169	 i915_program_error( p, "Bad inst->DstReg.Index" );
170	 return 0;
171      }
172   case PROGRAM_TEMPORARY:
173      return UREG(REG_TYPE_R, inst->DstReg.Index);
174   default:
175      i915_program_error( p, "Bad inst->DstReg.File" );
176      return 0;
177   }
178}
179
180static GLuint get_result_flags( const struct prog_instruction *inst )
181{
182   GLuint flags = 0;
183
184   if (inst->SaturateMode == SATURATE_ZERO_ONE) flags |= A0_DEST_SATURATE;
185   if (inst->DstReg.WriteMask & WRITEMASK_X) flags |= A0_DEST_CHANNEL_X;
186   if (inst->DstReg.WriteMask & WRITEMASK_Y) flags |= A0_DEST_CHANNEL_Y;
187   if (inst->DstReg.WriteMask & WRITEMASK_Z) flags |= A0_DEST_CHANNEL_Z;
188   if (inst->DstReg.WriteMask & WRITEMASK_W) flags |= A0_DEST_CHANNEL_W;
189
190   return flags;
191}
192
193static GLuint translate_tex_src_target( struct i915_fragment_program *p,
194				     GLubyte bit )
195{
196   switch (bit) {
197   case TEXTURE_1D_INDEX:   return D0_SAMPLE_TYPE_2D;
198   case TEXTURE_2D_INDEX:   return D0_SAMPLE_TYPE_2D;
199   case TEXTURE_RECT_INDEX: return D0_SAMPLE_TYPE_2D;
200   case TEXTURE_3D_INDEX:   return D0_SAMPLE_TYPE_VOLUME;
201   case TEXTURE_CUBE_INDEX: return D0_SAMPLE_TYPE_CUBE;
202   default: i915_program_error(p, "TexSrcBit"); return 0;
203   }
204}
205
206#define EMIT_TEX( OP )						\
207do {								\
208   GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget );	\
209   GLuint sampler = i915_emit_decl(p, REG_TYPE_S,		\
210				  inst->TexSrcUnit, dim);	\
211   GLuint coord = src_vector( p, &inst->SrcReg[0], program);	\
212   /* Texel lookup */						\
213								\
214   i915_emit_texld( p,						\
215	       get_result_vector( p, inst ),			\
216	       get_result_flags( inst ),			\
217	       sampler,						\
218	       coord,						\
219	       OP);						\
220} while (0)
221
222#define EMIT_ARITH( OP, N )						\
223do {									\
224   i915_emit_arith( p,							\
225	       OP,							\
226	       get_result_vector( p, inst ), 				\
227	       get_result_flags( inst ), 0,			\
228	       (N<1)?0:src_vector( p, &inst->SrcReg[0], program),	\
229	       (N<2)?0:src_vector( p, &inst->SrcReg[1], program),	\
230	       (N<3)?0:src_vector( p, &inst->SrcReg[2], program));	\
231} while (0)
232
233#define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
234#define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
235#define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
236
237
238/* Possible concerns:
239 *
240 * SIN, COS -- could use another taylor step?
241 * LIT      -- results seem a little different to sw mesa
242 * LOG      -- different to mesa on negative numbers, but this is conformant.
243 *
244 * Parse failures -- Mesa doesn't currently give a good indication
245 * internally whether a particular program string parsed or not.  This
246 * can lead to confusion -- hopefully we cope with it ok now.
247 *
248 */
249static void upload_program( struct i915_fragment_program *p )
250{
251   const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
252   const struct prog_instruction *inst = program->Base.Instructions;
253
254/*    _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */
255
256   /* Is this a parse-failed program?  Ensure a valid program is
257    * loaded, as the flagging of an error isn't sufficient to stop
258    * this being uploaded to hardware.
259    */
260   if (inst[0].Opcode == OPCODE_END) {
261      GLuint tmp = i915_get_utemp( p );
262      i915_emit_arith( p,
263		      A0_MOV,
264		      UREG(REG_TYPE_OC, 0),
265		      A0_DEST_CHANNEL_ALL, 0,
266		      swizzle(tmp,ONE,ZERO,ONE,ONE), 0, 0);
267      return;
268   }
269
270   while (1) {
271      GLuint src0, src1, src2, flags;
272      GLuint tmp = 0;
273
274      switch (inst->Opcode) {
275      case OPCODE_ABS:
276	 src0 = src_vector( p, &inst->SrcReg[0], program);
277	 i915_emit_arith( p,
278			 A0_MAX,
279			 get_result_vector( p, inst ),
280			 get_result_flags( inst ), 0,
281			 src0, negate(src0, 1,1,1,1), 0);
282	 break;
283
284      case OPCODE_ADD:
285	 EMIT_2ARG_ARITH( A0_ADD );
286	 break;
287
288      case OPCODE_CMP:
289	 src0 = src_vector( p, &inst->SrcReg[0], program);
290	 src1 = src_vector( p, &inst->SrcReg[1], program);
291	 src2 = src_vector( p, &inst->SrcReg[2], program);
292	 i915_emit_arith( p,
293			 A0_CMP,
294			 get_result_vector( p, inst ),
295			 get_result_flags( inst ), 0,
296			 src0, src2, src1);	/* NOTE: order of src2, src1 */
297	 break;
298
299      case OPCODE_COS:
300	 src0 = src_vector( p, &inst->SrcReg[0], program);
301	 tmp = i915_get_utemp( p );
302
303	 i915_emit_arith( p,
304			 A0_MUL,
305			 tmp, A0_DEST_CHANNEL_X, 0,
306			 src0,
307			 i915_emit_const1f(p, 1.0/(M_PI)),
308			 0);
309
310	 i915_emit_arith( p,
311			 A0_MOD,
312			 tmp, A0_DEST_CHANNEL_X, 0,
313			 tmp,
314			 0, 0 );
315
316	 /* By choosing different taylor constants, could get rid of this mul:
317	  */
318	 i915_emit_arith( p,
319			 A0_MUL,
320			 tmp, A0_DEST_CHANNEL_X, 0,
321			 tmp,
322			 i915_emit_const1f(p, (M_PI)),
323			 0);
324
325	 /*
326	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
327	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
328	  * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
329	  * result = DP4 t0, cos_constants
330	  */
331	 i915_emit_arith( p,
332			 A0_MUL,
333			 tmp, A0_DEST_CHANNEL_XY, 0,
334			 swizzle(tmp, X,X,ONE,ONE),
335			 swizzle(tmp, X,ONE,ONE,ONE), 0);
336
337	 i915_emit_arith( p,
338			 A0_MUL,
339			 tmp, A0_DEST_CHANNEL_XYZ, 0,
340			 swizzle(tmp, X,Y,X,ONE),
341			 swizzle(tmp, X,X,ONE,ONE), 0);
342
343	 i915_emit_arith( p,
344			 A0_MUL,
345			 tmp, A0_DEST_CHANNEL_XYZ, 0,
346			 swizzle(tmp, X,X,Z,ONE),
347			 swizzle(tmp, Z,ONE,ONE,ONE), 0);
348
349	 i915_emit_arith( p,
350			 A0_DP4,
351			 get_result_vector( p, inst ),
352			 get_result_flags( inst ), 0,
353			 swizzle(tmp, ONE,Z,Y,X),
354			 i915_emit_const4fv( p, cos_constants ), 0);
355
356	 break;
357
358      case OPCODE_DP3:
359	 EMIT_2ARG_ARITH( A0_DP3 );
360	 break;
361
362      case OPCODE_DP4:
363	 EMIT_2ARG_ARITH( A0_DP4 );
364	 break;
365
366      case OPCODE_DPH:
367	 src0 = src_vector( p, &inst->SrcReg[0], program);
368	 src1 = src_vector( p, &inst->SrcReg[1], program);
369
370	 i915_emit_arith( p,
371			 A0_DP4,
372			 get_result_vector( p, inst ),
373			 get_result_flags( inst ), 0,
374			 swizzle(src0, X,Y,Z,ONE), src1, 0);
375	 break;
376
377      case OPCODE_DST:
378	 src0 = src_vector( p, &inst->SrcReg[0], program);
379	 src1 = src_vector( p, &inst->SrcReg[1], program);
380
381	 /* result[0] = 1    * 1;
382	  * result[1] = a[1] * b[1];
383	  * result[2] = a[2] * 1;
384	  * result[3] = 1    * b[3];
385	  */
386	 i915_emit_arith( p,
387			 A0_MUL,
388			 get_result_vector( p, inst ),
389			 get_result_flags( inst ), 0,
390			 swizzle(src0, ONE, Y, Z,   ONE),
391			 swizzle(src1, ONE, Y, ONE, W  ),
392			 0);
393	 break;
394
395      case OPCODE_EX2:
396	 src0 = src_vector( p, &inst->SrcReg[0], program);
397
398	 i915_emit_arith( p,
399			 A0_EXP,
400			 get_result_vector( p, inst ),
401			 get_result_flags( inst ), 0,
402			 swizzle(src0,X,X,X,X), 0, 0);
403	 break;
404
405      case OPCODE_FLR:
406	 EMIT_1ARG_ARITH( A0_FLR );
407	 break;
408
409      case OPCODE_FRC:
410	 EMIT_1ARG_ARITH( A0_FRC );
411	 break;
412
413      case OPCODE_KIL:
414	 src0 = src_vector( p, &inst->SrcReg[0], program);
415	 tmp = i915_get_utemp( p );
416
417	 i915_emit_texld( p,
418			 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
419			 0,
420			 src0,
421			 T0_TEXKILL );
422	 break;
423
424      case OPCODE_LG2:
425	 src0 = src_vector( p, &inst->SrcReg[0], program);
426
427	 i915_emit_arith( p,
428			 A0_LOG,
429			 get_result_vector( p, inst ),
430			 get_result_flags( inst ), 0,
431			 swizzle(src0,X,X,X,X), 0, 0);
432	 break;
433
434      case OPCODE_LIT:
435	 src0 = src_vector( p, &inst->SrcReg[0], program);
436	 tmp = i915_get_utemp( p );
437
438	 /* tmp = max( a.xyzw, a.00zw )
439	  * XXX: Clamp tmp.w to -128..128
440	  * tmp.y = log(tmp.y)
441	  * tmp.y = tmp.w * tmp.y
442	  * tmp.y = exp(tmp.y)
443	  * result = cmp (a.11-x1, a.1x01, a.1xy1 )
444	  */
445	 i915_emit_arith( p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
446			 src0, swizzle(src0, ZERO, ZERO, Z, W), 0 );
447
448	 i915_emit_arith( p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
449			 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
450
451	 i915_emit_arith( p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
452			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
453			 swizzle(tmp, ZERO, W, ZERO, ZERO), 0 );
454
455	 i915_emit_arith( p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
456			 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
457
458	 i915_emit_arith( p, A0_CMP,
459			 get_result_vector( p, inst ),
460			 get_result_flags( inst ), 0,
461			 negate(swizzle(tmp, ONE, ONE, X, ONE),0,0,1,0),
462			 swizzle(tmp, ONE, X, ZERO, ONE),
463			 swizzle(tmp, ONE, X, Y, ONE));
464
465	 break;
466
467      case OPCODE_LRP:
468	 src0 = src_vector( p, &inst->SrcReg[0], program);
469	 src1 = src_vector( p, &inst->SrcReg[1], program);
470	 src2 = src_vector( p, &inst->SrcReg[2], program);
471	 flags = get_result_flags( inst );
472	 tmp = i915_get_utemp( p );
473
474	 /* b*a + c*(1-a)
475	  *
476	  * b*a + c - ca
477	  *
478	  * tmp = b*a + c,
479	  * result = (-c)*a + tmp
480	  */
481	 i915_emit_arith( p, A0_MAD, tmp,
482			 flags & A0_DEST_CHANNEL_ALL, 0,
483			 src1, src0, src2 );
484
485	 i915_emit_arith( p, A0_MAD,
486			 get_result_vector( p, inst ),
487			 flags, 0,
488			 negate(src2, 1,1,1,1), src0, tmp );
489	 break;
490
491      case OPCODE_MAD:
492	 EMIT_3ARG_ARITH( A0_MAD );
493	 break;
494
495      case OPCODE_MAX:
496	 EMIT_2ARG_ARITH( A0_MAX );
497	 break;
498
499      case OPCODE_MIN:
500	 src0 = src_vector( p, &inst->SrcReg[0], program);
501	 src1 = src_vector( p, &inst->SrcReg[1], program);
502	 tmp = i915_get_utemp( p );
503	 flags = get_result_flags( inst );
504
505	 i915_emit_arith( p,
506			 A0_MAX,
507			 tmp, flags & A0_DEST_CHANNEL_ALL, 0,
508			 negate(src0,1,1,1,1),
509			 negate(src1,1,1,1,1), 0);
510
511	 i915_emit_arith( p,
512			 A0_MOV,
513			 get_result_vector( p, inst ),
514			 flags, 0,
515			 negate(tmp, 1,1,1,1), 0, 0);
516	 break;
517
518      case OPCODE_MOV:
519	 EMIT_1ARG_ARITH( A0_MOV );
520	 break;
521
522      case OPCODE_MUL:
523	 EMIT_2ARG_ARITH( A0_MUL );
524	 break;
525
526      case OPCODE_POW:
527	 src0 = src_vector( p, &inst->SrcReg[0], program);
528	 src1 = src_vector( p, &inst->SrcReg[1], program);
529	 tmp = i915_get_utemp( p );
530	 flags = get_result_flags( inst );
531
532	 /* XXX: masking on intermediate values, here and elsewhere.
533	  */
534	 i915_emit_arith( p,
535			 A0_LOG,
536			 tmp, A0_DEST_CHANNEL_X, 0,
537			 swizzle(src0,X,X,X,X), 0, 0);
538
539	 i915_emit_arith( p,
540			 A0_MUL,
541			 tmp, A0_DEST_CHANNEL_X, 0,
542			 tmp, src1, 0);
543
544
545	 i915_emit_arith( p,
546			 A0_EXP,
547			 get_result_vector( p, inst ),
548			 flags, 0,
549			 swizzle(tmp,X,X,X,X), 0, 0);
550
551	 break;
552
553      case OPCODE_RCP:
554	 src0 = src_vector( p, &inst->SrcReg[0], program);
555
556	 i915_emit_arith( p,
557			 A0_RCP,
558			 get_result_vector( p, inst ),
559			 get_result_flags( inst ), 0,
560			 swizzle(src0,X,X,X,X), 0, 0);
561	 break;
562
563      case OPCODE_RSQ:
564
565	 src0 = src_vector( p, &inst->SrcReg[0], program);
566
567	 i915_emit_arith( p,
568			 A0_RSQ,
569			 get_result_vector( p, inst ),
570			 get_result_flags( inst ), 0,
571			 swizzle(src0,X,X,X,X), 0, 0);
572	 break;
573
574      case OPCODE_SCS:
575	 src0 = src_vector( p, &inst->SrcReg[0], program);
576	 tmp = i915_get_utemp( p );
577
578	 /*
579	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
580	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
581	  * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
582	  * scs.x = DP4 t1, sin_constants
583	  * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
584	  * scs.y = DP4 t1, cos_constants
585	  */
586	 i915_emit_arith( p,
587			 A0_MUL,
588			 tmp, A0_DEST_CHANNEL_XY, 0,
589			 swizzle(src0, X,X,ONE,ONE),
590			 swizzle(src0, X,ONE,ONE,ONE), 0);
591
592	 i915_emit_arith( p,
593			 A0_MUL,
594			 tmp, A0_DEST_CHANNEL_ALL, 0,
595			 swizzle(tmp, X,Y,X,Y),
596			 swizzle(tmp, X,X,ONE,ONE), 0);
597
598	 if (inst->DstReg.WriteMask & WRITEMASK_Y) {
599	    GLuint tmp1;
600
601	    if (inst->DstReg.WriteMask & WRITEMASK_X)
602	       tmp1 = i915_get_utemp( p );
603	    else
604	       tmp1 = tmp;
605
606	    i915_emit_arith( p,
607			    A0_MUL,
608			    tmp1, A0_DEST_CHANNEL_ALL, 0,
609			    swizzle(tmp, X,Y,Y,W),
610			    swizzle(tmp, X,Z,ONE,ONE), 0);
611
612	    i915_emit_arith( p,
613			    A0_DP4,
614			    get_result_vector( p, inst ),
615			    A0_DEST_CHANNEL_Y, 0,
616			    swizzle(tmp1, W,Z,Y,X),
617			    i915_emit_const4fv( p, sin_constants ), 0);
618	 }
619
620	 if (inst->DstReg.WriteMask & WRITEMASK_X) {
621	    i915_emit_arith( p,
622			    A0_MUL,
623			    tmp, A0_DEST_CHANNEL_XYZ, 0,
624			    swizzle(tmp, X,X,Z,ONE),
625			    swizzle(tmp, Z,ONE,ONE,ONE), 0);
626
627	    i915_emit_arith( p,
628			    A0_DP4,
629			    get_result_vector( p, inst ),
630			    A0_DEST_CHANNEL_X, 0,
631			    swizzle(tmp, ONE,Z,Y,X),
632			    i915_emit_const4fv( p, cos_constants ), 0);
633	 }
634	 break;
635
636      case OPCODE_SGE:
637	 EMIT_2ARG_ARITH( A0_SGE );
638	 break;
639
640      case OPCODE_SIN:
641	 src0 = src_vector( p, &inst->SrcReg[0], program);
642	 tmp = i915_get_utemp( p );
643
644	 i915_emit_arith( p,
645			 A0_MUL,
646			 tmp, A0_DEST_CHANNEL_X, 0,
647			 src0,
648			 i915_emit_const1f(p, 1.0/(M_PI)),
649			 0);
650
651	 i915_emit_arith( p,
652			 A0_MOD,
653			 tmp, A0_DEST_CHANNEL_X, 0,
654			 tmp,
655			 0, 0 );
656
657	 /* By choosing different taylor constants, could get rid of this mul:
658	  */
659	 i915_emit_arith( p,
660			 A0_MUL,
661			 tmp, A0_DEST_CHANNEL_X, 0,
662			 tmp,
663			 i915_emit_const1f(p, (M_PI)),
664			 0);
665
666	 /*
667	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
668	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
669	  * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
670	  * result = DP4 t1.wzyx, sin_constants
671	  */
672	 i915_emit_arith( p,
673			 A0_MUL,
674			 tmp, A0_DEST_CHANNEL_XY, 0,
675			 swizzle(tmp, X,X,ONE,ONE),
676			 swizzle(tmp, X,ONE,ONE,ONE), 0);
677
678	 i915_emit_arith( p,
679			 A0_MUL,
680			 tmp, A0_DEST_CHANNEL_ALL, 0,
681			 swizzle(tmp, X,Y,X,Y),
682			 swizzle(tmp, X,X,ONE,ONE), 0);
683
684	 i915_emit_arith( p,
685			 A0_MUL,
686			 tmp, A0_DEST_CHANNEL_ALL, 0,
687			 swizzle(tmp, X,Y,Y,W),
688			 swizzle(tmp, X,Z,ONE,ONE), 0);
689
690	 i915_emit_arith( p,
691			 A0_DP4,
692			 get_result_vector( p, inst ),
693			 get_result_flags( inst ), 0,
694			 swizzle(tmp, W, Z, Y, X ),
695			 i915_emit_const4fv( p, sin_constants ), 0);
696	 break;
697
698      case OPCODE_SLT:
699	 EMIT_2ARG_ARITH( A0_SLT );
700	 break;
701
702      case OPCODE_SUB:
703	 src0 = src_vector( p, &inst->SrcReg[0], program);
704	 src1 = src_vector( p, &inst->SrcReg[1], program);
705
706	 i915_emit_arith( p,
707			 A0_ADD,
708			 get_result_vector( p, inst ),
709			 get_result_flags( inst ), 0,
710			 src0, negate(src1, 1,1,1,1), 0);
711	 break;
712
713      case OPCODE_SWZ:
714	 EMIT_1ARG_ARITH( A0_MOV ); /* extended swizzle handled natively */
715	 break;
716
717      case OPCODE_TEX:
718	 EMIT_TEX( T0_TEXLD );
719	 break;
720
721      case OPCODE_TXB:
722	 EMIT_TEX( T0_TEXLDB );
723	 break;
724
725      case OPCODE_TXP:
726	 EMIT_TEX( T0_TEXLDP );
727	 break;
728
729      case OPCODE_XPD:
730	 /* Cross product:
731	  *      result.x = src0.y * src1.z - src0.z * src1.y;
732	  *      result.y = src0.z * src1.x - src0.x * src1.z;
733	  *      result.z = src0.x * src1.y - src0.y * src1.x;
734	  *      result.w = undef;
735	  */
736	 src0 = src_vector( p, &inst->SrcReg[0], program);
737	 src1 = src_vector( p, &inst->SrcReg[1], program);
738	 tmp = i915_get_utemp( p );
739
740	 i915_emit_arith( p,
741			 A0_MUL,
742			 tmp, A0_DEST_CHANNEL_ALL, 0,
743			 swizzle(src0,Z,X,Y,ONE),
744			 swizzle(src1,Y,Z,X,ONE), 0);
745
746	 i915_emit_arith( p,
747			 A0_MAD,
748			 get_result_vector( p, inst ),
749			 get_result_flags( inst ), 0,
750			 swizzle(src0,Y,Z,X,ONE),
751			 swizzle(src1,Z,X,Y,ONE),
752			 negate(tmp,1,1,1,0));
753	 break;
754
755      case OPCODE_END:
756	 return;
757
758      default:
759	 i915_program_error( p, "bad opcode" );
760	 return;
761      }
762
763      inst++;
764      i915_release_utemps( p );
765   }
766}
767
768/* Rather than trying to intercept and jiggle depth writes during
769 * emit, just move the value into its correct position at the end of
770 * the program:
771 */
772static void fixup_depth_write( struct i915_fragment_program *p )
773{
774   if (p->depth_written) {
775      GLuint depth = UREG(REG_TYPE_OD, 0);
776
777      i915_emit_arith( p,
778		      A0_MOV,
779		      depth, A0_DEST_CHANNEL_W, 0,
780		      swizzle(depth,X,Y,Z,Z),
781		      0, 0);
782   }
783}
784
785
786static void check_wpos( struct i915_fragment_program *p )
787{
788   GLuint inputs = p->FragProg.Base.InputsRead;
789   GLint i;
790
791   p->wpos_tex = -1;
792
793   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
794      if (inputs & FRAG_BIT_TEX(i))
795	 continue;
796      else if (inputs & FRAG_BIT_WPOS) {
797	 p->wpos_tex = i;
798	 inputs &= ~FRAG_BIT_WPOS;
799      }
800   }
801
802   if (inputs & FRAG_BIT_WPOS) {
803      i915_program_error(p, "No free texcoord for wpos value");
804   }
805}
806
807
808static void translate_program( struct i915_fragment_program *p )
809{
810   i915ContextPtr i915 = I915_CONTEXT(p->ctx);
811
812   i915_init_program( i915, p );
813   check_wpos( p );
814   upload_program( p );
815   fixup_depth_write( p );
816   i915_fini_program( p );
817
818   p->translated = 1;
819}
820
821
822static void track_params( struct i915_fragment_program *p )
823{
824   GLint i;
825
826   if (p->nr_params)
827      _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
828
829   for (i = 0; i < p->nr_params; i++) {
830      GLint reg = p->param[i].reg;
831      COPY_4V( p->constant[reg], p->param[i].values );
832   }
833
834   p->params_uptodate = 1;
835   p->on_hardware = 0;		/* overkill */
836}
837
838
839static void i915BindProgram( GLcontext *ctx,
840			    GLenum target,
841			    struct gl_program *prog )
842{
843   if (target == GL_FRAGMENT_PROGRAM_ARB) {
844      i915ContextPtr i915 = I915_CONTEXT(ctx);
845      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
846
847      if (i915->current_program == p)
848	 return;
849
850      if (i915->current_program) {
851	 i915->current_program->on_hardware = 0;
852	 i915->current_program->params_uptodate = 0;
853      }
854
855      i915->current_program = p;
856
857      assert(p->on_hardware == 0);
858      assert(p->params_uptodate == 0);
859
860   }
861}
862
863static struct gl_program *i915NewProgram( GLcontext *ctx,
864				      GLenum target,
865				      GLuint id )
866{
867   switch (target) {
868   case GL_VERTEX_PROGRAM_ARB:
869      return _mesa_init_vertex_program( ctx, CALLOC_STRUCT(gl_vertex_program),
870					target, id );
871
872   case GL_FRAGMENT_PROGRAM_ARB: {
873      struct i915_fragment_program *prog = CALLOC_STRUCT(i915_fragment_program);
874      if (prog) {
875	 i915_init_program( I915_CONTEXT(ctx), prog );
876
877	 return _mesa_init_fragment_program( ctx, &prog->FragProg,
878					     target, id );
879      }
880      else
881	 return NULL;
882   }
883
884   default:
885      /* Just fallback:
886       */
887      return _mesa_new_program( ctx, target, id );
888   }
889}
890
891static void i915DeleteProgram( GLcontext *ctx,
892			      struct gl_program *prog )
893{
894   if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
895      i915ContextPtr i915 = I915_CONTEXT(ctx);
896      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
897
898      if (i915->current_program == p)
899	 i915->current_program = 0;
900   }
901
902   _mesa_delete_program( ctx, prog );
903}
904
905
906static GLboolean i915IsProgramNative( GLcontext *ctx,
907				     GLenum target,
908				     struct gl_program *prog )
909{
910   if (target == GL_FRAGMENT_PROGRAM_ARB) {
911      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
912
913      if (!p->translated)
914	 translate_program( p );
915
916      return !p->error;
917   }
918   else
919      return GL_TRUE;
920}
921
922static void i915ProgramStringNotify( GLcontext *ctx,
923				    GLenum target,
924				    struct gl_program *prog )
925{
926   if (target == GL_FRAGMENT_PROGRAM_ARB) {
927      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
928      p->translated = 0;
929
930      /* Hack: make sure fog is correctly enabled according to this
931       * fragment program's fog options.
932       */
933      if (p->FragProg.FogOption) {
934         /* add extra instructions to do fog, then turn off FogOption field */
935         _mesa_append_fog_code(ctx, &p->FragProg);
936         p->FragProg.FogOption = GL_NONE;
937      }
938   }
939
940   _tnl_program_string(ctx, target, prog);
941}
942
943
944void i915ValidateFragmentProgram( i915ContextPtr i915 )
945{
946   GLcontext *ctx = &i915->intel.ctx;
947   intelContextPtr intel = INTEL_CONTEXT(ctx);
948   TNLcontext *tnl = TNL_CONTEXT(ctx);
949   struct vertex_buffer *VB = &tnl->vb;
950
951   struct i915_fragment_program *p =
952      (struct i915_fragment_program *)ctx->FragmentProgram._Current;
953
954   const GLuint inputsRead = p->FragProg.Base.InputsRead;
955   GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
956   GLuint s2 = S2_TEXCOORD_NONE;
957   int i, offset = 0;
958
959   if (i915->current_program != p)
960   {
961      if (i915->current_program) {
962	 i915->current_program->on_hardware = 0;
963	 i915->current_program->params_uptodate = 0;
964      }
965
966      i915->current_program = p;
967   }
968
969
970   /* Important:
971    */
972   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
973
974   if (!p->translated)
975      translate_program( p );
976
977   intel->vertex_attr_count = 0;
978   intel->wpos_offset = 0;
979   intel->wpos_size = 0;
980   intel->coloroffset = 0;
981   intel->specoffset = 0;
982
983   if (inputsRead & FRAG_BITS_TEX_ANY) {
984      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 );
985   }
986   else {
987      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 );
988   }
989
990   if (inputsRead & FRAG_BIT_COL0) {
991      intel->coloroffset = offset / 4;
992      EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4 );
993   }
994
995   if ((inputsRead & (FRAG_BIT_COL1|FRAG_BIT_FOGC)) ||
996       i915->vertex_fog != I915_FOG_NONE) {
997
998      if (inputsRead & FRAG_BIT_COL1) {
999	 intel->specoffset = offset / 4;
1000	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3 );
1001      }
1002      else
1003	 EMIT_PAD(3);
1004
1005      if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE)
1006	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1 );
1007      else
1008	 EMIT_PAD( 1 );
1009   }
1010
1011   /* XXX this was disabled, but enabling this code helped fix the Glean
1012    * tfragprog1 fog tests.
1013    */
1014#if 1
1015   if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) {
1016      EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4 );
1017   }
1018#endif
1019
1020   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
1021      if (inputsRead & FRAG_BIT_TEX(i)) {
1022	 int sz = VB->TexCoordPtr[i]->size;
1023
1024	 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1025	 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
1026
1027	 EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 );
1028      }
1029      else if (i == p->wpos_tex) {
1030
1031	 /* If WPOS is required, duplicate the XYZ position data in an
1032	  * unused texture coordinate:
1033	  */
1034	 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1035	 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
1036
1037	 intel->wpos_offset = offset;
1038	 intel->wpos_size = 3 * sizeof(GLuint);
1039
1040	 EMIT_PAD( intel->wpos_size );
1041      }
1042   }
1043
1044   if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
1045       s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
1046
1047      I915_STATECHANGE( i915, I915_UPLOAD_CTX );
1048
1049      /* Must do this *after* statechange, so as not to affect
1050       * buffered vertices reliant on the old state:
1051       */
1052      intel->vertex_size = _tnl_install_attrs( &intel->ctx,
1053					       intel->vertex_attrs,
1054					       intel->vertex_attr_count,
1055					       intel->ViewportMatrix.m, 0 );
1056
1057      intel->vertex_size >>= 2;
1058
1059      i915->state.Ctx[I915_CTXREG_LIS2] = s2;
1060      i915->state.Ctx[I915_CTXREG_LIS4] = s4;
1061
1062      assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size ));
1063   }
1064
1065   if (!p->params_uptodate)
1066      track_params( p );
1067
1068   if (!p->on_hardware)
1069      i915_upload_program( i915, p );
1070}
1071
1072void i915InitFragProgFuncs( struct dd_function_table *functions )
1073{
1074   functions->BindProgram = i915BindProgram;
1075   functions->NewProgram = i915NewProgram;
1076   functions->DeleteProgram = i915DeleteProgram;
1077   functions->IsProgramNative = i915IsProgramNative;
1078   functions->ProgramStringNotify = i915ProgramStringNotify;
1079}
1080