i915_fragprog.c revision 594c3f67ac8fceb061e47b090ec4d149c55a1940
1/**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "glheader.h"
29#include "macros.h"
30#include "enums.h"
31
32#include "tnl/t_context.h"
33#include "intel_batchbuffer.h"
34
35#include "i915_reg.h"
36#include "i915_context.h"
37#include "i915_program.h"
38
39#include "nvfragprog.h"
40#include "program.h"
41#include "arbfragparse.h"
42
43
44
45
46#define PI 3.141592
47
48
49/* 1, -1/3!, 1/5!, -1/7! */
50static const GLfloat sin_constants[4] = {  1.0,
51					   -1.0/(3*2*1),
52					   1.0/(5*4*3*2*1),
53					   -1.0/(7*6*5*4*3*2*1) };
54
55/* 1, -1/2!, 1/4!, -1/6! */
56static const GLfloat cos_constants[4] = {  1.0,
57					   -1.0/(2*1),
58					   1.0/(4*3*2*1),
59					   -1.0/(6*5*4*3*2*1) };
60
61/**
62 * Retrieve a ureg for the given source register.  Will emit
63 * constants, apply swizzling and negation as needed.
64 */
65static GLuint src_vector( struct i915_fragment_program *p,
66			  const struct fp_src_register *source,
67			  const struct fragment_program *program )
68{
69   GLuint src;
70
71   switch (source->File) {
72
73      /* Registers:
74       */
75      case PROGRAM_TEMPORARY:
76	 if (source->Index >= I915_MAX_TEMPORARY) {
77	    i915_program_error( p, "Exceeded max temporary reg" );
78	    return 0;
79	 }
80	 src = UREG( REG_TYPE_R, source->Index );
81         break;
82      case PROGRAM_INPUT:
83	 switch (source->Index) {
84	 case FRAG_ATTRIB_WPOS:
85	    src = i915_emit_decl( p,  REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL );
86	    break;
87	 case FRAG_ATTRIB_COL0:
88	    src = i915_emit_decl( p,  REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL );
89	    break;
90	 case FRAG_ATTRIB_COL1:
91	    src = i915_emit_decl( p,  REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ );
92	    src = swizzle( src, X, Y, Z, ONE );
93	    break;
94	 case FRAG_ATTRIB_FOGC:
95	    src = i915_emit_decl( p,  REG_TYPE_T, T_FOG_W, D0_CHANNEL_W );
96	    src = swizzle( src, W, W, W, W );
97	    break;
98	 case FRAG_ATTRIB_TEX0:
99	 case FRAG_ATTRIB_TEX1:
100	 case FRAG_ATTRIB_TEX2:
101	 case FRAG_ATTRIB_TEX3:
102	 case FRAG_ATTRIB_TEX4:
103	 case FRAG_ATTRIB_TEX5:
104	 case FRAG_ATTRIB_TEX6:
105	 case FRAG_ATTRIB_TEX7:
106	    src = i915_emit_decl( p,  REG_TYPE_T,
107				 T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
108				 D0_CHANNEL_ALL );
109	    break;
110
111	 default:
112	    i915_program_error( p, "Bad source->Index" );
113	    return 0;
114	 }
115         break;
116
117	 /* Various paramters and env values.  All emitted to
118	  * hardware as program constants.
119	  */
120      case PROGRAM_LOCAL_PARAM:
121         src = i915_emit_param4fv(
122	    p, program->Base.LocalParams[source->Index]);
123	 break;
124
125      case PROGRAM_ENV_PARAM:
126         src = i915_emit_param4fv(
127	    p, p->ctx->FragmentProgram.Parameters[source->Index]);
128	 break;
129
130      case PROGRAM_STATE_VAR:
131      case PROGRAM_NAMED_PARAM:
132         src = i915_emit_param4fv(
133	    p, program->Parameters->Parameters[source->Index].Values );
134	 break;
135
136      default:
137	 i915_program_error( p, "Bad source->File" );
138	 return 0;
139   }
140
141   src = swizzle(src,
142		 source->Swizzle[0],
143		 source->Swizzle[1],
144		 source->Swizzle[2],
145		 source->Swizzle[3]);
146
147   if (source->NegateBase)
148      src = negate( src, 1,1,1,1 );
149
150   return src;
151}
152
153
154static GLuint get_result_vector( struct i915_fragment_program *p,
155				 const struct fp_instruction *inst )
156{
157   switch (inst->DstReg.File) {
158   case PROGRAM_OUTPUT:
159      switch (inst->DstReg.Index) {
160      case 0:
161	 return UREG(REG_TYPE_OC, 0);
162      case 1:
163	 p->depth_written = 1;
164	 return UREG(REG_TYPE_OD, 0);
165      default:
166	 i915_program_error( p, "Bad inst->DstReg.Index" );
167	 return 0;
168      }
169   case PROGRAM_TEMPORARY:
170      return UREG(REG_TYPE_R, inst->DstReg.Index);
171   default:
172      i915_program_error( p, "Bad inst->DstReg.File" );
173      return 0;
174   }
175}
176
177static GLuint get_result_flags( const struct fp_instruction *inst )
178{
179   GLuint flags = 0;
180
181   if (inst->Saturate) flags |= A0_DEST_SATURATE;
182   if (inst->DstReg.WriteMask[0]) flags |= A0_DEST_CHANNEL_X;
183   if (inst->DstReg.WriteMask[1]) flags |= A0_DEST_CHANNEL_Y;
184   if (inst->DstReg.WriteMask[2]) flags |= A0_DEST_CHANNEL_Z;
185   if (inst->DstReg.WriteMask[3]) flags |= A0_DEST_CHANNEL_W;
186
187   return flags;
188}
189
190static GLuint translate_tex_src_bit( struct i915_fragment_program *p,
191				     GLubyte bit )
192{
193   switch (bit) {
194   case TEXTURE_1D_BIT:   return D0_SAMPLE_TYPE_2D;
195   case TEXTURE_2D_BIT:   return D0_SAMPLE_TYPE_2D;
196   case TEXTURE_RECT_BIT: return D0_SAMPLE_TYPE_2D;
197   case TEXTURE_3D_BIT:   return D0_SAMPLE_TYPE_VOLUME;
198   case TEXTURE_CUBE_BIT: return D0_SAMPLE_TYPE_CUBE;
199   default: i915_program_error(p, "TexSrcBit"); return 0;
200   }
201}
202
203#define EMIT_TEX( OP )						\
204do {								\
205   GLuint dim = translate_tex_src_bit( p, inst->TexSrcBit );	\
206   GLuint sampler = i915_emit_decl(p, REG_TYPE_S,		\
207				  inst->TexSrcUnit, dim);	\
208   GLuint coord = src_vector( p, &inst->SrcReg[0], program);	\
209   /* Texel lookup */						\
210								\
211   i915_emit_texld( p,						\
212	       get_result_vector( p, inst ),			\
213	       get_result_flags( inst ),			\
214	       sampler,						\
215	       coord,						\
216	       OP);						\
217} while (0)
218
219#define EMIT_ARITH( OP, N )						\
220do {									\
221   i915_emit_arith( p,							\
222	       OP,							\
223	       get_result_vector( p, inst ), 				\
224	       get_result_flags( inst ), 0,			\
225	       (N<1)?0:src_vector( p, &inst->SrcReg[0], program),	\
226	       (N<2)?0:src_vector( p, &inst->SrcReg[1], program),	\
227	       (N<3)?0:src_vector( p, &inst->SrcReg[2], program));	\
228} while (0)
229
230#define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
231#define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
232#define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
233
234
235/* Possible concerns:
236 *
237 * SIN, COS -- could use another taylor step?
238 * LIT      -- results seem a little different to sw mesa
239 * LOG      -- different to mesa on negative numbers, but this is conformant.
240 *
241 * Parse failures -- Mesa doesn't currently give a good indication
242 * internally whether a particular program string parsed or not.  This
243 * can lead to confusion -- hopefully we cope with it ok now.
244 *
245 */
246static void upload_program( struct i915_fragment_program *p )
247{
248   const struct fragment_program *program = p->ctx->FragmentProgram.Current;
249   const struct fp_instruction *inst = program->Instructions;
250
251/*    _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */
252
253   /* Is this a parse-failed program?  Ensure a valid program is
254    * loaded, as the flagging of an error isn't sufficient to stop
255    * this being uploaded to hardware.
256    */
257   if (inst[0].Opcode == FP_OPCODE_END) {
258      GLuint tmp = i915_get_utemp( p );
259      i915_emit_arith( p,
260		      A0_MOV,
261		      UREG(REG_TYPE_OC, 0),
262		      A0_DEST_CHANNEL_ALL, 0,
263		      swizzle(tmp,ONE,ZERO,ONE,ONE), 0, 0);
264      return;
265   }
266
267   while (1) {
268      GLuint src0, src1, src2, flags;
269      GLuint tmp = 0;
270
271      switch (inst->Opcode) {
272      case FP_OPCODE_ABS:
273	 src0 = src_vector( p, &inst->SrcReg[0], program);
274	 i915_emit_arith( p,
275			 A0_MAX,
276			 get_result_vector( p, inst ),
277			 get_result_flags( inst ), 0,
278			 src0, negate(src0, 1,1,1,1), 0);
279	 break;
280
281      case FP_OPCODE_ADD:
282	 EMIT_2ARG_ARITH( A0_ADD );
283	 break;
284
285      case FP_OPCODE_CMP:
286	 src0 = src_vector( p, &inst->SrcReg[0], program);
287	 src1 = src_vector( p, &inst->SrcReg[1], program);
288	 src2 = src_vector( p, &inst->SrcReg[2], program);
289	 i915_emit_arith( p,
290			 A0_CMP,
291			 get_result_vector( p, inst ),
292			 get_result_flags( inst ), 0,
293			 src0, src2, src1);	/* NOTE: order of src2, src1 */
294	 break;
295
296      case FP_OPCODE_COS:
297	 src0 = src_vector( p, &inst->SrcReg[0], program);
298	 tmp = i915_get_utemp( p );
299
300	 i915_emit_arith( p,
301			 A0_MUL,
302			 tmp, A0_DEST_CHANNEL_X, 0,
303			 src0,
304			 i915_emit_const1f(p, 1.0/(PI * 2)),
305			 0);
306
307	 i915_emit_arith( p,
308			 A0_MOD,
309			 tmp, A0_DEST_CHANNEL_X, 0,
310			 tmp,
311			 0, 0 );
312
313	 /* By choosing different taylor constants, could get rid of this mul:
314	  */
315	 i915_emit_arith( p,
316			 A0_MUL,
317			 tmp, A0_DEST_CHANNEL_X, 0,
318			 tmp,
319			 i915_emit_const1f(p, (PI * 2)),
320			 0);
321
322	 /*
323	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
324	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
325	  * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
326	  * result = DP4 t0, cos_constants
327	  */
328	 i915_emit_arith( p,
329			 A0_MUL,
330			 tmp, A0_DEST_CHANNEL_XY, 0,
331			 swizzle(tmp, X,X,ONE,ONE),
332			 swizzle(tmp, X,ONE,ONE,ONE), 0);
333
334	 i915_emit_arith( p,
335			 A0_MUL,
336			 tmp, A0_DEST_CHANNEL_XYZ, 0,
337			 swizzle(tmp, X,Y,X,ONE),
338			 swizzle(tmp, X,X,ONE,ONE), 0);
339
340	 i915_emit_arith( p,
341			 A0_MUL,
342			 tmp, A0_DEST_CHANNEL_XYZ, 0,
343			 swizzle(tmp, X,X,Z,ONE),
344			 swizzle(tmp, Z,ONE,ONE,ONE), 0);
345
346	 i915_emit_arith( p,
347			 A0_DP4,
348			 get_result_vector( p, inst ),
349			 get_result_flags( inst ), 0,
350			 swizzle(tmp, ONE,Z,Y,X),
351			 i915_emit_const4fv( p, cos_constants ), 0);
352
353	 break;
354
355      case FP_OPCODE_DP3:
356	 EMIT_2ARG_ARITH( A0_DP3 );
357	 break;
358
359      case FP_OPCODE_DP4:
360	 EMIT_2ARG_ARITH( A0_DP4 );
361	 break;
362
363      case FP_OPCODE_DPH:
364	 src0 = src_vector( p, &inst->SrcReg[0], program);
365	 src1 = src_vector( p, &inst->SrcReg[1], program);
366
367	 i915_emit_arith( p,
368			 A0_DP4,
369			 get_result_vector( p, inst ),
370			 get_result_flags( inst ), 0,
371			 swizzle(src0, X,Y,Z,ONE), src1, 0);
372	 break;
373
374      case FP_OPCODE_DST:
375	 src0 = src_vector( p, &inst->SrcReg[0], program);
376	 src1 = src_vector( p, &inst->SrcReg[1], program);
377
378	 /* result[0] = 1    * 1;
379	  * result[1] = a[1] * b[1];
380	  * result[2] = a[2] * 1;
381	  * result[3] = 1    * b[3];
382	  */
383	 i915_emit_arith( p,
384			 A0_MUL,
385			 get_result_vector( p, inst ),
386			 get_result_flags( inst ), 0,
387			 swizzle(src0, ONE, Y, Z,   ONE),
388			 swizzle(src1, ONE, Y, ONE, W  ),
389			 0);
390	 break;
391
392      case FP_OPCODE_EX2:
393	 src0 = src_vector( p, &inst->SrcReg[0], program);
394
395	 i915_emit_arith( p,
396			 A0_EXP,
397			 get_result_vector( p, inst ),
398			 get_result_flags( inst ), 0,
399			 swizzle(src0,X,X,X,X), 0, 0);
400	 break;
401
402      case FP_OPCODE_FLR:
403	 EMIT_1ARG_ARITH( A0_FLR );
404	 break;
405
406      case FP_OPCODE_FRC:
407	 EMIT_1ARG_ARITH( A0_FRC );
408	 break;
409
410      case FP_OPCODE_KIL:
411	 src0 = src_vector( p, &inst->SrcReg[0], program);
412	 tmp = i915_get_utemp( p );
413
414	 i915_emit_texld( p,
415			 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */
416			 0,
417			 src0,
418			 T0_TEXKILL );
419	 break;
420
421      case FP_OPCODE_LG2:
422	 src0 = src_vector( p, &inst->SrcReg[0], program);
423
424	 i915_emit_arith( p,
425			 A0_LOG,
426			 get_result_vector( p, inst ),
427			 get_result_flags( inst ), 0,
428			 swizzle(src0,X,X,X,X), 0, 0);
429	 break;
430
431      case FP_OPCODE_LIT:
432	 src0 = src_vector( p, &inst->SrcReg[0], program);
433	 tmp = i915_get_utemp( p );
434
435	 /* tmp = max( a.xyzw, a.00zw )
436	  * XXX: Clamp tmp.w to -128..128
437	  * tmp.y = log(tmp.y)
438	  * tmp.y = tmp.w * tmp.y
439	  * tmp.y = exp(tmp.y)
440	  * result = cmp (a.11-x1, a.1x01, a.1xy1 )
441	  */
442	 i915_emit_arith( p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
443			 src0, swizzle(src0, ZERO, ZERO, Z, W), 0 );
444
445	 i915_emit_arith( p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
446			 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
447
448	 i915_emit_arith( p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
449			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
450			 swizzle(tmp, ZERO, W, ZERO, ZERO), 0 );
451
452	 i915_emit_arith( p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
453			 swizzle(tmp, Y, Y, Y, Y), 0, 0 );
454
455	 i915_emit_arith( p, A0_CMP,
456			 get_result_vector( p, inst ),
457			 get_result_flags( inst ), 0,
458			 negate(swizzle(tmp, ONE, ONE, X, ONE),0,0,1,0),
459			 swizzle(tmp, ONE, X, ZERO, ONE),
460			 swizzle(tmp, ONE, X, Y, ONE));
461
462	 break;
463
464      case FP_OPCODE_LRP:
465	 src0 = src_vector( p, &inst->SrcReg[0], program);
466	 src1 = src_vector( p, &inst->SrcReg[1], program);
467	 src2 = src_vector( p, &inst->SrcReg[2], program);
468	 flags = get_result_flags( inst );
469	 tmp = i915_get_utemp( p );
470
471	 /* b*a + c*(1-a)
472	  *
473	  * b*a + c - ca
474	  *
475	  * tmp = b*a + c,
476	  * result = (-c)*a + tmp
477	  */
478	 i915_emit_arith( p, A0_MAD, tmp,
479			 flags & A0_DEST_CHANNEL_ALL, 0,
480			 src1, src0, src2 );
481
482	 i915_emit_arith( p, A0_MAD,
483			 get_result_vector( p, inst ),
484			 flags, 0,
485			 negate(src2, 1,1,1,1), src0, tmp );
486	 break;
487
488      case FP_OPCODE_MAD:
489	 EMIT_3ARG_ARITH( A0_MAD );
490	 break;
491
492      case FP_OPCODE_MAX:
493	 EMIT_2ARG_ARITH( A0_MAX );
494	 break;
495
496      case FP_OPCODE_MIN:
497	 src0 = src_vector( p, &inst->SrcReg[0], program);
498	 src1 = src_vector( p, &inst->SrcReg[1], program);
499	 tmp = i915_get_utemp( p );
500	 flags = get_result_flags( inst );
501
502	 i915_emit_arith( p,
503			 A0_MAX,
504			 tmp, flags & A0_DEST_CHANNEL_ALL, 0,
505			 negate(src0,1,1,1,1),
506			 negate(src1,1,1,1,1), 0);
507
508	 i915_emit_arith( p,
509			 A0_MOV,
510			 get_result_vector( p, inst ),
511			 flags, 0,
512			 negate(tmp, 1,1,1,1), 0, 0);
513	 break;
514
515      case FP_OPCODE_MOV:
516	 EMIT_1ARG_ARITH( A0_MOV );
517	 break;
518
519      case FP_OPCODE_MUL:
520	 EMIT_2ARG_ARITH( A0_MUL );
521	 break;
522
523      case FP_OPCODE_POW:
524	 src0 = src_vector( p, &inst->SrcReg[0], program);
525	 src1 = src_vector( p, &inst->SrcReg[1], program);
526	 tmp = i915_get_utemp( p );
527	 flags = get_result_flags( inst );
528
529	 /* XXX: masking on intermediate values, here and elsewhere.
530	  */
531	 i915_emit_arith( p,
532			 A0_LOG,
533			 tmp, A0_DEST_CHANNEL_X, 0,
534			 swizzle(src0,X,X,X,X), 0, 0);
535
536	 i915_emit_arith( p,
537			 A0_MUL,
538			 tmp, A0_DEST_CHANNEL_X, 0,
539			 tmp, src1, 0);
540
541
542	 i915_emit_arith( p,
543			 A0_EXP,
544			 get_result_vector( p, inst ),
545			 flags, 0,
546			 swizzle(tmp,X,X,X,X), 0, 0);
547
548	 break;
549
550      case FP_OPCODE_RCP:
551	 src0 = src_vector( p, &inst->SrcReg[0], program);
552
553	 i915_emit_arith( p,
554			 A0_RCP,
555			 get_result_vector( p, inst ),
556			 get_result_flags( inst ), 0,
557			 swizzle(src0,X,X,X,X), 0, 0);
558	 break;
559
560      case FP_OPCODE_RSQ:
561
562	 src0 = src_vector( p, &inst->SrcReg[0], program);
563
564	 i915_emit_arith( p,
565			 A0_RSQ,
566			 get_result_vector( p, inst ),
567			 get_result_flags( inst ), 0,
568			 swizzle(src0,X,X,X,X), 0, 0);
569	 break;
570
571      case FP_OPCODE_SCS:
572	 src0 = src_vector( p, &inst->SrcReg[0], program);
573	 tmp = i915_get_utemp( p );
574
575	 /*
576	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
577	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
578	  * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
579	  * scs.x = DP4 t1, sin_constants
580	  * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
581	  * scs.y = DP4 t1, cos_constants
582	  */
583	 i915_emit_arith( p,
584			 A0_MUL,
585			 tmp, A0_DEST_CHANNEL_XY, 0,
586			 swizzle(src0, X,X,ONE,ONE),
587			 swizzle(src0, X,ONE,ONE,ONE), 0);
588
589	 i915_emit_arith( p,
590			 A0_MUL,
591			 tmp, A0_DEST_CHANNEL_ALL, 0,
592			 swizzle(tmp, X,Y,X,Y),
593			 swizzle(tmp, X,X,ONE,ONE), 0);
594
595	 if (inst->DstReg.WriteMask[1]) {
596	    GLuint tmp1;
597
598	    if (inst->DstReg.WriteMask[0])
599	       tmp1 = i915_get_utemp( p );
600	    else
601	       tmp1 = tmp;
602
603	    i915_emit_arith( p,
604			    A0_MUL,
605			    tmp1, A0_DEST_CHANNEL_ALL, 0,
606			    swizzle(tmp, X,Y,Y,W),
607			    swizzle(tmp, X,Z,ONE,ONE), 0);
608
609	    i915_emit_arith( p,
610			    A0_DP4,
611			    get_result_vector( p, inst ),
612			    A0_DEST_CHANNEL_Y, 0,
613			    swizzle(tmp1, W,Z,Y,X),
614			    i915_emit_const4fv( p, sin_constants ), 0);
615	 }
616
617	 if (inst->DstReg.WriteMask[0]) {
618	    i915_emit_arith( p,
619			    A0_MUL,
620			    tmp, A0_DEST_CHANNEL_XYZ, 0,
621			    swizzle(tmp, X,X,Z,ONE),
622			    swizzle(tmp, Z,ONE,ONE,ONE), 0);
623
624	    i915_emit_arith( p,
625			    A0_DP4,
626			    get_result_vector( p, inst ),
627			    A0_DEST_CHANNEL_X, 0,
628			    swizzle(tmp, ONE,Z,Y,X),
629			    i915_emit_const4fv( p, cos_constants ), 0);
630	 }
631	 break;
632
633      case FP_OPCODE_SGE:
634	 EMIT_2ARG_ARITH( A0_SGE );
635	 break;
636
637      case FP_OPCODE_SIN:
638	 src0 = src_vector( p, &inst->SrcReg[0], program);
639	 tmp = i915_get_utemp( p );
640
641	 i915_emit_arith( p,
642			 A0_MUL,
643			 tmp, A0_DEST_CHANNEL_X, 0,
644			 src0,
645			 i915_emit_const1f(p, 1.0/(PI * 2)),
646			 0);
647
648	 i915_emit_arith( p,
649			 A0_MOD,
650			 tmp, A0_DEST_CHANNEL_X, 0,
651			 tmp,
652			 0, 0 );
653
654	 /* By choosing different taylor constants, could get rid of this mul:
655	  */
656	 i915_emit_arith( p,
657			 A0_MUL,
658			 tmp, A0_DEST_CHANNEL_X, 0,
659			 tmp,
660			 i915_emit_const1f(p, (PI * 2)),
661			 0);
662
663	 /*
664	  * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
665	  * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
666	  * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
667	  * result = DP4 t1.wzyx, sin_constants
668	  */
669	 i915_emit_arith( p,
670			 A0_MUL,
671			 tmp, A0_DEST_CHANNEL_XY, 0,
672			 swizzle(tmp, X,X,ONE,ONE),
673			 swizzle(tmp, X,ONE,ONE,ONE), 0);
674
675	 i915_emit_arith( p,
676			 A0_MUL,
677			 tmp, A0_DEST_CHANNEL_ALL, 0,
678			 swizzle(tmp, X,Y,X,Y),
679			 swizzle(tmp, X,X,ONE,ONE), 0);
680
681	 i915_emit_arith( p,
682			 A0_MUL,
683			 tmp, A0_DEST_CHANNEL_ALL, 0,
684			 swizzle(tmp, X,Y,Y,W),
685			 swizzle(tmp, X,Z,ONE,ONE), 0);
686
687	 i915_emit_arith( p,
688			 A0_DP4,
689			 get_result_vector( p, inst ),
690			 get_result_flags( inst ), 0,
691			 swizzle(tmp, W, Z, Y, X ),
692			 i915_emit_const4fv( p, sin_constants ), 0);
693	 break;
694
695      case FP_OPCODE_SLT:
696	 EMIT_2ARG_ARITH( A0_SLT );
697	 break;
698
699      case FP_OPCODE_SUB:
700	 src0 = src_vector( p, &inst->SrcReg[0], program);
701	 src1 = src_vector( p, &inst->SrcReg[1], program);
702
703	 i915_emit_arith( p,
704			 A0_ADD,
705			 get_result_vector( p, inst ),
706			 get_result_flags( inst ), 0,
707			 src0, negate(src1, 1,1,1,1), 0);
708	 break;
709
710      case FP_OPCODE_SWZ:
711	 EMIT_1ARG_ARITH( A0_MOV ); /* extended swizzle handled natively */
712	 break;
713
714      case FP_OPCODE_TEX:
715	 EMIT_TEX( T0_TEXLD );
716	 break;
717
718      case FP_OPCODE_TXB:
719	 EMIT_TEX( T0_TEXLDB );
720	 break;
721
722      case FP_OPCODE_TXP:
723	 EMIT_TEX( T0_TEXLDP );
724	 break;
725
726      case FP_OPCODE_XPD:
727	 /* Cross product:
728	  *      result.x = src0.y * src1.z - src0.z * src1.y;
729	  *      result.y = src0.z * src1.x - src0.x * src1.z;
730	  *      result.z = src0.x * src1.y - src0.y * src1.x;
731	  *      result.w = undef;
732	  */
733	 src0 = src_vector( p, &inst->SrcReg[0], program);
734	 src1 = src_vector( p, &inst->SrcReg[1], program);
735	 tmp = i915_get_utemp( p );
736
737	 i915_emit_arith( p,
738			 A0_MUL,
739			 tmp, A0_DEST_CHANNEL_ALL, 0,
740			 swizzle(src0,Z,X,Y,ONE),
741			 swizzle(src1,Y,Z,X,ONE), 0);
742
743	 i915_emit_arith( p,
744			 A0_MAD,
745			 get_result_vector( p, inst ),
746			 get_result_flags( inst ), 0,
747			 swizzle(src0,Y,Z,X,ONE),
748			 swizzle(src1,Z,X,Y,ONE),
749			 negate(tmp,1,1,1,0));
750	 break;
751
752      case FP_OPCODE_END:
753	 return;
754
755      default:
756	 i915_program_error( p, "bad opcode" );
757	 return;
758      }
759
760      inst++;
761      i915_release_utemps( p );
762   }
763}
764
765/* Rather than trying to intercept and jiggle depth writes during
766 * emit, just move the value into its correct position at the end of
767 * the program:
768 */
769static void fixup_depth_write( struct i915_fragment_program *p )
770{
771   if (p->depth_written) {
772      GLuint depth = UREG(REG_TYPE_OD, 0);
773
774      i915_emit_arith( p,
775		      A0_MOV,
776		      depth, A0_DEST_CHANNEL_W, 0,
777		      swizzle(depth,X,Y,Z,Z),
778		      0, 0);
779   }
780}
781
782
783#define FRAG_BIT_TEX(n)  (FRAG_BIT_TEX0 << (n))
784
785
786static void check_wpos( struct i915_fragment_program *p )
787{
788   GLuint inputs = p->FragProg.InputsRead;
789   GLint i;
790
791   p->wpos_tex = 0;
792
793   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
794      if (inputs & FRAG_BIT_TEX(i))
795	 continue;
796      else if (inputs & FRAG_BIT_WPOS) {
797	 p->wpos_tex = i;
798	 inputs &= ~FRAG_BIT_WPOS;
799      }
800   }
801
802   if (inputs & FRAG_BIT_WPOS) {
803      i915_program_error(p, "No free texcoord for wpos value");
804   }
805}
806
807
808static void translate_program( struct i915_fragment_program *p )
809{
810   i915ContextPtr i915 = I915_CONTEXT(p->ctx);
811
812   i915_init_program( i915, p );
813   check_wpos( p );
814   upload_program( p );
815   fixup_depth_write( p );
816   i915_fini_program( p );
817
818   p->translated = 1;
819}
820
821
822static void track_params( struct i915_fragment_program *p )
823{
824   GLint i;
825
826   if (p->nr_params)
827      _mesa_load_state_parameters(p->ctx, p->FragProg.Parameters);
828
829   for (i = 0; i < p->nr_params; i++) {
830      GLint reg = p->param[i].reg;
831      COPY_4V( p->constant[reg], p->param[i].values );
832   }
833
834   p->params_uptodate = 1;
835   p->on_hardware = 0;		/* overkill */
836}
837
838
839static void i915BindProgram( GLcontext *ctx,
840			    GLenum target,
841			    struct program *prog )
842{
843   if (target == GL_FRAGMENT_PROGRAM_ARB) {
844      i915ContextPtr i915 = I915_CONTEXT(ctx);
845      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
846
847      if (i915->current_program == p)
848	 return;
849
850      if (i915->current_program) {
851	 i915->current_program->on_hardware = 0;
852	 i915->current_program->params_uptodate = 0;
853      }
854
855      i915->current_program = p;
856
857      assert(p->on_hardware == 0);
858      assert(p->params_uptodate == 0);
859
860      /* Hack: make sure fog is correctly enabled according to this
861       * fragment program's fog options.
862       */
863      ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB,
864			  ctx->FragmentProgram.Enabled );
865   }
866}
867
868static struct program *i915NewProgram( GLcontext *ctx,
869				      GLenum target,
870				      GLuint id )
871{
872   switch (target) {
873   case GL_VERTEX_PROGRAM_ARB:
874      return _mesa_init_vertex_program( ctx, CALLOC_STRUCT(vertex_program),
875					target, id );
876
877   case GL_FRAGMENT_PROGRAM_ARB: {
878      struct i915_fragment_program *prog = CALLOC_STRUCT(i915_fragment_program);
879      if (prog) {
880	 i915_init_program( I915_CONTEXT(ctx), prog );
881
882	 return _mesa_init_fragment_program( ctx, &prog->FragProg,
883					     target, id );
884      }
885      else
886	 return NULL;
887   }
888
889   case GL_FRAGMENT_PROGRAM_NV:
890   default:
891      _mesa_problem(ctx, "bad target in _mesa_new_program");
892      return NULL;
893   }
894}
895
896static void i915DeleteProgram( GLcontext *ctx,
897			      struct program *prog )
898{
899   if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
900      i915ContextPtr i915 = I915_CONTEXT(ctx);
901      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
902
903      if (i915->current_program == p)
904	 i915->current_program = 0;
905   }
906
907   _mesa_delete_program( ctx, prog );
908}
909
910
911static GLboolean i915IsProgramNative( GLcontext *ctx,
912				     GLenum target,
913				     struct program *prog )
914{
915   if (target == GL_FRAGMENT_PROGRAM_ARB) {
916      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
917
918      if (!p->translated)
919	 translate_program( p );
920
921      return !p->error;
922   }
923   else
924      return GL_TRUE;
925}
926
927static void i915ProgramStringNotify( GLcontext *ctx,
928				    GLenum target,
929				    struct program *prog )
930{
931   if (target == GL_FRAGMENT_PROGRAM_ARB) {
932      struct i915_fragment_program *p = (struct i915_fragment_program *)prog;
933      p->translated = 0;
934
935      /* Hack: make sure fog is correctly enabled according to this
936       * fragment program's fog options.
937       */
938      ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB,
939			  ctx->FragmentProgram.Enabled );
940   }
941}
942
943
944void i915ValidateFragmentProgram( i915ContextPtr i915 )
945{
946   GLcontext *ctx = &i915->intel.ctx;
947   intelContextPtr intel = INTEL_CONTEXT(ctx);
948   TNLcontext *tnl = TNL_CONTEXT(ctx);
949   struct vertex_buffer *VB = &tnl->vb;
950
951   struct i915_fragment_program *p =
952      (struct i915_fragment_program *)ctx->FragmentProgram.Current;
953
954   GLuint inputsRead = p->FragProg.InputsRead;
955   GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
956   GLuint s2 = S2_TEXCOORD_NONE;
957   int i, offset = 0;
958
959   /* Important:
960    */
961   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
962
963   if (!p->translated)
964      translate_program( p );
965
966   intel->vertex_attr_count = 0;
967   intel->wpos_offset = 0;
968   intel->wpos_size = 0;
969   intel->coloroffset = 0;
970   intel->specoffset = 0;
971
972   if (inputsRead & FRAG_BITS_TEX_ANY) {
973      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 );
974   }
975   else {
976      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 );
977   }
978
979   if (inputsRead & FRAG_BIT_COL0) {
980      intel->coloroffset = offset / 4;
981      EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, S4_VFMT_COLOR, 4 );
982   }
983
984   if (inputsRead & FRAG_BIT_COL1) {
985      intel->specoffset = offset / 4;
986      EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB, S4_VFMT_SPEC_FOG, 3 );
987      EMIT_PAD( 1 );
988   }
989
990   if (inputsRead & FRAG_BIT_FOGC) {
991      EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4 );
992   }
993
994   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
995      if (inputsRead & FRAG_BIT_TEX(i)) {
996	 int sz = VB->TexCoordPtr[i]->size;
997
998	 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
999	 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
1000
1001	 EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 );
1002      }
1003      else if (i == p->wpos_tex) {
1004
1005	 /* If WPOS is required, duplicate the XYZ position data in an
1006	  * unused texture coordinate:
1007	  */
1008	 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
1009	 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
1010
1011	 intel->wpos_offset = offset;
1012	 intel->wpos_size = 3 * sizeof(GLuint);
1013
1014	 EMIT_PAD( intel->wpos_size );
1015      }
1016   }
1017
1018   if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
1019       s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
1020
1021      I915_STATECHANGE( i915, I915_UPLOAD_CTX );
1022
1023      /* Must do this *after* statechange, so as not to affect
1024       * buffered vertices reliant on the old state:
1025       */
1026      intel->vertex_size = _tnl_install_attrs( &intel->ctx,
1027					       intel->vertex_attrs,
1028					       intel->vertex_attr_count,
1029					       intel->ViewportMatrix.m, 0 );
1030
1031      intel->vertex_size >>= 2;
1032
1033      i915->state.Ctx[I915_CTXREG_LIS2] = s2;
1034      i915->state.Ctx[I915_CTXREG_LIS4] = s4;
1035
1036      assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size ));
1037   }
1038
1039   if (!p->params_uptodate)
1040      track_params( p );
1041
1042   if (!p->on_hardware)
1043      i915_upload_program( i915, p );
1044}
1045
1046void i915InitFragProgFuncs( struct dd_function_table *functions )
1047{
1048   functions->BindProgram = i915BindProgram;
1049   functions->NewProgram = i915NewProgram;
1050   functions->DeleteProgram = i915DeleteProgram;
1051   functions->IsProgramNative = i915IsProgramNative;
1052   functions->ProgramStringNotify = i915ProgramStringNotify;
1053}
1054