1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#include <stdarg.h>
30
31#include "i915_reg.h"
32#include "i915_context.h"
33#include "i915_fpc.h"
34
35#include "pipe/p_shader_tokens.h"
36#include "util/u_math.h"
37#include "util/u_memory.h"
38#include "util/u_string.h"
39#include "tgsi/tgsi_parse.h"
40#include "tgsi/tgsi_dump.h"
41
42#include "draw/draw_vertex.h"
43
44#ifndef M_PI
45#define M_PI 3.14159265358979323846
46#endif
47
48/**
49 * Simple pass-through fragment shader to use when we don't have
50 * a real shader (or it fails to compile for some reason).
51 */
52static unsigned passthrough_decl[] =
53{
54   _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1),
55
56   /* declare input color:
57    */
58   (D0_DCL |
59    (REG_TYPE_T << D0_TYPE_SHIFT) |
60    (T_DIFFUSE << D0_NR_SHIFT) |
61    D0_CHANNEL_ALL),
62   0,
63   0,
64};
65
66static unsigned passthrough_program[] =
67{
68   /* move to output color:
69    */
70   (A0_MOV |
71    (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
72    A0_DEST_CHANNEL_ALL |
73    (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) |
74    (T_DIFFUSE << A0_SRC0_NR_SHIFT)),
75   0x01230000,			/* .xyzw */
76   0
77};
78
79
80/* 1, -1/3!, 1/5!, -1/7! */
81static const float scs_sin_constants[4] = { 1.0,
82   -1.0f / (3 * 2 * 1),
83   1.0f / (5 * 4 * 3 * 2 * 1),
84   -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1)
85};
86
87/* 1, -1/2!, 1/4!, -1/6! */
88static const float scs_cos_constants[4] = { 1.0,
89   -1.0f / (2 * 1),
90   1.0f / (4 * 3 * 2 * 1),
91   -1.0f / (6 * 5 * 4 * 3 * 2 * 1)
92};
93
94/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */
95static const float sin_constants[4] = { 2.0 * M_PI,
96   -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1),
97   32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1),
98   -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1)
99};
100
101/* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */
102static const float cos_constants[4] = { 1.0,
103   -4.0f * M_PI * M_PI / (2 * 1),
104   16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1),
105   -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1)
106};
107
108
109
110/**
111 * component-wise negation of ureg
112 */
113static INLINE int
114negate(int reg, int x, int y, int z, int w)
115{
116   /* Another neat thing about the UREG representation */
117   return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
118                 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
119                 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
120                 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
121}
122
123
124/**
125 * In the event of a translation failure, we'll generate a simple color
126 * pass-through program.
127 */
128static void
129i915_use_passthrough_shader(struct i915_fragment_shader *fs)
130{
131   fs->program = (uint *) MALLOC(sizeof(passthrough_program));
132   fs->decl = (uint *) MALLOC(sizeof(passthrough_decl));
133   if (fs->program) {
134      memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
135      memcpy(fs->decl, passthrough_decl, sizeof(passthrough_decl));
136      fs->program_len = Elements(passthrough_program);
137      fs->decl_len = Elements(passthrough_decl);
138   }
139   fs->num_constants = 0;
140}
141
142
143void
144i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
145{
146   va_list args;
147   char buffer[1024];
148
149   debug_printf("i915_program_error: ");
150   va_start( args, msg );
151   util_vsnprintf( buffer, sizeof(buffer), msg, args );
152   va_end( args );
153   debug_printf("%s", buffer);
154   debug_printf("\n");
155
156   p->error = 1;
157}
158
159static uint get_mapping(struct i915_fragment_shader* fs, int unit)
160{
161   int i;
162   for (i = 0; i < I915_TEX_UNITS; i++)
163   {
164      if (fs->generic_mapping[i] == -1) {
165         fs->generic_mapping[i] = unit;
166         return i;
167      }
168      if (fs->generic_mapping[i] == unit)
169         return i;
170   }
171   debug_printf("Exceeded max generics\n");
172   return 0;
173}
174
175/**
176 * Construct a ureg for the given source register.  Will emit
177 * constants, apply swizzling and negation as needed.
178 */
179static uint
180src_vector(struct i915_fp_compile *p,
181           const struct i915_full_src_register *source,
182           struct i915_fragment_shader* fs)
183{
184   uint index = source->Register.Index;
185   uint src = 0, sem_name, sem_ind;
186
187   switch (source->Register.File) {
188   case TGSI_FILE_TEMPORARY:
189      if (source->Register.Index >= I915_MAX_TEMPORARY) {
190         i915_program_error(p, "Exceeded max temporary reg");
191         return 0;
192      }
193      src = UREG(REG_TYPE_R, index);
194      break;
195   case TGSI_FILE_INPUT:
196      /* XXX: Packing COL1, FOGC into a single attribute works for
197       * texenv programs, but will fail for real fragment programs
198       * that use these attributes and expect them to be a full 4
199       * components wide.  Could use a texcoord to pass these
200       * attributes if necessary, but that won't work in the general
201       * case.
202       *
203       * We also use a texture coordinate to pass wpos when possible.
204       */
205
206      sem_name = p->shader->info.input_semantic_name[index];
207      sem_ind = p->shader->info.input_semantic_index[index];
208
209      switch (sem_name) {
210      case TGSI_SEMANTIC_POSITION:
211         {
212            /* for fragcoord */
213            int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS);
214            src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
215            break;
216         }
217      case TGSI_SEMANTIC_COLOR:
218         if (sem_ind == 0) {
219            src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
220         }
221         else {
222            /* secondary color */
223            assert(sem_ind == 1);
224            src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
225            src = swizzle(src, X, Y, Z, ONE);
226         }
227         break;
228      case TGSI_SEMANTIC_FOG:
229         src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
230         src = swizzle(src, W, W, W, W);
231         break;
232      case TGSI_SEMANTIC_GENERIC:
233         {
234            int real_tex_unit = get_mapping(fs, sem_ind);
235            src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
236            break;
237         }
238      case TGSI_SEMANTIC_FACE:
239         {
240            /* for back/front faces */
241            int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE);
242            src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
243            break;
244         }
245      default:
246         i915_program_error(p, "Bad source->Index");
247         return 0;
248      }
249      break;
250
251   case TGSI_FILE_IMMEDIATE:
252      assert(index < p->num_immediates);
253      index = p->immediates_map[index];
254      /* fall-through */
255   case TGSI_FILE_CONSTANT:
256      src = UREG(REG_TYPE_CONST, index);
257      break;
258
259   default:
260      i915_program_error(p, "Bad source->File");
261      return 0;
262   }
263
264   src = swizzle(src,
265		 source->Register.SwizzleX,
266		 source->Register.SwizzleY,
267		 source->Register.SwizzleZ,
268		 source->Register.SwizzleW);
269
270   /* There's both negate-all-components and per-component negation.
271    * Try to handle both here.
272    */
273   {
274      int n = source->Register.Negate;
275      src = negate(src, n, n, n, n);
276   }
277
278   /* no abs() */
279#if 0
280   /* XXX assertions disabled to allow arbfplight.c to run */
281   /* XXX enable these assertions, or fix things */
282   assert(!source->Register.Absolute);
283#endif
284   if (source->Register.Absolute)
285      debug_printf("Unhandled absolute value\n");
286
287   return src;
288}
289
290
291/**
292 * Construct a ureg for a destination register.
293 */
294static uint
295get_result_vector(struct i915_fp_compile *p,
296                  const struct i915_full_dst_register *dest)
297{
298   switch (dest->Register.File) {
299   case TGSI_FILE_OUTPUT:
300      {
301         uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index];
302         switch (sem_name) {
303         case TGSI_SEMANTIC_POSITION:
304            return UREG(REG_TYPE_OD, 0);
305         case TGSI_SEMANTIC_COLOR:
306            return UREG(REG_TYPE_OC, 0);
307         default:
308            i915_program_error(p, "Bad inst->DstReg.Index/semantics");
309            return 0;
310         }
311      }
312   case TGSI_FILE_TEMPORARY:
313      return UREG(REG_TYPE_R, dest->Register.Index);
314   default:
315      i915_program_error(p, "Bad inst->DstReg.File");
316      return 0;
317   }
318}
319
320
321/**
322 * Compute flags for saturation and writemask.
323 */
324static uint
325get_result_flags(const struct i915_full_instruction *inst)
326{
327   const uint writeMask
328      = inst->Dst[0].Register.WriteMask;
329   uint flags = 0x0;
330
331   if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
332      flags |= A0_DEST_SATURATE;
333
334   if (writeMask & TGSI_WRITEMASK_X)
335      flags |= A0_DEST_CHANNEL_X;
336   if (writeMask & TGSI_WRITEMASK_Y)
337      flags |= A0_DEST_CHANNEL_Y;
338   if (writeMask & TGSI_WRITEMASK_Z)
339      flags |= A0_DEST_CHANNEL_Z;
340   if (writeMask & TGSI_WRITEMASK_W)
341      flags |= A0_DEST_CHANNEL_W;
342
343   return flags;
344}
345
346
347/**
348 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
349 */
350static uint
351translate_tex_src_target(struct i915_fp_compile *p, uint tex)
352{
353   switch (tex) {
354   case TGSI_TEXTURE_SHADOW1D:
355      /* fall-through */
356   case TGSI_TEXTURE_1D:
357      return D0_SAMPLE_TYPE_2D;
358
359   case TGSI_TEXTURE_SHADOW2D:
360      /* fall-through */
361   case TGSI_TEXTURE_2D:
362      return D0_SAMPLE_TYPE_2D;
363
364   case TGSI_TEXTURE_SHADOWRECT:
365      /* fall-through */
366   case TGSI_TEXTURE_RECT:
367      return D0_SAMPLE_TYPE_2D;
368
369   case TGSI_TEXTURE_3D:
370      return D0_SAMPLE_TYPE_VOLUME;
371
372   case TGSI_TEXTURE_CUBE:
373      return D0_SAMPLE_TYPE_CUBE;
374
375   default:
376      i915_program_error(p, "TexSrc type");
377      return 0;
378   }
379}
380
381/**
382 * Return the number of coords needed to access a given TGSI_TEXTURE_*
383 */
384static uint
385texture_num_coords(struct i915_fp_compile *p, uint tex)
386{
387   switch (tex) {
388   case TGSI_TEXTURE_SHADOW1D:
389   case TGSI_TEXTURE_1D:
390      return 1;
391
392   case TGSI_TEXTURE_SHADOW2D:
393   case TGSI_TEXTURE_2D:
394   case TGSI_TEXTURE_SHADOWRECT:
395   case TGSI_TEXTURE_RECT:
396      return 2;
397
398   case TGSI_TEXTURE_3D:
399   case TGSI_TEXTURE_CUBE:
400      return 3;
401
402   default:
403      i915_program_error(p, "Num coords");
404      return 2;
405   }
406}
407
408
409/**
410 * Generate texel lookup instruction.
411 */
412static void
413emit_tex(struct i915_fp_compile *p,
414         const struct i915_full_instruction *inst,
415         uint opcode,
416         struct i915_fragment_shader* fs)
417{
418   uint texture = inst->Texture.Texture;
419   uint unit = inst->Src[1].Register.Index;
420   uint tex = translate_tex_src_target( p, texture );
421   uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
422   uint coord = src_vector( p, &inst->Src[0], fs);
423
424   i915_emit_texld( p,
425                    get_result_vector( p, &inst->Dst[0] ),
426                    get_result_flags( inst ),
427                    sampler,
428                    coord,
429                    opcode,
430                    texture_num_coords(p, texture) );
431}
432
433
434/**
435 * Generate a simple arithmetic instruction
436 * \param opcode  the i915 opcode
437 * \param numArgs  the number of input/src arguments
438 */
439static void
440emit_simple_arith(struct i915_fp_compile *p,
441                  const struct i915_full_instruction *inst,
442                  uint opcode, uint numArgs,
443                  struct i915_fragment_shader* fs)
444{
445   uint arg1, arg2, arg3;
446
447   assert(numArgs <= 3);
448
449   arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs );
450   arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs );
451   arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs );
452
453   i915_emit_arith( p,
454                    opcode,
455                    get_result_vector( p, &inst->Dst[0]),
456                    get_result_flags( inst ), 0,
457                    arg1,
458                    arg2,
459                    arg3 );
460}
461
462
463/** As above, but swap the first two src regs */
464static void
465emit_simple_arith_swap2(struct i915_fp_compile *p,
466                        const struct i915_full_instruction *inst,
467                        uint opcode, uint numArgs,
468                        struct i915_fragment_shader* fs)
469{
470   struct i915_full_instruction inst2;
471
472   assert(numArgs == 2);
473
474   /* transpose first two registers */
475   inst2 = *inst;
476   inst2.Src[0] = inst->Src[1];
477   inst2.Src[1] = inst->Src[0];
478
479   emit_simple_arith(p, &inst2, opcode, numArgs, fs);
480}
481
482/*
483 * Translate TGSI instruction to i915 instruction.
484 *
485 * Possible concerns:
486 *
487 * DDX, DDY -- return 0
488 * SIN, COS -- could use another taylor step?
489 * LIT      -- results seem a little different to sw mesa
490 * LOG      -- different to mesa on negative numbers, but this is conformant.
491 */
492static void
493i915_translate_instruction(struct i915_fp_compile *p,
494                           const struct i915_full_instruction *inst,
495                           struct i915_fragment_shader *fs)
496{
497   uint writemask;
498   uint src0, src1, src2, flags;
499   uint tmp = 0;
500
501   switch (inst->Instruction.Opcode) {
502   case TGSI_OPCODE_ABS:
503      src0 = src_vector(p, &inst->Src[0], fs);
504      i915_emit_arith(p,
505                      A0_MAX,
506                      get_result_vector(p, &inst->Dst[0]),
507                      get_result_flags(inst), 0,
508                      src0, negate(src0, 1, 1, 1, 1), 0);
509      break;
510
511   case TGSI_OPCODE_ADD:
512      emit_simple_arith(p, inst, A0_ADD, 2, fs);
513      break;
514
515   case TGSI_OPCODE_CEIL:
516      src0 = src_vector(p, &inst->Src[0], fs);
517      tmp = i915_get_utemp(p);
518      flags = get_result_flags(inst);
519      i915_emit_arith(p,
520                      A0_FLR,
521                      tmp,
522                      flags & A0_DEST_CHANNEL_ALL, 0,
523                      negate(src0, 1, 1, 1, 1), 0, 0);
524      i915_emit_arith(p,
525                      A0_MOV,
526                      get_result_vector(p, &inst->Dst[0]),
527                      flags, 0,
528                      negate(tmp, 1, 1, 1, 1), 0, 0);
529      break;
530
531   case TGSI_OPCODE_CMP:
532      src0 = src_vector(p, &inst->Src[0], fs);
533      src1 = src_vector(p, &inst->Src[1], fs);
534      src2 = src_vector(p, &inst->Src[2], fs);
535      i915_emit_arith(p, A0_CMP,
536                      get_result_vector(p, &inst->Dst[0]),
537                      get_result_flags(inst),
538                      0, src0, src2, src1);   /* NOTE: order of src2, src1 */
539      break;
540
541   case TGSI_OPCODE_COS:
542      src0 = src_vector(p, &inst->Src[0], fs);
543      tmp = i915_get_utemp(p);
544
545      i915_emit_arith(p,
546                      A0_MUL,
547                      tmp, A0_DEST_CHANNEL_X, 0,
548                      src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);
549
550      i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
551
552      /*
553       * t0.xy = MUL x.xx11, x.x111  ; x^2, x, 1, 1
554       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
555       * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
556       * result = DP4 t0, cos_constants
557       */
558      i915_emit_arith(p,
559                      A0_MUL,
560                      tmp, A0_DEST_CHANNEL_XY, 0,
561                      swizzle(tmp, X, X, ONE, ONE),
562                      swizzle(tmp, X, ONE, ONE, ONE), 0);
563
564      i915_emit_arith(p,
565                      A0_MUL,
566                      tmp, A0_DEST_CHANNEL_XYZ, 0,
567                      swizzle(tmp, X, Y, X, ONE),
568                      swizzle(tmp, X, X, ONE, ONE), 0);
569
570      i915_emit_arith(p,
571                      A0_MUL,
572                      tmp, A0_DEST_CHANNEL_XYZ, 0,
573                      swizzle(tmp, X, X, Z, ONE),
574                      swizzle(tmp, Z, ONE, ONE, ONE), 0);
575
576      i915_emit_arith(p,
577                      A0_DP4,
578                      get_result_vector(p, &inst->Dst[0]),
579                      get_result_flags(inst), 0,
580                      swizzle(tmp, ONE, Z, Y, X),
581                      i915_emit_const4fv(p, cos_constants), 0);
582      break;
583
584  case TGSI_OPCODE_DDX:
585  case TGSI_OPCODE_DDY:
586      /* XXX We just output 0 here */
587      debug_printf("Punting DDX/DDX\n");
588      src0 = get_result_vector(p, &inst->Dst[0]);
589      i915_emit_arith(p,
590                      A0_MOV,
591                      get_result_vector(p, &inst->Dst[0]),
592                      get_result_flags(inst), 0,
593                      swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
594      break;
595
596  case TGSI_OPCODE_DP2:
597      src0 = src_vector(p, &inst->Src[0], fs);
598      src1 = src_vector(p, &inst->Src[1], fs);
599
600      i915_emit_arith(p,
601                      A0_DP3,
602                      get_result_vector(p, &inst->Dst[0]),
603                      get_result_flags(inst), 0,
604                      swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
605      break;
606
607   case TGSI_OPCODE_DP3:
608      emit_simple_arith(p, inst, A0_DP3, 2, fs);
609      break;
610
611   case TGSI_OPCODE_DP4:
612      emit_simple_arith(p, inst, A0_DP4, 2, fs);
613      break;
614
615   case TGSI_OPCODE_DPH:
616      src0 = src_vector(p, &inst->Src[0], fs);
617      src1 = src_vector(p, &inst->Src[1], fs);
618
619      i915_emit_arith(p,
620                      A0_DP4,
621                      get_result_vector(p, &inst->Dst[0]),
622                      get_result_flags(inst), 0,
623                      swizzle(src0, X, Y, Z, ONE), src1, 0);
624      break;
625
626   case TGSI_OPCODE_DST:
627      src0 = src_vector(p, &inst->Src[0], fs);
628      src1 = src_vector(p, &inst->Src[1], fs);
629
630      /* result[0] = 1    * 1;
631       * result[1] = a[1] * b[1];
632       * result[2] = a[2] * 1;
633       * result[3] = 1    * b[3];
634       */
635      i915_emit_arith(p,
636                      A0_MUL,
637                      get_result_vector(p, &inst->Dst[0]),
638                      get_result_flags(inst), 0,
639                      swizzle(src0, ONE, Y, Z, ONE),
640                      swizzle(src1, ONE, Y, ONE, W), 0);
641      break;
642
643   case TGSI_OPCODE_END:
644      /* no-op */
645      break;
646
647   case TGSI_OPCODE_EX2:
648      src0 = src_vector(p, &inst->Src[0], fs);
649
650      i915_emit_arith(p,
651                      A0_EXP,
652                      get_result_vector(p, &inst->Dst[0]),
653                      get_result_flags(inst), 0,
654                      swizzle(src0, X, X, X, X), 0, 0);
655      break;
656
657   case TGSI_OPCODE_FLR:
658      emit_simple_arith(p, inst, A0_FLR, 1, fs);
659      break;
660
661   case TGSI_OPCODE_FRC:
662      emit_simple_arith(p, inst, A0_FRC, 1, fs);
663      break;
664
665   case TGSI_OPCODE_KIL:
666      /* kill if src[0].x < 0 || src[0].y < 0 ... */
667      src0 = src_vector(p, &inst->Src[0], fs);
668      tmp = i915_get_utemp(p);
669
670      i915_emit_texld(p,
671                      tmp,                   /* dest reg: a dummy reg */
672                      A0_DEST_CHANNEL_ALL,   /* dest writemask */
673                      0,                     /* sampler */
674                      src0,                  /* coord*/
675                      T0_TEXKILL,            /* opcode */
676                      1);                    /* num_coord */
677      break;
678
679   case TGSI_OPCODE_KILP:
680      /* We emit an unconditional kill; we may want to revisit
681       * if we ever implement conditionals.
682       */
683      tmp = i915_get_utemp(p);
684
685      i915_emit_texld(p,
686                      tmp,                                   /* dest reg: a dummy reg */
687                      A0_DEST_CHANNEL_ALL,                   /* dest writemask */
688                      0,                                     /* sampler */
689                      negate(swizzle(0, ONE, ONE, ONE, ONE), 1, 1, 1, 1), /* coord */
690                      T0_TEXKILL,                            /* opcode */
691                      1);                                    /* num_coord */
692      break;
693
694   case TGSI_OPCODE_LG2:
695      src0 = src_vector(p, &inst->Src[0], fs);
696
697      i915_emit_arith(p,
698                      A0_LOG,
699                      get_result_vector(p, &inst->Dst[0]),
700                      get_result_flags(inst), 0,
701                      swizzle(src0, X, X, X, X), 0, 0);
702      break;
703
704   case TGSI_OPCODE_LIT:
705      src0 = src_vector(p, &inst->Src[0], fs);
706      tmp = i915_get_utemp(p);
707
708      /* tmp = max( a.xyzw, a.00zw )
709       * XXX: Clamp tmp.w to -128..128
710       * tmp.y = log(tmp.y)
711       * tmp.y = tmp.w * tmp.y
712       * tmp.y = exp(tmp.y)
713       * result = cmp (a.11-x1, a.1x01, a.1xy1 )
714       */
715      i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
716                      src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
717
718      i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
719                      swizzle(tmp, Y, Y, Y, Y), 0, 0);
720
721      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
722                      swizzle(tmp, ZERO, Y, ZERO, ZERO),
723                      swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
724
725      i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
726                      swizzle(tmp, Y, Y, Y, Y), 0, 0);
727
728      i915_emit_arith(p, A0_CMP,
729                      get_result_vector(p, &inst->Dst[0]),
730                      get_result_flags(inst), 0,
731                      negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
732                      swizzle(tmp, ONE, X, ZERO, ONE),
733                      swizzle(tmp, ONE, X, Y, ONE));
734
735      break;
736
737   case TGSI_OPCODE_LRP:
738      src0 = src_vector(p, &inst->Src[0], fs);
739      src1 = src_vector(p, &inst->Src[1], fs);
740      src2 = src_vector(p, &inst->Src[2], fs);
741      flags = get_result_flags(inst);
742      tmp = i915_get_utemp(p);
743
744      /* b*a + c*(1-a)
745       *
746       * b*a + c - ca
747       *
748       * tmp = b*a + c,
749       * result = (-c)*a + tmp
750       */
751      i915_emit_arith(p, A0_MAD, tmp,
752                      flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
753
754      i915_emit_arith(p, A0_MAD,
755                      get_result_vector(p, &inst->Dst[0]),
756                      flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
757      break;
758
759   case TGSI_OPCODE_MAD:
760      emit_simple_arith(p, inst, A0_MAD, 3, fs);
761      break;
762
763   case TGSI_OPCODE_MAX:
764      emit_simple_arith(p, inst, A0_MAX, 2, fs);
765      break;
766
767   case TGSI_OPCODE_MIN:
768      src0 = src_vector(p, &inst->Src[0], fs);
769      src1 = src_vector(p, &inst->Src[1], fs);
770      tmp = i915_get_utemp(p);
771      flags = get_result_flags(inst);
772
773      i915_emit_arith(p,
774                      A0_MAX,
775                      tmp, flags & A0_DEST_CHANNEL_ALL, 0,
776                      negate(src0, 1, 1, 1, 1),
777                      negate(src1, 1, 1, 1, 1), 0);
778
779      i915_emit_arith(p,
780                      A0_MOV,
781                      get_result_vector(p, &inst->Dst[0]),
782                      flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
783      break;
784
785   case TGSI_OPCODE_MOV:
786      emit_simple_arith(p, inst, A0_MOV, 1, fs);
787      break;
788
789   case TGSI_OPCODE_MUL:
790      emit_simple_arith(p, inst, A0_MUL, 2, fs);
791      break;
792
793   case TGSI_OPCODE_NOP:
794      break;
795
796   case TGSI_OPCODE_POW:
797      src0 = src_vector(p, &inst->Src[0], fs);
798      src1 = src_vector(p, &inst->Src[1], fs);
799      tmp = i915_get_utemp(p);
800      flags = get_result_flags(inst);
801
802      /* XXX: masking on intermediate values, here and elsewhere.
803       */
804      i915_emit_arith(p,
805                      A0_LOG,
806                      tmp, A0_DEST_CHANNEL_X, 0,
807                      swizzle(src0, X, X, X, X), 0, 0);
808
809      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
810
811      i915_emit_arith(p,
812                      A0_EXP,
813                      get_result_vector(p, &inst->Dst[0]),
814                      flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
815      break;
816
817   case TGSI_OPCODE_RET:
818      /* XXX: no-op? */
819      break;
820
821   case TGSI_OPCODE_RCP:
822      src0 = src_vector(p, &inst->Src[0], fs);
823
824      i915_emit_arith(p,
825                      A0_RCP,
826                      get_result_vector(p, &inst->Dst[0]),
827                      get_result_flags(inst), 0,
828                      swizzle(src0, X, X, X, X), 0, 0);
829      break;
830
831   case TGSI_OPCODE_RSQ:
832      src0 = src_vector(p, &inst->Src[0], fs);
833
834      i915_emit_arith(p,
835                      A0_RSQ,
836                      get_result_vector(p, &inst->Dst[0]),
837                      get_result_flags(inst), 0,
838                      swizzle(src0, X, X, X, X), 0, 0);
839      break;
840
841   case TGSI_OPCODE_SCS:
842      src0 = src_vector(p, &inst->Src[0], fs);
843      tmp = i915_get_utemp(p);
844
845      /*
846       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
847       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
848       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
849       * scs.x = DP4 t1, scs_sin_constants
850       * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
851       * scs.y = DP4 t1, scs_cos_constants
852       */
853      i915_emit_arith(p,
854                      A0_MUL,
855                      tmp, A0_DEST_CHANNEL_XY, 0,
856                      swizzle(src0, X, X, ONE, ONE),
857                      swizzle(src0, X, ONE, ONE, ONE), 0);
858
859      i915_emit_arith(p,
860                      A0_MUL,
861                      tmp, A0_DEST_CHANNEL_ALL, 0,
862                      swizzle(tmp, X, Y, X, Y),
863                      swizzle(tmp, X, X, ONE, ONE), 0);
864
865      writemask = inst->Dst[0].Register.WriteMask;
866
867      if (writemask & TGSI_WRITEMASK_Y) {
868         uint tmp1;
869
870         if (writemask & TGSI_WRITEMASK_X)
871            tmp1 = i915_get_utemp(p);
872         else
873            tmp1 = tmp;
874
875         i915_emit_arith(p,
876                         A0_MUL,
877                         tmp1, A0_DEST_CHANNEL_ALL, 0,
878                         swizzle(tmp, X, Y, Y, W),
879                         swizzle(tmp, X, Z, ONE, ONE), 0);
880
881         i915_emit_arith(p,
882                         A0_DP4,
883                         get_result_vector(p, &inst->Dst[0]),
884                         A0_DEST_CHANNEL_Y, 0,
885                         swizzle(tmp1, W, Z, Y, X),
886                         i915_emit_const4fv(p, scs_sin_constants), 0);
887      }
888
889      if (writemask & TGSI_WRITEMASK_X) {
890         i915_emit_arith(p,
891                         A0_MUL,
892                         tmp, A0_DEST_CHANNEL_XYZ, 0,
893                         swizzle(tmp, X, X, Z, ONE),
894                         swizzle(tmp, Z, ONE, ONE, ONE), 0);
895
896         i915_emit_arith(p,
897                         A0_DP4,
898                         get_result_vector(p, &inst->Dst[0]),
899                         A0_DEST_CHANNEL_X, 0,
900                         swizzle(tmp, ONE, Z, Y, X),
901                         i915_emit_const4fv(p, scs_cos_constants), 0);
902      }
903      break;
904
905   case TGSI_OPCODE_SEQ:
906      /* if we're both >= and <= then we're == */
907      src0 = src_vector(p, &inst->Src[0], fs);
908      src1 = src_vector(p, &inst->Src[1], fs);
909      tmp = i915_get_utemp(p);
910
911      i915_emit_arith(p,
912                      A0_SGE,
913                      tmp, A0_DEST_CHANNEL_ALL, 0,
914                      src0,
915                      src1, 0);
916
917      i915_emit_arith(p,
918                      A0_SGE,
919                      get_result_vector(p, &inst->Dst[0]),
920                      A0_DEST_CHANNEL_ALL, 0,
921                      src1,
922                      src0, 0);
923
924      i915_emit_arith(p,
925                      A0_MUL,
926                      get_result_vector(p, &inst->Dst[0]),
927                      A0_DEST_CHANNEL_ALL, 0,
928                      get_result_vector(p, &inst->Dst[0]),
929                      tmp, 0);
930
931      break;
932
933   case TGSI_OPCODE_SGE:
934      emit_simple_arith(p, inst, A0_SGE, 2, fs);
935      break;
936
937   case TGSI_OPCODE_SIN:
938      src0 = src_vector(p, &inst->Src[0], fs);
939      tmp = i915_get_utemp(p);
940
941      i915_emit_arith(p,
942                      A0_MUL,
943                      tmp, A0_DEST_CHANNEL_X, 0,
944                      src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);
945
946      i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
947
948      /*
949       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
950       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
951       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
952       * result = DP4 t1.wzyx, sin_constants
953       */
954      i915_emit_arith(p,
955                      A0_MUL,
956                      tmp, A0_DEST_CHANNEL_XY, 0,
957                      swizzle(tmp, X, X, ONE, ONE),
958                      swizzle(tmp, X, ONE, ONE, ONE), 0);
959
960      i915_emit_arith(p,
961                      A0_MUL,
962                      tmp, A0_DEST_CHANNEL_ALL, 0,
963                      swizzle(tmp, X, Y, X, Y),
964                      swizzle(tmp, X, X, ONE, ONE), 0);
965
966      i915_emit_arith(p,
967                      A0_MUL,
968                      tmp, A0_DEST_CHANNEL_ALL, 0,
969                      swizzle(tmp, X, Y, Y, W),
970                      swizzle(tmp, X, Z, ONE, ONE), 0);
971
972      i915_emit_arith(p,
973                      A0_DP4,
974                      get_result_vector(p, &inst->Dst[0]),
975                      get_result_flags(inst), 0,
976                      swizzle(tmp, W, Z, Y, X),
977                      i915_emit_const4fv(p, sin_constants), 0);
978      break;
979
980   case TGSI_OPCODE_SLE:
981      /* like SGE, but swap reg0, reg1 */
982      emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
983      break;
984
985   case TGSI_OPCODE_SLT:
986      emit_simple_arith(p, inst, A0_SLT, 2, fs);
987      break;
988
989   case TGSI_OPCODE_SGT:
990      /* like SLT, but swap reg0, reg1 */
991      emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
992      break;
993
994   case TGSI_OPCODE_SNE:
995      /* if we're < or > then we're != */
996      src0 = src_vector(p, &inst->Src[0], fs);
997      src1 = src_vector(p, &inst->Src[1], fs);
998      tmp = i915_get_utemp(p);
999
1000      i915_emit_arith(p,
1001                      A0_SLT,
1002                      tmp,
1003                      A0_DEST_CHANNEL_ALL, 0,
1004                      src0,
1005                      src1, 0);
1006
1007      i915_emit_arith(p,
1008                      A0_SLT,
1009                      get_result_vector(p, &inst->Dst[0]),
1010                      A0_DEST_CHANNEL_ALL, 0,
1011                      src1,
1012                      src0, 0);
1013
1014      i915_emit_arith(p,
1015                      A0_ADD,
1016                      get_result_vector(p, &inst->Dst[0]),
1017                      A0_DEST_CHANNEL_ALL, 0,
1018                      get_result_vector(p, &inst->Dst[0]),
1019                      tmp, 0);
1020      break;
1021
1022   case TGSI_OPCODE_SSG:
1023      /* compute (src>0) - (src<0) */
1024      src0 = src_vector(p, &inst->Src[0], fs);
1025      tmp = i915_get_utemp(p);
1026
1027      i915_emit_arith(p,
1028                      A0_SLT,
1029                      tmp,
1030                      A0_DEST_CHANNEL_ALL, 0,
1031                      src0,
1032                      swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
1033
1034      i915_emit_arith(p,
1035                      A0_SLT,
1036                      get_result_vector(p, &inst->Dst[0]),
1037                      A0_DEST_CHANNEL_ALL, 0,
1038                      swizzle(src0, ZERO, ZERO, ZERO, ZERO),
1039                      src0, 0);
1040
1041      i915_emit_arith(p,
1042                      A0_ADD,
1043                      get_result_vector(p, &inst->Dst[0]),
1044                      A0_DEST_CHANNEL_ALL, 0,
1045                      get_result_vector(p, &inst->Dst[0]),
1046                      negate(tmp, 1, 1, 1, 1), 0);
1047      break;
1048
1049   case TGSI_OPCODE_SUB:
1050      src0 = src_vector(p, &inst->Src[0], fs);
1051      src1 = src_vector(p, &inst->Src[1], fs);
1052
1053      i915_emit_arith(p,
1054                      A0_ADD,
1055                      get_result_vector(p, &inst->Dst[0]),
1056                      get_result_flags(inst), 0,
1057                      src0, negate(src1, 1, 1, 1, 1), 0);
1058      break;
1059
1060   case TGSI_OPCODE_TEX:
1061      emit_tex(p, inst, T0_TEXLD, fs);
1062      break;
1063
1064   case TGSI_OPCODE_TRUNC:
1065      emit_simple_arith(p, inst, A0_TRC, 1, fs);
1066      break;
1067
1068   case TGSI_OPCODE_TXB:
1069      emit_tex(p, inst, T0_TEXLDB, fs);
1070      break;
1071
1072   case TGSI_OPCODE_TXP:
1073      emit_tex(p, inst, T0_TEXLDP, fs);
1074      break;
1075
1076   case TGSI_OPCODE_XPD:
1077      /* Cross product:
1078       *      result.x = src0.y * src1.z - src0.z * src1.y;
1079       *      result.y = src0.z * src1.x - src0.x * src1.z;
1080       *      result.z = src0.x * src1.y - src0.y * src1.x;
1081       *      result.w = undef;
1082       */
1083      src0 = src_vector(p, &inst->Src[0], fs);
1084      src1 = src_vector(p, &inst->Src[1], fs);
1085      tmp = i915_get_utemp(p);
1086
1087      i915_emit_arith(p,
1088                      A0_MUL,
1089                      tmp, A0_DEST_CHANNEL_ALL, 0,
1090                      swizzle(src0, Z, X, Y, ONE),
1091                      swizzle(src1, Y, Z, X, ONE), 0);
1092
1093      i915_emit_arith(p,
1094                      A0_MAD,
1095                      get_result_vector(p, &inst->Dst[0]),
1096                      get_result_flags(inst), 0,
1097                      swizzle(src0, Y, Z, X, ONE),
1098                      swizzle(src1, Z, X, Y, ONE),
1099                      negate(tmp, 1, 1, 1, 0));
1100      break;
1101
1102   default:
1103      i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
1104      p->error = 1;
1105      return;
1106   }
1107
1108   i915_release_utemps(p);
1109}
1110
1111
1112static void i915_translate_token(struct i915_fp_compile *p,
1113                                 const union i915_full_token* token,
1114                                 struct i915_fragment_shader *fs)
1115{
1116   struct i915_fragment_shader *ifs = p->shader;
1117   switch( token->Token.Type ) {
1118   case TGSI_TOKEN_TYPE_PROPERTY:
1119      /*
1120       * We only support one cbuf, but we still need to ignore the property
1121       * correctly so we don't hit the assert at the end of the switch case.
1122       */
1123      assert(token->FullProperty.Property.PropertyName ==
1124             TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
1125      break;
1126
1127   case TGSI_TOKEN_TYPE_DECLARATION:
1128      if (token->FullDeclaration.Declaration.File
1129               == TGSI_FILE_CONSTANT) {
1130         uint i;
1131         for (i = token->FullDeclaration.Range.First;
1132              i <= token->FullDeclaration.Range.Last;
1133              i++) {
1134            assert(ifs->constant_flags[i] == 0x0);
1135            ifs->constant_flags[i] = I915_CONSTFLAG_USER;
1136            ifs->num_constants = MAX2(ifs->num_constants, i + 1);
1137         }
1138      }
1139      else if (token->FullDeclaration.Declaration.File
1140               == TGSI_FILE_TEMPORARY) {
1141         uint i;
1142         for (i = token->FullDeclaration.Range.First;
1143              i <= token->FullDeclaration.Range.Last;
1144              i++) {
1145            if (i >= I915_MAX_TEMPORARY)
1146               debug_printf("Too many temps (%d)\n",i);
1147            else
1148               /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
1149               p->temp_flag |= (1 << i); /* mark temp as used */
1150         }
1151      }
1152      break;
1153
1154   case TGSI_TOKEN_TYPE_IMMEDIATE:
1155      {
1156         const struct tgsi_full_immediate *imm
1157            = &token->FullImmediate;
1158         const uint pos = p->num_immediates++;
1159         uint j;
1160         assert( imm->Immediate.NrTokens <= 4 + 1 );
1161         for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
1162            p->immediates[pos][j] = imm->u[j].Float;
1163         }
1164      }
1165      break;
1166
1167   case TGSI_TOKEN_TYPE_INSTRUCTION:
1168      if (p->first_instruction) {
1169         /* resolve location of immediates */
1170         uint i, j;
1171         for (i = 0; i < p->num_immediates; i++) {
1172            /* find constant slot for this immediate */
1173            for (j = 0; j < I915_MAX_CONSTANT; j++) {
1174               if (ifs->constant_flags[j] == 0x0) {
1175                  memcpy(ifs->constants[j],
1176                         p->immediates[i],
1177                         4 * sizeof(float));
1178                  /*printf("immediate %d maps to const %d\n", i, j);*/
1179                  ifs->constant_flags[j] = 0xf;  /* all four comps used */
1180                  p->immediates_map[i] = j;
1181                  ifs->num_constants = MAX2(ifs->num_constants, j + 1);
1182                  break;
1183               }
1184            }
1185         }
1186
1187         p->first_instruction = FALSE;
1188      }
1189
1190      i915_translate_instruction(p, &token->FullInstruction, fs);
1191      break;
1192
1193   default:
1194      assert( 0 );
1195   }
1196
1197}
1198
1199/**
1200 * Translate TGSI fragment shader into i915 hardware instructions.
1201 * \param p  the translation state
1202 * \param tokens  the TGSI token array
1203 */
1204static void
1205i915_translate_instructions(struct i915_fp_compile *p,
1206                            const struct i915_token_list *tokens,
1207                            struct i915_fragment_shader *fs)
1208{
1209   int i;
1210   for(i = 0; i<tokens->NumTokens; i++) {
1211      i915_translate_token(p, &tokens->Tokens[i], fs);
1212   }
1213}
1214
1215
1216static struct i915_fp_compile *
1217i915_init_compile(struct i915_context *i915,
1218                  struct i915_fragment_shader *ifs)
1219{
1220   struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
1221   int i;
1222
1223   p->shader = ifs;
1224
1225   /* Put new constants at end of const buffer, growing downward.
1226    * The problem is we don't know how many user-defined constants might
1227    * be specified with pipe->set_constant_buffer().
1228    * Should pre-scan the user's program to determine the highest-numbered
1229    * constant referenced.
1230    */
1231   ifs->num_constants = 0;
1232   memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
1233
1234   memset(&p->register_phases, 0, sizeof(p->register_phases));
1235
1236   for (i = 0; i < I915_TEX_UNITS; i++)
1237      ifs->generic_mapping[i] = -1;
1238
1239   p->first_instruction = TRUE;
1240
1241   p->nr_tex_indirect = 1;      /* correct? */
1242   p->nr_tex_insn = 0;
1243   p->nr_alu_insn = 0;
1244   p->nr_decl_insn = 0;
1245
1246   p->csr = p->program;
1247   p->decl = p->declarations;
1248   p->decl_s = 0;
1249   p->decl_t = 0;
1250   p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
1251   p->utemp_flag = ~0x7;
1252
1253   /* initialize the first program word */
1254   *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
1255
1256   return p;
1257}
1258
1259
1260/* Copy compile results to the fragment program struct and destroy the
1261 * compilation context.
1262 */
1263static void
1264i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
1265{
1266   struct i915_fragment_shader *ifs = p->shader;
1267   unsigned long program_size = (unsigned long) (p->csr - p->program);
1268   unsigned long decl_size = (unsigned long) (p->decl - p->declarations);
1269
1270   if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
1271      debug_printf("Exceeded max nr indirect texture lookups\n");
1272
1273   if (p->nr_tex_insn > I915_MAX_TEX_INSN)
1274      i915_program_error(p, "Exceeded max TEX instructions");
1275
1276   if (p->nr_alu_insn > I915_MAX_ALU_INSN)
1277      i915_program_error(p, "Exceeded max ALU instructions");
1278
1279   if (p->nr_decl_insn > I915_MAX_DECL_INSN)
1280      i915_program_error(p, "Exceeded max DECL instructions");
1281
1282   if (p->error) {
1283      p->NumNativeInstructions = 0;
1284      p->NumNativeAluInstructions = 0;
1285      p->NumNativeTexInstructions = 0;
1286      p->NumNativeTexIndirections = 0;
1287
1288      i915_use_passthrough_shader(ifs);
1289   }
1290   else {
1291      p->NumNativeInstructions
1292         = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
1293      p->NumNativeAluInstructions = p->nr_alu_insn;
1294      p->NumNativeTexInstructions = p->nr_tex_insn;
1295      p->NumNativeTexIndirections = p->nr_tex_indirect;
1296
1297      /* patch in the program length */
1298      p->declarations[0] |= program_size + decl_size - 2;
1299
1300      /* Copy compilation results to fragment program struct:
1301       */
1302      assert(!ifs->decl);
1303      assert(!ifs->program);
1304
1305      ifs->decl
1306         = (uint *) MALLOC(decl_size * sizeof(uint));
1307      ifs->program
1308         = (uint *) MALLOC(program_size * sizeof(uint));
1309
1310      if (ifs->decl) {
1311         ifs->decl_len = decl_size;
1312
1313         memcpy(ifs->decl,
1314                p->declarations,
1315                decl_size * sizeof(uint));
1316      }
1317
1318      if (ifs->program) {
1319         ifs->program_len = program_size;
1320
1321         memcpy(ifs->program,
1322                p->program,
1323                program_size * sizeof(uint));
1324      }
1325   }
1326
1327   /* Release the compilation struct:
1328    */
1329   FREE(p);
1330}
1331
1332
1333
1334
1335
1336/**
1337 * Rather than trying to intercept and jiggle depth writes during
1338 * emit, just move the value into its correct position at the end of
1339 * the program:
1340 */
1341static void
1342i915_fixup_depth_write(struct i915_fp_compile *p)
1343{
1344   /* XXX assuming pos/depth is always in output[0] */
1345   if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
1346      const uint depth = UREG(REG_TYPE_OD, 0);
1347
1348      i915_emit_arith(p,
1349                      A0_MOV,                     /* opcode */
1350                      depth,                      /* dest reg */
1351                      A0_DEST_CHANNEL_W,          /* write mask */
1352                      0,                          /* saturate? */
1353                      swizzle(depth, X, Y, Z, Z), /* src0 */
1354                      0, 0 /* src1, src2 */);
1355   }
1356}
1357
1358
1359void
1360i915_translate_fragment_program( struct i915_context *i915,
1361                                 struct i915_fragment_shader *fs)
1362{
1363   struct i915_fp_compile *p;
1364   const struct tgsi_token *tokens = fs->state.tokens;
1365   struct i915_token_list* i_tokens;
1366
1367#if 0
1368   tgsi_dump(tokens, 0);
1369#endif
1370
1371   /* hw doesn't seem to like empty frag programs, even when the depth write
1372    * fixup gets emitted below - may that one is fishy, too? */
1373   if (fs->info.num_instructions == 1) {
1374      i915_use_passthrough_shader(fs);
1375
1376      return;
1377   }
1378
1379   p = i915_init_compile(i915, fs);
1380
1381   i_tokens = i915_optimize(tokens);
1382   i915_translate_instructions(p, i_tokens, fs);
1383   i915_fixup_depth_write(p);
1384
1385   i915_fini_compile(i915, p);
1386   i915_optimize_free(i_tokens);
1387
1388#if 0
1389   i915_disassemble_program(NULL, fs->program, fs->program_len);
1390#endif
1391}
1392