brw_fs.cpp revision ff0eb45f47ebf2fcc1af06a8b6b934c79dff1d41
1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28extern "C" {
29
30#include <sys/types.h>
31
32#include "main/macros.h"
33#include "main/shaderobj.h"
34#include "program/prog_parameter.h"
35#include "program/prog_print.h"
36#include "program/prog_optimize.h"
37#include "program/sampler.h"
38#include "program/hash_table.h"
39#include "brw_context.h"
40#include "brw_eu.h"
41#include "brw_wm.h"
42#include "talloc.h"
43}
44#include "../glsl/glsl_types.h"
45#include "../glsl/ir_optimization.h"
46#include "../glsl/ir_print_visitor.h"
47
48enum register_file {
49   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
50   GRF = BRW_GENERAL_REGISTER_FILE,
51   MRF = BRW_MESSAGE_REGISTER_FILE,
52   IMM = BRW_IMMEDIATE_VALUE,
53   FIXED_HW_REG, /* a struct brw_reg */
54   UNIFORM, /* prog_data->params[hw_reg] */
55   BAD_FILE
56};
57
58enum fs_opcodes {
59   FS_OPCODE_FB_WRITE = 256,
60   FS_OPCODE_RCP,
61   FS_OPCODE_RSQ,
62   FS_OPCODE_SQRT,
63   FS_OPCODE_EXP2,
64   FS_OPCODE_LOG2,
65   FS_OPCODE_POW,
66   FS_OPCODE_SIN,
67   FS_OPCODE_COS,
68   FS_OPCODE_DDX,
69   FS_OPCODE_DDY,
70   FS_OPCODE_LINTERP,
71   FS_OPCODE_TEX,
72   FS_OPCODE_TXB,
73   FS_OPCODE_TXL,
74   FS_OPCODE_DISCARD,
75};
76
77static int using_new_fs = -1;
78static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
79
80struct gl_shader *
81brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
82{
83   struct brw_shader *shader;
84
85   shader = talloc_zero(NULL, struct brw_shader);
86   if (shader) {
87      shader->base.Type = type;
88      shader->base.Name = name;
89      _mesa_init_shader(ctx, &shader->base);
90   }
91
92   return &shader->base;
93}
94
95struct gl_shader_program *
96brw_new_shader_program(GLcontext *ctx, GLuint name)
97{
98   struct brw_shader_program *prog;
99   prog = talloc_zero(NULL, struct brw_shader_program);
100   if (prog) {
101      prog->base.Name = name;
102      _mesa_init_shader_program(ctx, &prog->base);
103   }
104   return &prog->base;
105}
106
107GLboolean
108brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
109{
110   if (!_mesa_ir_compile_shader(ctx, shader))
111      return GL_FALSE;
112
113   return GL_TRUE;
114}
115
116GLboolean
117brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
118{
119   if (using_new_fs == -1)
120      using_new_fs = getenv("INTEL_NEW_FS") != NULL;
121
122   for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
123      struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
124
125      if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
126	 void *mem_ctx = talloc_new(NULL);
127	 bool progress;
128
129	 if (shader->ir)
130	    talloc_free(shader->ir);
131	 shader->ir = new(shader) exec_list;
132	 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
133
134	 do_mat_op_to_vec(shader->ir);
135	 do_mod_to_fract(shader->ir);
136	 do_div_to_mul_rcp(shader->ir);
137	 do_sub_to_add_neg(shader->ir);
138	 do_explog_to_explog2(shader->ir);
139
140	 do {
141	    progress = false;
142
143	    brw_do_channel_expressions(shader->ir);
144	    brw_do_vector_splitting(shader->ir);
145
146	    progress = do_lower_jumps(shader->ir, true, true,
147				      true, /* main return */
148				      false, /* continue */
149				      false /* loops */
150				      ) || progress;
151
152	    progress = do_common_optimization(shader->ir, true, 32) || progress;
153
154	    progress = lower_noise(shader->ir) || progress;
155	    progress =
156	       lower_variable_index_to_cond_assign(shader->ir,
157						   GL_TRUE, /* input */
158						   GL_TRUE, /* output */
159						   GL_TRUE, /* temp */
160						   GL_TRUE /* uniform */
161						   ) || progress;
162	 } while (progress);
163
164	 validate_ir_tree(shader->ir);
165
166	 reparent_ir(shader->ir, shader->ir);
167	 talloc_free(mem_ctx);
168      }
169   }
170
171   if (!_mesa_ir_link_shader(ctx, prog))
172      return GL_FALSE;
173
174   return GL_TRUE;
175}
176
177static int
178type_size(const struct glsl_type *type)
179{
180   unsigned int size, i;
181
182   switch (type->base_type) {
183   case GLSL_TYPE_UINT:
184   case GLSL_TYPE_INT:
185   case GLSL_TYPE_FLOAT:
186   case GLSL_TYPE_BOOL:
187      return type->components();
188   case GLSL_TYPE_ARRAY:
189      /* FINISHME: uniform/varying arrays. */
190      return type_size(type->fields.array) * type->length;
191   case GLSL_TYPE_STRUCT:
192      size = 0;
193      for (i = 0; i < type->length; i++) {
194	 size += type_size(type->fields.structure[i].type);
195      }
196      return size;
197   case GLSL_TYPE_SAMPLER:
198      /* Samplers take up no register space, since they're baked in at
199       * link time.
200       */
201      return 0;
202   default:
203      assert(!"not reached");
204      return 0;
205   }
206}
207
208class fs_reg {
209public:
210   /* Callers of this talloc-based new need not call delete. It's
211    * easier to just talloc_free 'ctx' (or any of its ancestors). */
212   static void* operator new(size_t size, void *ctx)
213   {
214      void *node;
215
216      node = talloc_size(ctx, size);
217      assert(node != NULL);
218
219      return node;
220   }
221
222   void init()
223   {
224      this->reg = 0;
225      this->reg_offset = 0;
226      this->negate = 0;
227      this->abs = 0;
228      this->hw_reg = -1;
229   }
230
231   /** Generic unset register constructor. */
232   fs_reg()
233   {
234      init();
235      this->file = BAD_FILE;
236   }
237
238   /** Immediate value constructor. */
239   fs_reg(float f)
240   {
241      init();
242      this->file = IMM;
243      this->type = BRW_REGISTER_TYPE_F;
244      this->imm.f = f;
245   }
246
247   /** Immediate value constructor. */
248   fs_reg(int32_t i)
249   {
250      init();
251      this->file = IMM;
252      this->type = BRW_REGISTER_TYPE_D;
253      this->imm.i = i;
254   }
255
256   /** Immediate value constructor. */
257   fs_reg(uint32_t u)
258   {
259      init();
260      this->file = IMM;
261      this->type = BRW_REGISTER_TYPE_UD;
262      this->imm.u = u;
263   }
264
265   /** Fixed brw_reg Immediate value constructor. */
266   fs_reg(struct brw_reg fixed_hw_reg)
267   {
268      init();
269      this->file = FIXED_HW_REG;
270      this->fixed_hw_reg = fixed_hw_reg;
271      this->type = fixed_hw_reg.type;
272   }
273
274   fs_reg(enum register_file file, int hw_reg);
275   fs_reg(class fs_visitor *v, const struct glsl_type *type);
276
277   /** Register file: ARF, GRF, MRF, IMM. */
278   enum register_file file;
279   /** Abstract register number.  0 = fixed hw reg */
280   int reg;
281   /** Offset within the abstract register. */
282   int reg_offset;
283   /** HW register number.  Generally unset until register allocation. */
284   int hw_reg;
285   /** Register type.  BRW_REGISTER_TYPE_* */
286   int type;
287   bool negate;
288   bool abs;
289   struct brw_reg fixed_hw_reg;
290
291   /** Value for file == BRW_IMMMEDIATE_FILE */
292   union {
293      int32_t i;
294      uint32_t u;
295      float f;
296   } imm;
297};
298
299static const fs_reg reg_undef;
300static const fs_reg reg_null(ARF, BRW_ARF_NULL);
301
302class fs_inst : public exec_node {
303public:
304   /* Callers of this talloc-based new need not call delete. It's
305    * easier to just talloc_free 'ctx' (or any of its ancestors). */
306   static void* operator new(size_t size, void *ctx)
307   {
308      void *node;
309
310      node = talloc_zero_size(ctx, size);
311      assert(node != NULL);
312
313      return node;
314   }
315
316   void init()
317   {
318      this->opcode = BRW_OPCODE_NOP;
319      this->saturate = false;
320      this->conditional_mod = BRW_CONDITIONAL_NONE;
321      this->predicated = false;
322      this->sampler = 0;
323      this->target = 0;
324      this->eot = false;
325      this->shadow_compare = false;
326   }
327
328   fs_inst()
329   {
330      init();
331   }
332
333   fs_inst(int opcode)
334   {
335      init();
336      this->opcode = opcode;
337   }
338
339   fs_inst(int opcode, fs_reg dst, fs_reg src0)
340   {
341      init();
342      this->opcode = opcode;
343      this->dst = dst;
344      this->src[0] = src0;
345   }
346
347   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
348   {
349      init();
350      this->opcode = opcode;
351      this->dst = dst;
352      this->src[0] = src0;
353      this->src[1] = src1;
354   }
355
356   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
357   {
358      init();
359      this->opcode = opcode;
360      this->dst = dst;
361      this->src[0] = src0;
362      this->src[1] = src1;
363      this->src[2] = src2;
364   }
365
366   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
367   fs_reg dst;
368   fs_reg src[3];
369   bool saturate;
370   bool predicated;
371   int conditional_mod; /**< BRW_CONDITIONAL_* */
372
373   int mlen; /**< SEND message length */
374   int sampler;
375   int target; /**< MRT target. */
376   bool eot;
377   bool shadow_compare;
378
379   /** @{
380    * Annotation for the generated IR.  One of the two can be set.
381    */
382   ir_instruction *ir;
383   const char *annotation;
384   /** @} */
385};
386
387class fs_visitor : public ir_visitor
388{
389public:
390
391   fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
392   {
393      this->c = c;
394      this->p = &c->func;
395      this->brw = p->brw;
396      this->fp = brw->fragment_program;
397      this->intel = &brw->intel;
398      this->ctx = &intel->ctx;
399      this->mem_ctx = talloc_new(NULL);
400      this->shader = shader;
401      this->fail = false;
402      this->next_abstract_grf = 1;
403      this->variable_ht = hash_table_ctor(0,
404					  hash_table_pointer_hash,
405					  hash_table_pointer_compare);
406
407      this->frag_color = NULL;
408      this->frag_data = NULL;
409      this->frag_depth = NULL;
410      this->first_non_payload_grf = 0;
411
412      this->current_annotation = NULL;
413      this->annotation_string = NULL;
414      this->annotation_ir = NULL;
415      this->base_ir = NULL;
416   }
417   ~fs_visitor()
418   {
419      talloc_free(this->mem_ctx);
420      hash_table_dtor(this->variable_ht);
421   }
422
423   fs_reg *variable_storage(ir_variable *var);
424
425   void visit(ir_variable *ir);
426   void visit(ir_assignment *ir);
427   void visit(ir_dereference_variable *ir);
428   void visit(ir_dereference_record *ir);
429   void visit(ir_dereference_array *ir);
430   void visit(ir_expression *ir);
431   void visit(ir_texture *ir);
432   void visit(ir_if *ir);
433   void visit(ir_constant *ir);
434   void visit(ir_swizzle *ir);
435   void visit(ir_return *ir);
436   void visit(ir_loop *ir);
437   void visit(ir_loop_jump *ir);
438   void visit(ir_discard *ir);
439   void visit(ir_call *ir);
440   void visit(ir_function *ir);
441   void visit(ir_function_signature *ir);
442
443   fs_inst *emit(fs_inst inst);
444   void assign_curb_setup();
445   void assign_urb_setup();
446   void assign_regs();
447   void generate_code();
448   void generate_fb_write(fs_inst *inst);
449   void generate_linterp(fs_inst *inst, struct brw_reg dst,
450			 struct brw_reg *src);
451   void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
452   void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
453   void generate_discard(fs_inst *inst);
454   void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
455   void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
456
457   void emit_dummy_fs();
458   void emit_fragcoord_interpolation(ir_variable *ir);
459   void emit_general_interpolation(ir_variable *ir);
460   void emit_interpolation_setup();
461   void emit_fb_writes();
462
463   struct brw_reg interp_reg(int location, int channel);
464   int setup_uniform_values(int loc, const glsl_type *type);
465
466   struct brw_context *brw;
467   const struct gl_fragment_program *fp;
468   struct intel_context *intel;
469   GLcontext *ctx;
470   struct brw_wm_compile *c;
471   struct brw_compile *p;
472   struct brw_shader *shader;
473   void *mem_ctx;
474   exec_list instructions;
475   int next_abstract_grf;
476   struct hash_table *variable_ht;
477   ir_variable *frag_color, *frag_data, *frag_depth;
478   int first_non_payload_grf;
479
480   /** @{ debug annotation info */
481   const char *current_annotation;
482   ir_instruction *base_ir;
483   const char **annotation_string;
484   ir_instruction **annotation_ir;
485   /** @} */
486
487   bool fail;
488
489   /* Result of last visit() method. */
490   fs_reg result;
491
492   fs_reg pixel_x;
493   fs_reg pixel_y;
494   fs_reg wpos_w;
495   fs_reg pixel_w;
496   fs_reg delta_x;
497   fs_reg delta_y;
498
499   int grf_used;
500
501};
502
503/** Fixed HW reg constructor. */
504fs_reg::fs_reg(enum register_file file, int hw_reg)
505{
506   init();
507   this->file = file;
508   this->hw_reg = hw_reg;
509   this->type = BRW_REGISTER_TYPE_F;
510}
511
512int
513brw_type_for_base_type(const struct glsl_type *type)
514{
515   switch (type->base_type) {
516   case GLSL_TYPE_FLOAT:
517      return BRW_REGISTER_TYPE_F;
518   case GLSL_TYPE_INT:
519   case GLSL_TYPE_BOOL:
520      return BRW_REGISTER_TYPE_D;
521   case GLSL_TYPE_UINT:
522      return BRW_REGISTER_TYPE_UD;
523   case GLSL_TYPE_ARRAY:
524   case GLSL_TYPE_STRUCT:
525      /* These should be overridden with the type of the member when
526       * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
527       * way to trip up if we don't.
528       */
529      return BRW_REGISTER_TYPE_UD;
530   default:
531      assert(!"not reached");
532      return BRW_REGISTER_TYPE_F;
533   }
534}
535
536/** Automatic reg constructor. */
537fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
538{
539   init();
540
541   this->file = GRF;
542   this->reg = v->next_abstract_grf;
543   this->reg_offset = 0;
544   v->next_abstract_grf += type_size(type);
545   this->type = brw_type_for_base_type(type);
546}
547
548fs_reg *
549fs_visitor::variable_storage(ir_variable *var)
550{
551   return (fs_reg *)hash_table_find(this->variable_ht, var);
552}
553
554/* Our support for uniforms is piggy-backed on the struct
555 * gl_fragment_program, because that's where the values actually
556 * get stored, rather than in some global gl_shader_program uniform
557 * store.
558 */
559int
560fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
561{
562   unsigned int offset = 0;
563   float *vec_values;
564
565   if (type->is_matrix()) {
566      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
567							type->vector_elements,
568							1);
569
570      for (unsigned int i = 0; i < type->matrix_columns; i++) {
571	 offset += setup_uniform_values(loc + offset, column);
572      }
573
574      return offset;
575   }
576
577   switch (type->base_type) {
578   case GLSL_TYPE_FLOAT:
579   case GLSL_TYPE_UINT:
580   case GLSL_TYPE_INT:
581   case GLSL_TYPE_BOOL:
582      vec_values = fp->Base.Parameters->ParameterValues[loc];
583      for (unsigned int i = 0; i < type->vector_elements; i++) {
584	 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
585      }
586      return 1;
587
588   case GLSL_TYPE_STRUCT:
589      for (unsigned int i = 0; i < type->length; i++) {
590	 offset += setup_uniform_values(loc + offset,
591					type->fields.structure[i].type);
592      }
593      return offset;
594
595   case GLSL_TYPE_ARRAY:
596      for (unsigned int i = 0; i < type->length; i++) {
597	 offset += setup_uniform_values(loc + offset, type->fields.array);
598      }
599      return offset;
600
601   case GLSL_TYPE_SAMPLER:
602      /* The sampler takes up a slot, but we don't use any values from it. */
603      return 1;
604
605   default:
606      assert(!"not reached");
607      return 0;
608   }
609}
610
611void
612fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
613{
614   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
615   fs_reg wpos = *reg;
616   fs_reg neg_y = this->pixel_y;
617   neg_y.negate = true;
618
619   /* gl_FragCoord.x */
620   if (ir->pixel_center_integer) {
621      emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
622   } else {
623      emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
624   }
625   wpos.reg_offset++;
626
627   /* gl_FragCoord.y */
628   if (ir->origin_upper_left && ir->pixel_center_integer) {
629      emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
630   } else {
631      fs_reg pixel_y = this->pixel_y;
632      float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
633
634      if (!ir->origin_upper_left) {
635	 pixel_y.negate = true;
636	 offset += c->key.drawable_height - 1.0;
637      }
638
639      emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
640   }
641   wpos.reg_offset++;
642
643   /* gl_FragCoord.z */
644   emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
645		interp_reg(FRAG_ATTRIB_WPOS, 2)));
646   wpos.reg_offset++;
647
648   /* gl_FragCoord.w: Already set up in emit_interpolation */
649   emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
650
651   hash_table_insert(this->variable_ht, reg, ir);
652}
653
654
655void
656fs_visitor::emit_general_interpolation(ir_variable *ir)
657{
658   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
659   /* Interpolation is always in floating point regs. */
660   reg->type = BRW_REGISTER_TYPE_F;
661   fs_reg attr = *reg;
662
663   unsigned int array_elements;
664   const glsl_type *type;
665
666   if (ir->type->is_array()) {
667      array_elements = ir->type->length;
668      if (array_elements == 0) {
669	 this->fail = true;
670      }
671      type = ir->type->fields.array;
672   } else {
673      array_elements = 1;
674      type = ir->type;
675   }
676
677   int location = ir->location;
678   for (unsigned int i = 0; i < array_elements; i++) {
679      for (unsigned int j = 0; j < type->matrix_columns; j++) {
680	 if (!(fp->Base.InputsRead & BITFIELD64_BIT(location))) {
681	    /* If there's no incoming setup data for this slot, don't
682	     * emit interpolation for it (since it's not used, and
683	     * we'd fall over later trying to find the setup data.
684	     */
685	    attr.reg_offset += type->vector_elements;
686	    continue;
687	 }
688
689	 for (unsigned int c = 0; c < type->vector_elements; c++) {
690	    struct brw_reg interp = interp_reg(location, c);
691	    emit(fs_inst(FS_OPCODE_LINTERP,
692			 attr,
693			 this->delta_x,
694			 this->delta_y,
695			 fs_reg(interp)));
696	    attr.reg_offset++;
697	 }
698	 attr.reg_offset -= type->vector_elements;
699
700	 for (unsigned int c = 0; c < type->vector_elements; c++) {
701	    emit(fs_inst(BRW_OPCODE_MUL,
702			 attr,
703			 attr,
704			 this->pixel_w));
705	    attr.reg_offset++;
706	 }
707	 location++;
708      }
709   }
710
711   hash_table_insert(this->variable_ht, reg, ir);
712}
713
714void
715fs_visitor::visit(ir_variable *ir)
716{
717   fs_reg *reg = NULL;
718
719   if (variable_storage(ir))
720      return;
721
722   if (strcmp(ir->name, "gl_FragColor") == 0) {
723      this->frag_color = ir;
724   } else if (strcmp(ir->name, "gl_FragData") == 0) {
725      this->frag_data = ir;
726   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
727      this->frag_depth = ir;
728   }
729
730   if (ir->mode == ir_var_in) {
731      if (!strcmp(ir->name, "gl_FragCoord")) {
732	 emit_fragcoord_interpolation(ir);
733	 return;
734      } else if (!strcmp(ir->name, "gl_FrontFacing")) {
735	 reg = new(this->mem_ctx) fs_reg(this, ir->type);
736	 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
737	 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
738	  * us front face
739	  */
740	 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
741				      *reg,
742				      fs_reg(r1_6ud),
743				      fs_reg(1u << 31)));
744	 inst->conditional_mod = BRW_CONDITIONAL_L;
745	 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
746      } else {
747	 emit_general_interpolation(ir);
748	 return;
749      }
750   }
751
752   if (ir->mode == ir_var_uniform) {
753      int param_index = c->prog_data.nr_params;
754
755      setup_uniform_values(ir->location, ir->type);
756
757      reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
758   }
759
760   if (!reg)
761      reg = new(this->mem_ctx) fs_reg(this, ir->type);
762
763   hash_table_insert(this->variable_ht, reg, ir);
764}
765
766void
767fs_visitor::visit(ir_dereference_variable *ir)
768{
769   fs_reg *reg = variable_storage(ir->var);
770   this->result = *reg;
771}
772
773void
774fs_visitor::visit(ir_dereference_record *ir)
775{
776   const glsl_type *struct_type = ir->record->type;
777
778   ir->record->accept(this);
779
780   unsigned int offset = 0;
781   for (unsigned int i = 0; i < struct_type->length; i++) {
782      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
783	 break;
784      offset += type_size(struct_type->fields.structure[i].type);
785   }
786   this->result.reg_offset += offset;
787   this->result.type = brw_type_for_base_type(ir->type);
788}
789
790void
791fs_visitor::visit(ir_dereference_array *ir)
792{
793   ir_constant *index;
794   int element_size;
795
796   ir->array->accept(this);
797   index = ir->array_index->as_constant();
798
799   element_size = type_size(ir->type);
800   this->result.type = brw_type_for_base_type(ir->type);
801
802   if (index) {
803      assert(this->result.file == UNIFORM ||
804	     (this->result.file == GRF &&
805	      this->result.reg != 0));
806      this->result.reg_offset += index->value.i[0] * element_size;
807   } else {
808      assert(!"FINISHME: non-constant array element");
809   }
810}
811
812void
813fs_visitor::visit(ir_expression *ir)
814{
815   unsigned int operand;
816   fs_reg op[2], temp;
817   fs_reg result;
818   fs_inst *inst;
819
820   for (operand = 0; operand < ir->get_num_operands(); operand++) {
821      ir->operands[operand]->accept(this);
822      if (this->result.file == BAD_FILE) {
823	 ir_print_visitor v;
824	 printf("Failed to get tree for expression operand:\n");
825	 ir->operands[operand]->accept(&v);
826	 this->fail = true;
827      }
828      op[operand] = this->result;
829
830      /* Matrix expression operands should have been broken down to vector
831       * operations already.
832       */
833      assert(!ir->operands[operand]->type->is_matrix());
834      /* And then those vector operands should have been broken down to scalar.
835       */
836      assert(!ir->operands[operand]->type->is_vector());
837   }
838
839   /* Storage for our result.  If our result goes into an assignment, it will
840    * just get copy-propagated out, so no worries.
841    */
842   this->result = fs_reg(this, ir->type);
843
844   switch (ir->operation) {
845   case ir_unop_logic_not:
846      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
847      break;
848   case ir_unop_neg:
849      op[0].negate = !op[0].negate;
850      this->result = op[0];
851      break;
852   case ir_unop_abs:
853      op[0].abs = true;
854      this->result = op[0];
855      break;
856   case ir_unop_sign:
857      temp = fs_reg(this, ir->type);
858
859      emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
860
861      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
862      inst->conditional_mod = BRW_CONDITIONAL_G;
863      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
864      inst->predicated = true;
865
866      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
867      inst->conditional_mod = BRW_CONDITIONAL_L;
868      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
869      inst->predicated = true;
870
871      break;
872   case ir_unop_rcp:
873      emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
874      break;
875
876   case ir_unop_exp2:
877      emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
878      break;
879   case ir_unop_log2:
880      emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
881      break;
882   case ir_unop_exp:
883   case ir_unop_log:
884      assert(!"not reached: should be handled by ir_explog_to_explog2");
885      break;
886   case ir_unop_sin:
887      emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
888      break;
889   case ir_unop_cos:
890      emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
891      break;
892
893   case ir_unop_dFdx:
894      emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
895      break;
896   case ir_unop_dFdy:
897      emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
898      break;
899
900   case ir_binop_add:
901      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
902      break;
903   case ir_binop_sub:
904      assert(!"not reached: should be handled by ir_sub_to_add_neg");
905      break;
906
907   case ir_binop_mul:
908      emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
909      break;
910   case ir_binop_div:
911      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
912      break;
913   case ir_binop_mod:
914      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
915      break;
916
917   case ir_binop_less:
918      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
919      inst->conditional_mod = BRW_CONDITIONAL_L;
920      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
921      break;
922   case ir_binop_greater:
923      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
924      inst->conditional_mod = BRW_CONDITIONAL_G;
925      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
926      break;
927   case ir_binop_lequal:
928      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
929      inst->conditional_mod = BRW_CONDITIONAL_LE;
930      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
931      break;
932   case ir_binop_gequal:
933      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
934      inst->conditional_mod = BRW_CONDITIONAL_GE;
935      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
936      break;
937   case ir_binop_equal:
938   case ir_binop_all_equal: /* same as nequal for scalars */
939      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
940      inst->conditional_mod = BRW_CONDITIONAL_Z;
941      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
942      break;
943   case ir_binop_nequal:
944   case ir_binop_any_nequal: /* same as nequal for scalars */
945      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
946      inst->conditional_mod = BRW_CONDITIONAL_NZ;
947      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
948      break;
949
950   case ir_binop_logic_xor:
951      emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
952      break;
953
954   case ir_binop_logic_or:
955      emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
956      break;
957
958   case ir_binop_logic_and:
959      emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
960      break;
961
962   case ir_binop_dot:
963   case ir_binop_cross:
964   case ir_unop_any:
965      assert(!"not reached: should be handled by brw_fs_channel_expressions");
966      break;
967
968   case ir_unop_noise:
969      assert(!"not reached: should be handled by lower_noise");
970      break;
971
972   case ir_unop_sqrt:
973      emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
974      break;
975
976   case ir_unop_rsq:
977      emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
978      break;
979
980   case ir_unop_i2f:
981   case ir_unop_b2f:
982   case ir_unop_b2i:
983      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
984      break;
985   case ir_unop_f2i:
986      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
987      break;
988   case ir_unop_f2b:
989   case ir_unop_i2b:
990      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
991      inst->conditional_mod = BRW_CONDITIONAL_NZ;
992
993   case ir_unop_trunc:
994      emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
995      break;
996   case ir_unop_ceil:
997      op[0].negate = ~op[0].negate;
998      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
999      this->result.negate = true;
1000      break;
1001   case ir_unop_floor:
1002      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1003      break;
1004   case ir_unop_fract:
1005      inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
1006      break;
1007
1008   case ir_binop_min:
1009      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1010      inst->conditional_mod = BRW_CONDITIONAL_L;
1011
1012      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1013      inst->predicated = true;
1014      break;
1015   case ir_binop_max:
1016      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1017      inst->conditional_mod = BRW_CONDITIONAL_G;
1018
1019      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1020      inst->predicated = true;
1021      break;
1022
1023   case ir_binop_pow:
1024      inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
1025      break;
1026
1027   case ir_unop_bit_not:
1028   case ir_unop_u2f:
1029   case ir_binop_lshift:
1030   case ir_binop_rshift:
1031   case ir_binop_bit_and:
1032   case ir_binop_bit_xor:
1033   case ir_binop_bit_or:
1034      assert(!"GLSL 1.30 features unsupported");
1035      break;
1036   }
1037}
1038
1039void
1040fs_visitor::visit(ir_assignment *ir)
1041{
1042   struct fs_reg l, r;
1043   int i;
1044   int write_mask;
1045   fs_inst *inst;
1046
1047   /* FINISHME: arrays on the lhs */
1048   ir->lhs->accept(this);
1049   l = this->result;
1050
1051   ir->rhs->accept(this);
1052   r = this->result;
1053
1054   /* FINISHME: This should really set to the correct maximal writemask for each
1055    * FINISHME: component written (in the loops below).  This case can only
1056    * FINISHME: occur for matrices, arrays, and structures.
1057    */
1058   if (ir->write_mask == 0) {
1059      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1060      write_mask = WRITEMASK_XYZW;
1061   } else {
1062      assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
1063      write_mask = ir->write_mask;
1064   }
1065
1066   assert(l.file != BAD_FILE);
1067   assert(r.file != BAD_FILE);
1068
1069   if (ir->condition) {
1070      /* Get the condition bool into the predicate. */
1071      ir->condition->accept(this);
1072      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
1073      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1074   }
1075
1076   for (i = 0; i < type_size(ir->lhs->type); i++) {
1077      if (i >= 4 || (write_mask & (1 << i))) {
1078	 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1079	 if (ir->condition)
1080	    inst->predicated = true;
1081	 r.reg_offset++;
1082      }
1083      l.reg_offset++;
1084   }
1085}
1086
1087void
1088fs_visitor::visit(ir_texture *ir)
1089{
1090   int base_mrf = 2;
1091   fs_inst *inst = NULL;
1092   unsigned int mlen = 0;
1093
1094   ir->coordinate->accept(this);
1095   fs_reg coordinate = this->result;
1096
1097   if (ir->projector) {
1098      fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
1099
1100      ir->projector->accept(this);
1101      emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
1102
1103      fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
1104      for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1105	 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
1106	 coordinate.reg_offset++;
1107	 proj_coordinate.reg_offset++;
1108      }
1109      proj_coordinate.reg_offset = 0;
1110
1111      coordinate = proj_coordinate;
1112   }
1113
1114   for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1115      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1116      coordinate.reg_offset++;
1117   }
1118
1119   /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1120   if (intel->gen < 5)
1121      mlen = 3;
1122
1123   if (ir->shadow_comparitor) {
1124      /* For shadow comparisons, we have to supply u,v,r. */
1125      mlen = 3;
1126
1127      ir->shadow_comparitor->accept(this);
1128      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1129      mlen++;
1130   }
1131
1132   /* Do we ever want to handle writemasking on texture samples?  Is it
1133    * performance relevant?
1134    */
1135   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1136
1137   switch (ir->op) {
1138   case ir_tex:
1139      inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1140      break;
1141   case ir_txb:
1142      ir->lod_info.bias->accept(this);
1143      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1144      mlen++;
1145
1146      inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1147      break;
1148   case ir_txl:
1149      ir->lod_info.lod->accept(this);
1150      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1151      mlen++;
1152
1153      inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1154      break;
1155   case ir_txd:
1156   case ir_txf:
1157      assert(!"GLSL 1.30 features unsupported");
1158      break;
1159   }
1160
1161   inst->sampler =
1162      _mesa_get_sampler_uniform_value(ir->sampler,
1163				      ctx->Shader.CurrentProgram,
1164				      &brw->fragment_program->Base);
1165   inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1166
1167   this->result = dst;
1168
1169   if (ir->shadow_comparitor)
1170      inst->shadow_compare = true;
1171   inst->mlen = mlen;
1172}
1173
1174void
1175fs_visitor::visit(ir_swizzle *ir)
1176{
1177   ir->val->accept(this);
1178   fs_reg val = this->result;
1179
1180   fs_reg result = fs_reg(this, ir->type);
1181   this->result = result;
1182
1183   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1184      fs_reg channel = val;
1185      int swiz = 0;
1186
1187      switch (i) {
1188      case 0:
1189	 swiz = ir->mask.x;
1190	 break;
1191      case 1:
1192	 swiz = ir->mask.y;
1193	 break;
1194      case 2:
1195	 swiz = ir->mask.z;
1196	 break;
1197      case 3:
1198	 swiz = ir->mask.w;
1199	 break;
1200      }
1201
1202      channel.reg_offset += swiz;
1203      emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1204      result.reg_offset++;
1205   }
1206}
1207
1208void
1209fs_visitor::visit(ir_discard *ir)
1210{
1211   assert(ir->condition == NULL); /* FINISHME */
1212
1213   emit(fs_inst(FS_OPCODE_DISCARD));
1214}
1215
1216void
1217fs_visitor::visit(ir_constant *ir)
1218{
1219   fs_reg reg(this, ir->type);
1220   this->result = reg;
1221
1222   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1223      switch (ir->type->base_type) {
1224      case GLSL_TYPE_FLOAT:
1225	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1226	 break;
1227      case GLSL_TYPE_UINT:
1228	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1229	 break;
1230      case GLSL_TYPE_INT:
1231	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1232	 break;
1233      case GLSL_TYPE_BOOL:
1234	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1235	 break;
1236      default:
1237	 assert(!"Non-float/uint/int/bool constant");
1238      }
1239      reg.reg_offset++;
1240   }
1241}
1242
1243void
1244fs_visitor::visit(ir_if *ir)
1245{
1246   fs_inst *inst;
1247
1248   /* Don't point the annotation at the if statement, because then it plus
1249    * the then and else blocks get printed.
1250    */
1251   this->base_ir = ir->condition;
1252
1253   /* Generate the condition into the condition code. */
1254   ir->condition->accept(this);
1255   inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1256   inst->conditional_mod = BRW_CONDITIONAL_NZ;
1257
1258   inst = emit(fs_inst(BRW_OPCODE_IF));
1259   inst->predicated = true;
1260
1261   foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1262      ir_instruction *ir = (ir_instruction *)iter.get();
1263      this->base_ir = ir;
1264
1265      ir->accept(this);
1266   }
1267
1268   if (!ir->else_instructions.is_empty()) {
1269      emit(fs_inst(BRW_OPCODE_ELSE));
1270
1271      foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1272	 ir_instruction *ir = (ir_instruction *)iter.get();
1273	 this->base_ir = ir;
1274
1275	 ir->accept(this);
1276      }
1277   }
1278
1279   emit(fs_inst(BRW_OPCODE_ENDIF));
1280}
1281
1282void
1283fs_visitor::visit(ir_loop *ir)
1284{
1285   fs_reg counter = reg_undef;
1286
1287   if (ir->counter) {
1288      this->base_ir = ir->counter;
1289      ir->counter->accept(this);
1290      counter = *(variable_storage(ir->counter));
1291
1292      if (ir->from) {
1293	 this->base_ir = ir->from;
1294	 ir->from->accept(this);
1295
1296	 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1297      }
1298   }
1299
1300   /* Start a safety counter.  If the user messed up their loop
1301    * counting, we don't want to hang the GPU.
1302    */
1303   fs_reg max_iter = fs_reg(this, glsl_type::int_type);
1304   emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000)));
1305
1306   emit(fs_inst(BRW_OPCODE_DO));
1307
1308   if (ir->to) {
1309      this->base_ir = ir->to;
1310      ir->to->accept(this);
1311
1312      fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1313				   counter, this->result));
1314      switch (ir->cmp) {
1315      case ir_binop_equal:
1316	 inst->conditional_mod = BRW_CONDITIONAL_Z;
1317	 break;
1318      case ir_binop_nequal:
1319	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1320	 break;
1321      case ir_binop_gequal:
1322	 inst->conditional_mod = BRW_CONDITIONAL_GE;
1323	 break;
1324      case ir_binop_lequal:
1325	 inst->conditional_mod = BRW_CONDITIONAL_LE;
1326	 break;
1327      case ir_binop_greater:
1328	 inst->conditional_mod = BRW_CONDITIONAL_G;
1329	 break;
1330      case ir_binop_less:
1331	 inst->conditional_mod = BRW_CONDITIONAL_L;
1332	 break;
1333      default:
1334	 assert(!"not reached: unknown loop condition");
1335	 this->fail = true;
1336	 break;
1337      }
1338
1339      inst = emit(fs_inst(BRW_OPCODE_BREAK));
1340      inst->predicated = true;
1341   }
1342
1343   foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1344      ir_instruction *ir = (ir_instruction *)iter.get();
1345      fs_inst *inst;
1346
1347      this->base_ir = ir;
1348      ir->accept(this);
1349
1350      /* Check the maximum loop iters counter. */
1351      inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1)));
1352      inst->conditional_mod = BRW_CONDITIONAL_Z;
1353
1354      inst = emit(fs_inst(BRW_OPCODE_BREAK));
1355      inst->predicated = true;
1356   }
1357
1358   if (ir->increment) {
1359      this->base_ir = ir->increment;
1360      ir->increment->accept(this);
1361      emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1362   }
1363
1364   emit(fs_inst(BRW_OPCODE_WHILE));
1365}
1366
1367void
1368fs_visitor::visit(ir_loop_jump *ir)
1369{
1370   switch (ir->mode) {
1371   case ir_loop_jump::jump_break:
1372      emit(fs_inst(BRW_OPCODE_BREAK));
1373      break;
1374   case ir_loop_jump::jump_continue:
1375      emit(fs_inst(BRW_OPCODE_CONTINUE));
1376      break;
1377   }
1378}
1379
1380void
1381fs_visitor::visit(ir_call *ir)
1382{
1383   assert(!"FINISHME");
1384}
1385
1386void
1387fs_visitor::visit(ir_return *ir)
1388{
1389   assert(!"FINISHME");
1390}
1391
1392void
1393fs_visitor::visit(ir_function *ir)
1394{
1395   /* Ignore function bodies other than main() -- we shouldn't see calls to
1396    * them since they should all be inlined before we get to ir_to_mesa.
1397    */
1398   if (strcmp(ir->name, "main") == 0) {
1399      const ir_function_signature *sig;
1400      exec_list empty;
1401
1402      sig = ir->matching_signature(&empty);
1403
1404      assert(sig);
1405
1406      foreach_iter(exec_list_iterator, iter, sig->body) {
1407	 ir_instruction *ir = (ir_instruction *)iter.get();
1408	 this->base_ir = ir;
1409
1410	 ir->accept(this);
1411      }
1412   }
1413}
1414
1415void
1416fs_visitor::visit(ir_function_signature *ir)
1417{
1418   assert(!"not reached");
1419   (void)ir;
1420}
1421
1422fs_inst *
1423fs_visitor::emit(fs_inst inst)
1424{
1425   fs_inst *list_inst = new(mem_ctx) fs_inst;
1426   *list_inst = inst;
1427
1428   list_inst->annotation = this->current_annotation;
1429   list_inst->ir = this->base_ir;
1430
1431   this->instructions.push_tail(list_inst);
1432
1433   return list_inst;
1434}
1435
1436/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1437void
1438fs_visitor::emit_dummy_fs()
1439{
1440   /* Everyone's favorite color. */
1441   emit(fs_inst(BRW_OPCODE_MOV,
1442		fs_reg(MRF, 2),
1443		fs_reg(1.0f)));
1444   emit(fs_inst(BRW_OPCODE_MOV,
1445		fs_reg(MRF, 3),
1446		fs_reg(0.0f)));
1447   emit(fs_inst(BRW_OPCODE_MOV,
1448		fs_reg(MRF, 4),
1449		fs_reg(1.0f)));
1450   emit(fs_inst(BRW_OPCODE_MOV,
1451		fs_reg(MRF, 5),
1452		fs_reg(0.0f)));
1453
1454   fs_inst *write;
1455   write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1456			fs_reg(0),
1457			fs_reg(0)));
1458}
1459
1460/* The register location here is relative to the start of the URB
1461 * data.  It will get adjusted to be a real location before
1462 * generate_code() time.
1463 */
1464struct brw_reg
1465fs_visitor::interp_reg(int location, int channel)
1466{
1467   int regnr = location * 2 + channel / 2;
1468   int stride = (channel & 1) * 4;
1469
1470   return brw_vec1_grf(regnr, stride);
1471}
1472
1473/** Emits the interpolation for the varying inputs. */
1474void
1475fs_visitor::emit_interpolation_setup()
1476{
1477   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1478
1479   this->current_annotation = "compute pixel centers";
1480   this->pixel_x = fs_reg(this, glsl_type::uint_type);
1481   this->pixel_y = fs_reg(this, glsl_type::uint_type);
1482   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1483   this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1484   emit(fs_inst(BRW_OPCODE_ADD,
1485		this->pixel_x,
1486		fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1487		fs_reg(brw_imm_v(0x10101010))));
1488   emit(fs_inst(BRW_OPCODE_ADD,
1489		this->pixel_y,
1490		fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1491		fs_reg(brw_imm_v(0x11001100))));
1492
1493   this->current_annotation = "compute pixel deltas from v0";
1494   this->delta_x = fs_reg(this, glsl_type::float_type);
1495   this->delta_y = fs_reg(this, glsl_type::float_type);
1496   emit(fs_inst(BRW_OPCODE_ADD,
1497		this->delta_x,
1498		this->pixel_x,
1499		fs_reg(negate(brw_vec1_grf(1, 0)))));
1500   emit(fs_inst(BRW_OPCODE_ADD,
1501		this->delta_y,
1502		this->pixel_y,
1503		fs_reg(negate(brw_vec1_grf(1, 1)))));
1504
1505   this->current_annotation = "compute pos.w and 1/pos.w";
1506   /* Compute wpos.w.  It's always in our setup, since it's needed to
1507    * interpolate the other attributes.
1508    */
1509   this->wpos_w = fs_reg(this, glsl_type::float_type);
1510   emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1511		interp_reg(FRAG_ATTRIB_WPOS, 3)));
1512   /* Compute the pixel 1/W value from wpos.w. */
1513   this->pixel_w = fs_reg(this, glsl_type::float_type);
1514   emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1515   this->current_annotation = NULL;
1516}
1517
1518void
1519fs_visitor::emit_fb_writes()
1520{
1521   this->current_annotation = "FB write header";
1522   int nr = 0;
1523
1524   /* m0, m1 header */
1525   nr += 2;
1526
1527   if (c->key.aa_dest_stencil_reg) {
1528      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1529		   fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1530   }
1531
1532   /* Reserve space for color. It'll be filled in per MRT below. */
1533   int color_mrf = nr;
1534   nr += 4;
1535
1536   if (c->key.source_depth_to_render_target) {
1537      if (c->key.computes_depth) {
1538	 /* Hand over gl_FragDepth. */
1539	 assert(this->frag_depth);
1540	 fs_reg depth = *(variable_storage(this->frag_depth));
1541
1542	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1543      } else {
1544	 /* Pass through the payload depth. */
1545	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1546		      fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1547      }
1548   }
1549
1550   if (c->key.dest_depth_reg) {
1551      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1552		   fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1553   }
1554
1555   fs_reg color = reg_undef;
1556   if (this->frag_color)
1557      color = *(variable_storage(this->frag_color));
1558   else if (this->frag_data)
1559      color = *(variable_storage(this->frag_data));
1560
1561   for (int target = 0; target < c->key.nr_color_regions; target++) {
1562      this->current_annotation = talloc_asprintf(this->mem_ctx,
1563						 "FB write target %d",
1564						 target);
1565      if (this->frag_color || this->frag_data) {
1566	 for (int i = 0; i < 4; i++) {
1567	    emit(fs_inst(BRW_OPCODE_MOV,
1568			 fs_reg(MRF, color_mrf + i),
1569			 color));
1570	    color.reg_offset++;
1571	 }
1572      }
1573
1574      if (this->frag_color)
1575	 color.reg_offset -= 4;
1576
1577      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1578				   reg_undef, reg_undef));
1579      inst->target = target;
1580      inst->mlen = nr;
1581      if (target == c->key.nr_color_regions - 1)
1582	 inst->eot = true;
1583   }
1584
1585   if (c->key.nr_color_regions == 0) {
1586      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1587				   reg_undef, reg_undef));
1588      inst->mlen = nr;
1589      inst->eot = true;
1590   }
1591
1592   this->current_annotation = NULL;
1593}
1594
1595void
1596fs_visitor::generate_fb_write(fs_inst *inst)
1597{
1598   GLboolean eot = inst->eot;
1599
1600   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1601    * move, here's g1.
1602    */
1603   brw_push_insn_state(p);
1604   brw_set_mask_control(p, BRW_MASK_DISABLE);
1605   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1606   brw_MOV(p,
1607	   brw_message_reg(1),
1608	   brw_vec8_grf(1, 0));
1609   brw_pop_insn_state(p);
1610
1611   brw_fb_WRITE(p,
1612		8, /* dispatch_width */
1613		retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1614		0, /* base MRF */
1615		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1616		inst->target,
1617		inst->mlen,
1618		0,
1619		eot);
1620}
1621
1622void
1623fs_visitor::generate_linterp(fs_inst *inst,
1624			     struct brw_reg dst, struct brw_reg *src)
1625{
1626   struct brw_reg delta_x = src[0];
1627   struct brw_reg delta_y = src[1];
1628   struct brw_reg interp = src[2];
1629
1630   if (brw->has_pln &&
1631       delta_y.nr == delta_x.nr + 1 &&
1632       (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1633      brw_PLN(p, dst, interp, delta_x);
1634   } else {
1635      brw_LINE(p, brw_null_reg(), interp, delta_x);
1636      brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1637   }
1638}
1639
1640void
1641fs_visitor::generate_math(fs_inst *inst,
1642			  struct brw_reg dst, struct brw_reg *src)
1643{
1644   int op;
1645
1646   switch (inst->opcode) {
1647   case FS_OPCODE_RCP:
1648      op = BRW_MATH_FUNCTION_INV;
1649      break;
1650   case FS_OPCODE_RSQ:
1651      op = BRW_MATH_FUNCTION_RSQ;
1652      break;
1653   case FS_OPCODE_SQRT:
1654      op = BRW_MATH_FUNCTION_SQRT;
1655      break;
1656   case FS_OPCODE_EXP2:
1657      op = BRW_MATH_FUNCTION_EXP;
1658      break;
1659   case FS_OPCODE_LOG2:
1660      op = BRW_MATH_FUNCTION_LOG;
1661      break;
1662   case FS_OPCODE_POW:
1663      op = BRW_MATH_FUNCTION_POW;
1664      break;
1665   case FS_OPCODE_SIN:
1666      op = BRW_MATH_FUNCTION_SIN;
1667      break;
1668   case FS_OPCODE_COS:
1669      op = BRW_MATH_FUNCTION_COS;
1670      break;
1671   default:
1672      assert(!"not reached: unknown math function");
1673      op = 0;
1674      break;
1675   }
1676
1677   if (inst->opcode == FS_OPCODE_POW) {
1678      brw_MOV(p, brw_message_reg(3), src[1]);
1679   }
1680
1681   brw_math(p, dst,
1682	    op,
1683	    inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1684	    BRW_MATH_SATURATE_NONE,
1685	    2, src[0],
1686	    BRW_MATH_DATA_VECTOR,
1687	    BRW_MATH_PRECISION_FULL);
1688}
1689
1690void
1691fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1692{
1693   int msg_type = -1;
1694   int rlen = 4;
1695
1696   if (intel->gen == 5) {
1697      switch (inst->opcode) {
1698      case FS_OPCODE_TEX:
1699	 if (inst->shadow_compare) {
1700	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1701	 } else {
1702	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1703	 }
1704	 break;
1705      case FS_OPCODE_TXB:
1706	 if (inst->shadow_compare) {
1707	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1708	 } else {
1709	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1710	 }
1711	 break;
1712      }
1713   } else {
1714      switch (inst->opcode) {
1715      case FS_OPCODE_TEX:
1716	 /* Note that G45 and older determines shadow compare and dispatch width
1717	  * from message length for most messages.
1718	  */
1719	 if (inst->shadow_compare) {
1720	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
1721	 } else {
1722	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
1723	 }
1724      case FS_OPCODE_TXB:
1725	 if (inst->shadow_compare) {
1726	    assert(!"FINISHME: shadow compare with bias.");
1727	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1728	 } else {
1729	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1730	    rlen = 8;
1731	 }
1732	 break;
1733      }
1734   }
1735   assert(msg_type != -1);
1736
1737   /* g0 header. */
1738   src.nr--;
1739
1740   brw_SAMPLE(p,
1741	      retype(dst, BRW_REGISTER_TYPE_UW),
1742	      src.nr,
1743	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1744              SURF_INDEX_TEXTURE(inst->sampler),
1745	      inst->sampler,
1746	      WRITEMASK_XYZW,
1747	      msg_type,
1748	      rlen,
1749	      inst->mlen + 1,
1750	      0,
1751	      1,
1752	      BRW_SAMPLER_SIMD_MODE_SIMD8);
1753}
1754
1755
1756/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1757 * looking like:
1758 *
1759 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1760 *
1761 * and we're trying to produce:
1762 *
1763 *           DDX                     DDY
1764 * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
1765 *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
1766 *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
1767 *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
1768 *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
1769 *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
1770 *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
1771 *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
1772 *
1773 * and add another set of two more subspans if in 16-pixel dispatch mode.
1774 *
1775 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1776 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1777 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1778 * between each other.  We could probably do it like ddx and swizzle the right
1779 * order later, but bail for now and just produce
1780 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1781 */
1782void
1783fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1784{
1785   struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
1786				 BRW_REGISTER_TYPE_F,
1787				 BRW_VERTICAL_STRIDE_2,
1788				 BRW_WIDTH_2,
1789				 BRW_HORIZONTAL_STRIDE_0,
1790				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1791   struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
1792				 BRW_REGISTER_TYPE_F,
1793				 BRW_VERTICAL_STRIDE_2,
1794				 BRW_WIDTH_2,
1795				 BRW_HORIZONTAL_STRIDE_0,
1796				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1797   brw_ADD(p, dst, src0, negate(src1));
1798}
1799
1800void
1801fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1802{
1803   struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
1804				 BRW_REGISTER_TYPE_F,
1805				 BRW_VERTICAL_STRIDE_4,
1806				 BRW_WIDTH_4,
1807				 BRW_HORIZONTAL_STRIDE_0,
1808				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1809   struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
1810				 BRW_REGISTER_TYPE_F,
1811				 BRW_VERTICAL_STRIDE_4,
1812				 BRW_WIDTH_4,
1813				 BRW_HORIZONTAL_STRIDE_0,
1814				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1815   brw_ADD(p, dst, src0, negate(src1));
1816}
1817
1818void
1819fs_visitor::generate_discard(fs_inst *inst)
1820{
1821   struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1822   brw_push_insn_state(p);
1823   brw_set_mask_control(p, BRW_MASK_DISABLE);
1824   brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
1825   brw_AND(p, g0, c->emit_mask_reg, g0);
1826   brw_pop_insn_state(p);
1827}
1828
1829static void
1830trivial_assign_reg(int header_size, fs_reg *reg)
1831{
1832   if (reg->file == GRF && reg->reg != 0) {
1833      reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset;
1834      reg->reg = 0;
1835   }
1836}
1837
1838void
1839fs_visitor::assign_curb_setup()
1840{
1841   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1842   c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1843
1844   if (intel->gen == 5 && (c->prog_data.first_curbe_grf +
1845			   c->prog_data.curb_read_length) & 1) {
1846      /* Align the start of the interpolation coefficients so that we can use
1847       * the PLN instruction.
1848       */
1849      c->prog_data.first_curbe_grf++;
1850   }
1851
1852   /* Map the offsets in the UNIFORM file to fixed HW regs. */
1853   foreach_iter(exec_list_iterator, iter, this->instructions) {
1854      fs_inst *inst = (fs_inst *)iter.get();
1855
1856      for (unsigned int i = 0; i < 3; i++) {
1857	 if (inst->src[i].file == UNIFORM) {
1858	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1859	    struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1860						  constant_nr / 8,
1861						  constant_nr % 8);
1862
1863	    inst->src[i].file = FIXED_HW_REG;
1864	    inst->src[i].fixed_hw_reg = brw_reg;
1865	 }
1866      }
1867   }
1868}
1869
1870void
1871fs_visitor::assign_urb_setup()
1872{
1873   int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
1874   int interp_reg_nr[FRAG_ATTRIB_MAX];
1875
1876   c->prog_data.urb_read_length = 0;
1877
1878   /* Figure out where each of the incoming setup attributes lands. */
1879   for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1880      interp_reg_nr[i] = -1;
1881
1882      if (i != FRAG_ATTRIB_WPOS &&
1883	  !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1884	 continue;
1885
1886      /* Each attribute is 4 setup channels, each of which is half a reg. */
1887      interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1888      c->prog_data.urb_read_length += 2;
1889   }
1890
1891   /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1892    * the correct setup input.
1893    */
1894   foreach_iter(exec_list_iterator, iter, this->instructions) {
1895      fs_inst *inst = (fs_inst *)iter.get();
1896
1897      if (inst->opcode != FS_OPCODE_LINTERP)
1898	 continue;
1899
1900      assert(inst->src[2].file == FIXED_HW_REG);
1901
1902      int location = inst->src[2].fixed_hw_reg.nr / 2;
1903      assert(interp_reg_nr[location] != -1);
1904      inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
1905				      (inst->src[2].fixed_hw_reg.nr & 1));
1906   }
1907
1908   this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
1909}
1910
1911void
1912fs_visitor::assign_regs()
1913{
1914   int header_size = this->first_non_payload_grf;
1915   int last_grf = 0;
1916
1917   /* FINISHME: trivial assignment of register numbers */
1918   foreach_iter(exec_list_iterator, iter, this->instructions) {
1919      fs_inst *inst = (fs_inst *)iter.get();
1920
1921      trivial_assign_reg(header_size, &inst->dst);
1922      trivial_assign_reg(header_size, &inst->src[0]);
1923      trivial_assign_reg(header_size, &inst->src[1]);
1924
1925      last_grf = MAX2(last_grf, inst->dst.hw_reg);
1926      last_grf = MAX2(last_grf, inst->src[0].hw_reg);
1927      last_grf = MAX2(last_grf, inst->src[1].hw_reg);
1928   }
1929
1930   this->grf_used = last_grf + 1;
1931}
1932
1933static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
1934{
1935   struct brw_reg brw_reg;
1936
1937   switch (reg->file) {
1938   case GRF:
1939   case ARF:
1940   case MRF:
1941      brw_reg = brw_vec8_reg(reg->file,
1942			    reg->hw_reg, 0);
1943      brw_reg = retype(brw_reg, reg->type);
1944      break;
1945   case IMM:
1946      switch (reg->type) {
1947      case BRW_REGISTER_TYPE_F:
1948	 brw_reg = brw_imm_f(reg->imm.f);
1949	 break;
1950      case BRW_REGISTER_TYPE_D:
1951	 brw_reg = brw_imm_d(reg->imm.i);
1952	 break;
1953      case BRW_REGISTER_TYPE_UD:
1954	 brw_reg = brw_imm_ud(reg->imm.u);
1955	 break;
1956      default:
1957	 assert(!"not reached");
1958	 break;
1959      }
1960      break;
1961   case FIXED_HW_REG:
1962      brw_reg = reg->fixed_hw_reg;
1963      break;
1964   case BAD_FILE:
1965      /* Probably unused. */
1966      brw_reg = brw_null_reg();
1967      break;
1968   case UNIFORM:
1969      assert(!"not reached");
1970      brw_reg = brw_null_reg();
1971      break;
1972   }
1973   if (reg->abs)
1974      brw_reg = brw_abs(brw_reg);
1975   if (reg->negate)
1976      brw_reg = negate(brw_reg);
1977
1978   return brw_reg;
1979}
1980
1981void
1982fs_visitor::generate_code()
1983{
1984   unsigned int annotation_len = 0;
1985   int last_native_inst = 0;
1986   struct brw_instruction *if_stack[16], *loop_stack[16];
1987   int if_stack_depth = 0, loop_stack_depth = 0;
1988   int if_depth_in_loop[16];
1989
1990   if_depth_in_loop[loop_stack_depth] = 0;
1991
1992   memset(&if_stack, 0, sizeof(if_stack));
1993   foreach_iter(exec_list_iterator, iter, this->instructions) {
1994      fs_inst *inst = (fs_inst *)iter.get();
1995      struct brw_reg src[3], dst;
1996
1997      for (unsigned int i = 0; i < 3; i++) {
1998	 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
1999      }
2000      dst = brw_reg_from_fs_reg(&inst->dst);
2001
2002      brw_set_conditionalmod(p, inst->conditional_mod);
2003      brw_set_predicate_control(p, inst->predicated);
2004
2005      switch (inst->opcode) {
2006      case BRW_OPCODE_MOV:
2007	 brw_MOV(p, dst, src[0]);
2008	 break;
2009      case BRW_OPCODE_ADD:
2010	 brw_ADD(p, dst, src[0], src[1]);
2011	 break;
2012      case BRW_OPCODE_MUL:
2013	 brw_MUL(p, dst, src[0], src[1]);
2014	 break;
2015
2016      case BRW_OPCODE_FRC:
2017	 brw_FRC(p, dst, src[0]);
2018	 break;
2019      case BRW_OPCODE_RNDD:
2020	 brw_RNDD(p, dst, src[0]);
2021	 break;
2022      case BRW_OPCODE_RNDZ:
2023	 brw_RNDZ(p, dst, src[0]);
2024	 break;
2025
2026      case BRW_OPCODE_AND:
2027	 brw_AND(p, dst, src[0], src[1]);
2028	 break;
2029      case BRW_OPCODE_OR:
2030	 brw_OR(p, dst, src[0], src[1]);
2031	 break;
2032      case BRW_OPCODE_XOR:
2033	 brw_XOR(p, dst, src[0], src[1]);
2034	 break;
2035
2036      case BRW_OPCODE_CMP:
2037	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2038	 break;
2039      case BRW_OPCODE_SEL:
2040	 brw_SEL(p, dst, src[0], src[1]);
2041	 break;
2042
2043      case BRW_OPCODE_IF:
2044	 assert(if_stack_depth < 16);
2045	 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2046	 if_depth_in_loop[loop_stack_depth]++;
2047	 if_stack_depth++;
2048	 break;
2049      case BRW_OPCODE_ELSE:
2050	 if_stack[if_stack_depth - 1] =
2051	    brw_ELSE(p, if_stack[if_stack_depth - 1]);
2052	 break;
2053      case BRW_OPCODE_ENDIF:
2054	 if_stack_depth--;
2055	 brw_ENDIF(p , if_stack[if_stack_depth]);
2056	 if_depth_in_loop[loop_stack_depth]--;
2057	 break;
2058
2059      case BRW_OPCODE_DO:
2060	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2061	 if_depth_in_loop[loop_stack_depth] = 0;
2062	 break;
2063
2064      case BRW_OPCODE_BREAK:
2065	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2066	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2067	 break;
2068      case BRW_OPCODE_CONTINUE:
2069	 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2070	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2071	 break;
2072
2073      case BRW_OPCODE_WHILE: {
2074	 struct brw_instruction *inst0, *inst1;
2075	 GLuint br = 1;
2076
2077	 if (intel->gen == 5)
2078	    br = 2;
2079
2080	 assert(loop_stack_depth > 0);
2081	 loop_stack_depth--;
2082	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2083	 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2084	 while (inst0 > loop_stack[loop_stack_depth]) {
2085	    inst0--;
2086	    if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2087		inst0->bits3.if_else.jump_count == 0) {
2088	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2089	    }
2090	    else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
2091		     inst0->bits3.if_else.jump_count == 0) {
2092	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
2093	    }
2094	 }
2095      }
2096	 break;
2097
2098      case FS_OPCODE_RCP:
2099      case FS_OPCODE_RSQ:
2100      case FS_OPCODE_SQRT:
2101      case FS_OPCODE_EXP2:
2102      case FS_OPCODE_LOG2:
2103      case FS_OPCODE_POW:
2104      case FS_OPCODE_SIN:
2105      case FS_OPCODE_COS:
2106	 generate_math(inst, dst, src);
2107	 break;
2108      case FS_OPCODE_LINTERP:
2109	 generate_linterp(inst, dst, src);
2110	 break;
2111      case FS_OPCODE_TEX:
2112      case FS_OPCODE_TXB:
2113      case FS_OPCODE_TXL:
2114	 generate_tex(inst, dst, src[0]);
2115	 break;
2116      case FS_OPCODE_DISCARD:
2117	 generate_discard(inst);
2118	 break;
2119      case FS_OPCODE_DDX:
2120	 generate_ddx(inst, dst, src[0]);
2121	 break;
2122      case FS_OPCODE_DDY:
2123	 generate_ddy(inst, dst, src[0]);
2124	 break;
2125      case FS_OPCODE_FB_WRITE:
2126	 generate_fb_write(inst);
2127	 break;
2128      default:
2129	 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2130	    _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2131			  brw_opcodes[inst->opcode].name);
2132	 } else {
2133	    _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2134	 }
2135	 this->fail = true;
2136      }
2137
2138      if (annotation_len < p->nr_insn) {
2139	 annotation_len *= 2;
2140	 if (annotation_len < 16)
2141	    annotation_len = 16;
2142
2143	 this->annotation_string = talloc_realloc(this->mem_ctx,
2144						  annotation_string,
2145						  const char *,
2146						  annotation_len);
2147	 this->annotation_ir = talloc_realloc(this->mem_ctx,
2148					      annotation_ir,
2149					      ir_instruction *,
2150					      annotation_len);
2151      }
2152
2153      for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2154	 this->annotation_string[i] = inst->annotation;
2155	 this->annotation_ir[i] = inst->ir;
2156      }
2157      last_native_inst = p->nr_insn;
2158   }
2159}
2160
2161GLboolean
2162brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2163{
2164   struct brw_compile *p = &c->func;
2165   struct intel_context *intel = &brw->intel;
2166   GLcontext *ctx = &intel->ctx;
2167   struct brw_shader *shader = NULL;
2168   struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2169
2170   if (!prog)
2171      return GL_FALSE;
2172
2173   if (!using_new_fs)
2174      return GL_FALSE;
2175
2176   for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2177      if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2178	 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2179	 break;
2180      }
2181   }
2182   if (!shader)
2183      return GL_FALSE;
2184
2185   /* We always use 8-wide mode, at least for now.  For one, flow
2186    * control only works in 8-wide.  Also, when we're fragment shader
2187    * bound, we're almost always under register pressure as well, so
2188    * 8-wide would save us from the performance cliff of spilling
2189    * regs.
2190    */
2191   c->dispatch_width = 8;
2192
2193   if (INTEL_DEBUG & DEBUG_WM) {
2194      printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2195      _mesa_print_ir(shader->ir, NULL);
2196      printf("\n");
2197   }
2198
2199   /* Now the main event: Visit the shader IR and generate our FS IR for it.
2200    */
2201   fs_visitor v(c, shader);
2202
2203   if (0) {
2204      v.emit_dummy_fs();
2205   } else {
2206      v.emit_interpolation_setup();
2207
2208      /* Generate FS IR for main().  (the visitor only descends into
2209       * functions called "main").
2210       */
2211      foreach_iter(exec_list_iterator, iter, *shader->ir) {
2212	 ir_instruction *ir = (ir_instruction *)iter.get();
2213	 v.base_ir = ir;
2214	 ir->accept(&v);
2215      }
2216
2217      v.emit_fb_writes();
2218      v.assign_curb_setup();
2219      v.assign_urb_setup();
2220      v.assign_regs();
2221   }
2222
2223   v.generate_code();
2224
2225   assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
2226
2227   if (v.fail)
2228      return GL_FALSE;
2229
2230   if (INTEL_DEBUG & DEBUG_WM) {
2231      const char *last_annotation_string = NULL;
2232      ir_instruction *last_annotation_ir = NULL;
2233
2234      printf("Native code for fragment shader %d:\n", prog->Name);
2235      for (unsigned int i = 0; i < p->nr_insn; i++) {
2236	 if (last_annotation_ir != v.annotation_ir[i]) {
2237	    last_annotation_ir = v.annotation_ir[i];
2238	    if (last_annotation_ir) {
2239	       printf("   ");
2240	       last_annotation_ir->print();
2241	       printf("\n");
2242	    }
2243	 }
2244	 if (last_annotation_string != v.annotation_string[i]) {
2245	    last_annotation_string = v.annotation_string[i];
2246	    if (last_annotation_string)
2247	       printf("   %s\n", last_annotation_string);
2248	 }
2249	 brw_disasm(stdout, &p->store[i], intel->gen);
2250      }
2251      printf("\n");
2252   }
2253
2254   c->prog_data.total_grf = v.grf_used;
2255   c->prog_data.total_scratch = 0;
2256
2257   return GL_TRUE;
2258}
2259