brw_fs.cpp revision 21148e1c0a3cf9cf25ded006a3d5ce2b12803ea9
1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28extern "C" {
29
30#include <sys/types.h>
31
32#include "main/macros.h"
33#include "main/shaderobj.h"
34#include "main/uniforms.h"
35#include "program/prog_parameter.h"
36#include "program/prog_print.h"
37#include "program/prog_optimize.h"
38#include "program/sampler.h"
39#include "program/hash_table.h"
40#include "brw_context.h"
41#include "brw_eu.h"
42#include "brw_wm.h"
43#include "talloc.h"
44}
45#include "../glsl/glsl_types.h"
46#include "../glsl/ir_optimization.h"
47#include "../glsl/ir_print_visitor.h"
48
49enum register_file {
50   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
51   GRF = BRW_GENERAL_REGISTER_FILE,
52   MRF = BRW_MESSAGE_REGISTER_FILE,
53   IMM = BRW_IMMEDIATE_VALUE,
54   FIXED_HW_REG, /* a struct brw_reg */
55   UNIFORM, /* prog_data->params[hw_reg] */
56   BAD_FILE
57};
58
59enum fs_opcodes {
60   FS_OPCODE_FB_WRITE = 256,
61   FS_OPCODE_RCP,
62   FS_OPCODE_RSQ,
63   FS_OPCODE_SQRT,
64   FS_OPCODE_EXP2,
65   FS_OPCODE_LOG2,
66   FS_OPCODE_POW,
67   FS_OPCODE_SIN,
68   FS_OPCODE_COS,
69   FS_OPCODE_DDX,
70   FS_OPCODE_DDY,
71   FS_OPCODE_LINTERP,
72   FS_OPCODE_TEX,
73   FS_OPCODE_TXB,
74   FS_OPCODE_TXL,
75   FS_OPCODE_DISCARD,
76};
77
78static int using_new_fs = -1;
79static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
80
81struct gl_shader *
82brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
83{
84   struct brw_shader *shader;
85
86   shader = talloc_zero(NULL, struct brw_shader);
87   if (shader) {
88      shader->base.Type = type;
89      shader->base.Name = name;
90      _mesa_init_shader(ctx, &shader->base);
91   }
92
93   return &shader->base;
94}
95
96struct gl_shader_program *
97brw_new_shader_program(GLcontext *ctx, GLuint name)
98{
99   struct brw_shader_program *prog;
100   prog = talloc_zero(NULL, struct brw_shader_program);
101   if (prog) {
102      prog->base.Name = name;
103      _mesa_init_shader_program(ctx, &prog->base);
104   }
105   return &prog->base;
106}
107
108GLboolean
109brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
110{
111   if (!_mesa_ir_compile_shader(ctx, shader))
112      return GL_FALSE;
113
114   return GL_TRUE;
115}
116
117GLboolean
118brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
119{
120   if (using_new_fs == -1)
121      using_new_fs = getenv("INTEL_NEW_FS") != NULL;
122
123   for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
124      struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
125
126      if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
127	 void *mem_ctx = talloc_new(NULL);
128	 bool progress;
129
130	 if (shader->ir)
131	    talloc_free(shader->ir);
132	 shader->ir = new(shader) exec_list;
133	 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
134
135	 do_mat_op_to_vec(shader->ir);
136	 do_mod_to_fract(shader->ir);
137	 do_div_to_mul_rcp(shader->ir);
138	 do_sub_to_add_neg(shader->ir);
139	 do_explog_to_explog2(shader->ir);
140
141	 do {
142	    progress = false;
143
144	    brw_do_channel_expressions(shader->ir);
145	    brw_do_vector_splitting(shader->ir);
146
147	    progress = do_lower_jumps(shader->ir, true, true,
148				      true, /* main return */
149				      false, /* continue */
150				      false /* loops */
151				      ) || progress;
152
153	    progress = do_common_optimization(shader->ir, true, 32) || progress;
154
155	    progress = lower_noise(shader->ir) || progress;
156	    progress =
157	       lower_variable_index_to_cond_assign(shader->ir,
158						   GL_TRUE, /* input */
159						   GL_TRUE, /* output */
160						   GL_TRUE, /* temp */
161						   GL_TRUE /* uniform */
162						   ) || progress;
163	 } while (progress);
164
165	 validate_ir_tree(shader->ir);
166
167	 reparent_ir(shader->ir, shader->ir);
168	 talloc_free(mem_ctx);
169      }
170   }
171
172   if (!_mesa_ir_link_shader(ctx, prog))
173      return GL_FALSE;
174
175   return GL_TRUE;
176}
177
178static int
179type_size(const struct glsl_type *type)
180{
181   unsigned int size, i;
182
183   switch (type->base_type) {
184   case GLSL_TYPE_UINT:
185   case GLSL_TYPE_INT:
186   case GLSL_TYPE_FLOAT:
187   case GLSL_TYPE_BOOL:
188      return type->components();
189   case GLSL_TYPE_ARRAY:
190      return type_size(type->fields.array) * type->length;
191   case GLSL_TYPE_STRUCT:
192      size = 0;
193      for (i = 0; i < type->length; i++) {
194	 size += type_size(type->fields.structure[i].type);
195      }
196      return size;
197   case GLSL_TYPE_SAMPLER:
198      /* Samplers take up no register space, since they're baked in at
199       * link time.
200       */
201      return 0;
202   default:
203      assert(!"not reached");
204      return 0;
205   }
206}
207
208class fs_reg {
209public:
210   /* Callers of this talloc-based new need not call delete. It's
211    * easier to just talloc_free 'ctx' (or any of its ancestors). */
212   static void* operator new(size_t size, void *ctx)
213   {
214      void *node;
215
216      node = talloc_size(ctx, size);
217      assert(node != NULL);
218
219      return node;
220   }
221
222   void init()
223   {
224      this->reg = 0;
225      this->reg_offset = 0;
226      this->negate = 0;
227      this->abs = 0;
228      this->hw_reg = -1;
229   }
230
231   /** Generic unset register constructor. */
232   fs_reg()
233   {
234      init();
235      this->file = BAD_FILE;
236   }
237
238   /** Immediate value constructor. */
239   fs_reg(float f)
240   {
241      init();
242      this->file = IMM;
243      this->type = BRW_REGISTER_TYPE_F;
244      this->imm.f = f;
245   }
246
247   /** Immediate value constructor. */
248   fs_reg(int32_t i)
249   {
250      init();
251      this->file = IMM;
252      this->type = BRW_REGISTER_TYPE_D;
253      this->imm.i = i;
254   }
255
256   /** Immediate value constructor. */
257   fs_reg(uint32_t u)
258   {
259      init();
260      this->file = IMM;
261      this->type = BRW_REGISTER_TYPE_UD;
262      this->imm.u = u;
263   }
264
265   /** Fixed brw_reg Immediate value constructor. */
266   fs_reg(struct brw_reg fixed_hw_reg)
267   {
268      init();
269      this->file = FIXED_HW_REG;
270      this->fixed_hw_reg = fixed_hw_reg;
271      this->type = fixed_hw_reg.type;
272   }
273
274   fs_reg(enum register_file file, int hw_reg);
275   fs_reg(class fs_visitor *v, const struct glsl_type *type);
276
277   /** Register file: ARF, GRF, MRF, IMM. */
278   enum register_file file;
279   /** virtual register number.  0 = fixed hw reg */
280   int reg;
281   /** Offset within the virtual register. */
282   int reg_offset;
283   /** HW register number.  Generally unset until register allocation. */
284   int hw_reg;
285   /** Register type.  BRW_REGISTER_TYPE_* */
286   int type;
287   bool negate;
288   bool abs;
289   struct brw_reg fixed_hw_reg;
290
291   /** Value for file == BRW_IMMMEDIATE_FILE */
292   union {
293      int32_t i;
294      uint32_t u;
295      float f;
296   } imm;
297};
298
299static const fs_reg reg_undef;
300static const fs_reg reg_null(ARF, BRW_ARF_NULL);
301
302class fs_inst : public exec_node {
303public:
304   /* Callers of this talloc-based new need not call delete. It's
305    * easier to just talloc_free 'ctx' (or any of its ancestors). */
306   static void* operator new(size_t size, void *ctx)
307   {
308      void *node;
309
310      node = talloc_zero_size(ctx, size);
311      assert(node != NULL);
312
313      return node;
314   }
315
316   void init()
317   {
318      this->opcode = BRW_OPCODE_NOP;
319      this->saturate = false;
320      this->conditional_mod = BRW_CONDITIONAL_NONE;
321      this->predicated = false;
322      this->sampler = 0;
323      this->target = 0;
324      this->eot = false;
325      this->shadow_compare = false;
326   }
327
328   fs_inst()
329   {
330      init();
331   }
332
333   fs_inst(int opcode)
334   {
335      init();
336      this->opcode = opcode;
337   }
338
339   fs_inst(int opcode, fs_reg dst, fs_reg src0)
340   {
341      init();
342      this->opcode = opcode;
343      this->dst = dst;
344      this->src[0] = src0;
345   }
346
347   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
348   {
349      init();
350      this->opcode = opcode;
351      this->dst = dst;
352      this->src[0] = src0;
353      this->src[1] = src1;
354   }
355
356   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
357   {
358      init();
359      this->opcode = opcode;
360      this->dst = dst;
361      this->src[0] = src0;
362      this->src[1] = src1;
363      this->src[2] = src2;
364   }
365
366   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
367   fs_reg dst;
368   fs_reg src[3];
369   bool saturate;
370   bool predicated;
371   int conditional_mod; /**< BRW_CONDITIONAL_* */
372
373   int mlen; /**< SEND message length */
374   int sampler;
375   int target; /**< MRT target. */
376   bool eot;
377   bool shadow_compare;
378
379   /** @{
380    * Annotation for the generated IR.  One of the two can be set.
381    */
382   ir_instruction *ir;
383   const char *annotation;
384   /** @} */
385};
386
387class fs_visitor : public ir_visitor
388{
389public:
390
391   fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
392   {
393      this->c = c;
394      this->p = &c->func;
395      this->brw = p->brw;
396      this->fp = brw->fragment_program;
397      this->intel = &brw->intel;
398      this->ctx = &intel->ctx;
399      this->mem_ctx = talloc_new(NULL);
400      this->shader = shader;
401      this->fail = false;
402      this->variable_ht = hash_table_ctor(0,
403					  hash_table_pointer_hash,
404					  hash_table_pointer_compare);
405
406      this->frag_color = NULL;
407      this->frag_data = NULL;
408      this->frag_depth = NULL;
409      this->first_non_payload_grf = 0;
410
411      this->current_annotation = NULL;
412      this->annotation_string = NULL;
413      this->annotation_ir = NULL;
414      this->base_ir = NULL;
415
416      this->virtual_grf_sizes = NULL;
417      this->virtual_grf_next = 1;
418      this->virtual_grf_array_size = 0;
419   }
420   ~fs_visitor()
421   {
422      talloc_free(this->mem_ctx);
423      hash_table_dtor(this->variable_ht);
424   }
425
426   fs_reg *variable_storage(ir_variable *var);
427   int virtual_grf_alloc(int size);
428
429   void visit(ir_variable *ir);
430   void visit(ir_assignment *ir);
431   void visit(ir_dereference_variable *ir);
432   void visit(ir_dereference_record *ir);
433   void visit(ir_dereference_array *ir);
434   void visit(ir_expression *ir);
435   void visit(ir_texture *ir);
436   void visit(ir_if *ir);
437   void visit(ir_constant *ir);
438   void visit(ir_swizzle *ir);
439   void visit(ir_return *ir);
440   void visit(ir_loop *ir);
441   void visit(ir_loop_jump *ir);
442   void visit(ir_discard *ir);
443   void visit(ir_call *ir);
444   void visit(ir_function *ir);
445   void visit(ir_function_signature *ir);
446
447   fs_inst *emit(fs_inst inst);
448   void assign_curb_setup();
449   void assign_urb_setup();
450   void assign_regs();
451   void generate_code();
452   void generate_fb_write(fs_inst *inst);
453   void generate_linterp(fs_inst *inst, struct brw_reg dst,
454			 struct brw_reg *src);
455   void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
456   void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
457   void generate_discard(fs_inst *inst, struct brw_reg temp);
458   void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
459   void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
460
461   void emit_dummy_fs();
462   void emit_fragcoord_interpolation(ir_variable *ir);
463   void emit_general_interpolation(ir_variable *ir);
464   void emit_interpolation_setup();
465   void emit_fb_writes();
466
467   struct brw_reg interp_reg(int location, int channel);
468   int setup_uniform_values(int loc, const glsl_type *type);
469   void setup_builtin_uniform_values(ir_variable *ir);
470
471   struct brw_context *brw;
472   const struct gl_fragment_program *fp;
473   struct intel_context *intel;
474   GLcontext *ctx;
475   struct brw_wm_compile *c;
476   struct brw_compile *p;
477   struct brw_shader *shader;
478   void *mem_ctx;
479   exec_list instructions;
480
481   int *virtual_grf_sizes;
482   int virtual_grf_next;
483   int virtual_grf_array_size;
484
485   struct hash_table *variable_ht;
486   ir_variable *frag_color, *frag_data, *frag_depth;
487   int first_non_payload_grf;
488
489   /** @{ debug annotation info */
490   const char *current_annotation;
491   ir_instruction *base_ir;
492   const char **annotation_string;
493   ir_instruction **annotation_ir;
494   /** @} */
495
496   bool fail;
497
498   /* Result of last visit() method. */
499   fs_reg result;
500
501   fs_reg pixel_x;
502   fs_reg pixel_y;
503   fs_reg wpos_w;
504   fs_reg pixel_w;
505   fs_reg delta_x;
506   fs_reg delta_y;
507
508   int grf_used;
509
510};
511
512int
513fs_visitor::virtual_grf_alloc(int size)
514{
515   if (virtual_grf_array_size <= virtual_grf_next) {
516      if (virtual_grf_array_size == 0)
517	 virtual_grf_array_size = 16;
518      else
519	 virtual_grf_array_size *= 2;
520      virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
521					 int, virtual_grf_array_size);
522
523      /* This slot is always unused. */
524      virtual_grf_sizes[0] = 0;
525   }
526   virtual_grf_sizes[virtual_grf_next] = size;
527   return virtual_grf_next++;
528}
529
530/** Fixed HW reg constructor. */
531fs_reg::fs_reg(enum register_file file, int hw_reg)
532{
533   init();
534   this->file = file;
535   this->hw_reg = hw_reg;
536   this->type = BRW_REGISTER_TYPE_F;
537}
538
539int
540brw_type_for_base_type(const struct glsl_type *type)
541{
542   switch (type->base_type) {
543   case GLSL_TYPE_FLOAT:
544      return BRW_REGISTER_TYPE_F;
545   case GLSL_TYPE_INT:
546   case GLSL_TYPE_BOOL:
547      return BRW_REGISTER_TYPE_D;
548   case GLSL_TYPE_UINT:
549      return BRW_REGISTER_TYPE_UD;
550   case GLSL_TYPE_ARRAY:
551   case GLSL_TYPE_STRUCT:
552      /* These should be overridden with the type of the member when
553       * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
554       * way to trip up if we don't.
555       */
556      return BRW_REGISTER_TYPE_UD;
557   default:
558      assert(!"not reached");
559      return BRW_REGISTER_TYPE_F;
560   }
561}
562
563/** Automatic reg constructor. */
564fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
565{
566   init();
567
568   this->file = GRF;
569   this->reg = v->virtual_grf_alloc(type_size(type));
570   this->reg_offset = 0;
571   this->type = brw_type_for_base_type(type);
572}
573
574fs_reg *
575fs_visitor::variable_storage(ir_variable *var)
576{
577   return (fs_reg *)hash_table_find(this->variable_ht, var);
578}
579
580/* Our support for uniforms is piggy-backed on the struct
581 * gl_fragment_program, because that's where the values actually
582 * get stored, rather than in some global gl_shader_program uniform
583 * store.
584 */
585int
586fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
587{
588   unsigned int offset = 0;
589   float *vec_values;
590
591   if (type->is_matrix()) {
592      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
593							type->vector_elements,
594							1);
595
596      for (unsigned int i = 0; i < type->matrix_columns; i++) {
597	 offset += setup_uniform_values(loc + offset, column);
598      }
599
600      return offset;
601   }
602
603   switch (type->base_type) {
604   case GLSL_TYPE_FLOAT:
605   case GLSL_TYPE_UINT:
606   case GLSL_TYPE_INT:
607   case GLSL_TYPE_BOOL:
608      vec_values = fp->Base.Parameters->ParameterValues[loc];
609      for (unsigned int i = 0; i < type->vector_elements; i++) {
610	 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
611      }
612      return 1;
613
614   case GLSL_TYPE_STRUCT:
615      for (unsigned int i = 0; i < type->length; i++) {
616	 offset += setup_uniform_values(loc + offset,
617					type->fields.structure[i].type);
618      }
619      return offset;
620
621   case GLSL_TYPE_ARRAY:
622      for (unsigned int i = 0; i < type->length; i++) {
623	 offset += setup_uniform_values(loc + offset, type->fields.array);
624      }
625      return offset;
626
627   case GLSL_TYPE_SAMPLER:
628      /* The sampler takes up a slot, but we don't use any values from it. */
629      return 1;
630
631   default:
632      assert(!"not reached");
633      return 0;
634   }
635}
636
637
638/* Our support for builtin uniforms is even scarier than non-builtin.
639 * It sits on top of the PROG_STATE_VAR parameters that are
640 * automatically updated from GL context state.
641 */
642void
643fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
644{
645   const struct gl_builtin_uniform_desc *statevar = NULL;
646
647   for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
648      statevar = &_mesa_builtin_uniform_desc[i];
649      if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
650	 break;
651   }
652
653   if (!statevar->name) {
654      this->fail = true;
655      printf("Failed to find builtin uniform `%s'\n", ir->name);
656      return;
657   }
658
659   int array_count;
660   if (ir->type->is_array()) {
661      array_count = ir->type->length;
662   } else {
663      array_count = 1;
664   }
665
666   for (int a = 0; a < array_count; a++) {
667      for (unsigned int i = 0; i < statevar->num_elements; i++) {
668	 struct gl_builtin_uniform_element *element = &statevar->elements[i];
669	 int tokens[STATE_LENGTH];
670
671	 memcpy(tokens, element->tokens, sizeof(element->tokens));
672	 if (ir->type->is_array()) {
673	    tokens[1] = a;
674	 }
675
676	 /* This state reference has already been setup by ir_to_mesa,
677	  * but we'll get the same index back here.
678	  */
679	 int index = _mesa_add_state_reference(this->fp->Base.Parameters,
680					       (gl_state_index *)tokens);
681	 float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
682
683	 /* Add each of the unique swizzles of the element as a
684	  * parameter.  This'll end up matching the expected layout of
685	  * the array/matrix/structure we're trying to fill in.
686	  */
687	 int last_swiz = -1;
688	 for (unsigned int i = 0; i < 4; i++) {
689	    int this_swiz = GET_SWZ(element->swizzle, i);
690	    if (this_swiz == last_swiz)
691	       break;
692	    last_swiz = this_swiz;
693
694	    c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
695	 }
696      }
697   }
698}
699
700void
701fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
702{
703   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
704   fs_reg wpos = *reg;
705   fs_reg neg_y = this->pixel_y;
706   neg_y.negate = true;
707
708   /* gl_FragCoord.x */
709   if (ir->pixel_center_integer) {
710      emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
711   } else {
712      emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
713   }
714   wpos.reg_offset++;
715
716   /* gl_FragCoord.y */
717   if (ir->origin_upper_left && ir->pixel_center_integer) {
718      emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
719   } else {
720      fs_reg pixel_y = this->pixel_y;
721      float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
722
723      if (!ir->origin_upper_left) {
724	 pixel_y.negate = true;
725	 offset += c->key.drawable_height - 1.0;
726      }
727
728      emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
729   }
730   wpos.reg_offset++;
731
732   /* gl_FragCoord.z */
733   emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
734		interp_reg(FRAG_ATTRIB_WPOS, 2)));
735   wpos.reg_offset++;
736
737   /* gl_FragCoord.w: Already set up in emit_interpolation */
738   emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
739
740   hash_table_insert(this->variable_ht, reg, ir);
741}
742
743
744void
745fs_visitor::emit_general_interpolation(ir_variable *ir)
746{
747   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
748   /* Interpolation is always in floating point regs. */
749   reg->type = BRW_REGISTER_TYPE_F;
750   fs_reg attr = *reg;
751
752   unsigned int array_elements;
753   const glsl_type *type;
754
755   if (ir->type->is_array()) {
756      array_elements = ir->type->length;
757      if (array_elements == 0) {
758	 this->fail = true;
759      }
760      type = ir->type->fields.array;
761   } else {
762      array_elements = 1;
763      type = ir->type;
764   }
765
766   int location = ir->location;
767   for (unsigned int i = 0; i < array_elements; i++) {
768      for (unsigned int j = 0; j < type->matrix_columns; j++) {
769	 if (!(fp->Base.InputsRead & BITFIELD64_BIT(location))) {
770	    /* If there's no incoming setup data for this slot, don't
771	     * emit interpolation for it (since it's not used, and
772	     * we'd fall over later trying to find the setup data.
773	     */
774	    attr.reg_offset += type->vector_elements;
775	    continue;
776	 }
777
778	 for (unsigned int c = 0; c < type->vector_elements; c++) {
779	    struct brw_reg interp = interp_reg(location, c);
780	    emit(fs_inst(FS_OPCODE_LINTERP,
781			 attr,
782			 this->delta_x,
783			 this->delta_y,
784			 fs_reg(interp)));
785	    attr.reg_offset++;
786	 }
787	 attr.reg_offset -= type->vector_elements;
788
789	 for (unsigned int c = 0; c < type->vector_elements; c++) {
790	    emit(fs_inst(BRW_OPCODE_MUL,
791			 attr,
792			 attr,
793			 this->pixel_w));
794	    attr.reg_offset++;
795	 }
796	 location++;
797      }
798   }
799
800   hash_table_insert(this->variable_ht, reg, ir);
801}
802
803void
804fs_visitor::visit(ir_variable *ir)
805{
806   fs_reg *reg = NULL;
807
808   if (variable_storage(ir))
809      return;
810
811   if (strcmp(ir->name, "gl_FragColor") == 0) {
812      this->frag_color = ir;
813   } else if (strcmp(ir->name, "gl_FragData") == 0) {
814      this->frag_data = ir;
815   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
816      this->frag_depth = ir;
817   }
818
819   if (ir->mode == ir_var_in) {
820      if (!strcmp(ir->name, "gl_FragCoord")) {
821	 emit_fragcoord_interpolation(ir);
822	 return;
823      } else if (!strcmp(ir->name, "gl_FrontFacing")) {
824	 reg = new(this->mem_ctx) fs_reg(this, ir->type);
825	 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
826	 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
827	  * us front face
828	  */
829	 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
830				      *reg,
831				      fs_reg(r1_6ud),
832				      fs_reg(1u << 31)));
833	 inst->conditional_mod = BRW_CONDITIONAL_L;
834	 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
835      } else {
836	 emit_general_interpolation(ir);
837	 return;
838      }
839   }
840
841   if (ir->mode == ir_var_uniform) {
842      int param_index = c->prog_data.nr_params;
843
844      if (!strncmp(ir->name, "gl_", 3)) {
845	 setup_builtin_uniform_values(ir);
846      } else {
847	 setup_uniform_values(ir->location, ir->type);
848      }
849
850      reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
851   }
852
853   if (!reg)
854      reg = new(this->mem_ctx) fs_reg(this, ir->type);
855
856   hash_table_insert(this->variable_ht, reg, ir);
857}
858
859void
860fs_visitor::visit(ir_dereference_variable *ir)
861{
862   fs_reg *reg = variable_storage(ir->var);
863   this->result = *reg;
864}
865
866void
867fs_visitor::visit(ir_dereference_record *ir)
868{
869   const glsl_type *struct_type = ir->record->type;
870
871   ir->record->accept(this);
872
873   unsigned int offset = 0;
874   for (unsigned int i = 0; i < struct_type->length; i++) {
875      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
876	 break;
877      offset += type_size(struct_type->fields.structure[i].type);
878   }
879   this->result.reg_offset += offset;
880   this->result.type = brw_type_for_base_type(ir->type);
881}
882
883void
884fs_visitor::visit(ir_dereference_array *ir)
885{
886   ir_constant *index;
887   int element_size;
888
889   ir->array->accept(this);
890   index = ir->array_index->as_constant();
891
892   element_size = type_size(ir->type);
893   this->result.type = brw_type_for_base_type(ir->type);
894
895   if (index) {
896      assert(this->result.file == UNIFORM ||
897	     (this->result.file == GRF &&
898	      this->result.reg != 0));
899      this->result.reg_offset += index->value.i[0] * element_size;
900   } else {
901      assert(!"FINISHME: non-constant array element");
902   }
903}
904
905void
906fs_visitor::visit(ir_expression *ir)
907{
908   unsigned int operand;
909   fs_reg op[2], temp;
910   fs_reg result;
911   fs_inst *inst;
912
913   for (operand = 0; operand < ir->get_num_operands(); operand++) {
914      ir->operands[operand]->accept(this);
915      if (this->result.file == BAD_FILE) {
916	 ir_print_visitor v;
917	 printf("Failed to get tree for expression operand:\n");
918	 ir->operands[operand]->accept(&v);
919	 this->fail = true;
920      }
921      op[operand] = this->result;
922
923      /* Matrix expression operands should have been broken down to vector
924       * operations already.
925       */
926      assert(!ir->operands[operand]->type->is_matrix());
927      /* And then those vector operands should have been broken down to scalar.
928       */
929      assert(!ir->operands[operand]->type->is_vector());
930   }
931
932   /* Storage for our result.  If our result goes into an assignment, it will
933    * just get copy-propagated out, so no worries.
934    */
935   this->result = fs_reg(this, ir->type);
936
937   switch (ir->operation) {
938   case ir_unop_logic_not:
939      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
940      break;
941   case ir_unop_neg:
942      op[0].negate = !op[0].negate;
943      this->result = op[0];
944      break;
945   case ir_unop_abs:
946      op[0].abs = true;
947      this->result = op[0];
948      break;
949   case ir_unop_sign:
950      temp = fs_reg(this, ir->type);
951
952      emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
953
954      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
955      inst->conditional_mod = BRW_CONDITIONAL_G;
956      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
957      inst->predicated = true;
958
959      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
960      inst->conditional_mod = BRW_CONDITIONAL_L;
961      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
962      inst->predicated = true;
963
964      break;
965   case ir_unop_rcp:
966      emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
967      break;
968
969   case ir_unop_exp2:
970      emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
971      break;
972   case ir_unop_log2:
973      emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
974      break;
975   case ir_unop_exp:
976   case ir_unop_log:
977      assert(!"not reached: should be handled by ir_explog_to_explog2");
978      break;
979   case ir_unop_sin:
980      emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
981      break;
982   case ir_unop_cos:
983      emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
984      break;
985
986   case ir_unop_dFdx:
987      emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
988      break;
989   case ir_unop_dFdy:
990      emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
991      break;
992
993   case ir_binop_add:
994      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
995      break;
996   case ir_binop_sub:
997      assert(!"not reached: should be handled by ir_sub_to_add_neg");
998      break;
999
1000   case ir_binop_mul:
1001      emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
1002      break;
1003   case ir_binop_div:
1004      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1005      break;
1006   case ir_binop_mod:
1007      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1008      break;
1009
1010   case ir_binop_less:
1011      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1012      inst->conditional_mod = BRW_CONDITIONAL_L;
1013      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1014      break;
1015   case ir_binop_greater:
1016      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1017      inst->conditional_mod = BRW_CONDITIONAL_G;
1018      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1019      break;
1020   case ir_binop_lequal:
1021      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1022      inst->conditional_mod = BRW_CONDITIONAL_LE;
1023      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1024      break;
1025   case ir_binop_gequal:
1026      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1027      inst->conditional_mod = BRW_CONDITIONAL_GE;
1028      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1029      break;
1030   case ir_binop_equal:
1031   case ir_binop_all_equal: /* same as nequal for scalars */
1032      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1033      inst->conditional_mod = BRW_CONDITIONAL_Z;
1034      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1035      break;
1036   case ir_binop_nequal:
1037   case ir_binop_any_nequal: /* same as nequal for scalars */
1038      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1039      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1040      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1041      break;
1042
1043   case ir_binop_logic_xor:
1044      emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
1045      break;
1046
1047   case ir_binop_logic_or:
1048      emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
1049      break;
1050
1051   case ir_binop_logic_and:
1052      emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
1053      break;
1054
1055   case ir_binop_dot:
1056   case ir_binop_cross:
1057   case ir_unop_any:
1058      assert(!"not reached: should be handled by brw_fs_channel_expressions");
1059      break;
1060
1061   case ir_unop_noise:
1062      assert(!"not reached: should be handled by lower_noise");
1063      break;
1064
1065   case ir_unop_sqrt:
1066      emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
1067      break;
1068
1069   case ir_unop_rsq:
1070      emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
1071      break;
1072
1073   case ir_unop_i2f:
1074   case ir_unop_b2f:
1075   case ir_unop_b2i:
1076      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1077      break;
1078   case ir_unop_f2i:
1079      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1080      break;
1081   case ir_unop_f2b:
1082   case ir_unop_i2b:
1083      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
1084      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1085
1086   case ir_unop_trunc:
1087      emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1088      break;
1089   case ir_unop_ceil:
1090      op[0].negate = ~op[0].negate;
1091      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1092      this->result.negate = true;
1093      break;
1094   case ir_unop_floor:
1095      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1096      break;
1097   case ir_unop_fract:
1098      inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
1099      break;
1100
1101   case ir_binop_min:
1102      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1103      inst->conditional_mod = BRW_CONDITIONAL_L;
1104
1105      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1106      inst->predicated = true;
1107      break;
1108   case ir_binop_max:
1109      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1110      inst->conditional_mod = BRW_CONDITIONAL_G;
1111
1112      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1113      inst->predicated = true;
1114      break;
1115
1116   case ir_binop_pow:
1117      inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
1118      break;
1119
1120   case ir_unop_bit_not:
1121   case ir_unop_u2f:
1122   case ir_binop_lshift:
1123   case ir_binop_rshift:
1124   case ir_binop_bit_and:
1125   case ir_binop_bit_xor:
1126   case ir_binop_bit_or:
1127      assert(!"GLSL 1.30 features unsupported");
1128      break;
1129   }
1130}
1131
1132void
1133fs_visitor::visit(ir_assignment *ir)
1134{
1135   struct fs_reg l, r;
1136   int i;
1137   int write_mask;
1138   fs_inst *inst;
1139
1140   /* FINISHME: arrays on the lhs */
1141   ir->lhs->accept(this);
1142   l = this->result;
1143
1144   ir->rhs->accept(this);
1145   r = this->result;
1146
1147   /* FINISHME: This should really set to the correct maximal writemask for each
1148    * FINISHME: component written (in the loops below).  This case can only
1149    * FINISHME: occur for matrices, arrays, and structures.
1150    */
1151   if (ir->write_mask == 0) {
1152      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1153      write_mask = WRITEMASK_XYZW;
1154   } else {
1155      assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
1156      write_mask = ir->write_mask;
1157   }
1158
1159   assert(l.file != BAD_FILE);
1160   assert(r.file != BAD_FILE);
1161
1162   if (ir->condition) {
1163      /* Get the condition bool into the predicate. */
1164      ir->condition->accept(this);
1165      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
1166      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1167   }
1168
1169   for (i = 0; i < type_size(ir->lhs->type); i++) {
1170      if (i >= 4 || (write_mask & (1 << i))) {
1171	 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1172	 if (ir->condition)
1173	    inst->predicated = true;
1174	 r.reg_offset++;
1175      }
1176      l.reg_offset++;
1177   }
1178}
1179
1180void
1181fs_visitor::visit(ir_texture *ir)
1182{
1183   int base_mrf = 2;
1184   fs_inst *inst = NULL;
1185   unsigned int mlen = 0;
1186
1187   ir->coordinate->accept(this);
1188   fs_reg coordinate = this->result;
1189
1190   if (ir->projector) {
1191      fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
1192
1193      ir->projector->accept(this);
1194      emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
1195
1196      fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
1197      for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1198	 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
1199	 coordinate.reg_offset++;
1200	 proj_coordinate.reg_offset++;
1201      }
1202      proj_coordinate.reg_offset = 0;
1203
1204      coordinate = proj_coordinate;
1205   }
1206
1207   for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1208      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1209      coordinate.reg_offset++;
1210   }
1211
1212   /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1213   if (intel->gen < 5)
1214      mlen = 3;
1215
1216   if (ir->shadow_comparitor) {
1217      /* For shadow comparisons, we have to supply u,v,r. */
1218      mlen = 3;
1219
1220      ir->shadow_comparitor->accept(this);
1221      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1222      mlen++;
1223   }
1224
1225   /* Do we ever want to handle writemasking on texture samples?  Is it
1226    * performance relevant?
1227    */
1228   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1229
1230   switch (ir->op) {
1231   case ir_tex:
1232      inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1233      break;
1234   case ir_txb:
1235      ir->lod_info.bias->accept(this);
1236      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1237      mlen++;
1238
1239      inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1240      break;
1241   case ir_txl:
1242      ir->lod_info.lod->accept(this);
1243      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1244      mlen++;
1245
1246      inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1247      break;
1248   case ir_txd:
1249   case ir_txf:
1250      assert(!"GLSL 1.30 features unsupported");
1251      break;
1252   }
1253
1254   inst->sampler =
1255      _mesa_get_sampler_uniform_value(ir->sampler,
1256				      ctx->Shader.CurrentProgram,
1257				      &brw->fragment_program->Base);
1258   inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1259
1260   this->result = dst;
1261
1262   if (ir->shadow_comparitor)
1263      inst->shadow_compare = true;
1264   inst->mlen = mlen;
1265}
1266
1267void
1268fs_visitor::visit(ir_swizzle *ir)
1269{
1270   ir->val->accept(this);
1271   fs_reg val = this->result;
1272
1273   fs_reg result = fs_reg(this, ir->type);
1274   this->result = result;
1275
1276   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1277      fs_reg channel = val;
1278      int swiz = 0;
1279
1280      switch (i) {
1281      case 0:
1282	 swiz = ir->mask.x;
1283	 break;
1284      case 1:
1285	 swiz = ir->mask.y;
1286	 break;
1287      case 2:
1288	 swiz = ir->mask.z;
1289	 break;
1290      case 3:
1291	 swiz = ir->mask.w;
1292	 break;
1293      }
1294
1295      channel.reg_offset += swiz;
1296      emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1297      result.reg_offset++;
1298   }
1299}
1300
1301void
1302fs_visitor::visit(ir_discard *ir)
1303{
1304   fs_reg temp = fs_reg(this, glsl_type::uint_type);
1305
1306   assert(ir->condition == NULL); /* FINISHME */
1307
1308   emit(fs_inst(FS_OPCODE_DISCARD, temp, temp));
1309}
1310
1311void
1312fs_visitor::visit(ir_constant *ir)
1313{
1314   fs_reg reg(this, ir->type);
1315   this->result = reg;
1316
1317   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1318      switch (ir->type->base_type) {
1319      case GLSL_TYPE_FLOAT:
1320	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1321	 break;
1322      case GLSL_TYPE_UINT:
1323	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1324	 break;
1325      case GLSL_TYPE_INT:
1326	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1327	 break;
1328      case GLSL_TYPE_BOOL:
1329	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1330	 break;
1331      default:
1332	 assert(!"Non-float/uint/int/bool constant");
1333      }
1334      reg.reg_offset++;
1335   }
1336}
1337
1338void
1339fs_visitor::visit(ir_if *ir)
1340{
1341   fs_inst *inst;
1342
1343   /* Don't point the annotation at the if statement, because then it plus
1344    * the then and else blocks get printed.
1345    */
1346   this->base_ir = ir->condition;
1347
1348   /* Generate the condition into the condition code. */
1349   ir->condition->accept(this);
1350   inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1351   inst->conditional_mod = BRW_CONDITIONAL_NZ;
1352
1353   inst = emit(fs_inst(BRW_OPCODE_IF));
1354   inst->predicated = true;
1355
1356   foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1357      ir_instruction *ir = (ir_instruction *)iter.get();
1358      this->base_ir = ir;
1359
1360      ir->accept(this);
1361   }
1362
1363   if (!ir->else_instructions.is_empty()) {
1364      emit(fs_inst(BRW_OPCODE_ELSE));
1365
1366      foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1367	 ir_instruction *ir = (ir_instruction *)iter.get();
1368	 this->base_ir = ir;
1369
1370	 ir->accept(this);
1371      }
1372   }
1373
1374   emit(fs_inst(BRW_OPCODE_ENDIF));
1375}
1376
1377void
1378fs_visitor::visit(ir_loop *ir)
1379{
1380   fs_reg counter = reg_undef;
1381
1382   if (ir->counter) {
1383      this->base_ir = ir->counter;
1384      ir->counter->accept(this);
1385      counter = *(variable_storage(ir->counter));
1386
1387      if (ir->from) {
1388	 this->base_ir = ir->from;
1389	 ir->from->accept(this);
1390
1391	 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1392      }
1393   }
1394
1395   /* Start a safety counter.  If the user messed up their loop
1396    * counting, we don't want to hang the GPU.
1397    */
1398   fs_reg max_iter = fs_reg(this, glsl_type::int_type);
1399   emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000)));
1400
1401   emit(fs_inst(BRW_OPCODE_DO));
1402
1403   if (ir->to) {
1404      this->base_ir = ir->to;
1405      ir->to->accept(this);
1406
1407      fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1408				   counter, this->result));
1409      switch (ir->cmp) {
1410      case ir_binop_equal:
1411	 inst->conditional_mod = BRW_CONDITIONAL_Z;
1412	 break;
1413      case ir_binop_nequal:
1414	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1415	 break;
1416      case ir_binop_gequal:
1417	 inst->conditional_mod = BRW_CONDITIONAL_GE;
1418	 break;
1419      case ir_binop_lequal:
1420	 inst->conditional_mod = BRW_CONDITIONAL_LE;
1421	 break;
1422      case ir_binop_greater:
1423	 inst->conditional_mod = BRW_CONDITIONAL_G;
1424	 break;
1425      case ir_binop_less:
1426	 inst->conditional_mod = BRW_CONDITIONAL_L;
1427	 break;
1428      default:
1429	 assert(!"not reached: unknown loop condition");
1430	 this->fail = true;
1431	 break;
1432      }
1433
1434      inst = emit(fs_inst(BRW_OPCODE_BREAK));
1435      inst->predicated = true;
1436   }
1437
1438   foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1439      ir_instruction *ir = (ir_instruction *)iter.get();
1440      fs_inst *inst;
1441
1442      this->base_ir = ir;
1443      ir->accept(this);
1444
1445      /* Check the maximum loop iters counter. */
1446      inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1)));
1447      inst->conditional_mod = BRW_CONDITIONAL_Z;
1448
1449      inst = emit(fs_inst(BRW_OPCODE_BREAK));
1450      inst->predicated = true;
1451   }
1452
1453   if (ir->increment) {
1454      this->base_ir = ir->increment;
1455      ir->increment->accept(this);
1456      emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1457   }
1458
1459   emit(fs_inst(BRW_OPCODE_WHILE));
1460}
1461
1462void
1463fs_visitor::visit(ir_loop_jump *ir)
1464{
1465   switch (ir->mode) {
1466   case ir_loop_jump::jump_break:
1467      emit(fs_inst(BRW_OPCODE_BREAK));
1468      break;
1469   case ir_loop_jump::jump_continue:
1470      emit(fs_inst(BRW_OPCODE_CONTINUE));
1471      break;
1472   }
1473}
1474
1475void
1476fs_visitor::visit(ir_call *ir)
1477{
1478   assert(!"FINISHME");
1479}
1480
1481void
1482fs_visitor::visit(ir_return *ir)
1483{
1484   assert(!"FINISHME");
1485}
1486
1487void
1488fs_visitor::visit(ir_function *ir)
1489{
1490   /* Ignore function bodies other than main() -- we shouldn't see calls to
1491    * them since they should all be inlined before we get to ir_to_mesa.
1492    */
1493   if (strcmp(ir->name, "main") == 0) {
1494      const ir_function_signature *sig;
1495      exec_list empty;
1496
1497      sig = ir->matching_signature(&empty);
1498
1499      assert(sig);
1500
1501      foreach_iter(exec_list_iterator, iter, sig->body) {
1502	 ir_instruction *ir = (ir_instruction *)iter.get();
1503	 this->base_ir = ir;
1504
1505	 ir->accept(this);
1506      }
1507   }
1508}
1509
1510void
1511fs_visitor::visit(ir_function_signature *ir)
1512{
1513   assert(!"not reached");
1514   (void)ir;
1515}
1516
1517fs_inst *
1518fs_visitor::emit(fs_inst inst)
1519{
1520   fs_inst *list_inst = new(mem_ctx) fs_inst;
1521   *list_inst = inst;
1522
1523   list_inst->annotation = this->current_annotation;
1524   list_inst->ir = this->base_ir;
1525
1526   this->instructions.push_tail(list_inst);
1527
1528   return list_inst;
1529}
1530
1531/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1532void
1533fs_visitor::emit_dummy_fs()
1534{
1535   /* Everyone's favorite color. */
1536   emit(fs_inst(BRW_OPCODE_MOV,
1537		fs_reg(MRF, 2),
1538		fs_reg(1.0f)));
1539   emit(fs_inst(BRW_OPCODE_MOV,
1540		fs_reg(MRF, 3),
1541		fs_reg(0.0f)));
1542   emit(fs_inst(BRW_OPCODE_MOV,
1543		fs_reg(MRF, 4),
1544		fs_reg(1.0f)));
1545   emit(fs_inst(BRW_OPCODE_MOV,
1546		fs_reg(MRF, 5),
1547		fs_reg(0.0f)));
1548
1549   fs_inst *write;
1550   write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1551			fs_reg(0),
1552			fs_reg(0)));
1553}
1554
1555/* The register location here is relative to the start of the URB
1556 * data.  It will get adjusted to be a real location before
1557 * generate_code() time.
1558 */
1559struct brw_reg
1560fs_visitor::interp_reg(int location, int channel)
1561{
1562   int regnr = location * 2 + channel / 2;
1563   int stride = (channel & 1) * 4;
1564
1565   return brw_vec1_grf(regnr, stride);
1566}
1567
1568/** Emits the interpolation for the varying inputs. */
1569void
1570fs_visitor::emit_interpolation_setup()
1571{
1572   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1573
1574   this->current_annotation = "compute pixel centers";
1575   this->pixel_x = fs_reg(this, glsl_type::uint_type);
1576   this->pixel_y = fs_reg(this, glsl_type::uint_type);
1577   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1578   this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1579   emit(fs_inst(BRW_OPCODE_ADD,
1580		this->pixel_x,
1581		fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1582		fs_reg(brw_imm_v(0x10101010))));
1583   emit(fs_inst(BRW_OPCODE_ADD,
1584		this->pixel_y,
1585		fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1586		fs_reg(brw_imm_v(0x11001100))));
1587
1588   this->current_annotation = "compute pixel deltas from v0";
1589   this->delta_x = fs_reg(this, glsl_type::float_type);
1590   this->delta_y = fs_reg(this, glsl_type::float_type);
1591   emit(fs_inst(BRW_OPCODE_ADD,
1592		this->delta_x,
1593		this->pixel_x,
1594		fs_reg(negate(brw_vec1_grf(1, 0)))));
1595   emit(fs_inst(BRW_OPCODE_ADD,
1596		this->delta_y,
1597		this->pixel_y,
1598		fs_reg(negate(brw_vec1_grf(1, 1)))));
1599
1600   this->current_annotation = "compute pos.w and 1/pos.w";
1601   /* Compute wpos.w.  It's always in our setup, since it's needed to
1602    * interpolate the other attributes.
1603    */
1604   this->wpos_w = fs_reg(this, glsl_type::float_type);
1605   emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1606		interp_reg(FRAG_ATTRIB_WPOS, 3)));
1607   /* Compute the pixel 1/W value from wpos.w. */
1608   this->pixel_w = fs_reg(this, glsl_type::float_type);
1609   emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1610   this->current_annotation = NULL;
1611}
1612
1613void
1614fs_visitor::emit_fb_writes()
1615{
1616   this->current_annotation = "FB write header";
1617   int nr = 0;
1618
1619   /* m0, m1 header */
1620   nr += 2;
1621
1622   if (c->key.aa_dest_stencil_reg) {
1623      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1624		   fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1625   }
1626
1627   /* Reserve space for color. It'll be filled in per MRT below. */
1628   int color_mrf = nr;
1629   nr += 4;
1630
1631   if (c->key.source_depth_to_render_target) {
1632      if (c->key.computes_depth) {
1633	 /* Hand over gl_FragDepth. */
1634	 assert(this->frag_depth);
1635	 fs_reg depth = *(variable_storage(this->frag_depth));
1636
1637	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1638      } else {
1639	 /* Pass through the payload depth. */
1640	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1641		      fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1642      }
1643   }
1644
1645   if (c->key.dest_depth_reg) {
1646      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1647		   fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1648   }
1649
1650   fs_reg color = reg_undef;
1651   if (this->frag_color)
1652      color = *(variable_storage(this->frag_color));
1653   else if (this->frag_data)
1654      color = *(variable_storage(this->frag_data));
1655
1656   for (int target = 0; target < c->key.nr_color_regions; target++) {
1657      this->current_annotation = talloc_asprintf(this->mem_ctx,
1658						 "FB write target %d",
1659						 target);
1660      if (this->frag_color || this->frag_data) {
1661	 for (int i = 0; i < 4; i++) {
1662	    emit(fs_inst(BRW_OPCODE_MOV,
1663			 fs_reg(MRF, color_mrf + i),
1664			 color));
1665	    color.reg_offset++;
1666	 }
1667      }
1668
1669      if (this->frag_color)
1670	 color.reg_offset -= 4;
1671
1672      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1673				   reg_undef, reg_undef));
1674      inst->target = target;
1675      inst->mlen = nr;
1676      if (target == c->key.nr_color_regions - 1)
1677	 inst->eot = true;
1678   }
1679
1680   if (c->key.nr_color_regions == 0) {
1681      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1682				   reg_undef, reg_undef));
1683      inst->mlen = nr;
1684      inst->eot = true;
1685   }
1686
1687   this->current_annotation = NULL;
1688}
1689
1690void
1691fs_visitor::generate_fb_write(fs_inst *inst)
1692{
1693   GLboolean eot = inst->eot;
1694
1695   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1696    * move, here's g1.
1697    */
1698   brw_push_insn_state(p);
1699   brw_set_mask_control(p, BRW_MASK_DISABLE);
1700   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1701   brw_MOV(p,
1702	   brw_message_reg(1),
1703	   brw_vec8_grf(1, 0));
1704   brw_pop_insn_state(p);
1705
1706   brw_fb_WRITE(p,
1707		8, /* dispatch_width */
1708		retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1709		0, /* base MRF */
1710		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1711		inst->target,
1712		inst->mlen,
1713		0,
1714		eot);
1715}
1716
1717void
1718fs_visitor::generate_linterp(fs_inst *inst,
1719			     struct brw_reg dst, struct brw_reg *src)
1720{
1721   struct brw_reg delta_x = src[0];
1722   struct brw_reg delta_y = src[1];
1723   struct brw_reg interp = src[2];
1724
1725   if (brw->has_pln &&
1726       delta_y.nr == delta_x.nr + 1 &&
1727       (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1728      brw_PLN(p, dst, interp, delta_x);
1729   } else {
1730      brw_LINE(p, brw_null_reg(), interp, delta_x);
1731      brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1732   }
1733}
1734
1735void
1736fs_visitor::generate_math(fs_inst *inst,
1737			  struct brw_reg dst, struct brw_reg *src)
1738{
1739   int op;
1740
1741   switch (inst->opcode) {
1742   case FS_OPCODE_RCP:
1743      op = BRW_MATH_FUNCTION_INV;
1744      break;
1745   case FS_OPCODE_RSQ:
1746      op = BRW_MATH_FUNCTION_RSQ;
1747      break;
1748   case FS_OPCODE_SQRT:
1749      op = BRW_MATH_FUNCTION_SQRT;
1750      break;
1751   case FS_OPCODE_EXP2:
1752      op = BRW_MATH_FUNCTION_EXP;
1753      break;
1754   case FS_OPCODE_LOG2:
1755      op = BRW_MATH_FUNCTION_LOG;
1756      break;
1757   case FS_OPCODE_POW:
1758      op = BRW_MATH_FUNCTION_POW;
1759      break;
1760   case FS_OPCODE_SIN:
1761      op = BRW_MATH_FUNCTION_SIN;
1762      break;
1763   case FS_OPCODE_COS:
1764      op = BRW_MATH_FUNCTION_COS;
1765      break;
1766   default:
1767      assert(!"not reached: unknown math function");
1768      op = 0;
1769      break;
1770   }
1771
1772   if (inst->opcode == FS_OPCODE_POW) {
1773      brw_MOV(p, brw_message_reg(3), src[1]);
1774   }
1775
1776   brw_math(p, dst,
1777	    op,
1778	    inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1779	    BRW_MATH_SATURATE_NONE,
1780	    2, src[0],
1781	    BRW_MATH_DATA_VECTOR,
1782	    BRW_MATH_PRECISION_FULL);
1783}
1784
1785void
1786fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1787{
1788   int msg_type = -1;
1789   int rlen = 4;
1790
1791   if (intel->gen == 5) {
1792      switch (inst->opcode) {
1793      case FS_OPCODE_TEX:
1794	 if (inst->shadow_compare) {
1795	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1796	 } else {
1797	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1798	 }
1799	 break;
1800      case FS_OPCODE_TXB:
1801	 if (inst->shadow_compare) {
1802	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1803	 } else {
1804	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1805	 }
1806	 break;
1807      }
1808   } else {
1809      switch (inst->opcode) {
1810      case FS_OPCODE_TEX:
1811	 /* Note that G45 and older determines shadow compare and dispatch width
1812	  * from message length for most messages.
1813	  */
1814	 if (inst->shadow_compare) {
1815	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
1816	 } else {
1817	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
1818	 }
1819      case FS_OPCODE_TXB:
1820	 if (inst->shadow_compare) {
1821	    assert(!"FINISHME: shadow compare with bias.");
1822	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1823	 } else {
1824	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1825	    rlen = 8;
1826	 }
1827	 break;
1828      }
1829   }
1830   assert(msg_type != -1);
1831
1832   /* g0 header. */
1833   src.nr--;
1834
1835   brw_SAMPLE(p,
1836	      retype(dst, BRW_REGISTER_TYPE_UW),
1837	      src.nr,
1838	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1839              SURF_INDEX_TEXTURE(inst->sampler),
1840	      inst->sampler,
1841	      WRITEMASK_XYZW,
1842	      msg_type,
1843	      rlen,
1844	      inst->mlen + 1,
1845	      0,
1846	      1,
1847	      BRW_SAMPLER_SIMD_MODE_SIMD8);
1848}
1849
1850
1851/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1852 * looking like:
1853 *
1854 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1855 *
1856 * and we're trying to produce:
1857 *
1858 *           DDX                     DDY
1859 * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
1860 *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
1861 *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
1862 *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
1863 *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
1864 *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
1865 *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
1866 *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
1867 *
1868 * and add another set of two more subspans if in 16-pixel dispatch mode.
1869 *
1870 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1871 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1872 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1873 * between each other.  We could probably do it like ddx and swizzle the right
1874 * order later, but bail for now and just produce
1875 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1876 */
1877void
1878fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1879{
1880   struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
1881				 BRW_REGISTER_TYPE_F,
1882				 BRW_VERTICAL_STRIDE_2,
1883				 BRW_WIDTH_2,
1884				 BRW_HORIZONTAL_STRIDE_0,
1885				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1886   struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
1887				 BRW_REGISTER_TYPE_F,
1888				 BRW_VERTICAL_STRIDE_2,
1889				 BRW_WIDTH_2,
1890				 BRW_HORIZONTAL_STRIDE_0,
1891				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1892   brw_ADD(p, dst, src0, negate(src1));
1893}
1894
1895void
1896fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1897{
1898   struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
1899				 BRW_REGISTER_TYPE_F,
1900				 BRW_VERTICAL_STRIDE_4,
1901				 BRW_WIDTH_4,
1902				 BRW_HORIZONTAL_STRIDE_0,
1903				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1904   struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
1905				 BRW_REGISTER_TYPE_F,
1906				 BRW_VERTICAL_STRIDE_4,
1907				 BRW_WIDTH_4,
1908				 BRW_HORIZONTAL_STRIDE_0,
1909				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1910   brw_ADD(p, dst, src0, negate(src1));
1911}
1912
1913void
1914fs_visitor::generate_discard(fs_inst *inst, struct brw_reg temp)
1915{
1916   struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1917   temp = brw_uw1_reg(temp.file, temp.nr, 0);
1918
1919   brw_push_insn_state(p);
1920   brw_set_mask_control(p, BRW_MASK_DISABLE);
1921   brw_NOT(p, temp, brw_mask_reg(1)); /* IMASK */
1922   brw_AND(p, g0, temp, g0);
1923   brw_pop_insn_state(p);
1924}
1925
1926void
1927fs_visitor::assign_curb_setup()
1928{
1929   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1930   c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1931
1932   if (intel->gen == 5 && (c->prog_data.first_curbe_grf +
1933			   c->prog_data.curb_read_length) & 1) {
1934      /* Align the start of the interpolation coefficients so that we can use
1935       * the PLN instruction.
1936       */
1937      c->prog_data.first_curbe_grf++;
1938   }
1939
1940   /* Map the offsets in the UNIFORM file to fixed HW regs. */
1941   foreach_iter(exec_list_iterator, iter, this->instructions) {
1942      fs_inst *inst = (fs_inst *)iter.get();
1943
1944      for (unsigned int i = 0; i < 3; i++) {
1945	 if (inst->src[i].file == UNIFORM) {
1946	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1947	    struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1948						  constant_nr / 8,
1949						  constant_nr % 8);
1950
1951	    inst->src[i].file = FIXED_HW_REG;
1952	    inst->src[i].fixed_hw_reg = brw_reg;
1953	 }
1954      }
1955   }
1956}
1957
1958void
1959fs_visitor::assign_urb_setup()
1960{
1961   int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
1962   int interp_reg_nr[FRAG_ATTRIB_MAX];
1963
1964   c->prog_data.urb_read_length = 0;
1965
1966   /* Figure out where each of the incoming setup attributes lands. */
1967   for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1968      interp_reg_nr[i] = -1;
1969
1970      if (i != FRAG_ATTRIB_WPOS &&
1971	  !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1972	 continue;
1973
1974      /* Each attribute is 4 setup channels, each of which is half a reg. */
1975      interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1976      c->prog_data.urb_read_length += 2;
1977   }
1978
1979   /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1980    * the correct setup input.
1981    */
1982   foreach_iter(exec_list_iterator, iter, this->instructions) {
1983      fs_inst *inst = (fs_inst *)iter.get();
1984
1985      if (inst->opcode != FS_OPCODE_LINTERP)
1986	 continue;
1987
1988      assert(inst->src[2].file == FIXED_HW_REG);
1989
1990      int location = inst->src[2].fixed_hw_reg.nr / 2;
1991      assert(interp_reg_nr[location] != -1);
1992      inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
1993				      (inst->src[2].fixed_hw_reg.nr & 1));
1994   }
1995
1996   this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
1997}
1998
1999static void
2000trivial_assign_reg(int *reg_hw_locations, fs_reg *reg)
2001{
2002   if (reg->file == GRF && reg->reg != 0) {
2003      reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
2004      reg->reg = 0;
2005   }
2006}
2007
2008void
2009fs_visitor::assign_regs()
2010{
2011   int last_grf = 0;
2012   int hw_reg_mapping[this->virtual_grf_next];
2013   int i;
2014
2015   hw_reg_mapping[0] = 0;
2016   hw_reg_mapping[1] = this->first_non_payload_grf;
2017   for (i = 2; i < this->virtual_grf_next; i++) {
2018      hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
2019			   this->virtual_grf_sizes[i - 1]);
2020   }
2021   last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
2022
2023   /* FINISHME: trivial assignment of register numbers */
2024   foreach_iter(exec_list_iterator, iter, this->instructions) {
2025      fs_inst *inst = (fs_inst *)iter.get();
2026
2027      trivial_assign_reg(hw_reg_mapping, &inst->dst);
2028      trivial_assign_reg(hw_reg_mapping, &inst->src[0]);
2029      trivial_assign_reg(hw_reg_mapping, &inst->src[1]);
2030   }
2031
2032   this->grf_used = last_grf + 1;
2033}
2034
2035static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
2036{
2037   struct brw_reg brw_reg;
2038
2039   switch (reg->file) {
2040   case GRF:
2041   case ARF:
2042   case MRF:
2043      brw_reg = brw_vec8_reg(reg->file,
2044			    reg->hw_reg, 0);
2045      brw_reg = retype(brw_reg, reg->type);
2046      break;
2047   case IMM:
2048      switch (reg->type) {
2049      case BRW_REGISTER_TYPE_F:
2050	 brw_reg = brw_imm_f(reg->imm.f);
2051	 break;
2052      case BRW_REGISTER_TYPE_D:
2053	 brw_reg = brw_imm_d(reg->imm.i);
2054	 break;
2055      case BRW_REGISTER_TYPE_UD:
2056	 brw_reg = brw_imm_ud(reg->imm.u);
2057	 break;
2058      default:
2059	 assert(!"not reached");
2060	 break;
2061      }
2062      break;
2063   case FIXED_HW_REG:
2064      brw_reg = reg->fixed_hw_reg;
2065      break;
2066   case BAD_FILE:
2067      /* Probably unused. */
2068      brw_reg = brw_null_reg();
2069      break;
2070   case UNIFORM:
2071      assert(!"not reached");
2072      brw_reg = brw_null_reg();
2073      break;
2074   }
2075   if (reg->abs)
2076      brw_reg = brw_abs(brw_reg);
2077   if (reg->negate)
2078      brw_reg = negate(brw_reg);
2079
2080   return brw_reg;
2081}
2082
2083void
2084fs_visitor::generate_code()
2085{
2086   unsigned int annotation_len = 0;
2087   int last_native_inst = 0;
2088   struct brw_instruction *if_stack[16], *loop_stack[16];
2089   int if_stack_depth = 0, loop_stack_depth = 0;
2090   int if_depth_in_loop[16];
2091
2092   if_depth_in_loop[loop_stack_depth] = 0;
2093
2094   memset(&if_stack, 0, sizeof(if_stack));
2095   foreach_iter(exec_list_iterator, iter, this->instructions) {
2096      fs_inst *inst = (fs_inst *)iter.get();
2097      struct brw_reg src[3], dst;
2098
2099      for (unsigned int i = 0; i < 3; i++) {
2100	 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
2101      }
2102      dst = brw_reg_from_fs_reg(&inst->dst);
2103
2104      brw_set_conditionalmod(p, inst->conditional_mod);
2105      brw_set_predicate_control(p, inst->predicated);
2106
2107      switch (inst->opcode) {
2108      case BRW_OPCODE_MOV:
2109	 brw_MOV(p, dst, src[0]);
2110	 break;
2111      case BRW_OPCODE_ADD:
2112	 brw_ADD(p, dst, src[0], src[1]);
2113	 break;
2114      case BRW_OPCODE_MUL:
2115	 brw_MUL(p, dst, src[0], src[1]);
2116	 break;
2117
2118      case BRW_OPCODE_FRC:
2119	 brw_FRC(p, dst, src[0]);
2120	 break;
2121      case BRW_OPCODE_RNDD:
2122	 brw_RNDD(p, dst, src[0]);
2123	 break;
2124      case BRW_OPCODE_RNDZ:
2125	 brw_RNDZ(p, dst, src[0]);
2126	 break;
2127
2128      case BRW_OPCODE_AND:
2129	 brw_AND(p, dst, src[0], src[1]);
2130	 break;
2131      case BRW_OPCODE_OR:
2132	 brw_OR(p, dst, src[0], src[1]);
2133	 break;
2134      case BRW_OPCODE_XOR:
2135	 brw_XOR(p, dst, src[0], src[1]);
2136	 break;
2137
2138      case BRW_OPCODE_CMP:
2139	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2140	 break;
2141      case BRW_OPCODE_SEL:
2142	 brw_SEL(p, dst, src[0], src[1]);
2143	 break;
2144
2145      case BRW_OPCODE_IF:
2146	 assert(if_stack_depth < 16);
2147	 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2148	 if_depth_in_loop[loop_stack_depth]++;
2149	 if_stack_depth++;
2150	 break;
2151      case BRW_OPCODE_ELSE:
2152	 if_stack[if_stack_depth - 1] =
2153	    brw_ELSE(p, if_stack[if_stack_depth - 1]);
2154	 break;
2155      case BRW_OPCODE_ENDIF:
2156	 if_stack_depth--;
2157	 brw_ENDIF(p , if_stack[if_stack_depth]);
2158	 if_depth_in_loop[loop_stack_depth]--;
2159	 break;
2160
2161      case BRW_OPCODE_DO:
2162	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2163	 if_depth_in_loop[loop_stack_depth] = 0;
2164	 break;
2165
2166      case BRW_OPCODE_BREAK:
2167	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2168	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2169	 break;
2170      case BRW_OPCODE_CONTINUE:
2171	 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2172	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2173	 break;
2174
2175      case BRW_OPCODE_WHILE: {
2176	 struct brw_instruction *inst0, *inst1;
2177	 GLuint br = 1;
2178
2179	 if (intel->gen == 5)
2180	    br = 2;
2181
2182	 assert(loop_stack_depth > 0);
2183	 loop_stack_depth--;
2184	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2185	 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2186	 while (inst0 > loop_stack[loop_stack_depth]) {
2187	    inst0--;
2188	    if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2189		inst0->bits3.if_else.jump_count == 0) {
2190	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2191	    }
2192	    else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
2193		     inst0->bits3.if_else.jump_count == 0) {
2194	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
2195	    }
2196	 }
2197      }
2198	 break;
2199
2200      case FS_OPCODE_RCP:
2201      case FS_OPCODE_RSQ:
2202      case FS_OPCODE_SQRT:
2203      case FS_OPCODE_EXP2:
2204      case FS_OPCODE_LOG2:
2205      case FS_OPCODE_POW:
2206      case FS_OPCODE_SIN:
2207      case FS_OPCODE_COS:
2208	 generate_math(inst, dst, src);
2209	 break;
2210      case FS_OPCODE_LINTERP:
2211	 generate_linterp(inst, dst, src);
2212	 break;
2213      case FS_OPCODE_TEX:
2214      case FS_OPCODE_TXB:
2215      case FS_OPCODE_TXL:
2216	 generate_tex(inst, dst, src[0]);
2217	 break;
2218      case FS_OPCODE_DISCARD:
2219	 generate_discard(inst, dst /* src0 == dst */);
2220	 break;
2221      case FS_OPCODE_DDX:
2222	 generate_ddx(inst, dst, src[0]);
2223	 break;
2224      case FS_OPCODE_DDY:
2225	 generate_ddy(inst, dst, src[0]);
2226	 break;
2227      case FS_OPCODE_FB_WRITE:
2228	 generate_fb_write(inst);
2229	 break;
2230      default:
2231	 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2232	    _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2233			  brw_opcodes[inst->opcode].name);
2234	 } else {
2235	    _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2236	 }
2237	 this->fail = true;
2238      }
2239
2240      if (annotation_len < p->nr_insn) {
2241	 annotation_len *= 2;
2242	 if (annotation_len < 16)
2243	    annotation_len = 16;
2244
2245	 this->annotation_string = talloc_realloc(this->mem_ctx,
2246						  annotation_string,
2247						  const char *,
2248						  annotation_len);
2249	 this->annotation_ir = talloc_realloc(this->mem_ctx,
2250					      annotation_ir,
2251					      ir_instruction *,
2252					      annotation_len);
2253      }
2254
2255      for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2256	 this->annotation_string[i] = inst->annotation;
2257	 this->annotation_ir[i] = inst->ir;
2258      }
2259      last_native_inst = p->nr_insn;
2260   }
2261}
2262
2263GLboolean
2264brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2265{
2266   struct brw_compile *p = &c->func;
2267   struct intel_context *intel = &brw->intel;
2268   GLcontext *ctx = &intel->ctx;
2269   struct brw_shader *shader = NULL;
2270   struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2271
2272   if (!prog)
2273      return GL_FALSE;
2274
2275   if (!using_new_fs)
2276      return GL_FALSE;
2277
2278   for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2279      if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2280	 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2281	 break;
2282      }
2283   }
2284   if (!shader)
2285      return GL_FALSE;
2286
2287   /* We always use 8-wide mode, at least for now.  For one, flow
2288    * control only works in 8-wide.  Also, when we're fragment shader
2289    * bound, we're almost always under register pressure as well, so
2290    * 8-wide would save us from the performance cliff of spilling
2291    * regs.
2292    */
2293   c->dispatch_width = 8;
2294
2295   if (INTEL_DEBUG & DEBUG_WM) {
2296      printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2297      _mesa_print_ir(shader->ir, NULL);
2298      printf("\n");
2299   }
2300
2301   /* Now the main event: Visit the shader IR and generate our FS IR for it.
2302    */
2303   fs_visitor v(c, shader);
2304
2305   if (0) {
2306      v.emit_dummy_fs();
2307   } else {
2308      v.emit_interpolation_setup();
2309
2310      /* Generate FS IR for main().  (the visitor only descends into
2311       * functions called "main").
2312       */
2313      foreach_iter(exec_list_iterator, iter, *shader->ir) {
2314	 ir_instruction *ir = (ir_instruction *)iter.get();
2315	 v.base_ir = ir;
2316	 ir->accept(&v);
2317      }
2318
2319      v.emit_fb_writes();
2320      v.assign_curb_setup();
2321      v.assign_urb_setup();
2322      v.assign_regs();
2323   }
2324
2325   v.generate_code();
2326
2327   assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
2328
2329   if (v.fail)
2330      return GL_FALSE;
2331
2332   if (INTEL_DEBUG & DEBUG_WM) {
2333      const char *last_annotation_string = NULL;
2334      ir_instruction *last_annotation_ir = NULL;
2335
2336      printf("Native code for fragment shader %d:\n", prog->Name);
2337      for (unsigned int i = 0; i < p->nr_insn; i++) {
2338	 if (last_annotation_ir != v.annotation_ir[i]) {
2339	    last_annotation_ir = v.annotation_ir[i];
2340	    if (last_annotation_ir) {
2341	       printf("   ");
2342	       last_annotation_ir->print();
2343	       printf("\n");
2344	    }
2345	 }
2346	 if (last_annotation_string != v.annotation_string[i]) {
2347	    last_annotation_string = v.annotation_string[i];
2348	    if (last_annotation_string)
2349	       printf("   %s\n", last_annotation_string);
2350	 }
2351	 brw_disasm(stdout, &p->store[i], intel->gen);
2352      }
2353      printf("\n");
2354   }
2355
2356   c->prog_data.total_grf = v.grf_used;
2357   c->prog_data.total_scratch = 0;
2358
2359   return GL_TRUE;
2360}
2361