brw_fs.cpp revision f157812bbbcf9caac1f84988e738fc9d1e051056
1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28extern "C" {
29
30#include <sys/types.h>
31
32#include "main/macros.h"
33#include "main/shaderobj.h"
34#include "main/uniforms.h"
35#include "program/prog_parameter.h"
36#include "program/prog_print.h"
37#include "program/prog_optimize.h"
38#include "program/register_allocate.h"
39#include "program/sampler.h"
40#include "program/hash_table.h"
41#include "brw_context.h"
42#include "brw_eu.h"
43#include "brw_wm.h"
44#include "talloc.h"
45}
46#include "brw_fs.h"
47#include "../glsl/glsl_types.h"
48#include "../glsl/ir_optimization.h"
49#include "../glsl/ir_print_visitor.h"
50
51static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
52
53struct gl_shader *
54brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
55{
56   struct brw_shader *shader;
57
58   shader = talloc_zero(NULL, struct brw_shader);
59   if (shader) {
60      shader->base.Type = type;
61      shader->base.Name = name;
62      _mesa_init_shader(ctx, &shader->base);
63   }
64
65   return &shader->base;
66}
67
68struct gl_shader_program *
69brw_new_shader_program(struct gl_context *ctx, GLuint name)
70{
71   struct brw_shader_program *prog;
72   prog = talloc_zero(NULL, struct brw_shader_program);
73   if (prog) {
74      prog->base.Name = name;
75      _mesa_init_shader_program(ctx, &prog->base);
76   }
77   return &prog->base;
78}
79
80GLboolean
81brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
82{
83   if (!_mesa_ir_compile_shader(ctx, shader))
84      return GL_FALSE;
85
86   return GL_TRUE;
87}
88
89GLboolean
90brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
91{
92   struct intel_context *intel = intel_context(ctx);
93
94   for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
95      struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
96
97      if (shader->base.Type == GL_FRAGMENT_SHADER) {
98	 void *mem_ctx = talloc_new(NULL);
99	 bool progress;
100
101	 if (shader->ir)
102	    talloc_free(shader->ir);
103	 shader->ir = new(shader) exec_list;
104	 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
105
106	 do_mat_op_to_vec(shader->ir);
107	 do_mod_to_fract(shader->ir);
108	 do_div_to_mul_rcp(shader->ir);
109	 do_sub_to_add_neg(shader->ir);
110	 do_explog_to_explog2(shader->ir);
111	 do_lower_texture_projection(shader->ir);
112	 brw_do_cubemap_normalize(shader->ir);
113
114	 do {
115	    progress = false;
116
117	    brw_do_channel_expressions(shader->ir);
118	    brw_do_vector_splitting(shader->ir);
119
120	    progress = do_lower_jumps(shader->ir, true, true,
121				      true, /* main return */
122				      false, /* continue */
123				      false /* loops */
124				      ) || progress;
125
126	    progress = do_common_optimization(shader->ir, true, 32) || progress;
127
128	    progress = lower_noise(shader->ir) || progress;
129	    progress =
130	       lower_variable_index_to_cond_assign(shader->ir,
131						   GL_TRUE, /* input */
132						   GL_TRUE, /* output */
133						   GL_TRUE, /* temp */
134						   GL_TRUE /* uniform */
135						   ) || progress;
136	    if (intel->gen == 6) {
137	       progress = do_if_to_cond_assign(shader->ir) || progress;
138	    }
139	 } while (progress);
140
141	 validate_ir_tree(shader->ir);
142
143	 reparent_ir(shader->ir, shader->ir);
144	 talloc_free(mem_ctx);
145      }
146   }
147
148   if (!_mesa_ir_link_shader(ctx, prog))
149      return GL_FALSE;
150
151   return GL_TRUE;
152}
153
154static int
155type_size(const struct glsl_type *type)
156{
157   unsigned int size, i;
158
159   switch (type->base_type) {
160   case GLSL_TYPE_UINT:
161   case GLSL_TYPE_INT:
162   case GLSL_TYPE_FLOAT:
163   case GLSL_TYPE_BOOL:
164      return type->components();
165   case GLSL_TYPE_ARRAY:
166      return type_size(type->fields.array) * type->length;
167   case GLSL_TYPE_STRUCT:
168      size = 0;
169      for (i = 0; i < type->length; i++) {
170	 size += type_size(type->fields.structure[i].type);
171      }
172      return size;
173   case GLSL_TYPE_SAMPLER:
174      /* Samplers take up no register space, since they're baked in at
175       * link time.
176       */
177      return 0;
178   default:
179      assert(!"not reached");
180      return 0;
181   }
182}
183
184static const fs_reg reg_undef;
185static const fs_reg reg_null(ARF, BRW_ARF_NULL);
186
187int
188fs_visitor::virtual_grf_alloc(int size)
189{
190   if (virtual_grf_array_size <= virtual_grf_next) {
191      if (virtual_grf_array_size == 0)
192	 virtual_grf_array_size = 16;
193      else
194	 virtual_grf_array_size *= 2;
195      virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
196					 int, virtual_grf_array_size);
197
198      /* This slot is always unused. */
199      virtual_grf_sizes[0] = 0;
200   }
201   virtual_grf_sizes[virtual_grf_next] = size;
202   return virtual_grf_next++;
203}
204
205/** Fixed HW reg constructor. */
206fs_reg::fs_reg(enum register_file file, int hw_reg)
207{
208   init();
209   this->file = file;
210   this->hw_reg = hw_reg;
211   this->type = BRW_REGISTER_TYPE_F;
212}
213
214int
215brw_type_for_base_type(const struct glsl_type *type)
216{
217   switch (type->base_type) {
218   case GLSL_TYPE_FLOAT:
219      return BRW_REGISTER_TYPE_F;
220   case GLSL_TYPE_INT:
221   case GLSL_TYPE_BOOL:
222      return BRW_REGISTER_TYPE_D;
223   case GLSL_TYPE_UINT:
224      return BRW_REGISTER_TYPE_UD;
225   case GLSL_TYPE_ARRAY:
226   case GLSL_TYPE_STRUCT:
227      /* These should be overridden with the type of the member when
228       * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
229       * way to trip up if we don't.
230       */
231      return BRW_REGISTER_TYPE_UD;
232   default:
233      assert(!"not reached");
234      return BRW_REGISTER_TYPE_F;
235   }
236}
237
238/** Automatic reg constructor. */
239fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
240{
241   init();
242
243   this->file = GRF;
244   this->reg = v->virtual_grf_alloc(type_size(type));
245   this->reg_offset = 0;
246   this->type = brw_type_for_base_type(type);
247}
248
249fs_reg *
250fs_visitor::variable_storage(ir_variable *var)
251{
252   return (fs_reg *)hash_table_find(this->variable_ht, var);
253}
254
255/* Our support for uniforms is piggy-backed on the struct
256 * gl_fragment_program, because that's where the values actually
257 * get stored, rather than in some global gl_shader_program uniform
258 * store.
259 */
260int
261fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
262{
263   unsigned int offset = 0;
264   float *vec_values;
265
266   if (type->is_matrix()) {
267      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
268							type->vector_elements,
269							1);
270
271      for (unsigned int i = 0; i < type->matrix_columns; i++) {
272	 offset += setup_uniform_values(loc + offset, column);
273      }
274
275      return offset;
276   }
277
278   switch (type->base_type) {
279   case GLSL_TYPE_FLOAT:
280   case GLSL_TYPE_UINT:
281   case GLSL_TYPE_INT:
282   case GLSL_TYPE_BOOL:
283      vec_values = fp->Base.Parameters->ParameterValues[loc];
284      for (unsigned int i = 0; i < type->vector_elements; i++) {
285	 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
286      }
287      return 1;
288
289   case GLSL_TYPE_STRUCT:
290      for (unsigned int i = 0; i < type->length; i++) {
291	 offset += setup_uniform_values(loc + offset,
292					type->fields.structure[i].type);
293      }
294      return offset;
295
296   case GLSL_TYPE_ARRAY:
297      for (unsigned int i = 0; i < type->length; i++) {
298	 offset += setup_uniform_values(loc + offset, type->fields.array);
299      }
300      return offset;
301
302   case GLSL_TYPE_SAMPLER:
303      /* The sampler takes up a slot, but we don't use any values from it. */
304      return 1;
305
306   default:
307      assert(!"not reached");
308      return 0;
309   }
310}
311
312
313/* Our support for builtin uniforms is even scarier than non-builtin.
314 * It sits on top of the PROG_STATE_VAR parameters that are
315 * automatically updated from GL context state.
316 */
317void
318fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
319{
320   const struct gl_builtin_uniform_desc *statevar = NULL;
321
322   for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
323      statevar = &_mesa_builtin_uniform_desc[i];
324      if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
325	 break;
326   }
327
328   if (!statevar->name) {
329      this->fail = true;
330      printf("Failed to find builtin uniform `%s'\n", ir->name);
331      return;
332   }
333
334   int array_count;
335   if (ir->type->is_array()) {
336      array_count = ir->type->length;
337   } else {
338      array_count = 1;
339   }
340
341   for (int a = 0; a < array_count; a++) {
342      for (unsigned int i = 0; i < statevar->num_elements; i++) {
343	 struct gl_builtin_uniform_element *element = &statevar->elements[i];
344	 int tokens[STATE_LENGTH];
345
346	 memcpy(tokens, element->tokens, sizeof(element->tokens));
347	 if (ir->type->is_array()) {
348	    tokens[1] = a;
349	 }
350
351	 /* This state reference has already been setup by ir_to_mesa,
352	  * but we'll get the same index back here.
353	  */
354	 int index = _mesa_add_state_reference(this->fp->Base.Parameters,
355					       (gl_state_index *)tokens);
356	 float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
357
358	 /* Add each of the unique swizzles of the element as a
359	  * parameter.  This'll end up matching the expected layout of
360	  * the array/matrix/structure we're trying to fill in.
361	  */
362	 int last_swiz = -1;
363	 for (unsigned int i = 0; i < 4; i++) {
364	    int swiz = GET_SWZ(element->swizzle, i);
365	    if (swiz == last_swiz)
366	       break;
367	    last_swiz = swiz;
368
369	    c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
370	 }
371      }
372   }
373}
374
375fs_reg *
376fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
377{
378   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
379   fs_reg wpos = *reg;
380   fs_reg neg_y = this->pixel_y;
381   neg_y.negate = true;
382
383   /* gl_FragCoord.x */
384   if (ir->pixel_center_integer) {
385      emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
386   } else {
387      emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
388   }
389   wpos.reg_offset++;
390
391   /* gl_FragCoord.y */
392   if (ir->origin_upper_left && ir->pixel_center_integer) {
393      emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
394   } else {
395      fs_reg pixel_y = this->pixel_y;
396      float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
397
398      if (!ir->origin_upper_left) {
399	 pixel_y.negate = true;
400	 offset += c->key.drawable_height - 1.0;
401      }
402
403      emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
404   }
405   wpos.reg_offset++;
406
407   /* gl_FragCoord.z */
408   emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
409		interp_reg(FRAG_ATTRIB_WPOS, 2)));
410   wpos.reg_offset++;
411
412   /* gl_FragCoord.w: Already set up in emit_interpolation */
413   emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
414
415   return reg;
416}
417
418fs_reg *
419fs_visitor::emit_general_interpolation(ir_variable *ir)
420{
421   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
422   /* Interpolation is always in floating point regs. */
423   reg->type = BRW_REGISTER_TYPE_F;
424   fs_reg attr = *reg;
425
426   unsigned int array_elements;
427   const glsl_type *type;
428
429   if (ir->type->is_array()) {
430      array_elements = ir->type->length;
431      if (array_elements == 0) {
432	 this->fail = true;
433      }
434      type = ir->type->fields.array;
435   } else {
436      array_elements = 1;
437      type = ir->type;
438   }
439
440   int location = ir->location;
441   for (unsigned int i = 0; i < array_elements; i++) {
442      for (unsigned int j = 0; j < type->matrix_columns; j++) {
443	 if (urb_setup[location] == -1) {
444	    /* If there's no incoming setup data for this slot, don't
445	     * emit interpolation for it.
446	     */
447	    attr.reg_offset += type->vector_elements;
448	    location++;
449	    continue;
450	 }
451
452	 for (unsigned int c = 0; c < type->vector_elements; c++) {
453	    struct brw_reg interp = interp_reg(location, c);
454	    emit(fs_inst(FS_OPCODE_LINTERP,
455			 attr,
456			 this->delta_x,
457			 this->delta_y,
458			 fs_reg(interp)));
459	    attr.reg_offset++;
460	 }
461
462	 if (intel->gen < 6) {
463	    attr.reg_offset -= type->vector_elements;
464	    for (unsigned int c = 0; c < type->vector_elements; c++) {
465	       emit(fs_inst(BRW_OPCODE_MUL,
466			    attr,
467			    attr,
468			    this->pixel_w));
469	       attr.reg_offset++;
470	    }
471	 }
472	 location++;
473      }
474   }
475
476   return reg;
477}
478
479fs_reg *
480fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
481{
482   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
483
484   /* The frontfacing comes in as a bit in the thread payload. */
485   if (intel->gen >= 6) {
486      emit(fs_inst(BRW_OPCODE_ASR,
487		   *reg,
488		   fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
489		   fs_reg(15)));
490      emit(fs_inst(BRW_OPCODE_NOT,
491		   *reg,
492		   *reg));
493      emit(fs_inst(BRW_OPCODE_AND,
494		   *reg,
495		   *reg,
496		   fs_reg(1)));
497   } else {
498      fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
499      struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
500      /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
501       * us front face
502       */
503      fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
504				   *reg,
505				   fs_reg(r1_6ud),
506				   fs_reg(1u << 31)));
507      inst->conditional_mod = BRW_CONDITIONAL_L;
508      emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
509   }
510
511   return reg;
512}
513
514fs_inst *
515fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
516{
517   switch (opcode) {
518   case FS_OPCODE_RCP:
519   case FS_OPCODE_RSQ:
520   case FS_OPCODE_SQRT:
521   case FS_OPCODE_EXP2:
522   case FS_OPCODE_LOG2:
523   case FS_OPCODE_SIN:
524   case FS_OPCODE_COS:
525      break;
526   default:
527      assert(!"not reached: bad math opcode");
528      return NULL;
529   }
530
531   /* Can't do hstride == 0 args to gen6 math, so expand it out.  We
532    * might be able to do better by doing execsize = 1 math and then
533    * expanding that result out, but we would need to be careful with
534    * masking.
535    */
536   if (intel->gen >= 6 && src.file == UNIFORM) {
537      fs_reg expanded = fs_reg(this, glsl_type::float_type);
538      emit(fs_inst(BRW_OPCODE_MOV, expanded, src));
539      src = expanded;
540   }
541
542   fs_inst *inst = emit(fs_inst(opcode, dst, src));
543
544   if (intel->gen < 6) {
545      inst->base_mrf = 2;
546      inst->mlen = 1;
547   }
548
549   return inst;
550}
551
552fs_inst *
553fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
554{
555   int base_mrf = 2;
556   fs_inst *inst;
557
558   assert(opcode == FS_OPCODE_POW);
559
560   if (intel->gen >= 6) {
561      /* Can't do hstride == 0 args to gen6 math, so expand it out. */
562      if (src0.file == UNIFORM) {
563	 fs_reg expanded = fs_reg(this, glsl_type::float_type);
564	 emit(fs_inst(BRW_OPCODE_MOV, expanded, src0));
565	 src0 = expanded;
566      }
567
568      if (src1.file == UNIFORM) {
569	 fs_reg expanded = fs_reg(this, glsl_type::float_type);
570	 emit(fs_inst(BRW_OPCODE_MOV, expanded, src1));
571	 src1 = expanded;
572      }
573
574      inst = emit(fs_inst(opcode, dst, src0, src1));
575   } else {
576      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1));
577      inst = emit(fs_inst(opcode, dst, src0, reg_null));
578
579      inst->base_mrf = base_mrf;
580      inst->mlen = 2;
581   }
582   return inst;
583}
584
585void
586fs_visitor::visit(ir_variable *ir)
587{
588   fs_reg *reg = NULL;
589
590   if (variable_storage(ir))
591      return;
592
593   if (strcmp(ir->name, "gl_FragColor") == 0) {
594      this->frag_color = ir;
595   } else if (strcmp(ir->name, "gl_FragData") == 0) {
596      this->frag_data = ir;
597   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
598      this->frag_depth = ir;
599   }
600
601   if (ir->mode == ir_var_in) {
602      if (!strcmp(ir->name, "gl_FragCoord")) {
603	 reg = emit_fragcoord_interpolation(ir);
604      } else if (!strcmp(ir->name, "gl_FrontFacing")) {
605	 reg = emit_frontfacing_interpolation(ir);
606      } else {
607	 reg = emit_general_interpolation(ir);
608      }
609      assert(reg);
610      hash_table_insert(this->variable_ht, reg, ir);
611      return;
612   }
613
614   if (ir->mode == ir_var_uniform) {
615      int param_index = c->prog_data.nr_params;
616
617      if (!strncmp(ir->name, "gl_", 3)) {
618	 setup_builtin_uniform_values(ir);
619      } else {
620	 setup_uniform_values(ir->location, ir->type);
621      }
622
623      reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
624   }
625
626   if (!reg)
627      reg = new(this->mem_ctx) fs_reg(this, ir->type);
628
629   hash_table_insert(this->variable_ht, reg, ir);
630}
631
632void
633fs_visitor::visit(ir_dereference_variable *ir)
634{
635   fs_reg *reg = variable_storage(ir->var);
636   this->result = *reg;
637}
638
639void
640fs_visitor::visit(ir_dereference_record *ir)
641{
642   const glsl_type *struct_type = ir->record->type;
643
644   ir->record->accept(this);
645
646   unsigned int offset = 0;
647   for (unsigned int i = 0; i < struct_type->length; i++) {
648      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
649	 break;
650      offset += type_size(struct_type->fields.structure[i].type);
651   }
652   this->result.reg_offset += offset;
653   this->result.type = brw_type_for_base_type(ir->type);
654}
655
656void
657fs_visitor::visit(ir_dereference_array *ir)
658{
659   ir_constant *index;
660   int element_size;
661
662   ir->array->accept(this);
663   index = ir->array_index->as_constant();
664
665   element_size = type_size(ir->type);
666   this->result.type = brw_type_for_base_type(ir->type);
667
668   if (index) {
669      assert(this->result.file == UNIFORM ||
670	     (this->result.file == GRF &&
671	      this->result.reg != 0));
672      this->result.reg_offset += index->value.i[0] * element_size;
673   } else {
674      assert(!"FINISHME: non-constant array element");
675   }
676}
677
678void
679fs_visitor::visit(ir_expression *ir)
680{
681   unsigned int operand;
682   fs_reg op[2], temp;
683   fs_reg result;
684   fs_inst *inst;
685
686   for (operand = 0; operand < ir->get_num_operands(); operand++) {
687      ir->operands[operand]->accept(this);
688      if (this->result.file == BAD_FILE) {
689	 ir_print_visitor v;
690	 printf("Failed to get tree for expression operand:\n");
691	 ir->operands[operand]->accept(&v);
692	 this->fail = true;
693      }
694      op[operand] = this->result;
695
696      /* Matrix expression operands should have been broken down to vector
697       * operations already.
698       */
699      assert(!ir->operands[operand]->type->is_matrix());
700      /* And then those vector operands should have been broken down to scalar.
701       */
702      assert(!ir->operands[operand]->type->is_vector());
703   }
704
705   /* Storage for our result.  If our result goes into an assignment, it will
706    * just get copy-propagated out, so no worries.
707    */
708   this->result = fs_reg(this, ir->type);
709
710   switch (ir->operation) {
711   case ir_unop_logic_not:
712      /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
713       * ones complement of the whole register, not just bit 0.
714       */
715      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
716      break;
717   case ir_unop_neg:
718      op[0].negate = !op[0].negate;
719      this->result = op[0];
720      break;
721   case ir_unop_abs:
722      op[0].abs = true;
723      this->result = op[0];
724      break;
725   case ir_unop_sign:
726      temp = fs_reg(this, ir->type);
727
728      emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
729
730      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
731      inst->conditional_mod = BRW_CONDITIONAL_G;
732      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
733      inst->predicated = true;
734
735      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
736      inst->conditional_mod = BRW_CONDITIONAL_L;
737      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
738      inst->predicated = true;
739
740      break;
741   case ir_unop_rcp:
742      emit_math(FS_OPCODE_RCP, this->result, op[0]);
743      break;
744
745   case ir_unop_exp2:
746      emit_math(FS_OPCODE_EXP2, this->result, op[0]);
747      break;
748   case ir_unop_log2:
749      emit_math(FS_OPCODE_LOG2, this->result, op[0]);
750      break;
751   case ir_unop_exp:
752   case ir_unop_log:
753      assert(!"not reached: should be handled by ir_explog_to_explog2");
754      break;
755   case ir_unop_sin:
756      emit_math(FS_OPCODE_SIN, this->result, op[0]);
757      break;
758   case ir_unop_cos:
759      emit_math(FS_OPCODE_COS, this->result, op[0]);
760      break;
761
762   case ir_unop_dFdx:
763      emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
764      break;
765   case ir_unop_dFdy:
766      emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
767      break;
768
769   case ir_binop_add:
770      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
771      break;
772   case ir_binop_sub:
773      assert(!"not reached: should be handled by ir_sub_to_add_neg");
774      break;
775
776   case ir_binop_mul:
777      emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
778      break;
779   case ir_binop_div:
780      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
781      break;
782   case ir_binop_mod:
783      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
784      break;
785
786   case ir_binop_less:
787      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
788      inst->conditional_mod = BRW_CONDITIONAL_L;
789      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
790      break;
791   case ir_binop_greater:
792      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
793      inst->conditional_mod = BRW_CONDITIONAL_G;
794      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
795      break;
796   case ir_binop_lequal:
797      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
798      inst->conditional_mod = BRW_CONDITIONAL_LE;
799      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
800      break;
801   case ir_binop_gequal:
802      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
803      inst->conditional_mod = BRW_CONDITIONAL_GE;
804      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
805      break;
806   case ir_binop_equal:
807   case ir_binop_all_equal: /* same as nequal for scalars */
808      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
809      inst->conditional_mod = BRW_CONDITIONAL_Z;
810      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
811      break;
812   case ir_binop_nequal:
813   case ir_binop_any_nequal: /* same as nequal for scalars */
814      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
815      inst->conditional_mod = BRW_CONDITIONAL_NZ;
816      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
817      break;
818
819   case ir_binop_logic_xor:
820      emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
821      break;
822
823   case ir_binop_logic_or:
824      emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
825      break;
826
827   case ir_binop_logic_and:
828      emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
829      break;
830
831   case ir_binop_dot:
832   case ir_binop_cross:
833   case ir_unop_any:
834      assert(!"not reached: should be handled by brw_fs_channel_expressions");
835      break;
836
837   case ir_unop_noise:
838      assert(!"not reached: should be handled by lower_noise");
839      break;
840
841   case ir_unop_sqrt:
842      emit_math(FS_OPCODE_SQRT, this->result, op[0]);
843      break;
844
845   case ir_unop_rsq:
846      emit_math(FS_OPCODE_RSQ, this->result, op[0]);
847      break;
848
849   case ir_unop_i2f:
850   case ir_unop_b2f:
851   case ir_unop_b2i:
852   case ir_unop_f2i:
853      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
854      break;
855   case ir_unop_f2b:
856   case ir_unop_i2b:
857      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
858      inst->conditional_mod = BRW_CONDITIONAL_NZ;
859      inst = emit(fs_inst(BRW_OPCODE_AND, this->result,
860			  this->result, fs_reg(1)));
861      break;
862
863   case ir_unop_trunc:
864      emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0]));
865      break;
866   case ir_unop_ceil:
867      op[0].negate = !op[0].negate;
868      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
869      this->result.negate = true;
870      break;
871   case ir_unop_floor:
872      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
873      break;
874   case ir_unop_fract:
875      inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
876      break;
877   case ir_unop_round_even:
878      emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0]));
879      break;
880
881   case ir_binop_min:
882      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
883      inst->conditional_mod = BRW_CONDITIONAL_L;
884
885      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
886      inst->predicated = true;
887      break;
888   case ir_binop_max:
889      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
890      inst->conditional_mod = BRW_CONDITIONAL_G;
891
892      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
893      inst->predicated = true;
894      break;
895
896   case ir_binop_pow:
897      emit_math(FS_OPCODE_POW, this->result, op[0], op[1]);
898      break;
899
900   case ir_unop_bit_not:
901   case ir_unop_u2f:
902   case ir_binop_lshift:
903   case ir_binop_rshift:
904   case ir_binop_bit_and:
905   case ir_binop_bit_xor:
906   case ir_binop_bit_or:
907      assert(!"GLSL 1.30 features unsupported");
908      break;
909   }
910}
911
912void
913fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
914				   const glsl_type *type, bool predicated)
915{
916   switch (type->base_type) {
917   case GLSL_TYPE_FLOAT:
918   case GLSL_TYPE_UINT:
919   case GLSL_TYPE_INT:
920   case GLSL_TYPE_BOOL:
921      for (unsigned int i = 0; i < type->components(); i++) {
922	 l.type = brw_type_for_base_type(type);
923	 r.type = brw_type_for_base_type(type);
924
925	 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
926	 inst->predicated = predicated;
927
928	 l.reg_offset++;
929	 r.reg_offset++;
930      }
931      break;
932   case GLSL_TYPE_ARRAY:
933      for (unsigned int i = 0; i < type->length; i++) {
934	 emit_assignment_writes(l, r, type->fields.array, predicated);
935      }
936
937   case GLSL_TYPE_STRUCT:
938      for (unsigned int i = 0; i < type->length; i++) {
939	 emit_assignment_writes(l, r, type->fields.structure[i].type,
940				predicated);
941      }
942      break;
943
944   case GLSL_TYPE_SAMPLER:
945      break;
946
947   default:
948      assert(!"not reached");
949      break;
950   }
951}
952
953void
954fs_visitor::visit(ir_assignment *ir)
955{
956   struct fs_reg l, r;
957   fs_inst *inst;
958
959   /* FINISHME: arrays on the lhs */
960   ir->lhs->accept(this);
961   l = this->result;
962
963   ir->rhs->accept(this);
964   r = this->result;
965
966   assert(l.file != BAD_FILE);
967   assert(r.file != BAD_FILE);
968
969   if (ir->condition) {
970      emit_bool_to_cond_code(ir->condition);
971   }
972
973   if (ir->lhs->type->is_scalar() ||
974       ir->lhs->type->is_vector()) {
975      for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
976	 if (ir->write_mask & (1 << i)) {
977	    inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
978	    if (ir->condition)
979	       inst->predicated = true;
980	    r.reg_offset++;
981	 }
982	 l.reg_offset++;
983      }
984   } else {
985      emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
986   }
987}
988
989fs_inst *
990fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
991{
992   int mlen;
993   int base_mrf = 1;
994   bool simd16 = false;
995   fs_reg orig_dst;
996
997   /* g0 header. */
998   mlen = 1;
999
1000   if (ir->shadow_comparitor) {
1001      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1002	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
1003		      coordinate));
1004	 coordinate.reg_offset++;
1005      }
1006      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1007      mlen += 3;
1008
1009      if (ir->op == ir_tex) {
1010	 /* There's no plain shadow compare message, so we use shadow
1011	  * compare with a bias of 0.0.
1012	  */
1013	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1014		      fs_reg(0.0f)));
1015	 mlen++;
1016      } else if (ir->op == ir_txb) {
1017	 ir->lod_info.bias->accept(this);
1018	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1019		      this->result));
1020	 mlen++;
1021      } else {
1022	 assert(ir->op == ir_txl);
1023	 ir->lod_info.lod->accept(this);
1024	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1025		      this->result));
1026	 mlen++;
1027      }
1028
1029      ir->shadow_comparitor->accept(this);
1030      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1031      mlen++;
1032   } else if (ir->op == ir_tex) {
1033      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1034	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
1035		      coordinate));
1036	 coordinate.reg_offset++;
1037      }
1038      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1039      mlen += 3;
1040   } else {
1041      /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
1042       * instructions.  We'll need to do SIMD16 here.
1043       */
1044      assert(ir->op == ir_txb || ir->op == ir_txl);
1045
1046      for (int i = 0; i < ir->coordinate->type->vector_elements * 2;) {
1047	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2),
1048		      coordinate));
1049	 coordinate.reg_offset++;
1050      }
1051
1052      /* lod/bias appears after u/v/r. */
1053      mlen += 6;
1054
1055      if (ir->op == ir_txb) {
1056	 ir->lod_info.bias->accept(this);
1057	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1058		      this->result));
1059	 mlen++;
1060      } else {
1061	 ir->lod_info.lod->accept(this);
1062	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1063		      this->result));
1064	 mlen++;
1065      }
1066
1067      /* The unused upper half. */
1068      mlen++;
1069
1070      /* Now, since we're doing simd16, the return is 2 interleaved
1071       * vec4s where the odd-indexed ones are junk. We'll need to move
1072       * this weirdness around to the expected layout.
1073       */
1074      simd16 = true;
1075      orig_dst = dst;
1076      dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
1077						       2));
1078      dst.type = BRW_REGISTER_TYPE_F;
1079   }
1080
1081   fs_inst *inst = NULL;
1082   switch (ir->op) {
1083   case ir_tex:
1084      inst = emit(fs_inst(FS_OPCODE_TEX, dst));
1085      break;
1086   case ir_txb:
1087      inst = emit(fs_inst(FS_OPCODE_TXB, dst));
1088      break;
1089   case ir_txl:
1090      inst = emit(fs_inst(FS_OPCODE_TXL, dst));
1091      break;
1092   case ir_txd:
1093   case ir_txf:
1094      assert(!"GLSL 1.30 features unsupported");
1095      break;
1096   }
1097   inst->base_mrf = base_mrf;
1098   inst->mlen = mlen;
1099
1100   if (simd16) {
1101      for (int i = 0; i < 4; i++) {
1102	 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst));
1103	 orig_dst.reg_offset++;
1104	 dst.reg_offset += 2;
1105      }
1106   }
1107
1108   return inst;
1109}
1110
1111fs_inst *
1112fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1113{
1114   /* gen5's SIMD8 sampler has slots for u, v, r, array index, then
1115    * optional parameters like shadow comparitor or LOD bias.  If
1116    * optional parameters aren't present, those base slots are
1117    * optional and don't need to be included in the message.
1118    *
1119    * We don't fill in the unnecessary slots regardless, which may
1120    * look surprising in the disassembly.
1121    */
1122   int mlen = 1; /* g0 header always present. */
1123   int base_mrf = 1;
1124
1125   for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1126      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
1127		   coordinate));
1128      coordinate.reg_offset++;
1129   }
1130   mlen += ir->coordinate->type->vector_elements;
1131
1132   if (ir->shadow_comparitor) {
1133      mlen = MAX2(mlen, 5);
1134
1135      ir->shadow_comparitor->accept(this);
1136      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1137      mlen++;
1138   }
1139
1140   fs_inst *inst = NULL;
1141   switch (ir->op) {
1142   case ir_tex:
1143      inst = emit(fs_inst(FS_OPCODE_TEX, dst));
1144      break;
1145   case ir_txb:
1146      ir->lod_info.bias->accept(this);
1147      mlen = MAX2(mlen, 5);
1148      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1149      mlen++;
1150
1151      inst = emit(fs_inst(FS_OPCODE_TXB, dst));
1152      break;
1153   case ir_txl:
1154      ir->lod_info.lod->accept(this);
1155      mlen = MAX2(mlen, 5);
1156      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1157      mlen++;
1158
1159      inst = emit(fs_inst(FS_OPCODE_TXL, dst));
1160      break;
1161   case ir_txd:
1162   case ir_txf:
1163      assert(!"GLSL 1.30 features unsupported");
1164      break;
1165   }
1166   inst->base_mrf = base_mrf;
1167   inst->mlen = mlen;
1168
1169   return inst;
1170}
1171
1172void
1173fs_visitor::visit(ir_texture *ir)
1174{
1175   int sampler;
1176   fs_inst *inst = NULL;
1177
1178   ir->coordinate->accept(this);
1179   fs_reg coordinate = this->result;
1180
1181   /* Should be lowered by do_lower_texture_projection */
1182   assert(!ir->projector);
1183
1184   sampler = _mesa_get_sampler_uniform_value(ir->sampler,
1185					     ctx->Shader.CurrentProgram,
1186					     &brw->fragment_program->Base);
1187   sampler = c->fp->program.Base.SamplerUnits[sampler];
1188
1189   /* The 965 requires the EU to do the normalization of GL rectangle
1190    * texture coordinates.  We use the program parameter state
1191    * tracking to get the scaling factor.
1192    */
1193   if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
1194      struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
1195      int tokens[STATE_LENGTH] = {
1196	 STATE_INTERNAL,
1197	 STATE_TEXRECT_SCALE,
1198	 sampler,
1199	 0,
1200	 0
1201      };
1202
1203      fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
1204      fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
1205      GLuint index = _mesa_add_state_reference(params,
1206					       (gl_state_index *)tokens);
1207      float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
1208
1209      c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0];
1210      c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1];
1211
1212      fs_reg dst = fs_reg(this, ir->coordinate->type);
1213      fs_reg src = coordinate;
1214      coordinate = dst;
1215
1216      emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_x));
1217      dst.reg_offset++;
1218      src.reg_offset++;
1219      emit(fs_inst(BRW_OPCODE_MUL, dst, src, scale_y));
1220   }
1221
1222   /* Writemasking doesn't eliminate channels on SIMD8 texture
1223    * samples, so don't worry about them.
1224    */
1225   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1226
1227   if (intel->gen < 5) {
1228      inst = emit_texture_gen4(ir, dst, coordinate);
1229   } else {
1230      inst = emit_texture_gen5(ir, dst, coordinate);
1231   }
1232
1233   inst->sampler = sampler;
1234
1235   this->result = dst;
1236
1237   if (ir->shadow_comparitor)
1238      inst->shadow_compare = true;
1239
1240   if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
1241      fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
1242
1243      for (int i = 0; i < 4; i++) {
1244	 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1245	 fs_reg l = swizzle_dst;
1246	 l.reg_offset += i;
1247
1248	 if (swiz == SWIZZLE_ZERO) {
1249	    emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f)));
1250	 } else if (swiz == SWIZZLE_ONE) {
1251	    emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f)));
1252	 } else {
1253	    fs_reg r = dst;
1254	    r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1255	    emit(fs_inst(BRW_OPCODE_MOV, l, r));
1256	 }
1257      }
1258      this->result = swizzle_dst;
1259   }
1260}
1261
1262void
1263fs_visitor::visit(ir_swizzle *ir)
1264{
1265   ir->val->accept(this);
1266   fs_reg val = this->result;
1267
1268   if (ir->type->vector_elements == 1) {
1269      this->result.reg_offset += ir->mask.x;
1270      return;
1271   }
1272
1273   fs_reg result = fs_reg(this, ir->type);
1274   this->result = result;
1275
1276   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1277      fs_reg channel = val;
1278      int swiz = 0;
1279
1280      switch (i) {
1281      case 0:
1282	 swiz = ir->mask.x;
1283	 break;
1284      case 1:
1285	 swiz = ir->mask.y;
1286	 break;
1287      case 2:
1288	 swiz = ir->mask.z;
1289	 break;
1290      case 3:
1291	 swiz = ir->mask.w;
1292	 break;
1293      }
1294
1295      channel.reg_offset += swiz;
1296      emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1297      result.reg_offset++;
1298   }
1299}
1300
1301void
1302fs_visitor::visit(ir_discard *ir)
1303{
1304   fs_reg temp = fs_reg(this, glsl_type::uint_type);
1305
1306   assert(ir->condition == NULL); /* FINISHME */
1307
1308   emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null));
1309   emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null, temp));
1310   kill_emitted = true;
1311}
1312
1313void
1314fs_visitor::visit(ir_constant *ir)
1315{
1316   fs_reg reg(this, ir->type);
1317   this->result = reg;
1318
1319   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1320      switch (ir->type->base_type) {
1321      case GLSL_TYPE_FLOAT:
1322	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1323	 break;
1324      case GLSL_TYPE_UINT:
1325	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1326	 break;
1327      case GLSL_TYPE_INT:
1328	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1329	 break;
1330      case GLSL_TYPE_BOOL:
1331	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1332	 break;
1333      default:
1334	 assert(!"Non-float/uint/int/bool constant");
1335      }
1336      reg.reg_offset++;
1337   }
1338}
1339
1340void
1341fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
1342{
1343   ir_expression *expr = ir->as_expression();
1344
1345   if (expr) {
1346      fs_reg op[2];
1347      fs_inst *inst;
1348
1349      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
1350	 assert(expr->operands[i]->type->is_scalar());
1351
1352	 expr->operands[i]->accept(this);
1353	 op[i] = this->result;
1354      }
1355
1356      switch (expr->operation) {
1357      case ir_unop_logic_not:
1358	 inst = emit(fs_inst(BRW_OPCODE_ADD, reg_null, op[0], fs_reg(-1)));
1359	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1360	 break;
1361
1362      case ir_binop_logic_xor:
1363	 inst = emit(fs_inst(BRW_OPCODE_XOR, reg_null, op[0], op[1]));
1364	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1365	 break;
1366
1367      case ir_binop_logic_or:
1368	 inst = emit(fs_inst(BRW_OPCODE_OR, reg_null, op[0], op[1]));
1369	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1370	 break;
1371
1372      case ir_binop_logic_and:
1373	 inst = emit(fs_inst(BRW_OPCODE_AND, reg_null, op[0], op[1]));
1374	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1375	 break;
1376
1377      case ir_unop_f2b:
1378	 if (intel->gen >= 6) {
1379	    inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
1380	 } else {
1381	    inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null, op[0]));
1382	 }
1383	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1384	 break;
1385
1386      case ir_unop_i2b:
1387	 if (intel->gen >= 6) {
1388	    inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0)));
1389	 } else {
1390	    inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null, op[0]));
1391	 }
1392	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1393	 break;
1394
1395      case ir_binop_greater:
1396	 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1]));
1397	 inst->conditional_mod = BRW_CONDITIONAL_G;
1398	 break;
1399      case ir_binop_gequal:
1400	 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1]));
1401	 inst->conditional_mod = BRW_CONDITIONAL_GE;
1402	 break;
1403      case ir_binop_less:
1404	 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1]));
1405	 inst->conditional_mod = BRW_CONDITIONAL_L;
1406	 break;
1407      case ir_binop_lequal:
1408	 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1]));
1409	 inst->conditional_mod = BRW_CONDITIONAL_LE;
1410	 break;
1411      case ir_binop_equal:
1412      case ir_binop_all_equal:
1413	 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1]));
1414	 inst->conditional_mod = BRW_CONDITIONAL_Z;
1415	 break;
1416      case ir_binop_nequal:
1417      case ir_binop_any_nequal:
1418	 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], op[1]));
1419	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1420	 break;
1421      default:
1422	 assert(!"not reached");
1423	 this->fail = true;
1424	 break;
1425      }
1426      return;
1427   }
1428
1429   ir->accept(this);
1430
1431   if (intel->gen >= 6) {
1432      fs_inst *inst = emit(fs_inst(BRW_OPCODE_AND, reg_null,
1433				   this->result, fs_reg(1)));
1434      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1435   } else {
1436      fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null, this->result));
1437      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1438   }
1439}
1440
1441void
1442fs_visitor::visit(ir_if *ir)
1443{
1444   fs_inst *inst;
1445
1446   /* Don't point the annotation at the if statement, because then it plus
1447    * the then and else blocks get printed.
1448    */
1449   this->base_ir = ir->condition;
1450
1451   emit_bool_to_cond_code(ir->condition);
1452
1453   inst = emit(fs_inst(BRW_OPCODE_IF));
1454   inst->predicated = true;
1455
1456   foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1457      ir_instruction *ir = (ir_instruction *)iter.get();
1458      this->base_ir = ir;
1459
1460      ir->accept(this);
1461   }
1462
1463   if (!ir->else_instructions.is_empty()) {
1464      emit(fs_inst(BRW_OPCODE_ELSE));
1465
1466      foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1467	 ir_instruction *ir = (ir_instruction *)iter.get();
1468	 this->base_ir = ir;
1469
1470	 ir->accept(this);
1471      }
1472   }
1473
1474   emit(fs_inst(BRW_OPCODE_ENDIF));
1475}
1476
1477void
1478fs_visitor::visit(ir_loop *ir)
1479{
1480   fs_reg counter = reg_undef;
1481
1482   if (ir->counter) {
1483      this->base_ir = ir->counter;
1484      ir->counter->accept(this);
1485      counter = *(variable_storage(ir->counter));
1486
1487      if (ir->from) {
1488	 this->base_ir = ir->from;
1489	 ir->from->accept(this);
1490
1491	 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1492      }
1493   }
1494
1495   emit(fs_inst(BRW_OPCODE_DO));
1496
1497   if (ir->to) {
1498      this->base_ir = ir->to;
1499      ir->to->accept(this);
1500
1501      fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1502				   counter, this->result));
1503      switch (ir->cmp) {
1504      case ir_binop_equal:
1505	 inst->conditional_mod = BRW_CONDITIONAL_Z;
1506	 break;
1507      case ir_binop_nequal:
1508	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1509	 break;
1510      case ir_binop_gequal:
1511	 inst->conditional_mod = BRW_CONDITIONAL_GE;
1512	 break;
1513      case ir_binop_lequal:
1514	 inst->conditional_mod = BRW_CONDITIONAL_LE;
1515	 break;
1516      case ir_binop_greater:
1517	 inst->conditional_mod = BRW_CONDITIONAL_G;
1518	 break;
1519      case ir_binop_less:
1520	 inst->conditional_mod = BRW_CONDITIONAL_L;
1521	 break;
1522      default:
1523	 assert(!"not reached: unknown loop condition");
1524	 this->fail = true;
1525	 break;
1526      }
1527
1528      inst = emit(fs_inst(BRW_OPCODE_BREAK));
1529      inst->predicated = true;
1530   }
1531
1532   foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1533      ir_instruction *ir = (ir_instruction *)iter.get();
1534
1535      this->base_ir = ir;
1536      ir->accept(this);
1537   }
1538
1539   if (ir->increment) {
1540      this->base_ir = ir->increment;
1541      ir->increment->accept(this);
1542      emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1543   }
1544
1545   emit(fs_inst(BRW_OPCODE_WHILE));
1546}
1547
1548void
1549fs_visitor::visit(ir_loop_jump *ir)
1550{
1551   switch (ir->mode) {
1552   case ir_loop_jump::jump_break:
1553      emit(fs_inst(BRW_OPCODE_BREAK));
1554      break;
1555   case ir_loop_jump::jump_continue:
1556      emit(fs_inst(BRW_OPCODE_CONTINUE));
1557      break;
1558   }
1559}
1560
1561void
1562fs_visitor::visit(ir_call *ir)
1563{
1564   assert(!"FINISHME");
1565}
1566
1567void
1568fs_visitor::visit(ir_return *ir)
1569{
1570   assert(!"FINISHME");
1571}
1572
1573void
1574fs_visitor::visit(ir_function *ir)
1575{
1576   /* Ignore function bodies other than main() -- we shouldn't see calls to
1577    * them since they should all be inlined before we get to ir_to_mesa.
1578    */
1579   if (strcmp(ir->name, "main") == 0) {
1580      const ir_function_signature *sig;
1581      exec_list empty;
1582
1583      sig = ir->matching_signature(&empty);
1584
1585      assert(sig);
1586
1587      foreach_iter(exec_list_iterator, iter, sig->body) {
1588	 ir_instruction *ir = (ir_instruction *)iter.get();
1589	 this->base_ir = ir;
1590
1591	 ir->accept(this);
1592      }
1593   }
1594}
1595
1596void
1597fs_visitor::visit(ir_function_signature *ir)
1598{
1599   assert(!"not reached");
1600   (void)ir;
1601}
1602
1603fs_inst *
1604fs_visitor::emit(fs_inst inst)
1605{
1606   fs_inst *list_inst = new(mem_ctx) fs_inst;
1607   *list_inst = inst;
1608
1609   list_inst->annotation = this->current_annotation;
1610   list_inst->ir = this->base_ir;
1611
1612   this->instructions.push_tail(list_inst);
1613
1614   return list_inst;
1615}
1616
1617/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1618void
1619fs_visitor::emit_dummy_fs()
1620{
1621   /* Everyone's favorite color. */
1622   emit(fs_inst(BRW_OPCODE_MOV,
1623		fs_reg(MRF, 2),
1624		fs_reg(1.0f)));
1625   emit(fs_inst(BRW_OPCODE_MOV,
1626		fs_reg(MRF, 3),
1627		fs_reg(0.0f)));
1628   emit(fs_inst(BRW_OPCODE_MOV,
1629		fs_reg(MRF, 4),
1630		fs_reg(1.0f)));
1631   emit(fs_inst(BRW_OPCODE_MOV,
1632		fs_reg(MRF, 5),
1633		fs_reg(0.0f)));
1634
1635   fs_inst *write;
1636   write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1637			fs_reg(0),
1638			fs_reg(0)));
1639   write->base_mrf = 0;
1640}
1641
1642/* The register location here is relative to the start of the URB
1643 * data.  It will get adjusted to be a real location before
1644 * generate_code() time.
1645 */
1646struct brw_reg
1647fs_visitor::interp_reg(int location, int channel)
1648{
1649   int regnr = urb_setup[location] * 2 + channel / 2;
1650   int stride = (channel & 1) * 4;
1651
1652   assert(urb_setup[location] != -1);
1653
1654   return brw_vec1_grf(regnr, stride);
1655}
1656
1657/** Emits the interpolation for the varying inputs. */
1658void
1659fs_visitor::emit_interpolation_setup_gen4()
1660{
1661   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1662
1663   this->current_annotation = "compute pixel centers";
1664   this->pixel_x = fs_reg(this, glsl_type::uint_type);
1665   this->pixel_y = fs_reg(this, glsl_type::uint_type);
1666   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1667   this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1668   emit(fs_inst(BRW_OPCODE_ADD,
1669		this->pixel_x,
1670		fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1671		fs_reg(brw_imm_v(0x10101010))));
1672   emit(fs_inst(BRW_OPCODE_ADD,
1673		this->pixel_y,
1674		fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1675		fs_reg(brw_imm_v(0x11001100))));
1676
1677   this->current_annotation = "compute pixel deltas from v0";
1678   if (brw->has_pln) {
1679      this->delta_x = fs_reg(this, glsl_type::vec2_type);
1680      this->delta_y = this->delta_x;
1681      this->delta_y.reg_offset++;
1682   } else {
1683      this->delta_x = fs_reg(this, glsl_type::float_type);
1684      this->delta_y = fs_reg(this, glsl_type::float_type);
1685   }
1686   emit(fs_inst(BRW_OPCODE_ADD,
1687		this->delta_x,
1688		this->pixel_x,
1689		fs_reg(negate(brw_vec1_grf(1, 0)))));
1690   emit(fs_inst(BRW_OPCODE_ADD,
1691		this->delta_y,
1692		this->pixel_y,
1693		fs_reg(negate(brw_vec1_grf(1, 1)))));
1694
1695   this->current_annotation = "compute pos.w and 1/pos.w";
1696   /* Compute wpos.w.  It's always in our setup, since it's needed to
1697    * interpolate the other attributes.
1698    */
1699   this->wpos_w = fs_reg(this, glsl_type::float_type);
1700   emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1701		interp_reg(FRAG_ATTRIB_WPOS, 3)));
1702   /* Compute the pixel 1/W value from wpos.w. */
1703   this->pixel_w = fs_reg(this, glsl_type::float_type);
1704   emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
1705   this->current_annotation = NULL;
1706}
1707
1708/** Emits the interpolation for the varying inputs. */
1709void
1710fs_visitor::emit_interpolation_setup_gen6()
1711{
1712   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1713
1714   /* If the pixel centers end up used, the setup is the same as for gen4. */
1715   this->current_annotation = "compute pixel centers";
1716   fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
1717   fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
1718   int_pixel_x.type = BRW_REGISTER_TYPE_UW;
1719   int_pixel_y.type = BRW_REGISTER_TYPE_UW;
1720   emit(fs_inst(BRW_OPCODE_ADD,
1721		int_pixel_x,
1722		fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1723		fs_reg(brw_imm_v(0x10101010))));
1724   emit(fs_inst(BRW_OPCODE_ADD,
1725		int_pixel_y,
1726		fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1727		fs_reg(brw_imm_v(0x11001100))));
1728
1729   /* As of gen6, we can no longer mix float and int sources.  We have
1730    * to turn the integer pixel centers into floats for their actual
1731    * use.
1732    */
1733   this->pixel_x = fs_reg(this, glsl_type::float_type);
1734   this->pixel_y = fs_reg(this, glsl_type::float_type);
1735   emit(fs_inst(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x));
1736   emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y));
1737
1738   this->current_annotation = "compute 1/pos.w";
1739   this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
1740   this->pixel_w = fs_reg(this, glsl_type::float_type);
1741   emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
1742
1743   this->delta_x = fs_reg(brw_vec8_grf(2, 0));
1744   this->delta_y = fs_reg(brw_vec8_grf(3, 0));
1745
1746   this->current_annotation = NULL;
1747}
1748
1749void
1750fs_visitor::emit_fb_writes()
1751{
1752   this->current_annotation = "FB write header";
1753   GLboolean header_present = GL_TRUE;
1754   int nr = 0;
1755
1756   if (intel->gen >= 6 &&
1757       !this->kill_emitted &&
1758       c->key.nr_color_regions == 1) {
1759      header_present = false;
1760   }
1761
1762   if (header_present) {
1763      /* m0, m1 header */
1764      nr += 2;
1765   }
1766
1767   if (c->key.aa_dest_stencil_reg) {
1768      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1769		   fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1770   }
1771
1772   /* Reserve space for color. It'll be filled in per MRT below. */
1773   int color_mrf = nr;
1774   nr += 4;
1775
1776   if (c->key.source_depth_to_render_target) {
1777      if (c->key.computes_depth) {
1778	 /* Hand over gl_FragDepth. */
1779	 assert(this->frag_depth);
1780	 fs_reg depth = *(variable_storage(this->frag_depth));
1781
1782	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1783      } else {
1784	 /* Pass through the payload depth. */
1785	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1786		      fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1787      }
1788   }
1789
1790   if (c->key.dest_depth_reg) {
1791      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1792		   fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1793   }
1794
1795   fs_reg color = reg_undef;
1796   if (this->frag_color)
1797      color = *(variable_storage(this->frag_color));
1798   else if (this->frag_data)
1799      color = *(variable_storage(this->frag_data));
1800
1801   for (int target = 0; target < c->key.nr_color_regions; target++) {
1802      this->current_annotation = talloc_asprintf(this->mem_ctx,
1803						 "FB write target %d",
1804						 target);
1805      if (this->frag_color || this->frag_data) {
1806	 for (int i = 0; i < 4; i++) {
1807	    emit(fs_inst(BRW_OPCODE_MOV,
1808			 fs_reg(MRF, color_mrf + i),
1809			 color));
1810	    color.reg_offset++;
1811	 }
1812      }
1813
1814      if (this->frag_color)
1815	 color.reg_offset -= 4;
1816
1817      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1818				   reg_undef, reg_undef));
1819      inst->target = target;
1820      inst->base_mrf = 0;
1821      inst->mlen = nr;
1822      if (target == c->key.nr_color_regions - 1)
1823	 inst->eot = true;
1824      inst->header_present = header_present;
1825   }
1826
1827   if (c->key.nr_color_regions == 0) {
1828      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1829				   reg_undef, reg_undef));
1830      inst->base_mrf = 0;
1831      inst->mlen = nr;
1832      inst->eot = true;
1833      inst->header_present = header_present;
1834   }
1835
1836   this->current_annotation = NULL;
1837}
1838
1839void
1840fs_visitor::generate_fb_write(fs_inst *inst)
1841{
1842   GLboolean eot = inst->eot;
1843   struct brw_reg implied_header;
1844
1845   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1846    * move, here's g1.
1847    */
1848   brw_push_insn_state(p);
1849   brw_set_mask_control(p, BRW_MASK_DISABLE);
1850   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1851
1852   if (inst->header_present) {
1853      if (intel->gen >= 6) {
1854	 brw_MOV(p,
1855		 brw_message_reg(inst->base_mrf),
1856		 brw_vec8_grf(0, 0));
1857	 implied_header = brw_null_reg();
1858      } else {
1859	 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
1860      }
1861
1862      brw_MOV(p,
1863	      brw_message_reg(inst->base_mrf + 1),
1864	      brw_vec8_grf(1, 0));
1865   } else {
1866      implied_header = brw_null_reg();
1867   }
1868
1869   brw_pop_insn_state(p);
1870
1871   brw_fb_WRITE(p,
1872		8, /* dispatch_width */
1873		retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1874		inst->base_mrf,
1875		implied_header,
1876		inst->target,
1877		inst->mlen,
1878		0,
1879		eot);
1880}
1881
1882void
1883fs_visitor::generate_linterp(fs_inst *inst,
1884			     struct brw_reg dst, struct brw_reg *src)
1885{
1886   struct brw_reg delta_x = src[0];
1887   struct brw_reg delta_y = src[1];
1888   struct brw_reg interp = src[2];
1889
1890   if (brw->has_pln &&
1891       delta_y.nr == delta_x.nr + 1 &&
1892       (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1893      brw_PLN(p, dst, interp, delta_x);
1894   } else {
1895      brw_LINE(p, brw_null_reg(), interp, delta_x);
1896      brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1897   }
1898}
1899
1900void
1901fs_visitor::generate_math(fs_inst *inst,
1902			  struct brw_reg dst, struct brw_reg *src)
1903{
1904   int op;
1905
1906   switch (inst->opcode) {
1907   case FS_OPCODE_RCP:
1908      op = BRW_MATH_FUNCTION_INV;
1909      break;
1910   case FS_OPCODE_RSQ:
1911      op = BRW_MATH_FUNCTION_RSQ;
1912      break;
1913   case FS_OPCODE_SQRT:
1914      op = BRW_MATH_FUNCTION_SQRT;
1915      break;
1916   case FS_OPCODE_EXP2:
1917      op = BRW_MATH_FUNCTION_EXP;
1918      break;
1919   case FS_OPCODE_LOG2:
1920      op = BRW_MATH_FUNCTION_LOG;
1921      break;
1922   case FS_OPCODE_POW:
1923      op = BRW_MATH_FUNCTION_POW;
1924      break;
1925   case FS_OPCODE_SIN:
1926      op = BRW_MATH_FUNCTION_SIN;
1927      break;
1928   case FS_OPCODE_COS:
1929      op = BRW_MATH_FUNCTION_COS;
1930      break;
1931   default:
1932      assert(!"not reached: unknown math function");
1933      op = 0;
1934      break;
1935   }
1936
1937   if (intel->gen >= 6) {
1938      assert(inst->mlen == 0);
1939
1940      if (inst->opcode == FS_OPCODE_POW) {
1941	 brw_math2(p, dst, op, src[0], src[1]);
1942      } else {
1943	 brw_math(p, dst,
1944		  op,
1945		  inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1946		  BRW_MATH_SATURATE_NONE,
1947		  0, src[0],
1948		  BRW_MATH_DATA_VECTOR,
1949		  BRW_MATH_PRECISION_FULL);
1950      }
1951   } else {
1952      assert(inst->mlen >= 1);
1953
1954      brw_math(p, dst,
1955	       op,
1956	       inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1957	       BRW_MATH_SATURATE_NONE,
1958	       inst->base_mrf, src[0],
1959	       BRW_MATH_DATA_VECTOR,
1960	       BRW_MATH_PRECISION_FULL);
1961   }
1962}
1963
1964void
1965fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst)
1966{
1967   int msg_type = -1;
1968   int rlen = 4;
1969   uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
1970
1971   if (intel->gen >= 5) {
1972      switch (inst->opcode) {
1973      case FS_OPCODE_TEX:
1974	 if (inst->shadow_compare) {
1975	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1976	 } else {
1977	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1978	 }
1979	 break;
1980      case FS_OPCODE_TXB:
1981	 if (inst->shadow_compare) {
1982	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1983	 } else {
1984	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1985	 }
1986	 break;
1987      }
1988   } else {
1989      switch (inst->opcode) {
1990      case FS_OPCODE_TEX:
1991	 /* Note that G45 and older determines shadow compare and dispatch width
1992	  * from message length for most messages.
1993	  */
1994	 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
1995	 if (inst->shadow_compare) {
1996	    assert(inst->mlen == 5);
1997	 } else {
1998	    assert(inst->mlen <= 6);
1999	 }
2000	 break;
2001      case FS_OPCODE_TXB:
2002	 if (inst->shadow_compare) {
2003	    assert(inst->mlen == 5);
2004	    msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2005	 } else {
2006	    assert(inst->mlen == 8);
2007	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
2008	    simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
2009	 }
2010	 break;
2011      }
2012   }
2013   assert(msg_type != -1);
2014
2015   if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
2016      rlen = 8;
2017      dst = vec16(dst);
2018   }
2019
2020   brw_SAMPLE(p,
2021	      retype(dst, BRW_REGISTER_TYPE_UW),
2022	      inst->base_mrf,
2023	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
2024              SURF_INDEX_TEXTURE(inst->sampler),
2025	      inst->sampler,
2026	      WRITEMASK_XYZW,
2027	      msg_type,
2028	      rlen,
2029	      inst->mlen,
2030	      0,
2031	      1,
2032	      simd_mode);
2033}
2034
2035
2036/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
2037 * looking like:
2038 *
2039 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
2040 *
2041 * and we're trying to produce:
2042 *
2043 *           DDX                     DDY
2044 * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
2045 *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
2046 *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
2047 *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
2048 *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
2049 *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
2050 *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
2051 *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
2052 *
2053 * and add another set of two more subspans if in 16-pixel dispatch mode.
2054 *
2055 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
2056 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
2057 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
2058 * between each other.  We could probably do it like ddx and swizzle the right
2059 * order later, but bail for now and just produce
2060 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
2061 */
2062void
2063fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2064{
2065   struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
2066				 BRW_REGISTER_TYPE_F,
2067				 BRW_VERTICAL_STRIDE_2,
2068				 BRW_WIDTH_2,
2069				 BRW_HORIZONTAL_STRIDE_0,
2070				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2071   struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
2072				 BRW_REGISTER_TYPE_F,
2073				 BRW_VERTICAL_STRIDE_2,
2074				 BRW_WIDTH_2,
2075				 BRW_HORIZONTAL_STRIDE_0,
2076				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2077   brw_ADD(p, dst, src0, negate(src1));
2078}
2079
2080void
2081fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2082{
2083   struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
2084				 BRW_REGISTER_TYPE_F,
2085				 BRW_VERTICAL_STRIDE_4,
2086				 BRW_WIDTH_4,
2087				 BRW_HORIZONTAL_STRIDE_0,
2088				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2089   struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
2090				 BRW_REGISTER_TYPE_F,
2091				 BRW_VERTICAL_STRIDE_4,
2092				 BRW_WIDTH_4,
2093				 BRW_HORIZONTAL_STRIDE_0,
2094				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2095   brw_ADD(p, dst, src0, negate(src1));
2096}
2097
2098void
2099fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask)
2100{
2101   brw_push_insn_state(p);
2102   brw_set_mask_control(p, BRW_MASK_DISABLE);
2103   brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */
2104   brw_pop_insn_state(p);
2105}
2106
2107void
2108fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask)
2109{
2110   struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
2111   mask = brw_uw1_reg(mask.file, mask.nr, 0);
2112
2113   brw_push_insn_state(p);
2114   brw_set_mask_control(p, BRW_MASK_DISABLE);
2115   brw_AND(p, g0, mask, g0);
2116   brw_pop_insn_state(p);
2117}
2118
2119void
2120fs_visitor::assign_curb_setup()
2121{
2122   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
2123   c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
2124
2125   /* Map the offsets in the UNIFORM file to fixed HW regs. */
2126   foreach_iter(exec_list_iterator, iter, this->instructions) {
2127      fs_inst *inst = (fs_inst *)iter.get();
2128
2129      for (unsigned int i = 0; i < 3; i++) {
2130	 if (inst->src[i].file == UNIFORM) {
2131	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
2132	    struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
2133						  constant_nr / 8,
2134						  constant_nr % 8);
2135
2136	    inst->src[i].file = FIXED_HW_REG;
2137	    inst->src[i].fixed_hw_reg = brw_reg;
2138	 }
2139      }
2140   }
2141}
2142
2143void
2144fs_visitor::calculate_urb_setup()
2145{
2146   for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2147      urb_setup[i] = -1;
2148   }
2149
2150   int urb_next = 0;
2151   /* Figure out where each of the incoming setup attributes lands. */
2152   if (intel->gen >= 6) {
2153      for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2154	 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) {
2155	    urb_setup[i] = urb_next++;
2156	 }
2157      }
2158   } else {
2159      /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
2160      for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) {
2161	 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
2162	    int fp_index;
2163
2164	    if (i >= VERT_RESULT_VAR0)
2165	       fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
2166	    else if (i <= VERT_RESULT_TEX7)
2167	       fp_index = i;
2168	    else
2169	       fp_index = -1;
2170
2171	    if (fp_index >= 0)
2172	       urb_setup[fp_index] = urb_next++;
2173	 }
2174      }
2175   }
2176
2177   /* Each attribute is 4 setup channels, each of which is half a reg. */
2178   c->prog_data.urb_read_length = urb_next * 2;
2179}
2180
2181void
2182fs_visitor::assign_urb_setup()
2183{
2184   int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
2185
2186   /* Offset all the urb_setup[] index by the actual position of the
2187    * setup regs, now that the location of the constants has been chosen.
2188    */
2189   foreach_iter(exec_list_iterator, iter, this->instructions) {
2190      fs_inst *inst = (fs_inst *)iter.get();
2191
2192      if (inst->opcode != FS_OPCODE_LINTERP)
2193	 continue;
2194
2195      assert(inst->src[2].file == FIXED_HW_REG);
2196
2197      inst->src[2].fixed_hw_reg.nr += urb_start;
2198   }
2199
2200   this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
2201}
2202
2203static void
2204assign_reg(int *reg_hw_locations, fs_reg *reg)
2205{
2206   if (reg->file == GRF && reg->reg != 0) {
2207      assert(reg->reg_offset >= 0);
2208      reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
2209      reg->reg = 0;
2210   }
2211}
2212
2213void
2214fs_visitor::assign_regs_trivial()
2215{
2216   int last_grf = 0;
2217   int hw_reg_mapping[this->virtual_grf_next];
2218   int i;
2219
2220   hw_reg_mapping[0] = 0;
2221   hw_reg_mapping[1] = this->first_non_payload_grf;
2222   for (i = 2; i < this->virtual_grf_next; i++) {
2223      hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
2224			   this->virtual_grf_sizes[i - 1]);
2225   }
2226   last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
2227
2228   foreach_iter(exec_list_iterator, iter, this->instructions) {
2229      fs_inst *inst = (fs_inst *)iter.get();
2230
2231      assign_reg(hw_reg_mapping, &inst->dst);
2232      assign_reg(hw_reg_mapping, &inst->src[0]);
2233      assign_reg(hw_reg_mapping, &inst->src[1]);
2234   }
2235
2236   this->grf_used = last_grf + 1;
2237}
2238
2239void
2240fs_visitor::assign_regs()
2241{
2242   int last_grf = 0;
2243   int hw_reg_mapping[this->virtual_grf_next + 1];
2244   int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
2245   int class_sizes[base_reg_count];
2246   int class_count = 0;
2247   int aligned_pair_class = -1;
2248
2249   /* Set up the register classes.
2250    *
2251    * The base registers store a scalar value.  For texture samples,
2252    * we get virtual GRFs composed of 4 contiguous hw register.  For
2253    * structures and arrays, we store them as contiguous larger things
2254    * than that, though we should be able to do better most of the
2255    * time.
2256    */
2257   class_sizes[class_count++] = 1;
2258   if (brw->has_pln && intel->gen < 6) {
2259      /* Always set up the (unaligned) pairs for gen5, so we can find
2260       * them for making the aligned pair class.
2261       */
2262      class_sizes[class_count++] = 2;
2263   }
2264   for (int r = 1; r < this->virtual_grf_next; r++) {
2265      int i;
2266
2267      for (i = 0; i < class_count; i++) {
2268	 if (class_sizes[i] == this->virtual_grf_sizes[r])
2269	    break;
2270      }
2271      if (i == class_count) {
2272	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
2273	    fprintf(stderr, "Object too large to register allocate.\n");
2274	    this->fail = true;
2275	 }
2276
2277	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
2278      }
2279   }
2280
2281   int ra_reg_count = 0;
2282   int class_base_reg[class_count];
2283   int class_reg_count[class_count];
2284   int classes[class_count + 1];
2285
2286   for (int i = 0; i < class_count; i++) {
2287      class_base_reg[i] = ra_reg_count;
2288      class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
2289      ra_reg_count += class_reg_count[i];
2290   }
2291
2292   struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
2293   for (int i = 0; i < class_count; i++) {
2294      classes[i] = ra_alloc_reg_class(regs);
2295
2296      for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2297	 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
2298      }
2299
2300      /* Add conflicts between our contiguous registers aliasing
2301       * base regs and other register classes' contiguous registers
2302       * that alias base regs, or the base regs themselves for classes[0].
2303       */
2304      for (int c = 0; c <= i; c++) {
2305	 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2306	    for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
2307		 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
2308		 c_r++) {
2309
2310	       if (0) {
2311		  printf("%d/%d conflicts %d/%d\n",
2312			 class_sizes[i], this->first_non_payload_grf + i_r,
2313			 class_sizes[c], this->first_non_payload_grf + c_r);
2314	       }
2315
2316	       ra_add_reg_conflict(regs,
2317				   class_base_reg[i] + i_r,
2318				   class_base_reg[c] + c_r);
2319	    }
2320	 }
2321      }
2322   }
2323
2324   /* Add a special class for aligned pairs, which we'll put delta_x/y
2325    * in on gen5 so that we can do PLN.
2326    */
2327   if (brw->has_pln && intel->gen < 6) {
2328      int reg_count = (base_reg_count - 1) / 2;
2329      int unaligned_pair_class = 1;
2330      assert(class_sizes[unaligned_pair_class] == 2);
2331
2332      aligned_pair_class = class_count;
2333      classes[aligned_pair_class] = ra_alloc_reg_class(regs);
2334      class_sizes[aligned_pair_class] = 2;
2335      class_base_reg[aligned_pair_class] = 0;
2336      class_reg_count[aligned_pair_class] = 0;
2337      int start = (this->first_non_payload_grf & 1) ? 1 : 0;
2338
2339      for (int i = 0; i < reg_count; i++) {
2340	 ra_class_add_reg(regs, classes[aligned_pair_class],
2341			  class_base_reg[unaligned_pair_class] + i * 2 + start);
2342      }
2343      class_count++;
2344   }
2345
2346   ra_set_finalize(regs);
2347
2348   struct ra_graph *g = ra_alloc_interference_graph(regs,
2349						    this->virtual_grf_next);
2350   /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
2351    * with nodes.
2352    */
2353   ra_set_node_class(g, 0, classes[0]);
2354
2355   for (int i = 1; i < this->virtual_grf_next; i++) {
2356      for (int c = 0; c < class_count; c++) {
2357	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
2358	    if (aligned_pair_class >= 0 &&
2359		this->delta_x.reg == i) {
2360	       ra_set_node_class(g, i, classes[aligned_pair_class]);
2361	    } else {
2362	       ra_set_node_class(g, i, classes[c]);
2363	    }
2364	    break;
2365	 }
2366      }
2367
2368      for (int j = 1; j < i; j++) {
2369	 if (virtual_grf_interferes(i, j)) {
2370	    ra_add_node_interference(g, i, j);
2371	 }
2372      }
2373   }
2374
2375   /* FINISHME: Handle spilling */
2376   if (!ra_allocate_no_spills(g)) {
2377      fprintf(stderr, "Failed to allocate registers.\n");
2378      this->fail = true;
2379      return;
2380   }
2381
2382   /* Get the chosen virtual registers for each node, and map virtual
2383    * regs in the register classes back down to real hardware reg
2384    * numbers.
2385    */
2386   hw_reg_mapping[0] = 0; /* unused */
2387   for (int i = 1; i < this->virtual_grf_next; i++) {
2388      int reg = ra_get_node_reg(g, i);
2389      int hw_reg = -1;
2390
2391      for (int c = 0; c < class_count; c++) {
2392	 if (reg >= class_base_reg[c] &&
2393	     reg < class_base_reg[c] + class_reg_count[c]) {
2394	    hw_reg = reg - class_base_reg[c];
2395	    break;
2396	 }
2397      }
2398
2399      assert(hw_reg >= 0);
2400      hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
2401      last_grf = MAX2(last_grf,
2402		      hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
2403   }
2404
2405   foreach_iter(exec_list_iterator, iter, this->instructions) {
2406      fs_inst *inst = (fs_inst *)iter.get();
2407
2408      assign_reg(hw_reg_mapping, &inst->dst);
2409      assign_reg(hw_reg_mapping, &inst->src[0]);
2410      assign_reg(hw_reg_mapping, &inst->src[1]);
2411   }
2412
2413   this->grf_used = last_grf + 1;
2414
2415   talloc_free(g);
2416   talloc_free(regs);
2417}
2418
2419/**
2420 * Split large virtual GRFs into separate components if we can.
2421 *
2422 * This is mostly duplicated with what brw_fs_vector_splitting does,
2423 * but that's really conservative because it's afraid of doing
2424 * splitting that doesn't result in real progress after the rest of
2425 * the optimization phases, which would cause infinite looping in
2426 * optimization.  We can do it once here, safely.  This also has the
2427 * opportunity to split interpolated values, or maybe even uniforms,
2428 * which we don't have at the IR level.
2429 *
2430 * We want to split, because virtual GRFs are what we register
2431 * allocate and spill (due to contiguousness requirements for some
2432 * instructions), and they're what we naturally generate in the
2433 * codegen process, but most virtual GRFs don't actually need to be
2434 * contiguous sets of GRFs.  If we split, we'll end up with reduced
2435 * live intervals and better dead code elimination and coalescing.
2436 */
2437void
2438fs_visitor::split_virtual_grfs()
2439{
2440   int num_vars = this->virtual_grf_next;
2441   bool split_grf[num_vars];
2442   int new_virtual_grf[num_vars];
2443
2444   /* Try to split anything > 0 sized. */
2445   for (int i = 0; i < num_vars; i++) {
2446      if (this->virtual_grf_sizes[i] != 1)
2447	 split_grf[i] = true;
2448      else
2449	 split_grf[i] = false;
2450   }
2451
2452   if (brw->has_pln) {
2453      /* PLN opcodes rely on the delta_xy being contiguous. */
2454      split_grf[this->delta_x.reg] = false;
2455   }
2456
2457   foreach_iter(exec_list_iterator, iter, this->instructions) {
2458      fs_inst *inst = (fs_inst *)iter.get();
2459
2460      /* Texturing produces 4 contiguous registers, so no splitting. */
2461      if ((inst->opcode == FS_OPCODE_TEX ||
2462	   inst->opcode == FS_OPCODE_TXB ||
2463	   inst->opcode == FS_OPCODE_TXL) &&
2464	  inst->dst.file == GRF) {
2465	 split_grf[inst->dst.reg] = false;
2466      }
2467   }
2468
2469   /* Allocate new space for split regs.  Note that the virtual
2470    * numbers will be contiguous.
2471    */
2472   for (int i = 0; i < num_vars; i++) {
2473      if (split_grf[i]) {
2474	 new_virtual_grf[i] = virtual_grf_alloc(1);
2475	 for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
2476	    int reg = virtual_grf_alloc(1);
2477	    assert(reg == new_virtual_grf[i] + j - 1);
2478	 }
2479	 this->virtual_grf_sizes[i] = 1;
2480      }
2481   }
2482
2483   foreach_iter(exec_list_iterator, iter, this->instructions) {
2484      fs_inst *inst = (fs_inst *)iter.get();
2485
2486      if (inst->dst.file == GRF &&
2487	  split_grf[inst->dst.reg] &&
2488	  inst->dst.reg_offset != 0) {
2489	 inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
2490			  inst->dst.reg_offset - 1);
2491	 inst->dst.reg_offset = 0;
2492      }
2493      for (int i = 0; i < 3; i++) {
2494	 if (inst->src[i].file == GRF &&
2495	     split_grf[inst->src[i].reg] &&
2496	     inst->src[i].reg_offset != 0) {
2497	    inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
2498				inst->src[i].reg_offset - 1);
2499	    inst->src[i].reg_offset = 0;
2500	 }
2501      }
2502   }
2503}
2504
2505void
2506fs_visitor::calculate_live_intervals()
2507{
2508   int num_vars = this->virtual_grf_next;
2509   int *def = talloc_array(mem_ctx, int, num_vars);
2510   int *use = talloc_array(mem_ctx, int, num_vars);
2511   int loop_depth = 0;
2512   int loop_start = 0;
2513
2514   for (int i = 0; i < num_vars; i++) {
2515      def[i] = 1 << 30;
2516      use[i] = -1;
2517   }
2518
2519   int ip = 0;
2520   foreach_iter(exec_list_iterator, iter, this->instructions) {
2521      fs_inst *inst = (fs_inst *)iter.get();
2522
2523      if (inst->opcode == BRW_OPCODE_DO) {
2524	 if (loop_depth++ == 0)
2525	    loop_start = ip;
2526      } else if (inst->opcode == BRW_OPCODE_WHILE) {
2527	 loop_depth--;
2528
2529	 if (loop_depth == 0) {
2530	    /* FINISHME:
2531	     *
2532	     * Patches up any vars marked for use within the loop as
2533	     * live until the end.  This is conservative, as there
2534	     * will often be variables defined and used inside the
2535	     * loop but dead at the end of the loop body.
2536	     */
2537	    for (int i = 0; i < num_vars; i++) {
2538	       if (use[i] == loop_start) {
2539		  use[i] = ip;
2540	       }
2541	    }
2542	 }
2543      } else {
2544	 int eip = ip;
2545
2546	 if (loop_depth)
2547	    eip = loop_start;
2548
2549	 for (unsigned int i = 0; i < 3; i++) {
2550	    if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
2551	       use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip);
2552	    }
2553	 }
2554	 if (inst->dst.file == GRF && inst->dst.reg != 0) {
2555	    def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip);
2556	 }
2557      }
2558
2559      ip++;
2560   }
2561
2562   talloc_free(this->virtual_grf_def);
2563   talloc_free(this->virtual_grf_use);
2564   this->virtual_grf_def = def;
2565   this->virtual_grf_use = use;
2566}
2567
2568/**
2569 * Attempts to move immediate constants into the immediate
2570 * constant slot of following instructions.
2571 *
2572 * Immediate constants are a bit tricky -- they have to be in the last
2573 * operand slot, you can't do abs/negate on them,
2574 */
2575
2576bool
2577fs_visitor::propagate_constants()
2578{
2579   bool progress = false;
2580
2581   foreach_iter(exec_list_iterator, iter, this->instructions) {
2582      fs_inst *inst = (fs_inst *)iter.get();
2583
2584      if (inst->opcode != BRW_OPCODE_MOV ||
2585	  inst->predicated ||
2586	  inst->dst.file != GRF || inst->src[0].file != IMM ||
2587	  inst->dst.type != inst->src[0].type)
2588	 continue;
2589
2590      /* Don't bother with cases where we should have had the
2591       * operation on the constant folded in GLSL already.
2592       */
2593      if (inst->saturate)
2594	 continue;
2595
2596      /* Found a move of a constant to a GRF.  Find anything else using the GRF
2597       * before it's written, and replace it with the constant if we can.
2598       */
2599      exec_list_iterator scan_iter = iter;
2600      scan_iter.next();
2601      for (; scan_iter.has_next(); scan_iter.next()) {
2602	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2603
2604	 if (scan_inst->opcode == BRW_OPCODE_DO ||
2605	     scan_inst->opcode == BRW_OPCODE_WHILE ||
2606	     scan_inst->opcode == BRW_OPCODE_ELSE ||
2607	     scan_inst->opcode == BRW_OPCODE_ENDIF) {
2608	    break;
2609	 }
2610
2611	 for (int i = 2; i >= 0; i--) {
2612	    if (scan_inst->src[i].file != GRF ||
2613		scan_inst->src[i].reg != inst->dst.reg ||
2614		scan_inst->src[i].reg_offset != inst->dst.reg_offset)
2615	       continue;
2616
2617	    /* Don't bother with cases where we should have had the
2618	     * operation on the constant folded in GLSL already.
2619	     */
2620	    if (scan_inst->src[i].negate || scan_inst->src[i].abs)
2621	       continue;
2622
2623	    switch (scan_inst->opcode) {
2624	    case BRW_OPCODE_MOV:
2625	       scan_inst->src[i] = inst->src[0];
2626	       progress = true;
2627	       break;
2628
2629	    case BRW_OPCODE_MUL:
2630	    case BRW_OPCODE_ADD:
2631	       if (i == 1) {
2632		  scan_inst->src[i] = inst->src[0];
2633		  progress = true;
2634	       } else if (i == 0 && scan_inst->src[1].file != IMM) {
2635		  /* Fit this constant in by commuting the operands */
2636		  scan_inst->src[0] = scan_inst->src[1];
2637		  scan_inst->src[1] = inst->src[0];
2638	       }
2639	       break;
2640	    case BRW_OPCODE_CMP:
2641	       if (i == 1) {
2642		  scan_inst->src[i] = inst->src[0];
2643		  progress = true;
2644	       }
2645	    }
2646	 }
2647
2648	 if (scan_inst->dst.file == GRF &&
2649	     scan_inst->dst.reg == inst->dst.reg &&
2650	     (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2651	      scan_inst->opcode == FS_OPCODE_TEX)) {
2652	    break;
2653	 }
2654      }
2655   }
2656
2657   return progress;
2658}
2659/**
2660 * Must be called after calculate_live_intervales() to remove unused
2661 * writes to registers -- register allocation will fail otherwise
2662 * because something deffed but not used won't be considered to
2663 * interfere with other regs.
2664 */
2665bool
2666fs_visitor::dead_code_eliminate()
2667{
2668   bool progress = false;
2669   int num_vars = this->virtual_grf_next;
2670   bool dead[num_vars];
2671
2672   for (int i = 0; i < num_vars; i++) {
2673      dead[i] = this->virtual_grf_def[i] >= this->virtual_grf_use[i];
2674
2675      if (dead[i]) {
2676	 /* Mark off its interval so it won't interfere with anything. */
2677	 this->virtual_grf_def[i] = -1;
2678	 this->virtual_grf_use[i] = -1;
2679      }
2680   }
2681
2682   foreach_iter(exec_list_iterator, iter, this->instructions) {
2683      fs_inst *inst = (fs_inst *)iter.get();
2684
2685      if (inst->dst.file == GRF && dead[inst->dst.reg]) {
2686	 inst->remove();
2687	 progress = true;
2688      }
2689   }
2690
2691   return progress;
2692}
2693
2694bool
2695fs_visitor::register_coalesce()
2696{
2697   bool progress = false;
2698
2699   foreach_iter(exec_list_iterator, iter, this->instructions) {
2700      fs_inst *inst = (fs_inst *)iter.get();
2701
2702      if (inst->opcode != BRW_OPCODE_MOV ||
2703	  inst->predicated ||
2704	  inst->saturate ||
2705	  inst->dst.file != GRF || inst->src[0].file != GRF ||
2706	  inst->dst.type != inst->src[0].type)
2707	 continue;
2708
2709      /* Found a move of a GRF to a GRF.  Let's see if we can coalesce
2710       * them: check for no writes to either one until the exit of the
2711       * program.
2712       */
2713      bool interfered = false;
2714      exec_list_iterator scan_iter = iter;
2715      scan_iter.next();
2716      for (; scan_iter.has_next(); scan_iter.next()) {
2717	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2718
2719	 if (scan_inst->opcode == BRW_OPCODE_DO ||
2720	     scan_inst->opcode == BRW_OPCODE_WHILE ||
2721	     scan_inst->opcode == BRW_OPCODE_ENDIF) {
2722	    interfered = true;
2723	    iter = scan_iter;
2724	    break;
2725	 }
2726
2727	 if (scan_inst->dst.file == GRF) {
2728	    if (scan_inst->dst.reg == inst->dst.reg &&
2729		(scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2730		 scan_inst->opcode == FS_OPCODE_TEX)) {
2731	       interfered = true;
2732	       break;
2733	    }
2734	    if (scan_inst->dst.reg == inst->src[0].reg &&
2735		(scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
2736		 scan_inst->opcode == FS_OPCODE_TEX)) {
2737	       interfered = true;
2738	       break;
2739	    }
2740	 }
2741      }
2742      if (interfered) {
2743	 continue;
2744      }
2745
2746      /* Update live interval so we don't have to recalculate. */
2747      this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg],
2748						     virtual_grf_use[inst->dst.reg]);
2749
2750      /* Rewrite the later usage to point at the source of the move to
2751       * be removed.
2752       */
2753      for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
2754	   scan_iter.next()) {
2755	 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2756
2757	 for (int i = 0; i < 3; i++) {
2758	    if (scan_inst->src[i].file == GRF &&
2759		scan_inst->src[i].reg == inst->dst.reg &&
2760		scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
2761	       scan_inst->src[i].reg = inst->src[0].reg;
2762	       scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
2763	       scan_inst->src[i].abs |= inst->src[0].abs;
2764	       scan_inst->src[i].negate ^= inst->src[0].negate;
2765	    }
2766	 }
2767      }
2768
2769      inst->remove();
2770      progress = true;
2771   }
2772
2773   return progress;
2774}
2775
2776
2777bool
2778fs_visitor::compute_to_mrf()
2779{
2780   bool progress = false;
2781   int next_ip = 0;
2782
2783   foreach_iter(exec_list_iterator, iter, this->instructions) {
2784      fs_inst *inst = (fs_inst *)iter.get();
2785
2786      int ip = next_ip;
2787      next_ip++;
2788
2789      if (inst->opcode != BRW_OPCODE_MOV ||
2790	  inst->predicated ||
2791	  inst->dst.file != MRF || inst->src[0].file != GRF ||
2792	  inst->dst.type != inst->src[0].type ||
2793	  inst->src[0].abs || inst->src[0].negate)
2794	 continue;
2795
2796      /* Can't compute-to-MRF this GRF if someone else was going to
2797       * read it later.
2798       */
2799      if (this->virtual_grf_use[inst->src[0].reg] > ip)
2800	 continue;
2801
2802      /* Found a move of a GRF to a MRF.  Let's see if we can go
2803       * rewrite the thing that made this GRF to write into the MRF.
2804       */
2805      bool found = false;
2806      fs_inst *scan_inst;
2807      for (scan_inst = (fs_inst *)inst->prev;
2808	   scan_inst->prev != NULL;
2809	   scan_inst = (fs_inst *)scan_inst->prev) {
2810	 /* We don't handle flow control here.  Most computation of
2811	  * values that end up in MRFs are shortly before the MRF
2812	  * write anyway.
2813	  */
2814	 if (scan_inst->opcode == BRW_OPCODE_DO ||
2815	     scan_inst->opcode == BRW_OPCODE_WHILE ||
2816	     scan_inst->opcode == BRW_OPCODE_ENDIF) {
2817	    break;
2818	 }
2819
2820	 /* You can't read from an MRF, so if someone else reads our
2821	  * MRF's source GRF that we wanted to rewrite, that stops us.
2822	  */
2823	 bool interfered = false;
2824	 for (int i = 0; i < 3; i++) {
2825	    if (scan_inst->src[i].file == GRF &&
2826		scan_inst->src[i].reg == inst->src[0].reg &&
2827		scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
2828	       interfered = true;
2829	    }
2830	 }
2831	 if (interfered)
2832	    break;
2833
2834	 if (scan_inst->dst.file == MRF &&
2835	     scan_inst->dst.hw_reg == inst->dst.hw_reg) {
2836	    /* Somebody else wrote our MRF here, so we can't can't
2837	     * compute-to-MRF before that.
2838	     */
2839	    break;
2840	 }
2841
2842	 if (scan_inst->mlen > 0) {
2843	    /* Found a SEND instruction, which will do some amount of
2844	     * implied write that may overwrite our MRF that we were
2845	     * hoping to compute-to-MRF somewhere above it.  Nothing
2846	     * we have implied-writes more than 2 MRFs from base_mrf,
2847	     * though.
2848	     */
2849	    int implied_write_len = MIN2(scan_inst->mlen, 2);
2850	    if (inst->dst.hw_reg >= scan_inst->base_mrf &&
2851		inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) {
2852	       break;
2853	    }
2854	 }
2855
2856	 if (scan_inst->dst.file == GRF &&
2857	     scan_inst->dst.reg == inst->src[0].reg) {
2858	    /* Found the last thing to write our reg we want to turn
2859	     * into a compute-to-MRF.
2860	     */
2861
2862	    if (scan_inst->opcode == FS_OPCODE_TEX) {
2863	       /* texturing writes several continuous regs, so we can't
2864		* compute-to-mrf that.
2865		*/
2866	       break;
2867	    }
2868
2869	    /* If it's predicated, it (probably) didn't populate all
2870	     * the channels.
2871	     */
2872	    if (scan_inst->predicated)
2873	       break;
2874
2875	    /* SEND instructions can't have MRF as a destination. */
2876	    if (scan_inst->mlen)
2877	       break;
2878
2879	    if (intel->gen >= 6) {
2880	       /* gen6 math instructions must have the destination be
2881		* GRF, so no compute-to-MRF for them.
2882		*/
2883	       if (scan_inst->opcode == FS_OPCODE_RCP ||
2884		   scan_inst->opcode == FS_OPCODE_RSQ ||
2885		   scan_inst->opcode == FS_OPCODE_SQRT ||
2886		   scan_inst->opcode == FS_OPCODE_EXP2 ||
2887		   scan_inst->opcode == FS_OPCODE_LOG2 ||
2888		   scan_inst->opcode == FS_OPCODE_SIN ||
2889		   scan_inst->opcode == FS_OPCODE_COS ||
2890		   scan_inst->opcode == FS_OPCODE_POW) {
2891		  break;
2892	       }
2893	    }
2894
2895	    if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
2896	       /* Found the creator of our MRF's source value. */
2897	       found = true;
2898	       break;
2899	    }
2900	 }
2901      }
2902      if (found) {
2903	 scan_inst->dst.file = MRF;
2904	 scan_inst->dst.hw_reg = inst->dst.hw_reg;
2905	 scan_inst->saturate |= inst->saturate;
2906	 inst->remove();
2907	 progress = true;
2908      }
2909   }
2910
2911   return progress;
2912}
2913
2914bool
2915fs_visitor::virtual_grf_interferes(int a, int b)
2916{
2917   int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
2918   int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
2919
2920   /* For dead code, just check if the def interferes with the other range. */
2921   if (this->virtual_grf_use[a] == -1) {
2922      return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] &&
2923	      this->virtual_grf_def[a] < this->virtual_grf_use[b]);
2924   }
2925   if (this->virtual_grf_use[b] == -1) {
2926      return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] &&
2927	      this->virtual_grf_def[b] < this->virtual_grf_use[a]);
2928   }
2929
2930   return start < end;
2931}
2932
2933static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
2934{
2935   struct brw_reg brw_reg;
2936
2937   switch (reg->file) {
2938   case GRF:
2939   case ARF:
2940   case MRF:
2941      brw_reg = brw_vec8_reg(reg->file,
2942			    reg->hw_reg, 0);
2943      brw_reg = retype(brw_reg, reg->type);
2944      break;
2945   case IMM:
2946      switch (reg->type) {
2947      case BRW_REGISTER_TYPE_F:
2948	 brw_reg = brw_imm_f(reg->imm.f);
2949	 break;
2950      case BRW_REGISTER_TYPE_D:
2951	 brw_reg = brw_imm_d(reg->imm.i);
2952	 break;
2953      case BRW_REGISTER_TYPE_UD:
2954	 brw_reg = brw_imm_ud(reg->imm.u);
2955	 break;
2956      default:
2957	 assert(!"not reached");
2958	 break;
2959      }
2960      break;
2961   case FIXED_HW_REG:
2962      brw_reg = reg->fixed_hw_reg;
2963      break;
2964   case BAD_FILE:
2965      /* Probably unused. */
2966      brw_reg = brw_null_reg();
2967      break;
2968   case UNIFORM:
2969      assert(!"not reached");
2970      brw_reg = brw_null_reg();
2971      break;
2972   }
2973   if (reg->abs)
2974      brw_reg = brw_abs(brw_reg);
2975   if (reg->negate)
2976      brw_reg = negate(brw_reg);
2977
2978   return brw_reg;
2979}
2980
2981void
2982fs_visitor::generate_code()
2983{
2984   unsigned int annotation_len = 0;
2985   int last_native_inst = 0;
2986   struct brw_instruction *if_stack[16], *loop_stack[16];
2987   int if_stack_depth = 0, loop_stack_depth = 0;
2988   int if_depth_in_loop[16];
2989
2990   if_depth_in_loop[loop_stack_depth] = 0;
2991
2992   memset(&if_stack, 0, sizeof(if_stack));
2993   foreach_iter(exec_list_iterator, iter, this->instructions) {
2994      fs_inst *inst = (fs_inst *)iter.get();
2995      struct brw_reg src[3], dst;
2996
2997      for (unsigned int i = 0; i < 3; i++) {
2998	 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
2999      }
3000      dst = brw_reg_from_fs_reg(&inst->dst);
3001
3002      brw_set_conditionalmod(p, inst->conditional_mod);
3003      brw_set_predicate_control(p, inst->predicated);
3004
3005      switch (inst->opcode) {
3006      case BRW_OPCODE_MOV:
3007	 brw_MOV(p, dst, src[0]);
3008	 break;
3009      case BRW_OPCODE_ADD:
3010	 brw_ADD(p, dst, src[0], src[1]);
3011	 break;
3012      case BRW_OPCODE_MUL:
3013	 brw_MUL(p, dst, src[0], src[1]);
3014	 break;
3015
3016      case BRW_OPCODE_FRC:
3017	 brw_FRC(p, dst, src[0]);
3018	 break;
3019      case BRW_OPCODE_RNDD:
3020	 brw_RNDD(p, dst, src[0]);
3021	 break;
3022      case BRW_OPCODE_RNDE:
3023	 brw_RNDE(p, dst, src[0]);
3024	 break;
3025      case BRW_OPCODE_RNDZ:
3026	 brw_RNDZ(p, dst, src[0]);
3027	 break;
3028
3029      case BRW_OPCODE_AND:
3030	 brw_AND(p, dst, src[0], src[1]);
3031	 break;
3032      case BRW_OPCODE_OR:
3033	 brw_OR(p, dst, src[0], src[1]);
3034	 break;
3035      case BRW_OPCODE_XOR:
3036	 brw_XOR(p, dst, src[0], src[1]);
3037	 break;
3038      case BRW_OPCODE_NOT:
3039	 brw_NOT(p, dst, src[0]);
3040	 break;
3041      case BRW_OPCODE_ASR:
3042	 brw_ASR(p, dst, src[0], src[1]);
3043	 break;
3044      case BRW_OPCODE_SHR:
3045	 brw_SHR(p, dst, src[0], src[1]);
3046	 break;
3047      case BRW_OPCODE_SHL:
3048	 brw_SHL(p, dst, src[0], src[1]);
3049	 break;
3050
3051      case BRW_OPCODE_CMP:
3052	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
3053	 break;
3054      case BRW_OPCODE_SEL:
3055	 brw_SEL(p, dst, src[0], src[1]);
3056	 break;
3057
3058      case BRW_OPCODE_IF:
3059	 assert(if_stack_depth < 16);
3060	 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
3061	 if_depth_in_loop[loop_stack_depth]++;
3062	 if_stack_depth++;
3063	 break;
3064      case BRW_OPCODE_ELSE:
3065	 if_stack[if_stack_depth - 1] =
3066	    brw_ELSE(p, if_stack[if_stack_depth - 1]);
3067	 break;
3068      case BRW_OPCODE_ENDIF:
3069	 if_stack_depth--;
3070	 brw_ENDIF(p , if_stack[if_stack_depth]);
3071	 if_depth_in_loop[loop_stack_depth]--;
3072	 break;
3073
3074      case BRW_OPCODE_DO:
3075	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
3076	 if_depth_in_loop[loop_stack_depth] = 0;
3077	 break;
3078
3079      case BRW_OPCODE_BREAK:
3080	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
3081	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
3082	 break;
3083      case BRW_OPCODE_CONTINUE:
3084	 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
3085	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
3086	 break;
3087
3088      case BRW_OPCODE_WHILE: {
3089	 struct brw_instruction *inst0, *inst1;
3090	 GLuint br = 1;
3091
3092	 if (intel->gen >= 5)
3093	    br = 2;
3094
3095	 assert(loop_stack_depth > 0);
3096	 loop_stack_depth--;
3097	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
3098	 /* patch all the BREAK/CONT instructions from last BGNLOOP */
3099	 while (inst0 > loop_stack[loop_stack_depth]) {
3100	    inst0--;
3101	    if (inst0->header.opcode == BRW_OPCODE_BREAK &&
3102		inst0->bits3.if_else.jump_count == 0) {
3103	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
3104	    }
3105	    else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
3106		     inst0->bits3.if_else.jump_count == 0) {
3107	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
3108	    }
3109	 }
3110      }
3111	 break;
3112
3113      case FS_OPCODE_RCP:
3114      case FS_OPCODE_RSQ:
3115      case FS_OPCODE_SQRT:
3116      case FS_OPCODE_EXP2:
3117      case FS_OPCODE_LOG2:
3118      case FS_OPCODE_POW:
3119      case FS_OPCODE_SIN:
3120      case FS_OPCODE_COS:
3121	 generate_math(inst, dst, src);
3122	 break;
3123      case FS_OPCODE_LINTERP:
3124	 generate_linterp(inst, dst, src);
3125	 break;
3126      case FS_OPCODE_TEX:
3127      case FS_OPCODE_TXB:
3128      case FS_OPCODE_TXL:
3129	 generate_tex(inst, dst);
3130	 break;
3131      case FS_OPCODE_DISCARD_NOT:
3132	 generate_discard_not(inst, dst);
3133	 break;
3134      case FS_OPCODE_DISCARD_AND:
3135	 generate_discard_and(inst, src[0]);
3136	 break;
3137      case FS_OPCODE_DDX:
3138	 generate_ddx(inst, dst, src[0]);
3139	 break;
3140      case FS_OPCODE_DDY:
3141	 generate_ddy(inst, dst, src[0]);
3142	 break;
3143      case FS_OPCODE_FB_WRITE:
3144	 generate_fb_write(inst);
3145	 break;
3146      default:
3147	 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
3148	    _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
3149			  brw_opcodes[inst->opcode].name);
3150	 } else {
3151	    _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
3152	 }
3153	 this->fail = true;
3154      }
3155
3156      if (annotation_len < p->nr_insn) {
3157	 annotation_len *= 2;
3158	 if (annotation_len < 16)
3159	    annotation_len = 16;
3160
3161	 this->annotation_string = talloc_realloc(this->mem_ctx,
3162						  annotation_string,
3163						  const char *,
3164						  annotation_len);
3165	 this->annotation_ir = talloc_realloc(this->mem_ctx,
3166					      annotation_ir,
3167					      ir_instruction *,
3168					      annotation_len);
3169      }
3170
3171      for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
3172	 this->annotation_string[i] = inst->annotation;
3173	 this->annotation_ir[i] = inst->ir;
3174      }
3175      last_native_inst = p->nr_insn;
3176   }
3177}
3178
3179GLboolean
3180brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
3181{
3182   struct brw_compile *p = &c->func;
3183   struct intel_context *intel = &brw->intel;
3184   struct gl_context *ctx = &intel->ctx;
3185   struct brw_shader *shader = NULL;
3186   struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
3187
3188   if (!prog)
3189      return GL_FALSE;
3190
3191   for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
3192      if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
3193	 shader = (struct brw_shader *)prog->_LinkedShaders[i];
3194	 break;
3195      }
3196   }
3197   if (!shader)
3198      return GL_FALSE;
3199
3200   /* We always use 8-wide mode, at least for now.  For one, flow
3201    * control only works in 8-wide.  Also, when we're fragment shader
3202    * bound, we're almost always under register pressure as well, so
3203    * 8-wide would save us from the performance cliff of spilling
3204    * regs.
3205    */
3206   c->dispatch_width = 8;
3207
3208   if (INTEL_DEBUG & DEBUG_WM) {
3209      printf("GLSL IR for native fragment shader %d:\n", prog->Name);
3210      _mesa_print_ir(shader->ir, NULL);
3211      printf("\n");
3212   }
3213
3214   /* Now the main event: Visit the shader IR and generate our FS IR for it.
3215    */
3216   fs_visitor v(c, shader);
3217
3218   if (0) {
3219      v.emit_dummy_fs();
3220   } else {
3221      v.calculate_urb_setup();
3222      if (intel->gen < 6)
3223	 v.emit_interpolation_setup_gen4();
3224      else
3225	 v.emit_interpolation_setup_gen6();
3226
3227      /* Generate FS IR for main().  (the visitor only descends into
3228       * functions called "main").
3229       */
3230      foreach_iter(exec_list_iterator, iter, *shader->ir) {
3231	 ir_instruction *ir = (ir_instruction *)iter.get();
3232	 v.base_ir = ir;
3233	 ir->accept(&v);
3234      }
3235
3236      v.emit_fb_writes();
3237
3238      v.split_virtual_grfs();
3239
3240      v.assign_curb_setup();
3241      v.assign_urb_setup();
3242
3243      bool progress;
3244      do {
3245	 progress = false;
3246	 v.calculate_live_intervals();
3247	 progress = v.propagate_constants() || progress;
3248	 progress = v.register_coalesce() || progress;
3249	 progress = v.compute_to_mrf() || progress;
3250	 progress = v.dead_code_eliminate() || progress;
3251      } while (progress);
3252
3253      if (0)
3254	 v.assign_regs_trivial();
3255      else
3256	 v.assign_regs();
3257   }
3258
3259   if (!v.fail)
3260      v.generate_code();
3261
3262   assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
3263
3264   if (v.fail)
3265      return GL_FALSE;
3266
3267   if (INTEL_DEBUG & DEBUG_WM) {
3268      const char *last_annotation_string = NULL;
3269      ir_instruction *last_annotation_ir = NULL;
3270
3271      printf("Native code for fragment shader %d:\n", prog->Name);
3272      for (unsigned int i = 0; i < p->nr_insn; i++) {
3273	 if (last_annotation_ir != v.annotation_ir[i]) {
3274	    last_annotation_ir = v.annotation_ir[i];
3275	    if (last_annotation_ir) {
3276	       printf("   ");
3277	       last_annotation_ir->print();
3278	       printf("\n");
3279	    }
3280	 }
3281	 if (last_annotation_string != v.annotation_string[i]) {
3282	    last_annotation_string = v.annotation_string[i];
3283	    if (last_annotation_string)
3284	       printf("   %s\n", last_annotation_string);
3285	 }
3286	 brw_disasm(stdout, &p->store[i], intel->gen);
3287      }
3288      printf("\n");
3289   }
3290
3291   c->prog_data.total_grf = v.grf_used;
3292   c->prog_data.total_scratch = 0;
3293
3294   return GL_TRUE;
3295}
3296