brw_fs.cpp revision 3dff682b6595c8771655307ed00bd8844f22238c
1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28extern "C" {
29
30#include <sys/types.h>
31/* Evil hack for using libdrm in a c++ compiler. */
32#define virtual virt
33#include "i915_drm.h"
34#include "intel_bufmgr.h"
35#undef virtual
36
37#include "main/macros.h"
38#include "main/shaderobj.h"
39#include "program/prog_parameter.h"
40#include "program/prog_print.h"
41#include "program/prog_optimize.h"
42#include "program/hash_table.h"
43#include "brw_context.h"
44#include "brw_eu.h"
45#include "brw_wm.h"
46#include "talloc.h"
47}
48#include "../glsl/glsl_types.h"
49#include "../glsl/ir_optimization.h"
50#include "../glsl/ir_print_visitor.h"
51
52enum register_file {
53   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
54   GRF = BRW_GENERAL_REGISTER_FILE,
55   MRF = BRW_MESSAGE_REGISTER_FILE,
56   IMM = BRW_IMMEDIATE_VALUE,
57   FIXED_HW_REG,
58   BAD_FILE
59};
60
61enum fs_opcodes {
62   FS_OPCODE_FB_WRITE = 256,
63   FS_OPCODE_RCP,
64   FS_OPCODE_RSQ,
65   FS_OPCODE_SQRT,
66   FS_OPCODE_EXP2,
67   FS_OPCODE_LOG2,
68   FS_OPCODE_POW,
69   FS_OPCODE_SIN,
70   FS_OPCODE_COS,
71   FS_OPCODE_DDX,
72   FS_OPCODE_DDY,
73   FS_OPCODE_LINTERP,
74};
75
76static int using_new_fs = -1;
77
78struct gl_shader *
79brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
80{
81   struct brw_shader *shader;
82
83   shader = talloc_zero(NULL, struct brw_shader);
84   if (shader) {
85      shader->base.Type = type;
86      shader->base.Name = name;
87      _mesa_init_shader(ctx, &shader->base);
88   }
89
90   return &shader->base;
91}
92
93struct gl_shader_program *
94brw_new_shader_program(GLcontext *ctx, GLuint name)
95{
96   struct brw_shader_program *prog;
97   prog = talloc_zero(NULL, struct brw_shader_program);
98   if (prog) {
99      prog->base.Name = name;
100      _mesa_init_shader_program(ctx, &prog->base);
101   }
102   return &prog->base;
103}
104
105GLboolean
106brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
107{
108   if (!_mesa_ir_compile_shader(ctx, shader))
109      return GL_FALSE;
110
111   return GL_TRUE;
112}
113
114GLboolean
115brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
116{
117   if (using_new_fs == -1)
118      using_new_fs = getenv("INTEL_NEW_FS") != NULL;
119
120   for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
121      struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
122
123      if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
124	 void *mem_ctx = talloc_new(NULL);
125	 bool progress;
126
127	 if (shader->ir)
128	    talloc_free(shader->ir);
129	 shader->ir = new(shader) exec_list;
130	 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
131
132	 do_mat_op_to_vec(shader->ir);
133	 do_div_to_mul_rcp(shader->ir);
134	 do_sub_to_add_neg(shader->ir);
135	 do_explog_to_explog2(shader->ir);
136
137	 brw_do_channel_expressions(shader->ir);
138	 brw_do_vector_splitting(shader->ir);
139
140	 do {
141	    progress = false;
142
143	    progress = do_common_optimization(shader->ir, true) || progress;
144	 } while (progress);
145
146	 reparent_ir(shader->ir, shader->ir);
147	 talloc_free(mem_ctx);
148      }
149   }
150
151   if (!_mesa_ir_link_shader(ctx, prog))
152      return GL_FALSE;
153
154   return GL_TRUE;
155}
156
157static int
158type_size(const struct glsl_type *type)
159{
160   unsigned int size, i;
161
162   switch (type->base_type) {
163   case GLSL_TYPE_UINT:
164   case GLSL_TYPE_INT:
165   case GLSL_TYPE_FLOAT:
166   case GLSL_TYPE_BOOL:
167      if (type->is_matrix()) {
168	 /* In case of incoming uniform/varying matrices, match their
169	  * allocation behavior.  FINISHME: We could just use
170	  * glsl_type->components() for variables and temps within the
171	  * shader.
172	  */
173	 return type->matrix_columns * 4;
174      } else {
175	 return type->vector_elements;
176      }
177   case GLSL_TYPE_ARRAY:
178      /* FINISHME: uniform/varying arrays. */
179      return type_size(type->fields.array) * type->length;
180   case GLSL_TYPE_STRUCT:
181      size = 0;
182      for (i = 0; i < type->length; i++) {
183	 size += type_size(type->fields.structure[i].type);
184      }
185      return size;
186   case GLSL_TYPE_SAMPLER:
187      /* Samplers take up no register space, since they're baked in at
188       * link time.
189       */
190      return 0;
191   default:
192      assert(!"not reached");
193      return 0;
194   }
195}
196
197class fs_reg {
198public:
199   /* Callers of this talloc-based new need not call delete. It's
200    * easier to just talloc_free 'ctx' (or any of its ancestors). */
201   static void* operator new(size_t size, void *ctx)
202   {
203      void *node;
204
205      node = talloc_size(ctx, size);
206      assert(node != NULL);
207
208      return node;
209   }
210
211   /** Generic unset register constructor. */
212   fs_reg()
213   {
214      this->file = BAD_FILE;
215      this->reg = 0;
216      this->reg_offset = 0;
217      this->hw_reg = -1;
218      this->negate = 0;
219      this->abs = 0;
220   }
221
222   /** Immediate value constructor. */
223   fs_reg(float f)
224   {
225      this->file = IMM;
226      this->reg = 0;
227      this->hw_reg = 0;
228      this->type = BRW_REGISTER_TYPE_F;
229      this->imm.f = f;
230      this->negate = 0;
231      this->abs = 0;
232   }
233
234   /** Immediate value constructor. */
235   fs_reg(int32_t i)
236   {
237      this->file = IMM;
238      this->reg = 0;
239      this->hw_reg = 0;
240      this->type = BRW_REGISTER_TYPE_D;
241      this->imm.i = i;
242      this->negate = 0;
243      this->abs = 0;
244   }
245
246   /** Immediate value constructor. */
247   fs_reg(uint32_t u)
248   {
249      this->file = IMM;
250      this->reg = 0;
251      this->hw_reg = 0;
252      this->type = BRW_REGISTER_TYPE_UD;
253      this->imm.u = u;
254      this->negate = 0;
255      this->abs = 0;
256   }
257
258   /** Fixed brw_reg Immediate value constructor. */
259   fs_reg(struct brw_reg fixed_hw_reg)
260   {
261      this->file = FIXED_HW_REG;
262      this->fixed_hw_reg = fixed_hw_reg;
263      this->reg = 0;
264      this->hw_reg = 0;
265      this->type = fixed_hw_reg.type;
266      this->negate = 0;
267      this->abs = 0;
268   }
269
270   fs_reg(enum register_file file, int hw_reg);
271   fs_reg(class fs_visitor *v, const struct glsl_type *type);
272
273   /** Register file: ARF, GRF, MRF, IMM. */
274   enum register_file file;
275   /** Abstract register number.  0 = fixed hw reg */
276   int reg;
277   /** Offset within the abstract register. */
278   int reg_offset;
279   /** HW register number.  Generally unset until register allocation. */
280   int hw_reg;
281   /** Register type.  BRW_REGISTER_TYPE_* */
282   int type;
283   bool negate;
284   bool abs;
285   struct brw_reg fixed_hw_reg;
286
287   /** Value for file == BRW_IMMMEDIATE_FILE */
288   union {
289      int32_t i;
290      uint32_t u;
291      float f;
292   } imm;
293};
294
295static const fs_reg reg_undef;
296static const fs_reg reg_null(ARF, BRW_ARF_NULL);
297
298class fs_inst : public exec_node {
299public:
300   /* Callers of this talloc-based new need not call delete. It's
301    * easier to just talloc_free 'ctx' (or any of its ancestors). */
302   static void* operator new(size_t size, void *ctx)
303   {
304      void *node;
305
306      node = talloc_zero_size(ctx, size);
307      assert(node != NULL);
308
309      return node;
310   }
311
312   fs_inst()
313   {
314      this->opcode = BRW_OPCODE_NOP;
315      this->saturate = false;
316      this->conditional_mod = BRW_CONDITIONAL_NONE;
317      this->predicated = false;
318   }
319
320   fs_inst(int opcode, fs_reg dst, fs_reg src0)
321   {
322      this->opcode = opcode;
323      this->dst = dst;
324      this->src[0] = src0;
325      this->saturate = false;
326      this->conditional_mod = BRW_CONDITIONAL_NONE;
327      this->predicated = false;
328   }
329
330   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
331   {
332      this->opcode = opcode;
333      this->dst = dst;
334      this->src[0] = src0;
335      this->src[1] = src1;
336      this->saturate = false;
337      this->conditional_mod = BRW_CONDITIONAL_NONE;
338      this->predicated = false;
339   }
340
341   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
342   {
343      this->opcode = opcode;
344      this->dst = dst;
345      this->src[0] = src0;
346      this->src[1] = src1;
347      this->src[2] = src2;
348      this->saturate = false;
349      this->conditional_mod = BRW_CONDITIONAL_NONE;
350      this->predicated = false;
351   }
352
353   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
354   fs_reg dst;
355   fs_reg src[3];
356   bool saturate;
357   bool predicated;
358   int conditional_mod; /**< BRW_CONDITIONAL_* */
359
360   /** @{
361    * Annotation for the generated IR.  One of the two can be set.
362    */
363   ir_instruction *ir;
364   const char *annotation;
365   /** @} */
366};
367
368class fs_visitor : public ir_visitor
369{
370public:
371
372   fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
373   {
374      this->c = c;
375      this->p = &c->func;
376      this->brw = p->brw;
377      this->intel = &brw->intel;
378      this->mem_ctx = talloc_new(NULL);
379      this->shader = shader;
380      this->fail = false;
381      this->next_abstract_grf = 1;
382      this->variable_ht = hash_table_ctor(0,
383					  hash_table_pointer_hash,
384					  hash_table_pointer_compare);
385
386      this->frag_color = NULL;
387      this->frag_data = NULL;
388      this->frag_depth = NULL;
389      this->first_non_payload_grf = 0;
390
391      this->current_annotation = NULL;
392      this->annotation_string = NULL;
393      this->annotation_ir = NULL;
394   }
395   ~fs_visitor()
396   {
397      talloc_free(this->mem_ctx);
398      hash_table_dtor(this->variable_ht);
399   }
400
401   fs_reg *variable_storage(ir_variable *var);
402
403   void visit(ir_variable *ir);
404   void visit(ir_assignment *ir);
405   void visit(ir_dereference_variable *ir);
406   void visit(ir_dereference_record *ir);
407   void visit(ir_dereference_array *ir);
408   void visit(ir_expression *ir);
409   void visit(ir_texture *ir);
410   void visit(ir_if *ir);
411   void visit(ir_constant *ir);
412   void visit(ir_swizzle *ir);
413   void visit(ir_return *ir);
414   void visit(ir_loop *ir);
415   void visit(ir_loop_jump *ir);
416   void visit(ir_discard *ir);
417   void visit(ir_call *ir);
418   void visit(ir_function *ir);
419   void visit(ir_function_signature *ir);
420
421   fs_inst *emit(fs_inst inst);
422   void assign_urb_setup();
423   void assign_regs();
424   void generate_code();
425   void generate_fb_write(fs_inst *inst);
426   void generate_linterp(fs_inst *inst, struct brw_reg dst,
427			 struct brw_reg *src);
428   void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
429
430   void emit_dummy_fs();
431   void emit_interpolation();
432   void emit_pinterp(int location);
433   void emit_fb_writes();
434
435   struct brw_reg interp_reg(int location, int channel);
436
437   struct brw_context *brw;
438   struct intel_context *intel;
439   struct brw_wm_compile *c;
440   struct brw_compile *p;
441   struct brw_shader *shader;
442   void *mem_ctx;
443   exec_list instructions;
444   int next_abstract_grf;
445   struct hash_table *variable_ht;
446   ir_variable *frag_color, *frag_data, *frag_depth;
447   int first_non_payload_grf;
448
449   /** @{ debug annotation info */
450   const char *current_annotation;
451   ir_instruction *base_ir;
452   const char **annotation_string;
453   ir_instruction **annotation_ir;
454   /** @} */
455
456   bool fail;
457
458   /* Result of last visit() method. */
459   fs_reg result;
460
461   fs_reg pixel_x;
462   fs_reg pixel_y;
463   fs_reg pixel_w;
464   fs_reg delta_x;
465   fs_reg delta_y;
466   fs_reg interp_attrs[64];
467
468   int grf_used;
469
470};
471
472/** Fixed HW reg constructor. */
473fs_reg::fs_reg(enum register_file file, int hw_reg)
474{
475   this->file = file;
476   this->reg = 0;
477   this->reg_offset = 0;
478   this->hw_reg = hw_reg;
479   this->type = BRW_REGISTER_TYPE_F;
480   this->negate = 0;
481   this->abs = 0;
482}
483
484/** Automatic reg constructor. */
485fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
486{
487   this->file = GRF;
488   this->reg = v->next_abstract_grf;
489   this->reg_offset = 0;
490   v->next_abstract_grf += type_size(type);
491   this->hw_reg = -1;
492   this->negate = 0;
493   this->abs = 0;
494
495   switch (type->base_type) {
496   case GLSL_TYPE_FLOAT:
497      this->type = BRW_REGISTER_TYPE_F;
498      break;
499   case GLSL_TYPE_INT:
500   case GLSL_TYPE_BOOL:
501      this->type = BRW_REGISTER_TYPE_D;
502      break;
503   case GLSL_TYPE_UINT:
504      this->type = BRW_REGISTER_TYPE_UD;
505      break;
506   default:
507      assert(!"not reached");
508      this->type =  BRW_REGISTER_TYPE_F;
509      break;
510   }
511}
512
513fs_reg *
514fs_visitor::variable_storage(ir_variable *var)
515{
516   return (fs_reg *)hash_table_find(this->variable_ht, var);
517}
518
519void
520fs_visitor::visit(ir_variable *ir)
521{
522   fs_reg *reg = NULL;
523
524   /* FINISHME */
525   assert(ir->mode != ir_var_uniform);
526
527   if (strcmp(ir->name, "gl_FragColor") == 0) {
528      this->frag_color = ir;
529   } else if (strcmp(ir->name, "gl_FragData") == 0) {
530      this->frag_data = ir;
531   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
532      this->frag_depth = ir;
533      assert(!"FINISHME: this hangs currently.");
534   }
535
536   if (ir->mode == ir_var_in) {
537      reg = &this->interp_attrs[ir->location];
538   }
539
540   if (!reg)
541      reg = new(this->mem_ctx) fs_reg(this, ir->type);
542
543   hash_table_insert(this->variable_ht, reg, ir);
544}
545
546void
547fs_visitor::visit(ir_dereference_variable *ir)
548{
549   fs_reg *reg = variable_storage(ir->var);
550   this->result = *reg;
551}
552
553void
554fs_visitor::visit(ir_dereference_record *ir)
555{
556   assert(!"FINISHME");
557}
558
559void
560fs_visitor::visit(ir_dereference_array *ir)
561{
562   assert(!"FINISHME");
563}
564
565void
566fs_visitor::visit(ir_expression *ir)
567{
568   unsigned int operand;
569   fs_reg op[2], temp;
570   fs_reg result;
571   fs_inst *inst;
572
573   for (operand = 0; operand < ir->get_num_operands(); operand++) {
574      ir->operands[operand]->accept(this);
575      if (this->result.file == BAD_FILE) {
576	 ir_print_visitor v;
577	 printf("Failed to get tree for expression operand:\n");
578	 ir->operands[operand]->accept(&v);
579	 this->fail = true;
580      }
581      op[operand] = this->result;
582
583      /* Matrix expression operands should have been broken down to vector
584       * operations already.
585       */
586      assert(!ir->operands[operand]->type->is_matrix());
587      /* And then those vector operands should have been broken down to scalar.
588       */
589      assert(!ir->operands[operand]->type->is_vector());
590   }
591
592   /* Storage for our result.  If our result goes into an assignment, it will
593    * just get copy-propagated out, so no worries.
594    */
595   this->result = fs_reg(this, ir->type);
596
597   switch (ir->operation) {
598   case ir_unop_logic_not:
599      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
600      break;
601   case ir_unop_neg:
602      this->result = op[0];
603      op[0].negate = ~op[0].negate;
604      break;
605   case ir_unop_abs:
606      this->result = op[0];
607      op[0].abs = true;
608      break;
609   case ir_unop_sign:
610      temp = fs_reg(this, ir->type);
611
612      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
613      inst->conditional_mod = BRW_CONDITIONAL_G;
614
615      inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)));
616      inst->conditional_mod = BRW_CONDITIONAL_L;
617
618      temp.negate = true;
619      emit(fs_inst(BRW_OPCODE_ADD, this->result, this->result, temp));
620
621      break;
622   case ir_unop_rcp:
623      emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
624      break;
625
626   case ir_unop_exp2:
627      emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
628      break;
629   case ir_unop_log2:
630      emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
631      break;
632   case ir_unop_exp:
633   case ir_unop_log:
634      assert(!"not reached: should be handled by ir_explog_to_explog2");
635      break;
636   case ir_unop_sin:
637      emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
638      break;
639   case ir_unop_cos:
640      emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
641      break;
642
643   case ir_unop_dFdx:
644      emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
645      break;
646   case ir_unop_dFdy:
647      emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
648      break;
649
650   case ir_binop_add:
651      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
652      break;
653   case ir_binop_sub:
654      assert(!"not reached: should be handled by ir_sub_to_add_neg");
655      break;
656
657   case ir_binop_mul:
658      emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
659      break;
660   case ir_binop_div:
661      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
662      break;
663   case ir_binop_mod:
664      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
665      break;
666
667   case ir_binop_less:
668      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
669      inst->conditional_mod = BRW_CONDITIONAL_L;
670      break;
671   case ir_binop_greater:
672      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
673      inst->conditional_mod = BRW_CONDITIONAL_G;
674      break;
675   case ir_binop_lequal:
676      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
677      inst->conditional_mod = BRW_CONDITIONAL_LE;
678      break;
679   case ir_binop_gequal:
680      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
681      inst->conditional_mod = BRW_CONDITIONAL_GE;
682      break;
683   case ir_binop_equal:
684      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
685      inst->conditional_mod = BRW_CONDITIONAL_Z;
686      break;
687   case ir_binop_nequal:
688      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
689      inst->conditional_mod = BRW_CONDITIONAL_NZ;
690      break;
691
692   case ir_binop_logic_xor:
693      emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
694      break;
695
696   case ir_binop_logic_or:
697      emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
698      break;
699
700   case ir_binop_logic_and:
701      emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
702      break;
703
704   case ir_binop_dot:
705   case ir_binop_cross:
706   case ir_unop_any:
707      assert(!"not reached: should be handled by brw_channel_expressions");
708      break;
709
710   case ir_unop_sqrt:
711      emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
712      break;
713
714   case ir_unop_rsq:
715      emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
716      break;
717
718   case ir_unop_i2f:
719   case ir_unop_b2f:
720   case ir_unop_b2i:
721      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
722      break;
723   case ir_unop_f2i:
724      emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0]));
725      break;
726   case ir_unop_f2b:
727   case ir_unop_i2b:
728      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
729      inst->conditional_mod = BRW_CONDITIONAL_NZ;
730
731   case ir_unop_trunc:
732      emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
733      break;
734   case ir_unop_ceil:
735      op[0].negate = ~op[0].negate;
736      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
737      this->result.negate = true;
738      break;
739   case ir_unop_floor:
740      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
741      break;
742   case ir_unop_fract:
743      inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
744      break;
745
746   case ir_binop_min:
747      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
748      inst->conditional_mod = BRW_CONDITIONAL_L;
749
750      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
751      inst->predicated = true;
752      break;
753   case ir_binop_max:
754      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
755      inst->conditional_mod = BRW_CONDITIONAL_G;
756
757      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
758      inst->predicated = true;
759      break;
760
761   case ir_binop_pow:
762      inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
763      break;
764
765   case ir_unop_bit_not:
766   case ir_unop_u2f:
767   case ir_binop_lshift:
768   case ir_binop_rshift:
769   case ir_binop_bit_and:
770   case ir_binop_bit_xor:
771   case ir_binop_bit_or:
772      assert(!"GLSL 1.30 features unsupported");
773      break;
774   }
775}
776
777void
778fs_visitor::visit(ir_assignment *ir)
779{
780   struct fs_reg l, r;
781   int i;
782   int write_mask;
783   fs_inst *inst;
784
785   /* FINISHME: arrays on the lhs */
786   ir->lhs->accept(this);
787   l = this->result;
788
789   ir->rhs->accept(this);
790   r = this->result;
791
792   /* FINISHME: This should really set to the correct maximal writemask for each
793    * FINISHME: component written (in the loops below).  This case can only
794    * FINISHME: occur for matrices, arrays, and structures.
795    */
796   if (ir->write_mask == 0) {
797      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
798      write_mask = WRITEMASK_XYZW;
799   } else {
800      assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
801      write_mask = ir->write_mask;
802   }
803
804   assert(l.file != BAD_FILE);
805   assert(r.file != BAD_FILE);
806
807   if (ir->condition) {
808      /* Get the condition bool into the predicate. */
809      ir->condition->accept(this);
810      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, fs_reg(0)));
811      inst->conditional_mod = BRW_CONDITIONAL_NZ;
812   }
813
814   for (i = 0; i < type_size(ir->lhs->type); i++) {
815      if (i < 4 && !(write_mask & (1 << i)))
816	 continue;
817
818      inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
819      if (ir->condition)
820	 inst->predicated = true;
821      l.reg_offset++;
822      r.reg_offset++;
823   }
824}
825
826void
827fs_visitor::visit(ir_texture *ir)
828{
829   assert(!"FINISHME");
830}
831
832void
833fs_visitor::visit(ir_swizzle *ir)
834{
835   ir->val->accept(this);
836   fs_reg val = this->result;
837
838   fs_reg result = fs_reg(this, ir->type);
839   this->result = result;
840
841   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
842      fs_reg channel = val;
843      int swiz = 0;
844
845      switch (i) {
846      case 0:
847	 swiz = ir->mask.x;
848	 break;
849      case 1:
850	 swiz = ir->mask.y;
851	 break;
852      case 2:
853	 swiz = ir->mask.z;
854	 break;
855      case 3:
856	 swiz = ir->mask.w;
857	 break;
858      }
859
860      channel.reg_offset += swiz;
861      emit(fs_inst(BRW_OPCODE_MOV, result, channel));
862      result.reg_offset++;
863   }
864}
865
866void
867fs_visitor::visit(ir_discard *ir)
868{
869   assert(!"FINISHME");
870}
871
872void
873fs_visitor::visit(ir_constant *ir)
874{
875   fs_reg reg(this, ir->type);
876   this->result = reg;
877
878   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
879      switch (ir->type->base_type) {
880      case GLSL_TYPE_FLOAT:
881	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
882	 break;
883      case GLSL_TYPE_UINT:
884	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
885	 break;
886      case GLSL_TYPE_INT:
887	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
888	 break;
889      case GLSL_TYPE_BOOL:
890	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
891	 break;
892      default:
893	 assert(!"Non-float/uint/int/bool constant");
894      }
895      reg.reg_offset++;
896   }
897}
898
899void
900fs_visitor::visit(ir_if *ir)
901{
902   assert(!"FINISHME");
903}
904
905void
906fs_visitor::visit(ir_loop *ir)
907{
908   assert(!"FINISHME");
909}
910
911void
912fs_visitor::visit(ir_loop_jump *ir)
913{
914   assert(!"FINISHME");
915}
916
917void
918fs_visitor::visit(ir_call *ir)
919{
920   assert(!"FINISHME");
921}
922
923void
924fs_visitor::visit(ir_return *ir)
925{
926   assert(!"FINISHME");
927}
928
929void
930fs_visitor::visit(ir_function *ir)
931{
932   /* Ignore function bodies other than main() -- we shouldn't see calls to
933    * them since they should all be inlined before we get to ir_to_mesa.
934    */
935   if (strcmp(ir->name, "main") == 0) {
936      const ir_function_signature *sig;
937      exec_list empty;
938
939      sig = ir->matching_signature(&empty);
940
941      assert(sig);
942
943      foreach_iter(exec_list_iterator, iter, sig->body) {
944	 ir_instruction *ir = (ir_instruction *)iter.get();
945	 this->base_ir = ir;
946
947	 ir->accept(this);
948      }
949   }
950}
951
952void
953fs_visitor::visit(ir_function_signature *ir)
954{
955   assert(!"not reached");
956   (void)ir;
957}
958
959fs_inst *
960fs_visitor::emit(fs_inst inst)
961{
962   fs_inst *list_inst = new(mem_ctx) fs_inst;
963   *list_inst = inst;
964
965   list_inst->annotation = this->current_annotation;
966   list_inst->ir = this->base_ir;
967
968   this->instructions.push_tail(list_inst);
969
970   return list_inst;
971}
972
973/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
974void
975fs_visitor::emit_dummy_fs()
976{
977   /* Everyone's favorite color. */
978   emit(fs_inst(BRW_OPCODE_MOV,
979		fs_reg(MRF, 2),
980		fs_reg(1.0f)));
981   emit(fs_inst(BRW_OPCODE_MOV,
982		fs_reg(MRF, 3),
983		fs_reg(0.0f)));
984   emit(fs_inst(BRW_OPCODE_MOV,
985		fs_reg(MRF, 4),
986		fs_reg(1.0f)));
987   emit(fs_inst(BRW_OPCODE_MOV,
988		fs_reg(MRF, 5),
989		fs_reg(0.0f)));
990
991   fs_inst *write;
992   write = emit(fs_inst(FS_OPCODE_FB_WRITE,
993			fs_reg(0),
994			fs_reg(0)));
995}
996
997/* The register location here is relative to the start of the URB
998 * data.  It will get adjusted to be a real location before
999 * generate_code() time.
1000 */
1001struct brw_reg
1002fs_visitor::interp_reg(int location, int channel)
1003{
1004   int regnr = location * 2 + channel / 2;
1005   int stride = (channel & 1) * 4;
1006
1007   return brw_vec1_grf(regnr, stride);
1008}
1009
1010/** Emits the interpolation for the varying inputs. */
1011void
1012fs_visitor::emit_interpolation()
1013{
1014   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1015   /* For now, the source regs for the setup URB data will be unset,
1016    * since we don't know until codegen how many push constants we'll
1017    * use, and therefore what the setup URB offset is.
1018    */
1019   fs_reg src_reg = reg_undef;
1020
1021   this->current_annotation = "compute pixel centers";
1022   this->pixel_x = fs_reg(this, glsl_type::uint_type);
1023   this->pixel_y = fs_reg(this, glsl_type::uint_type);
1024   emit(fs_inst(BRW_OPCODE_ADD,
1025		this->pixel_x,
1026		fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1027		fs_reg(brw_imm_v(0x10101010))));
1028   emit(fs_inst(BRW_OPCODE_ADD,
1029		this->pixel_y,
1030		fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1031		fs_reg(brw_imm_v(0x11001100))));
1032
1033   this->current_annotation = "compute pixel deltas from v0";
1034   this->delta_x = fs_reg(this, glsl_type::float_type);
1035   this->delta_y = fs_reg(this, glsl_type::float_type);
1036   emit(fs_inst(BRW_OPCODE_ADD,
1037		this->delta_x,
1038		this->pixel_x,
1039		fs_reg(negate(brw_vec1_grf(1, 0)))));
1040   emit(fs_inst(BRW_OPCODE_ADD,
1041		this->delta_y,
1042		this->pixel_y,
1043		fs_reg(brw_vec1_grf(1, 1))));
1044
1045   this->current_annotation = "compute pos.w and 1/pos.w";
1046   /* Compute wpos.  Unlike many other varying inputs, we usually need it
1047    * to produce 1/w, and the varying variable wouldn't show up.
1048    */
1049   fs_reg wpos = fs_reg(this, glsl_type::vec4_type);
1050   this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos;
1051   emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */
1052   wpos.reg_offset++;
1053   emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */
1054   wpos.reg_offset++;
1055   emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1056		interp_reg(FRAG_ATTRIB_WPOS, 2)));
1057   wpos.reg_offset++;
1058   emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1059		interp_reg(FRAG_ATTRIB_WPOS, 3)));
1060   /* Compute the pixel W value from wpos.w. */
1061   this->pixel_w = fs_reg(this, glsl_type::float_type);
1062   emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos));
1063
1064   /* FINISHME: gl_FrontFacing */
1065
1066   foreach_iter(exec_list_iterator, iter, *this->shader->ir) {
1067      ir_instruction *ir = (ir_instruction *)iter.get();
1068      ir_variable *var = ir->as_variable();
1069
1070      if (!var)
1071	 continue;
1072
1073      if (var->mode != ir_var_in)
1074	 continue;
1075
1076      /* If it's already set up (WPOS), skip. */
1077      if (var->location == 0)
1078	 continue;
1079
1080      this->current_annotation = talloc_asprintf(this->mem_ctx,
1081						 "interpolate %s "
1082						 "(FRAG_ATTRIB[%d])",
1083						 var->name,
1084						 var->location);
1085      emit_pinterp(var->location);
1086   }
1087   this->current_annotation = NULL;
1088}
1089
1090void
1091fs_visitor::emit_pinterp(int location)
1092{
1093   fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type);
1094   this->interp_attrs[location] = interp_attr;
1095
1096   for (unsigned int i = 0; i < 4; i++) {
1097      struct brw_reg interp = interp_reg(location, i);
1098      emit(fs_inst(FS_OPCODE_LINTERP,
1099		   interp_attr,
1100		   this->delta_x,
1101		   this->delta_y,
1102		   fs_reg(interp)));
1103      interp_attr.reg_offset++;
1104   }
1105   interp_attr.reg_offset -= 4;
1106
1107   for (unsigned int i = 0; i < 4; i++) {
1108      emit(fs_inst(BRW_OPCODE_MUL,
1109		   interp_attr,
1110		   interp_attr,
1111		   this->pixel_w));
1112      interp_attr.reg_offset++;
1113   }
1114}
1115
1116void
1117fs_visitor::emit_fb_writes()
1118{
1119   this->current_annotation = "FB write";
1120
1121   assert(this->frag_color || !"FINISHME: MRT");
1122   fs_reg color = *(variable_storage(this->frag_color));
1123
1124   for (int i = 0; i < 4; i++) {
1125      emit(fs_inst(BRW_OPCODE_MOV,
1126		   fs_reg(MRF, 2 + i),
1127		   color));
1128      color.reg_offset++;
1129   }
1130
1131   emit(fs_inst(FS_OPCODE_FB_WRITE,
1132		fs_reg(0),
1133		fs_reg(0)));
1134
1135   this->current_annotation = NULL;
1136}
1137
1138void
1139fs_visitor::generate_fb_write(fs_inst *inst)
1140{
1141   GLboolean eot = 1; /* FINISHME: MRT */
1142   /* FINISHME: AADS */
1143
1144   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1145    * move, here's g1.
1146    */
1147   brw_push_insn_state(p);
1148   brw_set_mask_control(p, BRW_MASK_DISABLE);
1149   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1150   brw_MOV(p,
1151	   brw_message_reg(1),
1152	   brw_vec8_grf(1, 0));
1153   brw_pop_insn_state(p);
1154
1155   int nr = 2 + 4;
1156
1157   brw_fb_WRITE(p,
1158		8, /* dispatch_width */
1159		retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1160		0, /* base MRF */
1161		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1162		0, /* FINISHME: MRT target */
1163		nr,
1164		0,
1165		eot);
1166}
1167
1168void
1169fs_visitor::generate_linterp(fs_inst *inst,
1170			     struct brw_reg dst, struct brw_reg *src)
1171{
1172   struct brw_reg delta_x = src[0];
1173   struct brw_reg delta_y = src[1];
1174   struct brw_reg interp = src[2];
1175
1176   if (brw->has_pln &&
1177       delta_y.nr == delta_x.nr + 1 &&
1178       (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1179      brw_PLN(p, dst, interp, delta_x);
1180   } else {
1181      brw_LINE(p, brw_null_reg(), interp, delta_x);
1182      brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1183   }
1184}
1185
1186void
1187fs_visitor::generate_math(fs_inst *inst,
1188			  struct brw_reg dst, struct brw_reg *src)
1189{
1190   int op;
1191
1192   switch (inst->opcode) {
1193   case FS_OPCODE_RCP:
1194      op = BRW_MATH_FUNCTION_INV;
1195      break;
1196   case FS_OPCODE_RSQ:
1197      op = BRW_MATH_FUNCTION_RSQ;
1198      break;
1199   case FS_OPCODE_SQRT:
1200      op = BRW_MATH_FUNCTION_SQRT;
1201      break;
1202   case FS_OPCODE_EXP2:
1203      op = BRW_MATH_FUNCTION_EXP;
1204      break;
1205   case FS_OPCODE_LOG2:
1206      op = BRW_MATH_FUNCTION_LOG;
1207      break;
1208   case FS_OPCODE_POW:
1209      op = BRW_MATH_FUNCTION_POW;
1210      break;
1211   case FS_OPCODE_SIN:
1212      op = BRW_MATH_FUNCTION_SIN;
1213      break;
1214   case FS_OPCODE_COS:
1215      op = BRW_MATH_FUNCTION_COS;
1216      break;
1217   default:
1218      assert(!"not reached: unknown math function");
1219      op = 0;
1220      break;
1221   }
1222
1223   if (inst->opcode == FS_OPCODE_POW) {
1224      brw_MOV(p, brw_message_reg(3), src[1]);
1225   }
1226
1227   brw_math(p, dst,
1228	    op,
1229	    inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1230	    BRW_MATH_SATURATE_NONE,
1231	    2, src[0],
1232	    BRW_MATH_DATA_VECTOR,
1233	    BRW_MATH_PRECISION_FULL);
1234}
1235
1236static void
1237trivial_assign_reg(int header_size, fs_reg *reg)
1238{
1239   if (reg->file == GRF && reg->reg != 0) {
1240      reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset;
1241      reg->reg = 0;
1242   }
1243}
1244
1245void
1246fs_visitor::assign_urb_setup()
1247{
1248   int urb_start = c->key.nr_payload_regs; /* FINISHME: push constants */
1249   int interp_reg_nr[FRAG_ATTRIB_MAX];
1250
1251   c->prog_data.urb_read_length = 0;
1252
1253   /* Figure out where each of the incoming setup attributes lands. */
1254   for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1255      interp_reg_nr[i] = -1;
1256
1257      if (i != FRAG_ATTRIB_WPOS &&
1258	  !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1259	 continue;
1260
1261      /* Each attribute is 4 setup channels, each of which is half a reg. */
1262      interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1263      c->prog_data.urb_read_length += 2;
1264   }
1265
1266   /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1267    * the correct setup input.
1268    */
1269   foreach_iter(exec_list_iterator, iter, this->instructions) {
1270      fs_inst *inst = (fs_inst *)iter.get();
1271
1272      if (inst->opcode != FS_OPCODE_LINTERP)
1273	 continue;
1274
1275      assert(inst->src[2].file == FIXED_HW_REG);
1276
1277      int location = inst->src[2].fixed_hw_reg.nr / 2;
1278      assert(interp_reg_nr[location] != -1);
1279      inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
1280				      (inst->src[2].fixed_hw_reg.nr & 1));
1281   }
1282
1283   this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
1284}
1285
1286void
1287fs_visitor::assign_regs()
1288{
1289   int header_size = this->first_non_payload_grf;
1290   int last_grf = 0;
1291
1292   /* FINISHME: trivial assignment of register numbers */
1293   foreach_iter(exec_list_iterator, iter, this->instructions) {
1294      fs_inst *inst = (fs_inst *)iter.get();
1295
1296      trivial_assign_reg(header_size, &inst->dst);
1297      trivial_assign_reg(header_size, &inst->src[0]);
1298      trivial_assign_reg(header_size, &inst->src[1]);
1299
1300      last_grf = MAX2(last_grf, inst->dst.hw_reg);
1301      last_grf = MAX2(last_grf, inst->src[0].hw_reg);
1302      last_grf = MAX2(last_grf, inst->src[1].hw_reg);
1303   }
1304
1305   this->grf_used = last_grf;
1306}
1307
1308static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
1309{
1310   struct brw_reg brw_reg;
1311
1312   switch (reg->file) {
1313   case GRF:
1314   case ARF:
1315   case MRF:
1316      brw_reg = brw_vec8_reg(reg->file,
1317			    reg->hw_reg, 0);
1318      brw_reg = retype(brw_reg, reg->type);
1319      break;
1320   case IMM:
1321      switch (reg->type) {
1322      case BRW_REGISTER_TYPE_F:
1323	 brw_reg = brw_imm_f(reg->imm.f);
1324	 break;
1325      case BRW_REGISTER_TYPE_D:
1326	 brw_reg = brw_imm_f(reg->imm.i);
1327	 break;
1328      case BRW_REGISTER_TYPE_UD:
1329	 brw_reg = brw_imm_f(reg->imm.u);
1330	 break;
1331      default:
1332	 assert(!"not reached");
1333	 break;
1334      }
1335      break;
1336   case FIXED_HW_REG:
1337      brw_reg = reg->fixed_hw_reg;
1338      break;
1339   case BAD_FILE:
1340      /* Probably unused. */
1341      brw_reg = brw_null_reg();
1342   }
1343   if (reg->abs)
1344      brw_reg = brw_abs(brw_reg);
1345   if (reg->negate)
1346      brw_reg = negate(brw_reg);
1347
1348   return brw_reg;
1349}
1350
1351void
1352fs_visitor::generate_code()
1353{
1354   unsigned int annotation_len = 0;
1355   int last_native_inst = 0;
1356
1357   foreach_iter(exec_list_iterator, iter, this->instructions) {
1358      fs_inst *inst = (fs_inst *)iter.get();
1359      struct brw_reg src[3], dst;
1360
1361      for (unsigned int i = 0; i < 3; i++) {
1362	 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
1363      }
1364      dst = brw_reg_from_fs_reg(&inst->dst);
1365
1366      brw_set_conditionalmod(p, inst->conditional_mod);
1367      brw_set_predicate_control(p, inst->predicated);
1368
1369      switch (inst->opcode) {
1370      case BRW_OPCODE_MOV:
1371	 brw_MOV(p, dst, src[0]);
1372	 break;
1373      case BRW_OPCODE_ADD:
1374	 brw_ADD(p, dst, src[0], src[1]);
1375	 break;
1376      case BRW_OPCODE_MUL:
1377	 brw_MUL(p, dst, src[0], src[1]);
1378	 break;
1379      case FS_OPCODE_RCP:
1380      case FS_OPCODE_RSQ:
1381      case FS_OPCODE_SQRT:
1382      case FS_OPCODE_EXP2:
1383      case FS_OPCODE_LOG2:
1384      case FS_OPCODE_POW:
1385      case FS_OPCODE_SIN:
1386      case FS_OPCODE_COS:
1387	 generate_math(inst, dst, src);
1388	 break;
1389      case FS_OPCODE_LINTERP:
1390	 generate_linterp(inst, dst, src);
1391	 break;
1392      case FS_OPCODE_FB_WRITE:
1393	 generate_fb_write(inst);
1394	 break;
1395      default:
1396	 assert(!"not reached");
1397      }
1398
1399      if (annotation_len < p->nr_insn) {
1400	 annotation_len *= 2;
1401	 if (annotation_len < 16)
1402	    annotation_len = 16;
1403
1404	 this->annotation_string = talloc_realloc(this->mem_ctx,
1405						  annotation_string,
1406						  const char *,
1407						  annotation_len);
1408	 this->annotation_ir = talloc_realloc(this->mem_ctx,
1409					      annotation_ir,
1410					      ir_instruction *,
1411					      annotation_len);
1412      }
1413
1414      for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
1415	 this->annotation_string[i] = inst->annotation;
1416	 this->annotation_ir[i] = inst->ir;
1417      }
1418      last_native_inst = p->nr_insn;
1419   }
1420}
1421
1422GLboolean
1423brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
1424{
1425   struct brw_compile *p = &c->func;
1426   struct intel_context *intel = &brw->intel;
1427   GLcontext *ctx = &intel->ctx;
1428   struct brw_shader *shader = NULL;
1429   struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
1430
1431   if (!prog)
1432      return GL_FALSE;
1433
1434   if (!using_new_fs)
1435      return GL_FALSE;
1436
1437   for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
1438      if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
1439	 shader = (struct brw_shader *)prog->_LinkedShaders[i];
1440	 break;
1441      }
1442   }
1443   if (!shader)
1444      return GL_FALSE;
1445
1446   /* We always use 8-wide mode, at least for now.  For one, flow
1447    * control only works in 8-wide.  Also, when we're fragment shader
1448    * bound, we're almost always under register pressure as well, so
1449    * 8-wide would save us from the performance cliff of spilling
1450    * regs.
1451    */
1452   c->dispatch_width = 8;
1453
1454   if (INTEL_DEBUG & DEBUG_WM) {
1455      printf("GLSL IR for native fragment shader %d:\n", prog->Name);
1456      _mesa_print_ir(shader->ir, NULL);
1457      printf("\n");
1458   }
1459
1460   /* Now the main event: Visit the shader IR and generate our FS IR for it.
1461    */
1462   fs_visitor v(c, shader);
1463
1464   if (0) {
1465      v.emit_dummy_fs();
1466   } else {
1467      v.emit_interpolation();
1468
1469      /* Generate FS IR for main().  (the visitor only descends into
1470       * functions called "main").
1471       */
1472      foreach_iter(exec_list_iterator, iter, *shader->ir) {
1473	 ir_instruction *ir = (ir_instruction *)iter.get();
1474	 v.base_ir = ir;
1475	 ir->accept(&v);
1476      }
1477
1478      if (v.fail)
1479	 return GL_FALSE;
1480
1481      v.emit_fb_writes();
1482      v.assign_urb_setup();
1483      v.assign_regs();
1484   }
1485
1486   v.generate_code();
1487
1488   if (INTEL_DEBUG & DEBUG_WM) {
1489      const char *last_annotation_string = NULL;
1490      ir_instruction *last_annotation_ir = NULL;
1491
1492      printf("Native code for fragment shader %d:\n", prog->Name);
1493      for (unsigned int i = 0; i < p->nr_insn; i++) {
1494	 if (last_annotation_ir != v.annotation_ir[i]) {
1495	    last_annotation_ir = v.annotation_ir[i];
1496	    if (last_annotation_ir) {
1497	       printf("   ");
1498	       last_annotation_ir->print();
1499	       printf("\n");
1500	    }
1501	 }
1502	 if (last_annotation_string != v.annotation_string[i]) {
1503	    last_annotation_string = v.annotation_string[i];
1504	    if (last_annotation_string)
1505	       printf("   %s\n", last_annotation_string);
1506	 }
1507	 brw_disasm(stdout, &p->store[i], intel->gen);
1508      }
1509      printf("\n");
1510   }
1511
1512   c->prog_data.nr_params = 0; /* FINISHME */
1513   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1514   c->prog_data.curb_read_length = 0; /* FINISHME */
1515   c->prog_data.total_grf = v.grf_used;
1516   c->prog_data.total_scratch = 0;
1517
1518   return GL_TRUE;
1519}
1520