brw_fs.cpp revision 2999a44968a045b5516ff23d70b711b01bd696a5
1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28extern "C" {
29
30#include <sys/types.h>
31
32#include "main/macros.h"
33#include "main/shaderobj.h"
34#include "program/prog_parameter.h"
35#include "program/prog_print.h"
36#include "program/prog_optimize.h"
37#include "program/sampler.h"
38#include "program/hash_table.h"
39#include "brw_context.h"
40#include "brw_eu.h"
41#include "brw_wm.h"
42#include "talloc.h"
43}
44#include "../glsl/glsl_types.h"
45#include "../glsl/ir_optimization.h"
46#include "../glsl/ir_print_visitor.h"
47
48enum register_file {
49   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
50   GRF = BRW_GENERAL_REGISTER_FILE,
51   MRF = BRW_MESSAGE_REGISTER_FILE,
52   IMM = BRW_IMMEDIATE_VALUE,
53   FIXED_HW_REG, /* a struct brw_reg */
54   UNIFORM, /* prog_data->params[hw_reg] */
55   BAD_FILE
56};
57
58enum fs_opcodes {
59   FS_OPCODE_FB_WRITE = 256,
60   FS_OPCODE_RCP,
61   FS_OPCODE_RSQ,
62   FS_OPCODE_SQRT,
63   FS_OPCODE_EXP2,
64   FS_OPCODE_LOG2,
65   FS_OPCODE_POW,
66   FS_OPCODE_SIN,
67   FS_OPCODE_COS,
68   FS_OPCODE_DDX,
69   FS_OPCODE_DDY,
70   FS_OPCODE_LINTERP,
71   FS_OPCODE_TEX,
72   FS_OPCODE_TXB,
73   FS_OPCODE_TXL,
74   FS_OPCODE_DISCARD,
75};
76
77static int using_new_fs = -1;
78
79struct gl_shader *
80brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
81{
82   struct brw_shader *shader;
83
84   shader = talloc_zero(NULL, struct brw_shader);
85   if (shader) {
86      shader->base.Type = type;
87      shader->base.Name = name;
88      _mesa_init_shader(ctx, &shader->base);
89   }
90
91   return &shader->base;
92}
93
94struct gl_shader_program *
95brw_new_shader_program(GLcontext *ctx, GLuint name)
96{
97   struct brw_shader_program *prog;
98   prog = talloc_zero(NULL, struct brw_shader_program);
99   if (prog) {
100      prog->base.Name = name;
101      _mesa_init_shader_program(ctx, &prog->base);
102   }
103   return &prog->base;
104}
105
106GLboolean
107brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
108{
109   if (!_mesa_ir_compile_shader(ctx, shader))
110      return GL_FALSE;
111
112   return GL_TRUE;
113}
114
115GLboolean
116brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
117{
118   if (using_new_fs == -1)
119      using_new_fs = getenv("INTEL_NEW_FS") != NULL;
120
121   for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
122      struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
123
124      if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
125	 void *mem_ctx = talloc_new(NULL);
126	 bool progress;
127
128	 if (shader->ir)
129	    talloc_free(shader->ir);
130	 shader->ir = new(shader) exec_list;
131	 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
132
133	 do_mat_op_to_vec(shader->ir);
134	 do_mod_to_fract(shader->ir);
135	 do_div_to_mul_rcp(shader->ir);
136	 do_sub_to_add_neg(shader->ir);
137	 do_explog_to_explog2(shader->ir);
138
139	 do {
140	    progress = false;
141
142	    brw_do_channel_expressions(shader->ir);
143	    brw_do_vector_splitting(shader->ir);
144
145	    progress = do_lower_jumps(shader->ir, true, true,
146				      true, /* main return */
147				      false, /* continue */
148				      false /* loops */
149				      ) || progress;
150
151	    progress = do_common_optimization(shader->ir, true, 32) || progress;
152
153	    progress = lower_noise(shader->ir) || progress;
154	    progress =
155	       lower_variable_index_to_cond_assign(shader->ir,
156						   GL_TRUE, /* input */
157						   GL_TRUE, /* output */
158						   GL_TRUE, /* temp */
159						   GL_TRUE /* uniform */
160						   ) || progress;
161	 } while (progress);
162
163	 validate_ir_tree(shader->ir);
164
165	 reparent_ir(shader->ir, shader->ir);
166	 talloc_free(mem_ctx);
167      }
168   }
169
170   if (!_mesa_ir_link_shader(ctx, prog))
171      return GL_FALSE;
172
173   return GL_TRUE;
174}
175
176static int
177type_size(const struct glsl_type *type)
178{
179   unsigned int size, i;
180
181   switch (type->base_type) {
182   case GLSL_TYPE_UINT:
183   case GLSL_TYPE_INT:
184   case GLSL_TYPE_FLOAT:
185   case GLSL_TYPE_BOOL:
186      return type->components();
187   case GLSL_TYPE_ARRAY:
188      /* FINISHME: uniform/varying arrays. */
189      return type_size(type->fields.array) * type->length;
190   case GLSL_TYPE_STRUCT:
191      size = 0;
192      for (i = 0; i < type->length; i++) {
193	 size += type_size(type->fields.structure[i].type);
194      }
195      return size;
196   case GLSL_TYPE_SAMPLER:
197      /* Samplers take up no register space, since they're baked in at
198       * link time.
199       */
200      return 0;
201   default:
202      assert(!"not reached");
203      return 0;
204   }
205}
206
207class fs_reg {
208public:
209   /* Callers of this talloc-based new need not call delete. It's
210    * easier to just talloc_free 'ctx' (or any of its ancestors). */
211   static void* operator new(size_t size, void *ctx)
212   {
213      void *node;
214
215      node = talloc_size(ctx, size);
216      assert(node != NULL);
217
218      return node;
219   }
220
221   void init()
222   {
223      this->reg = 0;
224      this->reg_offset = 0;
225      this->negate = 0;
226      this->abs = 0;
227      this->hw_reg = -1;
228   }
229
230   /** Generic unset register constructor. */
231   fs_reg()
232   {
233      init();
234      this->file = BAD_FILE;
235   }
236
237   /** Immediate value constructor. */
238   fs_reg(float f)
239   {
240      init();
241      this->file = IMM;
242      this->type = BRW_REGISTER_TYPE_F;
243      this->imm.f = f;
244   }
245
246   /** Immediate value constructor. */
247   fs_reg(int32_t i)
248   {
249      init();
250      this->file = IMM;
251      this->type = BRW_REGISTER_TYPE_D;
252      this->imm.i = i;
253   }
254
255   /** Immediate value constructor. */
256   fs_reg(uint32_t u)
257   {
258      init();
259      this->file = IMM;
260      this->type = BRW_REGISTER_TYPE_UD;
261      this->imm.u = u;
262   }
263
264   /** Fixed brw_reg Immediate value constructor. */
265   fs_reg(struct brw_reg fixed_hw_reg)
266   {
267      init();
268      this->file = FIXED_HW_REG;
269      this->fixed_hw_reg = fixed_hw_reg;
270      this->type = fixed_hw_reg.type;
271   }
272
273   fs_reg(enum register_file file, int hw_reg);
274   fs_reg(class fs_visitor *v, const struct glsl_type *type);
275
276   /** Register file: ARF, GRF, MRF, IMM. */
277   enum register_file file;
278   /** Abstract register number.  0 = fixed hw reg */
279   int reg;
280   /** Offset within the abstract register. */
281   int reg_offset;
282   /** HW register number.  Generally unset until register allocation. */
283   int hw_reg;
284   /** Register type.  BRW_REGISTER_TYPE_* */
285   int type;
286   bool negate;
287   bool abs;
288   struct brw_reg fixed_hw_reg;
289
290   /** Value for file == BRW_IMMMEDIATE_FILE */
291   union {
292      int32_t i;
293      uint32_t u;
294      float f;
295   } imm;
296};
297
298static const fs_reg reg_undef;
299static const fs_reg reg_null(ARF, BRW_ARF_NULL);
300
301class fs_inst : public exec_node {
302public:
303   /* Callers of this talloc-based new need not call delete. It's
304    * easier to just talloc_free 'ctx' (or any of its ancestors). */
305   static void* operator new(size_t size, void *ctx)
306   {
307      void *node;
308
309      node = talloc_zero_size(ctx, size);
310      assert(node != NULL);
311
312      return node;
313   }
314
315   void init()
316   {
317      this->opcode = BRW_OPCODE_NOP;
318      this->saturate = false;
319      this->conditional_mod = BRW_CONDITIONAL_NONE;
320      this->predicated = false;
321      this->sampler = 0;
322      this->shadow_compare = false;
323   }
324
325   fs_inst()
326   {
327      init();
328   }
329
330   fs_inst(int opcode)
331   {
332      init();
333      this->opcode = opcode;
334   }
335
336   fs_inst(int opcode, fs_reg dst, fs_reg src0)
337   {
338      init();
339      this->opcode = opcode;
340      this->dst = dst;
341      this->src[0] = src0;
342   }
343
344   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
345   {
346      init();
347      this->opcode = opcode;
348      this->dst = dst;
349      this->src[0] = src0;
350      this->src[1] = src1;
351   }
352
353   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
354   {
355      init();
356      this->opcode = opcode;
357      this->dst = dst;
358      this->src[0] = src0;
359      this->src[1] = src1;
360      this->src[2] = src2;
361   }
362
363   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
364   fs_reg dst;
365   fs_reg src[3];
366   bool saturate;
367   bool predicated;
368   int conditional_mod; /**< BRW_CONDITIONAL_* */
369
370   int mlen; /** SEND message length */
371   int sampler;
372   bool shadow_compare;
373
374   /** @{
375    * Annotation for the generated IR.  One of the two can be set.
376    */
377   ir_instruction *ir;
378   const char *annotation;
379   /** @} */
380};
381
382class fs_visitor : public ir_visitor
383{
384public:
385
386   fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
387   {
388      this->c = c;
389      this->p = &c->func;
390      this->brw = p->brw;
391      this->fp = brw->fragment_program;
392      this->intel = &brw->intel;
393      this->ctx = &intel->ctx;
394      this->mem_ctx = talloc_new(NULL);
395      this->shader = shader;
396      this->fail = false;
397      this->next_abstract_grf = 1;
398      this->variable_ht = hash_table_ctor(0,
399					  hash_table_pointer_hash,
400					  hash_table_pointer_compare);
401
402      this->frag_color = NULL;
403      this->frag_data = NULL;
404      this->frag_depth = NULL;
405      this->first_non_payload_grf = 0;
406
407      this->current_annotation = NULL;
408      this->annotation_string = NULL;
409      this->annotation_ir = NULL;
410      this->base_ir = NULL;
411   }
412   ~fs_visitor()
413   {
414      talloc_free(this->mem_ctx);
415      hash_table_dtor(this->variable_ht);
416   }
417
418   fs_reg *variable_storage(ir_variable *var);
419
420   void visit(ir_variable *ir);
421   void visit(ir_assignment *ir);
422   void visit(ir_dereference_variable *ir);
423   void visit(ir_dereference_record *ir);
424   void visit(ir_dereference_array *ir);
425   void visit(ir_expression *ir);
426   void visit(ir_texture *ir);
427   void visit(ir_if *ir);
428   void visit(ir_constant *ir);
429   void visit(ir_swizzle *ir);
430   void visit(ir_return *ir);
431   void visit(ir_loop *ir);
432   void visit(ir_loop_jump *ir);
433   void visit(ir_discard *ir);
434   void visit(ir_call *ir);
435   void visit(ir_function *ir);
436   void visit(ir_function_signature *ir);
437
438   fs_inst *emit(fs_inst inst);
439   void assign_curb_setup();
440   void assign_urb_setup();
441   void assign_regs();
442   void generate_code();
443   void generate_fb_write(fs_inst *inst);
444   void generate_linterp(fs_inst *inst, struct brw_reg dst,
445			 struct brw_reg *src);
446   void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
447   void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
448   void generate_discard(fs_inst *inst);
449   void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
450   void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
451
452   void emit_dummy_fs();
453   void emit_interpolation();
454   void emit_pinterp(int location);
455   void emit_fb_writes();
456
457   struct brw_reg interp_reg(int location, int channel);
458   int setup_uniform_values(int loc, const glsl_type *type);
459
460   struct brw_context *brw;
461   const struct gl_fragment_program *fp;
462   struct intel_context *intel;
463   GLcontext *ctx;
464   struct brw_wm_compile *c;
465   struct brw_compile *p;
466   struct brw_shader *shader;
467   void *mem_ctx;
468   exec_list instructions;
469   int next_abstract_grf;
470   struct hash_table *variable_ht;
471   ir_variable *frag_color, *frag_data, *frag_depth;
472   int first_non_payload_grf;
473
474   /** @{ debug annotation info */
475   const char *current_annotation;
476   ir_instruction *base_ir;
477   const char **annotation_string;
478   ir_instruction **annotation_ir;
479   /** @} */
480
481   bool fail;
482
483   /* Result of last visit() method. */
484   fs_reg result;
485
486   fs_reg pixel_x;
487   fs_reg pixel_y;
488   fs_reg pixel_w;
489   fs_reg delta_x;
490   fs_reg delta_y;
491   fs_reg interp_attrs[64];
492
493   int grf_used;
494
495};
496
497/** Fixed HW reg constructor. */
498fs_reg::fs_reg(enum register_file file, int hw_reg)
499{
500   init();
501   this->file = file;
502   this->hw_reg = hw_reg;
503   this->type = BRW_REGISTER_TYPE_F;
504}
505
506int
507brw_type_for_base_type(const struct glsl_type *type)
508{
509   switch (type->base_type) {
510   case GLSL_TYPE_FLOAT:
511      return BRW_REGISTER_TYPE_F;
512   case GLSL_TYPE_INT:
513   case GLSL_TYPE_BOOL:
514      return BRW_REGISTER_TYPE_D;
515   case GLSL_TYPE_UINT:
516      return BRW_REGISTER_TYPE_UD;
517   case GLSL_TYPE_ARRAY:
518   case GLSL_TYPE_STRUCT:
519      /* These should be overridden with the type of the member when
520       * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
521       * way to trip up if we don't.
522       */
523      return BRW_REGISTER_TYPE_UD;
524   default:
525      assert(!"not reached");
526      return BRW_REGISTER_TYPE_F;
527   }
528}
529
530/** Automatic reg constructor. */
531fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
532{
533   init();
534
535   this->file = GRF;
536   this->reg = v->next_abstract_grf;
537   this->reg_offset = 0;
538   v->next_abstract_grf += type_size(type);
539   this->type = brw_type_for_base_type(type);
540}
541
542fs_reg *
543fs_visitor::variable_storage(ir_variable *var)
544{
545   return (fs_reg *)hash_table_find(this->variable_ht, var);
546}
547
548/* Our support for uniforms is piggy-backed on the struct
549 * gl_fragment_program, because that's where the values actually
550 * get stored, rather than in some global gl_shader_program uniform
551 * store.
552 */
553int
554fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
555{
556   unsigned int offset = 0;
557   float *vec_values;
558
559   if (type->is_matrix()) {
560      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
561							type->vector_elements,
562							1);
563
564      for (unsigned int i = 0; i < type->matrix_columns; i++) {
565	 offset += setup_uniform_values(loc + offset, column);
566      }
567
568      return offset;
569   }
570
571   switch (type->base_type) {
572   case GLSL_TYPE_FLOAT:
573   case GLSL_TYPE_UINT:
574   case GLSL_TYPE_INT:
575   case GLSL_TYPE_BOOL:
576      vec_values = fp->Base.Parameters->ParameterValues[loc];
577      for (unsigned int i = 0; i < type->vector_elements; i++) {
578	 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
579      }
580      return 1;
581
582   case GLSL_TYPE_STRUCT:
583      for (unsigned int i = 0; i < type->length; i++) {
584	 offset += setup_uniform_values(loc + offset,
585					type->fields.structure[i].type);
586      }
587      return offset;
588
589   case GLSL_TYPE_ARRAY:
590      for (unsigned int i = 0; i < type->length; i++) {
591	 offset += setup_uniform_values(loc + offset, type->fields.array);
592      }
593      return offset;
594
595   case GLSL_TYPE_SAMPLER:
596      /* The sampler takes up a slot, but we don't use any values from it. */
597      return 1;
598
599   default:
600      assert(!"not reached");
601      return 0;
602   }
603}
604
605void
606fs_visitor::visit(ir_variable *ir)
607{
608   fs_reg *reg = NULL;
609
610   if (strcmp(ir->name, "gl_FragColor") == 0) {
611      this->frag_color = ir;
612   } else if (strcmp(ir->name, "gl_FragData") == 0) {
613      this->frag_data = ir;
614   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
615      this->frag_depth = ir;
616      assert(!"FINISHME: this hangs currently.");
617   }
618
619   if (ir->mode == ir_var_in) {
620      if (strcmp(ir->name, "gl_FrontFacing") == 0) {
621	 reg = new(this->mem_ctx) fs_reg(this, ir->type);
622	 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
623	 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
624	  * us front face
625	  */
626	 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
627				      *reg,
628				      fs_reg(r1_6ud),
629				      fs_reg(1u << 31)));
630	 inst->conditional_mod = BRW_CONDITIONAL_L;
631	 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
632      } else {
633	 reg = &this->interp_attrs[ir->location];
634      }
635   }
636
637   if (ir->mode == ir_var_uniform) {
638      int param_index = c->prog_data.nr_params;
639
640      setup_uniform_values(ir->location, ir->type);
641
642      reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
643   }
644
645   if (!reg)
646      reg = new(this->mem_ctx) fs_reg(this, ir->type);
647
648   hash_table_insert(this->variable_ht, reg, ir);
649}
650
651void
652fs_visitor::visit(ir_dereference_variable *ir)
653{
654   fs_reg *reg = variable_storage(ir->var);
655   this->result = *reg;
656}
657
658void
659fs_visitor::visit(ir_dereference_record *ir)
660{
661   const glsl_type *struct_type = ir->record->type;
662
663   ir->record->accept(this);
664
665   unsigned int offset = 0;
666   for (unsigned int i = 0; i < struct_type->length; i++) {
667      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
668	 break;
669      offset += type_size(struct_type->fields.structure[i].type);
670   }
671   this->result.reg_offset += offset;
672   this->result.type = brw_type_for_base_type(ir->type);
673}
674
675void
676fs_visitor::visit(ir_dereference_array *ir)
677{
678   ir_constant *index;
679   int element_size;
680
681   ir->array->accept(this);
682   index = ir->array_index->as_constant();
683
684   if (ir->type->is_matrix()) {
685      element_size = ir->type->vector_elements;
686   } else {
687      element_size = type_size(ir->type);
688      this->result.type = brw_type_for_base_type(ir->type);
689   }
690
691   if (index) {
692      assert(this->result.file == UNIFORM ||
693	     (this->result.file == GRF &&
694	      this->result.reg != 0));
695      this->result.reg_offset += index->value.i[0] * element_size;
696   } else {
697      assert(!"FINISHME: non-constant matrix column");
698   }
699}
700
701void
702fs_visitor::visit(ir_expression *ir)
703{
704   unsigned int operand;
705   fs_reg op[2], temp;
706   fs_reg result;
707   fs_inst *inst;
708
709   for (operand = 0; operand < ir->get_num_operands(); operand++) {
710      ir->operands[operand]->accept(this);
711      if (this->result.file == BAD_FILE) {
712	 ir_print_visitor v;
713	 printf("Failed to get tree for expression operand:\n");
714	 ir->operands[operand]->accept(&v);
715	 this->fail = true;
716      }
717      op[operand] = this->result;
718
719      /* Matrix expression operands should have been broken down to vector
720       * operations already.
721       */
722      assert(!ir->operands[operand]->type->is_matrix());
723      /* And then those vector operands should have been broken down to scalar.
724       */
725      assert(!ir->operands[operand]->type->is_vector());
726   }
727
728   /* Storage for our result.  If our result goes into an assignment, it will
729    * just get copy-propagated out, so no worries.
730    */
731   this->result = fs_reg(this, ir->type);
732
733   switch (ir->operation) {
734   case ir_unop_logic_not:
735      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
736      break;
737   case ir_unop_neg:
738      op[0].negate = !op[0].negate;
739      this->result = op[0];
740      break;
741   case ir_unop_abs:
742      op[0].abs = true;
743      this->result = op[0];
744      break;
745   case ir_unop_sign:
746      temp = fs_reg(this, ir->type);
747
748      emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
749
750      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
751      inst->conditional_mod = BRW_CONDITIONAL_G;
752      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
753      inst->predicated = true;
754
755      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
756      inst->conditional_mod = BRW_CONDITIONAL_L;
757      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
758      inst->predicated = true;
759
760      break;
761   case ir_unop_rcp:
762      emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
763      break;
764
765   case ir_unop_exp2:
766      emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
767      break;
768   case ir_unop_log2:
769      emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
770      break;
771   case ir_unop_exp:
772   case ir_unop_log:
773      assert(!"not reached: should be handled by ir_explog_to_explog2");
774      break;
775   case ir_unop_sin:
776      emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
777      break;
778   case ir_unop_cos:
779      emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
780      break;
781
782   case ir_unop_dFdx:
783      emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
784      break;
785   case ir_unop_dFdy:
786      emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
787      break;
788
789   case ir_binop_add:
790      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
791      break;
792   case ir_binop_sub:
793      assert(!"not reached: should be handled by ir_sub_to_add_neg");
794      break;
795
796   case ir_binop_mul:
797      emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
798      break;
799   case ir_binop_div:
800      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
801      break;
802   case ir_binop_mod:
803      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
804      break;
805
806   case ir_binop_less:
807      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
808      inst->conditional_mod = BRW_CONDITIONAL_L;
809      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
810      break;
811   case ir_binop_greater:
812      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
813      inst->conditional_mod = BRW_CONDITIONAL_G;
814      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
815      break;
816   case ir_binop_lequal:
817      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
818      inst->conditional_mod = BRW_CONDITIONAL_LE;
819      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
820      break;
821   case ir_binop_gequal:
822      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
823      inst->conditional_mod = BRW_CONDITIONAL_GE;
824      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
825      break;
826   case ir_binop_equal:
827   case ir_binop_all_equal: /* same as nequal for scalars */
828      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
829      inst->conditional_mod = BRW_CONDITIONAL_Z;
830      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
831      break;
832   case ir_binop_nequal:
833   case ir_binop_any_nequal: /* same as nequal for scalars */
834      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
835      inst->conditional_mod = BRW_CONDITIONAL_NZ;
836      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
837      break;
838
839   case ir_binop_logic_xor:
840      emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
841      break;
842
843   case ir_binop_logic_or:
844      emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
845      break;
846
847   case ir_binop_logic_and:
848      emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
849      break;
850
851   case ir_binop_dot:
852   case ir_binop_cross:
853   case ir_unop_any:
854      assert(!"not reached: should be handled by brw_fs_channel_expressions");
855      break;
856
857   case ir_unop_noise:
858      assert(!"not reached: should be handled by lower_noise");
859      break;
860
861   case ir_unop_sqrt:
862      emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
863      break;
864
865   case ir_unop_rsq:
866      emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
867      break;
868
869   case ir_unop_i2f:
870   case ir_unop_b2f:
871   case ir_unop_b2i:
872      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
873      break;
874   case ir_unop_f2i:
875      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
876      break;
877   case ir_unop_f2b:
878   case ir_unop_i2b:
879      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
880      inst->conditional_mod = BRW_CONDITIONAL_NZ;
881
882   case ir_unop_trunc:
883      emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
884      break;
885   case ir_unop_ceil:
886      op[0].negate = ~op[0].negate;
887      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
888      this->result.negate = true;
889      break;
890   case ir_unop_floor:
891      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
892      break;
893   case ir_unop_fract:
894      inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
895      break;
896
897   case ir_binop_min:
898      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
899      inst->conditional_mod = BRW_CONDITIONAL_L;
900
901      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
902      inst->predicated = true;
903      break;
904   case ir_binop_max:
905      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
906      inst->conditional_mod = BRW_CONDITIONAL_G;
907
908      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
909      inst->predicated = true;
910      break;
911
912   case ir_binop_pow:
913      inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
914      break;
915
916   case ir_unop_bit_not:
917   case ir_unop_u2f:
918   case ir_binop_lshift:
919   case ir_binop_rshift:
920   case ir_binop_bit_and:
921   case ir_binop_bit_xor:
922   case ir_binop_bit_or:
923      assert(!"GLSL 1.30 features unsupported");
924      break;
925   }
926}
927
928void
929fs_visitor::visit(ir_assignment *ir)
930{
931   struct fs_reg l, r;
932   int i;
933   int write_mask;
934   fs_inst *inst;
935
936   /* FINISHME: arrays on the lhs */
937   ir->lhs->accept(this);
938   l = this->result;
939
940   ir->rhs->accept(this);
941   r = this->result;
942
943   /* FINISHME: This should really set to the correct maximal writemask for each
944    * FINISHME: component written (in the loops below).  This case can only
945    * FINISHME: occur for matrices, arrays, and structures.
946    */
947   if (ir->write_mask == 0) {
948      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
949      write_mask = WRITEMASK_XYZW;
950   } else {
951      assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
952      write_mask = ir->write_mask;
953   }
954
955   assert(l.file != BAD_FILE);
956   assert(r.file != BAD_FILE);
957
958   if (ir->condition) {
959      /* Get the condition bool into the predicate. */
960      ir->condition->accept(this);
961      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
962      inst->conditional_mod = BRW_CONDITIONAL_NZ;
963   }
964
965   for (i = 0; i < type_size(ir->lhs->type); i++) {
966      if (i >= 4 || (write_mask & (1 << i))) {
967	 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
968	 if (ir->condition)
969	    inst->predicated = true;
970	 r.reg_offset++;
971      }
972      l.reg_offset++;
973   }
974}
975
976void
977fs_visitor::visit(ir_texture *ir)
978{
979   int base_mrf = 2;
980   fs_inst *inst = NULL;
981   unsigned int mlen = 0;
982
983   ir->coordinate->accept(this);
984   fs_reg coordinate = this->result;
985
986   if (ir->projector) {
987      fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
988
989      ir->projector->accept(this);
990      emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
991
992      fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
993      for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
994	 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
995	 coordinate.reg_offset++;
996	 proj_coordinate.reg_offset++;
997      }
998      proj_coordinate.reg_offset = 0;
999
1000      coordinate = proj_coordinate;
1001   }
1002
1003   for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1004      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1005      coordinate.reg_offset++;
1006   }
1007
1008   /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1009   if (intel->gen < 5)
1010      mlen = 3;
1011
1012   if (ir->shadow_comparitor) {
1013      /* For shadow comparisons, we have to supply u,v,r. */
1014      mlen = 3;
1015
1016      ir->shadow_comparitor->accept(this);
1017      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1018      mlen++;
1019   }
1020
1021   /* Do we ever want to handle writemasking on texture samples?  Is it
1022    * performance relevant?
1023    */
1024   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1025
1026   switch (ir->op) {
1027   case ir_tex:
1028      inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1029      break;
1030   case ir_txb:
1031      ir->lod_info.bias->accept(this);
1032      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1033      mlen++;
1034
1035      inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1036      break;
1037   case ir_txl:
1038      ir->lod_info.lod->accept(this);
1039      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1040      mlen++;
1041
1042      inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1043      break;
1044   case ir_txd:
1045   case ir_txf:
1046      assert(!"GLSL 1.30 features unsupported");
1047      break;
1048   }
1049
1050   inst->sampler =
1051      _mesa_get_sampler_uniform_value(ir->sampler,
1052				      ctx->Shader.CurrentProgram,
1053				      &brw->fragment_program->Base);
1054   inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1055
1056   this->result = dst;
1057
1058   if (ir->shadow_comparitor)
1059      inst->shadow_compare = true;
1060   inst->mlen = mlen;
1061}
1062
1063void
1064fs_visitor::visit(ir_swizzle *ir)
1065{
1066   ir->val->accept(this);
1067   fs_reg val = this->result;
1068
1069   fs_reg result = fs_reg(this, ir->type);
1070   this->result = result;
1071
1072   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1073      fs_reg channel = val;
1074      int swiz = 0;
1075
1076      switch (i) {
1077      case 0:
1078	 swiz = ir->mask.x;
1079	 break;
1080      case 1:
1081	 swiz = ir->mask.y;
1082	 break;
1083      case 2:
1084	 swiz = ir->mask.z;
1085	 break;
1086      case 3:
1087	 swiz = ir->mask.w;
1088	 break;
1089      }
1090
1091      channel.reg_offset += swiz;
1092      emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1093      result.reg_offset++;
1094   }
1095}
1096
1097void
1098fs_visitor::visit(ir_discard *ir)
1099{
1100   assert(ir->condition == NULL); /* FINISHME */
1101
1102   emit(fs_inst(FS_OPCODE_DISCARD));
1103}
1104
1105void
1106fs_visitor::visit(ir_constant *ir)
1107{
1108   fs_reg reg(this, ir->type);
1109   this->result = reg;
1110
1111   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1112      switch (ir->type->base_type) {
1113      case GLSL_TYPE_FLOAT:
1114	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1115	 break;
1116      case GLSL_TYPE_UINT:
1117	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1118	 break;
1119      case GLSL_TYPE_INT:
1120	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1121	 break;
1122      case GLSL_TYPE_BOOL:
1123	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1124	 break;
1125      default:
1126	 assert(!"Non-float/uint/int/bool constant");
1127      }
1128      reg.reg_offset++;
1129   }
1130}
1131
1132void
1133fs_visitor::visit(ir_if *ir)
1134{
1135   fs_inst *inst;
1136
1137   /* Don't point the annotation at the if statement, because then it plus
1138    * the then and else blocks get printed.
1139    */
1140   this->base_ir = ir->condition;
1141
1142   /* Generate the condition into the condition code. */
1143   ir->condition->accept(this);
1144   inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1145   inst->conditional_mod = BRW_CONDITIONAL_NZ;
1146
1147   inst = emit(fs_inst(BRW_OPCODE_IF));
1148   inst->predicated = true;
1149
1150   foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1151      ir_instruction *ir = (ir_instruction *)iter.get();
1152      this->base_ir = ir;
1153
1154      ir->accept(this);
1155   }
1156
1157   if (!ir->else_instructions.is_empty()) {
1158      emit(fs_inst(BRW_OPCODE_ELSE));
1159
1160      foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1161	 ir_instruction *ir = (ir_instruction *)iter.get();
1162	 this->base_ir = ir;
1163
1164	 ir->accept(this);
1165      }
1166   }
1167
1168   emit(fs_inst(BRW_OPCODE_ENDIF));
1169}
1170
1171void
1172fs_visitor::visit(ir_loop *ir)
1173{
1174   assert(!ir->from);
1175   assert(!ir->to);
1176   assert(!ir->increment);
1177   assert(!ir->counter);
1178
1179   emit(fs_inst(BRW_OPCODE_DO));
1180
1181   /* Start a safety counter.  If the user messed up their loop
1182    * counting, we don't want to hang the GPU.
1183    */
1184   fs_reg max_iter = fs_reg(this, glsl_type::int_type);
1185   emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000)));
1186
1187   foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1188      ir_instruction *ir = (ir_instruction *)iter.get();
1189      fs_inst *inst;
1190
1191      this->base_ir = ir;
1192      ir->accept(this);
1193
1194      /* Check the maximum loop iters counter. */
1195      inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1)));
1196      inst->conditional_mod = BRW_CONDITIONAL_Z;
1197
1198      inst = emit(fs_inst(BRW_OPCODE_BREAK));
1199      inst->predicated = true;
1200   }
1201
1202   emit(fs_inst(BRW_OPCODE_WHILE));
1203}
1204
1205void
1206fs_visitor::visit(ir_loop_jump *ir)
1207{
1208   switch (ir->mode) {
1209   case ir_loop_jump::jump_break:
1210      emit(fs_inst(BRW_OPCODE_BREAK));
1211      break;
1212   case ir_loop_jump::jump_continue:
1213      emit(fs_inst(BRW_OPCODE_CONTINUE));
1214      break;
1215   }
1216}
1217
1218void
1219fs_visitor::visit(ir_call *ir)
1220{
1221   assert(!"FINISHME");
1222}
1223
1224void
1225fs_visitor::visit(ir_return *ir)
1226{
1227   assert(!"FINISHME");
1228}
1229
1230void
1231fs_visitor::visit(ir_function *ir)
1232{
1233   /* Ignore function bodies other than main() -- we shouldn't see calls to
1234    * them since they should all be inlined before we get to ir_to_mesa.
1235    */
1236   if (strcmp(ir->name, "main") == 0) {
1237      const ir_function_signature *sig;
1238      exec_list empty;
1239
1240      sig = ir->matching_signature(&empty);
1241
1242      assert(sig);
1243
1244      foreach_iter(exec_list_iterator, iter, sig->body) {
1245	 ir_instruction *ir = (ir_instruction *)iter.get();
1246	 this->base_ir = ir;
1247
1248	 ir->accept(this);
1249      }
1250   }
1251}
1252
1253void
1254fs_visitor::visit(ir_function_signature *ir)
1255{
1256   assert(!"not reached");
1257   (void)ir;
1258}
1259
1260fs_inst *
1261fs_visitor::emit(fs_inst inst)
1262{
1263   fs_inst *list_inst = new(mem_ctx) fs_inst;
1264   *list_inst = inst;
1265
1266   list_inst->annotation = this->current_annotation;
1267   list_inst->ir = this->base_ir;
1268
1269   this->instructions.push_tail(list_inst);
1270
1271   return list_inst;
1272}
1273
1274/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1275void
1276fs_visitor::emit_dummy_fs()
1277{
1278   /* Everyone's favorite color. */
1279   emit(fs_inst(BRW_OPCODE_MOV,
1280		fs_reg(MRF, 2),
1281		fs_reg(1.0f)));
1282   emit(fs_inst(BRW_OPCODE_MOV,
1283		fs_reg(MRF, 3),
1284		fs_reg(0.0f)));
1285   emit(fs_inst(BRW_OPCODE_MOV,
1286		fs_reg(MRF, 4),
1287		fs_reg(1.0f)));
1288   emit(fs_inst(BRW_OPCODE_MOV,
1289		fs_reg(MRF, 5),
1290		fs_reg(0.0f)));
1291
1292   fs_inst *write;
1293   write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1294			fs_reg(0),
1295			fs_reg(0)));
1296}
1297
1298/* The register location here is relative to the start of the URB
1299 * data.  It will get adjusted to be a real location before
1300 * generate_code() time.
1301 */
1302struct brw_reg
1303fs_visitor::interp_reg(int location, int channel)
1304{
1305   int regnr = location * 2 + channel / 2;
1306   int stride = (channel & 1) * 4;
1307
1308   return brw_vec1_grf(regnr, stride);
1309}
1310
1311/** Emits the interpolation for the varying inputs. */
1312void
1313fs_visitor::emit_interpolation()
1314{
1315   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1316   /* For now, the source regs for the setup URB data will be unset,
1317    * since we don't know until codegen how many push constants we'll
1318    * use, and therefore what the setup URB offset is.
1319    */
1320   fs_reg src_reg = reg_undef;
1321
1322   this->current_annotation = "compute pixel centers";
1323   this->pixel_x = fs_reg(this, glsl_type::uint_type);
1324   this->pixel_y = fs_reg(this, glsl_type::uint_type);
1325   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1326   this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1327   emit(fs_inst(BRW_OPCODE_ADD,
1328		this->pixel_x,
1329		fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1330		fs_reg(brw_imm_v(0x10101010))));
1331   emit(fs_inst(BRW_OPCODE_ADD,
1332		this->pixel_y,
1333		fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1334		fs_reg(brw_imm_v(0x11001100))));
1335
1336   this->current_annotation = "compute pixel deltas from v0";
1337   this->delta_x = fs_reg(this, glsl_type::float_type);
1338   this->delta_y = fs_reg(this, glsl_type::float_type);
1339   emit(fs_inst(BRW_OPCODE_ADD,
1340		this->delta_x,
1341		this->pixel_x,
1342		fs_reg(negate(brw_vec1_grf(1, 0)))));
1343   emit(fs_inst(BRW_OPCODE_ADD,
1344		this->delta_y,
1345		this->pixel_y,
1346		fs_reg(negate(brw_vec1_grf(1, 1)))));
1347
1348   this->current_annotation = "compute pos.w and 1/pos.w";
1349   /* Compute wpos.  Unlike many other varying inputs, we usually need it
1350    * to produce 1/w, and the varying variable wouldn't show up.
1351    */
1352   fs_reg wpos = fs_reg(this, glsl_type::vec4_type);
1353   this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos;
1354   emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */
1355   wpos.reg_offset++;
1356   emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */
1357   wpos.reg_offset++;
1358   emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1359		interp_reg(FRAG_ATTRIB_WPOS, 2)));
1360   wpos.reg_offset++;
1361   emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1362		interp_reg(FRAG_ATTRIB_WPOS, 3)));
1363   /* Compute the pixel W value from wpos.w. */
1364   this->pixel_w = fs_reg(this, glsl_type::float_type);
1365   emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos));
1366
1367   foreach_iter(exec_list_iterator, iter, *this->shader->ir) {
1368      ir_instruction *ir = (ir_instruction *)iter.get();
1369      ir_variable *var = ir->as_variable();
1370
1371      if (!var)
1372	 continue;
1373
1374      if (var->mode != ir_var_in)
1375	 continue;
1376
1377      /* If it's already set up (WPOS), skip. */
1378      if (var->location == 0)
1379	 continue;
1380
1381      this->current_annotation = talloc_asprintf(this->mem_ctx,
1382						 "interpolate %s "
1383						 "(FRAG_ATTRIB[%d])",
1384						 var->name,
1385						 var->location);
1386      emit_pinterp(var->location);
1387   }
1388   this->current_annotation = NULL;
1389}
1390
1391void
1392fs_visitor::emit_pinterp(int location)
1393{
1394   fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type);
1395   this->interp_attrs[location] = interp_attr;
1396
1397   for (unsigned int i = 0; i < 4; i++) {
1398      struct brw_reg interp = interp_reg(location, i);
1399      emit(fs_inst(FS_OPCODE_LINTERP,
1400		   interp_attr,
1401		   this->delta_x,
1402		   this->delta_y,
1403		   fs_reg(interp)));
1404      interp_attr.reg_offset++;
1405   }
1406   interp_attr.reg_offset -= 4;
1407
1408   for (unsigned int i = 0; i < 4; i++) {
1409      emit(fs_inst(BRW_OPCODE_MUL,
1410		   interp_attr,
1411		   interp_attr,
1412		   this->pixel_w));
1413      interp_attr.reg_offset++;
1414   }
1415}
1416
1417void
1418fs_visitor::emit_fb_writes()
1419{
1420   this->current_annotation = "FB write";
1421
1422   assert(this->frag_color || !"FINISHME: MRT");
1423   fs_reg color = *(variable_storage(this->frag_color));
1424
1425   for (int i = 0; i < 4; i++) {
1426      emit(fs_inst(BRW_OPCODE_MOV,
1427		   fs_reg(MRF, 2 + i),
1428		   color));
1429      color.reg_offset++;
1430   }
1431
1432   emit(fs_inst(FS_OPCODE_FB_WRITE,
1433		fs_reg(0),
1434		fs_reg(0)));
1435
1436   this->current_annotation = NULL;
1437}
1438
1439void
1440fs_visitor::generate_fb_write(fs_inst *inst)
1441{
1442   GLboolean eot = 1; /* FINISHME: MRT */
1443   /* FINISHME: AADS */
1444
1445   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1446    * move, here's g1.
1447    */
1448   brw_push_insn_state(p);
1449   brw_set_mask_control(p, BRW_MASK_DISABLE);
1450   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1451   brw_MOV(p,
1452	   brw_message_reg(1),
1453	   brw_vec8_grf(1, 0));
1454   brw_pop_insn_state(p);
1455
1456   int nr = 2 + 4;
1457
1458   brw_fb_WRITE(p,
1459		8, /* dispatch_width */
1460		retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1461		0, /* base MRF */
1462		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1463		0, /* FINISHME: MRT target */
1464		nr,
1465		0,
1466		eot);
1467}
1468
1469void
1470fs_visitor::generate_linterp(fs_inst *inst,
1471			     struct brw_reg dst, struct brw_reg *src)
1472{
1473   struct brw_reg delta_x = src[0];
1474   struct brw_reg delta_y = src[1];
1475   struct brw_reg interp = src[2];
1476
1477   if (brw->has_pln &&
1478       delta_y.nr == delta_x.nr + 1 &&
1479       (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1480      brw_PLN(p, dst, interp, delta_x);
1481   } else {
1482      brw_LINE(p, brw_null_reg(), interp, delta_x);
1483      brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1484   }
1485}
1486
1487void
1488fs_visitor::generate_math(fs_inst *inst,
1489			  struct brw_reg dst, struct brw_reg *src)
1490{
1491   int op;
1492
1493   switch (inst->opcode) {
1494   case FS_OPCODE_RCP:
1495      op = BRW_MATH_FUNCTION_INV;
1496      break;
1497   case FS_OPCODE_RSQ:
1498      op = BRW_MATH_FUNCTION_RSQ;
1499      break;
1500   case FS_OPCODE_SQRT:
1501      op = BRW_MATH_FUNCTION_SQRT;
1502      break;
1503   case FS_OPCODE_EXP2:
1504      op = BRW_MATH_FUNCTION_EXP;
1505      break;
1506   case FS_OPCODE_LOG2:
1507      op = BRW_MATH_FUNCTION_LOG;
1508      break;
1509   case FS_OPCODE_POW:
1510      op = BRW_MATH_FUNCTION_POW;
1511      break;
1512   case FS_OPCODE_SIN:
1513      op = BRW_MATH_FUNCTION_SIN;
1514      break;
1515   case FS_OPCODE_COS:
1516      op = BRW_MATH_FUNCTION_COS;
1517      break;
1518   default:
1519      assert(!"not reached: unknown math function");
1520      op = 0;
1521      break;
1522   }
1523
1524   if (inst->opcode == FS_OPCODE_POW) {
1525      brw_MOV(p, brw_message_reg(3), src[1]);
1526   }
1527
1528   brw_math(p, dst,
1529	    op,
1530	    inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1531	    BRW_MATH_SATURATE_NONE,
1532	    2, src[0],
1533	    BRW_MATH_DATA_VECTOR,
1534	    BRW_MATH_PRECISION_FULL);
1535}
1536
1537void
1538fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1539{
1540   int msg_type = -1;
1541   int rlen = 4;
1542
1543   if (intel->gen == 5) {
1544      switch (inst->opcode) {
1545      case FS_OPCODE_TEX:
1546	 if (inst->shadow_compare) {
1547	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1548	 } else {
1549	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1550	 }
1551	 break;
1552      case FS_OPCODE_TXB:
1553	 if (inst->shadow_compare) {
1554	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1555	 } else {
1556	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1557	 }
1558	 break;
1559      }
1560   } else {
1561      switch (inst->opcode) {
1562      case FS_OPCODE_TEX:
1563	 /* Note that G45 and older determines shadow compare and dispatch width
1564	  * from message length for most messages.
1565	  */
1566	 if (inst->shadow_compare) {
1567	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
1568	 } else {
1569	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
1570	 }
1571      case FS_OPCODE_TXB:
1572	 if (inst->shadow_compare) {
1573	    assert(!"FINISHME: shadow compare with bias.");
1574	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1575	 } else {
1576	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1577	    rlen = 8;
1578	 }
1579	 break;
1580      }
1581   }
1582   assert(msg_type != -1);
1583
1584   /* g0 header. */
1585   src.nr--;
1586
1587   brw_SAMPLE(p,
1588	      retype(dst, BRW_REGISTER_TYPE_UW),
1589	      src.nr,
1590	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1591              SURF_INDEX_TEXTURE(inst->sampler),
1592	      inst->sampler,
1593	      WRITEMASK_XYZW,
1594	      msg_type,
1595	      rlen,
1596	      inst->mlen + 1,
1597	      0,
1598	      1,
1599	      BRW_SAMPLER_SIMD_MODE_SIMD8);
1600}
1601
1602
1603/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1604 * looking like:
1605 *
1606 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1607 *
1608 * and we're trying to produce:
1609 *
1610 *           DDX                     DDY
1611 * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
1612 *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
1613 *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
1614 *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
1615 *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
1616 *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
1617 *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
1618 *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
1619 *
1620 * and add another set of two more subspans if in 16-pixel dispatch mode.
1621 *
1622 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1623 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1624 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1625 * between each other.  We could probably do it like ddx and swizzle the right
1626 * order later, but bail for now and just produce
1627 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1628 */
1629void
1630fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1631{
1632   struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
1633				 BRW_REGISTER_TYPE_F,
1634				 BRW_VERTICAL_STRIDE_2,
1635				 BRW_WIDTH_2,
1636				 BRW_HORIZONTAL_STRIDE_0,
1637				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1638   struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
1639				 BRW_REGISTER_TYPE_F,
1640				 BRW_VERTICAL_STRIDE_2,
1641				 BRW_WIDTH_2,
1642				 BRW_HORIZONTAL_STRIDE_0,
1643				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1644   brw_ADD(p, dst, src0, negate(src1));
1645}
1646
1647void
1648fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1649{
1650   struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
1651				 BRW_REGISTER_TYPE_F,
1652				 BRW_VERTICAL_STRIDE_4,
1653				 BRW_WIDTH_4,
1654				 BRW_HORIZONTAL_STRIDE_0,
1655				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1656   struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
1657				 BRW_REGISTER_TYPE_F,
1658				 BRW_VERTICAL_STRIDE_4,
1659				 BRW_WIDTH_4,
1660				 BRW_HORIZONTAL_STRIDE_0,
1661				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1662   brw_ADD(p, dst, src0, negate(src1));
1663}
1664
1665void
1666fs_visitor::generate_discard(fs_inst *inst)
1667{
1668   struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1669   brw_push_insn_state(p);
1670   brw_set_mask_control(p, BRW_MASK_DISABLE);
1671   brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
1672   brw_AND(p, g0, c->emit_mask_reg, g0);
1673   brw_pop_insn_state(p);
1674}
1675
1676static void
1677trivial_assign_reg(int header_size, fs_reg *reg)
1678{
1679   if (reg->file == GRF && reg->reg != 0) {
1680      reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset;
1681      reg->reg = 0;
1682   }
1683}
1684
1685void
1686fs_visitor::assign_curb_setup()
1687{
1688   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1689   c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1690
1691   if (intel->gen == 5 && (c->prog_data.first_curbe_grf +
1692			   c->prog_data.curb_read_length) & 1) {
1693      /* Align the start of the interpolation coefficients so that we can use
1694       * the PLN instruction.
1695       */
1696      c->prog_data.first_curbe_grf++;
1697   }
1698
1699   /* Map the offsets in the UNIFORM file to fixed HW regs. */
1700   foreach_iter(exec_list_iterator, iter, this->instructions) {
1701      fs_inst *inst = (fs_inst *)iter.get();
1702
1703      for (unsigned int i = 0; i < 3; i++) {
1704	 if (inst->src[i].file == UNIFORM) {
1705	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1706	    struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1707						  constant_nr / 8,
1708						  constant_nr % 8);
1709
1710	    inst->src[i].file = FIXED_HW_REG;
1711	    inst->src[i].fixed_hw_reg = brw_reg;
1712	 }
1713      }
1714   }
1715}
1716
1717void
1718fs_visitor::assign_urb_setup()
1719{
1720   int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
1721   int interp_reg_nr[FRAG_ATTRIB_MAX];
1722
1723   c->prog_data.urb_read_length = 0;
1724
1725   /* Figure out where each of the incoming setup attributes lands. */
1726   for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1727      interp_reg_nr[i] = -1;
1728
1729      if (i != FRAG_ATTRIB_WPOS &&
1730	  !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1731	 continue;
1732
1733      /* Each attribute is 4 setup channels, each of which is half a reg. */
1734      interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1735      c->prog_data.urb_read_length += 2;
1736   }
1737
1738   /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1739    * the correct setup input.
1740    */
1741   foreach_iter(exec_list_iterator, iter, this->instructions) {
1742      fs_inst *inst = (fs_inst *)iter.get();
1743
1744      if (inst->opcode != FS_OPCODE_LINTERP)
1745	 continue;
1746
1747      assert(inst->src[2].file == FIXED_HW_REG);
1748
1749      int location = inst->src[2].fixed_hw_reg.nr / 2;
1750      assert(interp_reg_nr[location] != -1);
1751      inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
1752				      (inst->src[2].fixed_hw_reg.nr & 1));
1753   }
1754
1755   this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
1756}
1757
1758void
1759fs_visitor::assign_regs()
1760{
1761   int header_size = this->first_non_payload_grf;
1762   int last_grf = 0;
1763
1764   /* FINISHME: trivial assignment of register numbers */
1765   foreach_iter(exec_list_iterator, iter, this->instructions) {
1766      fs_inst *inst = (fs_inst *)iter.get();
1767
1768      trivial_assign_reg(header_size, &inst->dst);
1769      trivial_assign_reg(header_size, &inst->src[0]);
1770      trivial_assign_reg(header_size, &inst->src[1]);
1771
1772      last_grf = MAX2(last_grf, inst->dst.hw_reg);
1773      last_grf = MAX2(last_grf, inst->src[0].hw_reg);
1774      last_grf = MAX2(last_grf, inst->src[1].hw_reg);
1775   }
1776
1777   this->grf_used = last_grf + 1;
1778}
1779
1780static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
1781{
1782   struct brw_reg brw_reg;
1783
1784   switch (reg->file) {
1785   case GRF:
1786   case ARF:
1787   case MRF:
1788      brw_reg = brw_vec8_reg(reg->file,
1789			    reg->hw_reg, 0);
1790      brw_reg = retype(brw_reg, reg->type);
1791      break;
1792   case IMM:
1793      switch (reg->type) {
1794      case BRW_REGISTER_TYPE_F:
1795	 brw_reg = brw_imm_f(reg->imm.f);
1796	 break;
1797      case BRW_REGISTER_TYPE_D:
1798	 brw_reg = brw_imm_d(reg->imm.i);
1799	 break;
1800      case BRW_REGISTER_TYPE_UD:
1801	 brw_reg = brw_imm_ud(reg->imm.u);
1802	 break;
1803      default:
1804	 assert(!"not reached");
1805	 break;
1806      }
1807      break;
1808   case FIXED_HW_REG:
1809      brw_reg = reg->fixed_hw_reg;
1810      break;
1811   case BAD_FILE:
1812      /* Probably unused. */
1813      brw_reg = brw_null_reg();
1814      break;
1815   case UNIFORM:
1816      assert(!"not reached");
1817      brw_reg = brw_null_reg();
1818      break;
1819   }
1820   if (reg->abs)
1821      brw_reg = brw_abs(brw_reg);
1822   if (reg->negate)
1823      brw_reg = negate(brw_reg);
1824
1825   return brw_reg;
1826}
1827
1828void
1829fs_visitor::generate_code()
1830{
1831   unsigned int annotation_len = 0;
1832   int last_native_inst = 0;
1833   struct brw_instruction *if_stack[16], *loop_stack[16];
1834   int if_stack_depth = 0, loop_stack_depth = 0;
1835   int if_depth_in_loop[16];
1836
1837   if_depth_in_loop[loop_stack_depth] = 0;
1838
1839   memset(&if_stack, 0, sizeof(if_stack));
1840   foreach_iter(exec_list_iterator, iter, this->instructions) {
1841      fs_inst *inst = (fs_inst *)iter.get();
1842      struct brw_reg src[3], dst;
1843
1844      for (unsigned int i = 0; i < 3; i++) {
1845	 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
1846      }
1847      dst = brw_reg_from_fs_reg(&inst->dst);
1848
1849      brw_set_conditionalmod(p, inst->conditional_mod);
1850      brw_set_predicate_control(p, inst->predicated);
1851
1852      switch (inst->opcode) {
1853      case BRW_OPCODE_MOV:
1854	 brw_MOV(p, dst, src[0]);
1855	 break;
1856      case BRW_OPCODE_ADD:
1857	 brw_ADD(p, dst, src[0], src[1]);
1858	 break;
1859      case BRW_OPCODE_MUL:
1860	 brw_MUL(p, dst, src[0], src[1]);
1861	 break;
1862
1863      case BRW_OPCODE_FRC:
1864	 brw_FRC(p, dst, src[0]);
1865	 break;
1866      case BRW_OPCODE_RNDD:
1867	 brw_RNDD(p, dst, src[0]);
1868	 break;
1869      case BRW_OPCODE_RNDZ:
1870	 brw_RNDZ(p, dst, src[0]);
1871	 break;
1872
1873      case BRW_OPCODE_AND:
1874	 brw_AND(p, dst, src[0], src[1]);
1875	 break;
1876      case BRW_OPCODE_OR:
1877	 brw_OR(p, dst, src[0], src[1]);
1878	 break;
1879      case BRW_OPCODE_XOR:
1880	 brw_XOR(p, dst, src[0], src[1]);
1881	 break;
1882
1883      case BRW_OPCODE_CMP:
1884	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
1885	 break;
1886      case BRW_OPCODE_SEL:
1887	 brw_SEL(p, dst, src[0], src[1]);
1888	 break;
1889
1890      case BRW_OPCODE_IF:
1891	 assert(if_stack_depth < 16);
1892	 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
1893	 if_depth_in_loop[loop_stack_depth]++;
1894	 if_stack_depth++;
1895	 break;
1896      case BRW_OPCODE_ELSE:
1897	 if_stack[if_stack_depth - 1] =
1898	    brw_ELSE(p, if_stack[if_stack_depth - 1]);
1899	 break;
1900      case BRW_OPCODE_ENDIF:
1901	 if_stack_depth--;
1902	 brw_ENDIF(p , if_stack[if_stack_depth]);
1903	 if_depth_in_loop[loop_stack_depth]--;
1904	 break;
1905
1906      case BRW_OPCODE_DO:
1907	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
1908	 if_depth_in_loop[loop_stack_depth] = 0;
1909	 break;
1910
1911      case BRW_OPCODE_BREAK:
1912	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
1913	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1914	 break;
1915      case BRW_OPCODE_CONTINUE:
1916	 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
1917	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1918	 break;
1919
1920      case BRW_OPCODE_WHILE: {
1921	 struct brw_instruction *inst0, *inst1;
1922	 GLuint br = 1;
1923
1924	 if (intel->gen == 5)
1925	    br = 2;
1926
1927	 assert(loop_stack_depth > 0);
1928	 loop_stack_depth--;
1929	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
1930	 /* patch all the BREAK/CONT instructions from last BGNLOOP */
1931	 while (inst0 > loop_stack[loop_stack_depth]) {
1932	    inst0--;
1933	    if (inst0->header.opcode == BRW_OPCODE_BREAK &&
1934		inst0->bits3.if_else.jump_count == 0) {
1935	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
1936	    }
1937	    else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
1938		     inst0->bits3.if_else.jump_count == 0) {
1939	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
1940	    }
1941	 }
1942      }
1943	 break;
1944
1945      case FS_OPCODE_RCP:
1946      case FS_OPCODE_RSQ:
1947      case FS_OPCODE_SQRT:
1948      case FS_OPCODE_EXP2:
1949      case FS_OPCODE_LOG2:
1950      case FS_OPCODE_POW:
1951      case FS_OPCODE_SIN:
1952      case FS_OPCODE_COS:
1953	 generate_math(inst, dst, src);
1954	 break;
1955      case FS_OPCODE_LINTERP:
1956	 generate_linterp(inst, dst, src);
1957	 break;
1958      case FS_OPCODE_TEX:
1959      case FS_OPCODE_TXB:
1960      case FS_OPCODE_TXL:
1961	 generate_tex(inst, dst, src[0]);
1962	 break;
1963      case FS_OPCODE_DISCARD:
1964	 generate_discard(inst);
1965	 break;
1966      case FS_OPCODE_DDX:
1967	 generate_ddx(inst, dst, src[0]);
1968	 break;
1969      case FS_OPCODE_DDY:
1970	 generate_ddy(inst, dst, src[0]);
1971	 break;
1972      case FS_OPCODE_FB_WRITE:
1973	 generate_fb_write(inst);
1974	 break;
1975      default:
1976	 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
1977	    _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
1978			  brw_opcodes[inst->opcode].name);
1979	 } else {
1980	    _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
1981	 }
1982	 this->fail = true;
1983      }
1984
1985      if (annotation_len < p->nr_insn) {
1986	 annotation_len *= 2;
1987	 if (annotation_len < 16)
1988	    annotation_len = 16;
1989
1990	 this->annotation_string = talloc_realloc(this->mem_ctx,
1991						  annotation_string,
1992						  const char *,
1993						  annotation_len);
1994	 this->annotation_ir = talloc_realloc(this->mem_ctx,
1995					      annotation_ir,
1996					      ir_instruction *,
1997					      annotation_len);
1998      }
1999
2000      for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2001	 this->annotation_string[i] = inst->annotation;
2002	 this->annotation_ir[i] = inst->ir;
2003      }
2004      last_native_inst = p->nr_insn;
2005   }
2006}
2007
2008GLboolean
2009brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2010{
2011   struct brw_compile *p = &c->func;
2012   struct intel_context *intel = &brw->intel;
2013   GLcontext *ctx = &intel->ctx;
2014   struct brw_shader *shader = NULL;
2015   struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2016
2017   if (!prog)
2018      return GL_FALSE;
2019
2020   if (!using_new_fs)
2021      return GL_FALSE;
2022
2023   for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2024      if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2025	 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2026	 break;
2027      }
2028   }
2029   if (!shader)
2030      return GL_FALSE;
2031
2032   /* We always use 8-wide mode, at least for now.  For one, flow
2033    * control only works in 8-wide.  Also, when we're fragment shader
2034    * bound, we're almost always under register pressure as well, so
2035    * 8-wide would save us from the performance cliff of spilling
2036    * regs.
2037    */
2038   c->dispatch_width = 8;
2039
2040   if (INTEL_DEBUG & DEBUG_WM) {
2041      printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2042      _mesa_print_ir(shader->ir, NULL);
2043      printf("\n");
2044   }
2045
2046   /* Now the main event: Visit the shader IR and generate our FS IR for it.
2047    */
2048   fs_visitor v(c, shader);
2049
2050   if (0) {
2051      v.emit_dummy_fs();
2052   } else {
2053      v.emit_interpolation();
2054
2055      /* Generate FS IR for main().  (the visitor only descends into
2056       * functions called "main").
2057       */
2058      foreach_iter(exec_list_iterator, iter, *shader->ir) {
2059	 ir_instruction *ir = (ir_instruction *)iter.get();
2060	 v.base_ir = ir;
2061	 ir->accept(&v);
2062      }
2063
2064      v.emit_fb_writes();
2065      v.assign_curb_setup();
2066      v.assign_urb_setup();
2067      v.assign_regs();
2068   }
2069
2070   v.generate_code();
2071
2072   assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
2073
2074   if (v.fail)
2075      return GL_FALSE;
2076
2077   if (INTEL_DEBUG & DEBUG_WM) {
2078      const char *last_annotation_string = NULL;
2079      ir_instruction *last_annotation_ir = NULL;
2080
2081      printf("Native code for fragment shader %d:\n", prog->Name);
2082      for (unsigned int i = 0; i < p->nr_insn; i++) {
2083	 if (last_annotation_ir != v.annotation_ir[i]) {
2084	    last_annotation_ir = v.annotation_ir[i];
2085	    if (last_annotation_ir) {
2086	       printf("   ");
2087	       last_annotation_ir->print();
2088	       printf("\n");
2089	    }
2090	 }
2091	 if (last_annotation_string != v.annotation_string[i]) {
2092	    last_annotation_string = v.annotation_string[i];
2093	    if (last_annotation_string)
2094	       printf("   %s\n", last_annotation_string);
2095	 }
2096	 brw_disasm(stdout, &p->store[i], intel->gen);
2097      }
2098      printf("\n");
2099   }
2100
2101   c->prog_data.total_grf = v.grf_used;
2102   c->prog_data.total_scratch = 0;
2103
2104   return GL_TRUE;
2105}
2106