brw_fs.cpp revision 40aadafa91ef5b931436d400fedafd720d59deff
1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28extern "C" {
29
30#include <sys/types.h>
31
32#include "main/macros.h"
33#include "main/shaderobj.h"
34#include "program/prog_parameter.h"
35#include "program/prog_print.h"
36#include "program/prog_optimize.h"
37#include "program/hash_table.h"
38#include "brw_context.h"
39#include "brw_eu.h"
40#include "brw_wm.h"
41#include "talloc.h"
42}
43#include "../glsl/glsl_types.h"
44#include "../glsl/ir_optimization.h"
45#include "../glsl/ir_print_visitor.h"
46
47enum register_file {
48   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
49   GRF = BRW_GENERAL_REGISTER_FILE,
50   MRF = BRW_MESSAGE_REGISTER_FILE,
51   IMM = BRW_IMMEDIATE_VALUE,
52   FIXED_HW_REG, /* a struct brw_reg */
53   UNIFORM, /* prog_data->params[hw_reg] */
54   BAD_FILE
55};
56
57enum fs_opcodes {
58   FS_OPCODE_FB_WRITE = 256,
59   FS_OPCODE_RCP,
60   FS_OPCODE_RSQ,
61   FS_OPCODE_SQRT,
62   FS_OPCODE_EXP2,
63   FS_OPCODE_LOG2,
64   FS_OPCODE_POW,
65   FS_OPCODE_SIN,
66   FS_OPCODE_COS,
67   FS_OPCODE_DDX,
68   FS_OPCODE_DDY,
69   FS_OPCODE_LINTERP,
70   FS_OPCODE_TEX,
71   FS_OPCODE_TXB,
72   FS_OPCODE_TXL,
73};
74
75static int using_new_fs = -1;
76
77struct gl_shader *
78brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
79{
80   struct brw_shader *shader;
81
82   shader = talloc_zero(NULL, struct brw_shader);
83   if (shader) {
84      shader->base.Type = type;
85      shader->base.Name = name;
86      _mesa_init_shader(ctx, &shader->base);
87   }
88
89   return &shader->base;
90}
91
92struct gl_shader_program *
93brw_new_shader_program(GLcontext *ctx, GLuint name)
94{
95   struct brw_shader_program *prog;
96   prog = talloc_zero(NULL, struct brw_shader_program);
97   if (prog) {
98      prog->base.Name = name;
99      _mesa_init_shader_program(ctx, &prog->base);
100   }
101   return &prog->base;
102}
103
104GLboolean
105brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
106{
107   if (!_mesa_ir_compile_shader(ctx, shader))
108      return GL_FALSE;
109
110   return GL_TRUE;
111}
112
113GLboolean
114brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
115{
116   if (using_new_fs == -1)
117      using_new_fs = getenv("INTEL_NEW_FS") != NULL;
118
119   for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
120      struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
121
122      if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
123	 void *mem_ctx = talloc_new(NULL);
124	 bool progress;
125
126	 if (shader->ir)
127	    talloc_free(shader->ir);
128	 shader->ir = new(shader) exec_list;
129	 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
130
131	 do_mat_op_to_vec(shader->ir);
132	 do_mod_to_fract(shader->ir);
133	 do_div_to_mul_rcp(shader->ir);
134	 do_sub_to_add_neg(shader->ir);
135	 do_explog_to_explog2(shader->ir);
136
137	 brw_do_channel_expressions(shader->ir);
138	 brw_do_vector_splitting(shader->ir);
139
140	 do {
141	    progress = false;
142
143	    progress = do_common_optimization(shader->ir, true) || progress;
144	 } while (progress);
145
146	 validate_ir_tree(shader->ir);
147
148	 reparent_ir(shader->ir, shader->ir);
149	 talloc_free(mem_ctx);
150      }
151   }
152
153   if (!_mesa_ir_link_shader(ctx, prog))
154      return GL_FALSE;
155
156   return GL_TRUE;
157}
158
159static int
160type_size(const struct glsl_type *type)
161{
162   unsigned int size, i;
163
164   switch (type->base_type) {
165   case GLSL_TYPE_UINT:
166   case GLSL_TYPE_INT:
167   case GLSL_TYPE_FLOAT:
168   case GLSL_TYPE_BOOL:
169      return type->components();
170   case GLSL_TYPE_ARRAY:
171      /* FINISHME: uniform/varying arrays. */
172      return type_size(type->fields.array) * type->length;
173   case GLSL_TYPE_STRUCT:
174      size = 0;
175      for (i = 0; i < type->length; i++) {
176	 size += type_size(type->fields.structure[i].type);
177      }
178      return size;
179   case GLSL_TYPE_SAMPLER:
180      /* Samplers take up no register space, since they're baked in at
181       * link time.
182       */
183      return 0;
184   default:
185      assert(!"not reached");
186      return 0;
187   }
188}
189
190class fs_reg {
191public:
192   /* Callers of this talloc-based new need not call delete. It's
193    * easier to just talloc_free 'ctx' (or any of its ancestors). */
194   static void* operator new(size_t size, void *ctx)
195   {
196      void *node;
197
198      node = talloc_size(ctx, size);
199      assert(node != NULL);
200
201      return node;
202   }
203
204   /** Generic unset register constructor. */
205   fs_reg()
206   {
207      this->file = BAD_FILE;
208      this->reg = 0;
209      this->reg_offset = 0;
210      this->hw_reg = -1;
211      this->negate = 0;
212      this->abs = 0;
213   }
214
215   /** Immediate value constructor. */
216   fs_reg(float f)
217   {
218      this->file = IMM;
219      this->reg = 0;
220      this->hw_reg = 0;
221      this->type = BRW_REGISTER_TYPE_F;
222      this->imm.f = f;
223      this->negate = 0;
224      this->abs = 0;
225   }
226
227   /** Immediate value constructor. */
228   fs_reg(int32_t i)
229   {
230      this->file = IMM;
231      this->reg = 0;
232      this->hw_reg = 0;
233      this->type = BRW_REGISTER_TYPE_D;
234      this->imm.i = i;
235      this->negate = 0;
236      this->abs = 0;
237   }
238
239   /** Immediate value constructor. */
240   fs_reg(uint32_t u)
241   {
242      this->file = IMM;
243      this->reg = 0;
244      this->hw_reg = 0;
245      this->type = BRW_REGISTER_TYPE_UD;
246      this->imm.u = u;
247      this->negate = 0;
248      this->abs = 0;
249   }
250
251   /** Fixed brw_reg Immediate value constructor. */
252   fs_reg(struct brw_reg fixed_hw_reg)
253   {
254      this->file = FIXED_HW_REG;
255      this->fixed_hw_reg = fixed_hw_reg;
256      this->reg = 0;
257      this->hw_reg = 0;
258      this->type = fixed_hw_reg.type;
259      this->negate = 0;
260      this->abs = 0;
261   }
262
263   fs_reg(enum register_file file, int hw_reg);
264   fs_reg(class fs_visitor *v, const struct glsl_type *type);
265
266   /** Register file: ARF, GRF, MRF, IMM. */
267   enum register_file file;
268   /** Abstract register number.  0 = fixed hw reg */
269   int reg;
270   /** Offset within the abstract register. */
271   int reg_offset;
272   /** HW register number.  Generally unset until register allocation. */
273   int hw_reg;
274   /** Register type.  BRW_REGISTER_TYPE_* */
275   int type;
276   bool negate;
277   bool abs;
278   struct brw_reg fixed_hw_reg;
279
280   /** Value for file == BRW_IMMMEDIATE_FILE */
281   union {
282      int32_t i;
283      uint32_t u;
284      float f;
285   } imm;
286};
287
288static const fs_reg reg_undef;
289static const fs_reg reg_null(ARF, BRW_ARF_NULL);
290
291class fs_inst : public exec_node {
292public:
293   /* Callers of this talloc-based new need not call delete. It's
294    * easier to just talloc_free 'ctx' (or any of its ancestors). */
295   static void* operator new(size_t size, void *ctx)
296   {
297      void *node;
298
299      node = talloc_zero_size(ctx, size);
300      assert(node != NULL);
301
302      return node;
303   }
304
305   void init()
306   {
307      this->opcode = BRW_OPCODE_NOP;
308      this->saturate = false;
309      this->conditional_mod = BRW_CONDITIONAL_NONE;
310      this->predicated = false;
311      this->sampler = 0;
312      this->shadow_compare = false;
313   }
314
315   fs_inst()
316   {
317      init();
318   }
319
320   fs_inst(int opcode)
321   {
322      init();
323      this->opcode = opcode;
324   }
325
326   fs_inst(int opcode, fs_reg dst, fs_reg src0)
327   {
328      init();
329      this->opcode = opcode;
330      this->dst = dst;
331      this->src[0] = src0;
332   }
333
334   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
335   {
336      init();
337      this->opcode = opcode;
338      this->dst = dst;
339      this->src[0] = src0;
340      this->src[1] = src1;
341   }
342
343   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
344   {
345      init();
346      this->opcode = opcode;
347      this->dst = dst;
348      this->src[0] = src0;
349      this->src[1] = src1;
350      this->src[2] = src2;
351   }
352
353   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
354   fs_reg dst;
355   fs_reg src[3];
356   bool saturate;
357   bool predicated;
358   int conditional_mod; /**< BRW_CONDITIONAL_* */
359
360   int mlen; /** SEND message length */
361   int sampler;
362   bool shadow_compare;
363
364   /** @{
365    * Annotation for the generated IR.  One of the two can be set.
366    */
367   ir_instruction *ir;
368   const char *annotation;
369   /** @} */
370};
371
372class fs_visitor : public ir_visitor
373{
374public:
375
376   fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
377   {
378      this->c = c;
379      this->p = &c->func;
380      this->brw = p->brw;
381      this->intel = &brw->intel;
382      this->ctx = &intel->ctx;
383      this->mem_ctx = talloc_new(NULL);
384      this->shader = shader;
385      this->fail = false;
386      this->next_abstract_grf = 1;
387      this->variable_ht = hash_table_ctor(0,
388					  hash_table_pointer_hash,
389					  hash_table_pointer_compare);
390
391      this->frag_color = NULL;
392      this->frag_data = NULL;
393      this->frag_depth = NULL;
394      this->first_non_payload_grf = 0;
395
396      this->current_annotation = NULL;
397      this->annotation_string = NULL;
398      this->annotation_ir = NULL;
399   }
400   ~fs_visitor()
401   {
402      talloc_free(this->mem_ctx);
403      hash_table_dtor(this->variable_ht);
404   }
405
406   fs_reg *variable_storage(ir_variable *var);
407
408   void visit(ir_variable *ir);
409   void visit(ir_assignment *ir);
410   void visit(ir_dereference_variable *ir);
411   void visit(ir_dereference_record *ir);
412   void visit(ir_dereference_array *ir);
413   void visit(ir_expression *ir);
414   void visit(ir_texture *ir);
415   void visit(ir_if *ir);
416   void visit(ir_constant *ir);
417   void visit(ir_swizzle *ir);
418   void visit(ir_return *ir);
419   void visit(ir_loop *ir);
420   void visit(ir_loop_jump *ir);
421   void visit(ir_discard *ir);
422   void visit(ir_call *ir);
423   void visit(ir_function *ir);
424   void visit(ir_function_signature *ir);
425
426   fs_inst *emit(fs_inst inst);
427   void assign_curb_setup();
428   void assign_urb_setup();
429   void assign_regs();
430   void generate_code();
431   void generate_fb_write(fs_inst *inst);
432   void generate_linterp(fs_inst *inst, struct brw_reg dst,
433			 struct brw_reg *src);
434   void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
435   void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
436
437   void emit_dummy_fs();
438   void emit_interpolation();
439   void emit_pinterp(int location);
440   void emit_fb_writes();
441
442   struct brw_reg interp_reg(int location, int channel);
443
444   struct brw_context *brw;
445   struct intel_context *intel;
446   GLcontext *ctx;
447   struct brw_wm_compile *c;
448   struct brw_compile *p;
449   struct brw_shader *shader;
450   void *mem_ctx;
451   exec_list instructions;
452   int next_abstract_grf;
453   struct hash_table *variable_ht;
454   ir_variable *frag_color, *frag_data, *frag_depth;
455   int first_non_payload_grf;
456
457   /** @{ debug annotation info */
458   const char *current_annotation;
459   ir_instruction *base_ir;
460   const char **annotation_string;
461   ir_instruction **annotation_ir;
462   /** @} */
463
464   bool fail;
465
466   /* Result of last visit() method. */
467   fs_reg result;
468
469   fs_reg pixel_x;
470   fs_reg pixel_y;
471   fs_reg pixel_w;
472   fs_reg delta_x;
473   fs_reg delta_y;
474   fs_reg interp_attrs[64];
475
476   int grf_used;
477
478};
479
480/** Fixed HW reg constructor. */
481fs_reg::fs_reg(enum register_file file, int hw_reg)
482{
483   this->file = file;
484   this->reg = 0;
485   this->reg_offset = 0;
486   this->hw_reg = hw_reg;
487   this->type = BRW_REGISTER_TYPE_F;
488   this->negate = 0;
489   this->abs = 0;
490}
491
492/** Automatic reg constructor. */
493fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
494{
495   this->file = GRF;
496   this->reg = v->next_abstract_grf;
497   this->reg_offset = 0;
498   v->next_abstract_grf += type_size(type);
499   this->hw_reg = -1;
500   this->negate = 0;
501   this->abs = 0;
502
503   switch (type->base_type) {
504   case GLSL_TYPE_FLOAT:
505      this->type = BRW_REGISTER_TYPE_F;
506      break;
507   case GLSL_TYPE_INT:
508   case GLSL_TYPE_BOOL:
509      this->type = BRW_REGISTER_TYPE_D;
510      break;
511   case GLSL_TYPE_UINT:
512      this->type = BRW_REGISTER_TYPE_UD;
513      break;
514   default:
515      assert(!"not reached");
516      this->type =  BRW_REGISTER_TYPE_F;
517      break;
518   }
519}
520
521fs_reg *
522fs_visitor::variable_storage(ir_variable *var)
523{
524   return (fs_reg *)hash_table_find(this->variable_ht, var);
525}
526
527void
528fs_visitor::visit(ir_variable *ir)
529{
530   fs_reg *reg = NULL;
531
532   if (strcmp(ir->name, "gl_FragColor") == 0) {
533      this->frag_color = ir;
534   } else if (strcmp(ir->name, "gl_FragData") == 0) {
535      this->frag_data = ir;
536   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
537      this->frag_depth = ir;
538      assert(!"FINISHME: this hangs currently.");
539   }
540
541   if (ir->mode == ir_var_in) {
542      reg = &this->interp_attrs[ir->location];
543   }
544
545   if (ir->mode == ir_var_uniform) {
546      const float *vec_values;
547      int param_index = c->prog_data.nr_params;
548
549      /* FINISHME: This is wildly incomplete. */
550      assert(ir->type->is_scalar() || ir->type->is_vector() ||
551	     ir->type->is_sampler());
552
553      const struct gl_program *fp = &this->brw->fragment_program->Base;
554      /* Our support for uniforms is piggy-backed on the struct
555       * gl_fragment_program, because that's where the values actually
556       * get stored, rather than in some global gl_shader_program uniform
557       * store.
558       */
559      vec_values = fp->Parameters->ParameterValues[ir->location];
560      for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
561	 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
562      }
563
564      reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
565   }
566
567   if (!reg)
568      reg = new(this->mem_ctx) fs_reg(this, ir->type);
569
570   hash_table_insert(this->variable_ht, reg, ir);
571}
572
573void
574fs_visitor::visit(ir_dereference_variable *ir)
575{
576   fs_reg *reg = variable_storage(ir->var);
577   this->result = *reg;
578}
579
580void
581fs_visitor::visit(ir_dereference_record *ir)
582{
583   assert(!"FINISHME");
584}
585
586void
587fs_visitor::visit(ir_dereference_array *ir)
588{
589   ir_constant *index;
590   int element_size;
591
592   ir->array->accept(this);
593   index = ir->array_index->as_constant();
594
595   if (ir->type->is_matrix()) {
596      element_size = ir->type->vector_elements;
597   } else {
598      element_size = type_size(ir->type);
599   }
600
601   if (index) {
602      assert(this->result.file == UNIFORM ||
603	     (this->result.file == GRF &&
604	      this->result.reg != 0));
605      this->result.reg_offset += index->value.i[0] * element_size;
606   } else {
607      assert(!"FINISHME: non-constant matrix column");
608   }
609}
610
611void
612fs_visitor::visit(ir_expression *ir)
613{
614   unsigned int operand;
615   fs_reg op[2], temp;
616   fs_reg result;
617   fs_inst *inst;
618
619   for (operand = 0; operand < ir->get_num_operands(); operand++) {
620      ir->operands[operand]->accept(this);
621      if (this->result.file == BAD_FILE) {
622	 ir_print_visitor v;
623	 printf("Failed to get tree for expression operand:\n");
624	 ir->operands[operand]->accept(&v);
625	 this->fail = true;
626      }
627      op[operand] = this->result;
628
629      /* Matrix expression operands should have been broken down to vector
630       * operations already.
631       */
632      assert(!ir->operands[operand]->type->is_matrix());
633      /* And then those vector operands should have been broken down to scalar.
634       */
635      assert(!ir->operands[operand]->type->is_vector());
636   }
637
638   /* Storage for our result.  If our result goes into an assignment, it will
639    * just get copy-propagated out, so no worries.
640    */
641   this->result = fs_reg(this, ir->type);
642
643   switch (ir->operation) {
644   case ir_unop_logic_not:
645      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
646      break;
647   case ir_unop_neg:
648      op[0].negate = ~op[0].negate;
649      this->result = op[0];
650      break;
651   case ir_unop_abs:
652      op[0].abs = true;
653      this->result = op[0];
654      break;
655   case ir_unop_sign:
656      temp = fs_reg(this, ir->type);
657
658      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
659      inst->conditional_mod = BRW_CONDITIONAL_G;
660
661      inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)));
662      inst->conditional_mod = BRW_CONDITIONAL_L;
663
664      temp.negate = true;
665      emit(fs_inst(BRW_OPCODE_ADD, this->result, this->result, temp));
666
667      break;
668   case ir_unop_rcp:
669      emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
670      break;
671
672   case ir_unop_exp2:
673      emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
674      break;
675   case ir_unop_log2:
676      emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
677      break;
678   case ir_unop_exp:
679   case ir_unop_log:
680      assert(!"not reached: should be handled by ir_explog_to_explog2");
681      break;
682   case ir_unop_sin:
683      emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
684      break;
685   case ir_unop_cos:
686      emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
687      break;
688
689   case ir_unop_dFdx:
690      emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
691      break;
692   case ir_unop_dFdy:
693      emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
694      break;
695
696   case ir_binop_add:
697      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
698      break;
699   case ir_binop_sub:
700      assert(!"not reached: should be handled by ir_sub_to_add_neg");
701      break;
702
703   case ir_binop_mul:
704      emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
705      break;
706   case ir_binop_div:
707      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
708      break;
709   case ir_binop_mod:
710      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
711      break;
712
713   case ir_binop_less:
714      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
715      inst->conditional_mod = BRW_CONDITIONAL_L;
716      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
717      break;
718   case ir_binop_greater:
719      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
720      inst->conditional_mod = BRW_CONDITIONAL_G;
721      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
722      break;
723   case ir_binop_lequal:
724      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
725      inst->conditional_mod = BRW_CONDITIONAL_LE;
726      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
727      break;
728   case ir_binop_gequal:
729      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
730      inst->conditional_mod = BRW_CONDITIONAL_GE;
731      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
732      break;
733   case ir_binop_equal:
734      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
735      inst->conditional_mod = BRW_CONDITIONAL_Z;
736      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
737      break;
738   case ir_binop_nequal:
739      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
740      inst->conditional_mod = BRW_CONDITIONAL_NZ;
741      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
742      break;
743
744   case ir_binop_logic_xor:
745      emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
746      break;
747
748   case ir_binop_logic_or:
749      emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
750      break;
751
752   case ir_binop_logic_and:
753      emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
754      break;
755
756   case ir_binop_dot:
757   case ir_binop_cross:
758   case ir_unop_any:
759      assert(!"not reached: should be handled by brw_channel_expressions");
760      break;
761
762   case ir_unop_sqrt:
763      emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
764      break;
765
766   case ir_unop_rsq:
767      emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
768      break;
769
770   case ir_unop_i2f:
771   case ir_unop_b2f:
772   case ir_unop_b2i:
773      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
774      break;
775   case ir_unop_f2i:
776      emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0]));
777      break;
778   case ir_unop_f2b:
779   case ir_unop_i2b:
780      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
781      inst->conditional_mod = BRW_CONDITIONAL_NZ;
782
783   case ir_unop_trunc:
784      emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
785      break;
786   case ir_unop_ceil:
787      op[0].negate = ~op[0].negate;
788      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
789      this->result.negate = true;
790      break;
791   case ir_unop_floor:
792      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
793      break;
794   case ir_unop_fract:
795      inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
796      break;
797
798   case ir_binop_min:
799      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
800      inst->conditional_mod = BRW_CONDITIONAL_L;
801
802      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
803      inst->predicated = true;
804      break;
805   case ir_binop_max:
806      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
807      inst->conditional_mod = BRW_CONDITIONAL_G;
808
809      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
810      inst->predicated = true;
811      break;
812
813   case ir_binop_pow:
814      inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
815      break;
816
817   case ir_unop_bit_not:
818   case ir_unop_u2f:
819   case ir_binop_lshift:
820   case ir_binop_rshift:
821   case ir_binop_bit_and:
822   case ir_binop_bit_xor:
823   case ir_binop_bit_or:
824      assert(!"GLSL 1.30 features unsupported");
825      break;
826   }
827}
828
829void
830fs_visitor::visit(ir_assignment *ir)
831{
832   struct fs_reg l, r;
833   int i;
834   int write_mask;
835   fs_inst *inst;
836
837   /* FINISHME: arrays on the lhs */
838   ir->lhs->accept(this);
839   l = this->result;
840
841   ir->rhs->accept(this);
842   r = this->result;
843
844   /* FINISHME: This should really set to the correct maximal writemask for each
845    * FINISHME: component written (in the loops below).  This case can only
846    * FINISHME: occur for matrices, arrays, and structures.
847    */
848   if (ir->write_mask == 0) {
849      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
850      write_mask = WRITEMASK_XYZW;
851   } else {
852      assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
853      write_mask = ir->write_mask;
854   }
855
856   assert(l.file != BAD_FILE);
857   assert(r.file != BAD_FILE);
858
859   if (ir->condition) {
860      /* Get the condition bool into the predicate. */
861      ir->condition->accept(this);
862      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, fs_reg(0)));
863      inst->conditional_mod = BRW_CONDITIONAL_NZ;
864   }
865
866   for (i = 0; i < type_size(ir->lhs->type); i++) {
867      if (i >= 4 || (write_mask & (1 << i))) {
868	 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
869	 if (ir->condition)
870	    inst->predicated = true;
871      }
872      l.reg_offset++;
873      r.reg_offset++;
874   }
875}
876
877void
878fs_visitor::visit(ir_texture *ir)
879{
880   int base_mrf = 2;
881   fs_inst *inst = NULL;
882   unsigned int mlen = 0;
883
884   ir->coordinate->accept(this);
885   fs_reg coordinate = this->result;
886
887   if (ir->projector) {
888      fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
889
890      ir->projector->accept(this);
891      emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
892
893      fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
894      for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
895	 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
896	 coordinate.reg_offset++;
897	 proj_coordinate.reg_offset++;
898      }
899      proj_coordinate.reg_offset = 0;
900
901      coordinate = proj_coordinate;
902   }
903
904   for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
905      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
906      coordinate.reg_offset++;
907   }
908
909   /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
910   if (intel->gen < 5)
911      mlen = 3;
912
913   if (ir->shadow_comparitor) {
914      /* For shadow comparisons, we have to supply u,v,r. */
915      mlen = 3;
916
917      ir->shadow_comparitor->accept(this);
918      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
919      mlen++;
920   }
921
922   /* Do we ever want to handle writemasking on texture samples?  Is it
923    * performance relevant?
924    */
925   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
926
927   switch (ir->op) {
928   case ir_tex:
929      inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
930      break;
931   case ir_txb:
932      ir->lod_info.bias->accept(this);
933      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
934      mlen++;
935
936      inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
937      break;
938   case ir_txl:
939      ir->lod_info.lod->accept(this);
940      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
941      mlen++;
942
943      inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
944      break;
945   case ir_txd:
946   case ir_txf:
947      assert(!"GLSL 1.30 features unsupported");
948      break;
949   }
950
951   this->result = dst;
952
953   if (ir->shadow_comparitor)
954      inst->shadow_compare = true;
955   inst->mlen = mlen;
956}
957
958void
959fs_visitor::visit(ir_swizzle *ir)
960{
961   ir->val->accept(this);
962   fs_reg val = this->result;
963
964   fs_reg result = fs_reg(this, ir->type);
965   this->result = result;
966
967   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
968      fs_reg channel = val;
969      int swiz = 0;
970
971      switch (i) {
972      case 0:
973	 swiz = ir->mask.x;
974	 break;
975      case 1:
976	 swiz = ir->mask.y;
977	 break;
978      case 2:
979	 swiz = ir->mask.z;
980	 break;
981      case 3:
982	 swiz = ir->mask.w;
983	 break;
984      }
985
986      channel.reg_offset += swiz;
987      emit(fs_inst(BRW_OPCODE_MOV, result, channel));
988      result.reg_offset++;
989   }
990}
991
992void
993fs_visitor::visit(ir_discard *ir)
994{
995   assert(!"FINISHME");
996}
997
998void
999fs_visitor::visit(ir_constant *ir)
1000{
1001   fs_reg reg(this, ir->type);
1002   this->result = reg;
1003
1004   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1005      switch (ir->type->base_type) {
1006      case GLSL_TYPE_FLOAT:
1007	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1008	 break;
1009      case GLSL_TYPE_UINT:
1010	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1011	 break;
1012      case GLSL_TYPE_INT:
1013	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1014	 break;
1015      case GLSL_TYPE_BOOL:
1016	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1017	 break;
1018      default:
1019	 assert(!"Non-float/uint/int/bool constant");
1020      }
1021      reg.reg_offset++;
1022   }
1023}
1024
1025void
1026fs_visitor::visit(ir_if *ir)
1027{
1028   fs_inst *inst;
1029
1030   /* Don't point the annotation at the if statement, because then it plus
1031    * the then and else blocks get printed.
1032    */
1033   this->base_ir = ir->condition;
1034
1035   /* Generate the condition into the condition code. */
1036   ir->condition->accept(this);
1037   inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1038   inst->conditional_mod = BRW_CONDITIONAL_NZ;
1039
1040   inst = emit(fs_inst(BRW_OPCODE_IF));
1041   inst->predicated = true;
1042
1043   foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1044      ir_instruction *ir = (ir_instruction *)iter.get();
1045      this->base_ir = ir;
1046
1047      ir->accept(this);
1048   }
1049
1050   if (!ir->else_instructions.is_empty()) {
1051      emit(fs_inst(BRW_OPCODE_ELSE));
1052
1053      foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1054	 ir_instruction *ir = (ir_instruction *)iter.get();
1055	 this->base_ir = ir;
1056
1057	 ir->accept(this);
1058      }
1059   }
1060
1061   emit(fs_inst(BRW_OPCODE_ENDIF));
1062}
1063
1064void
1065fs_visitor::visit(ir_loop *ir)
1066{
1067   assert(!"FINISHME");
1068}
1069
1070void
1071fs_visitor::visit(ir_loop_jump *ir)
1072{
1073   assert(!"FINISHME");
1074}
1075
1076void
1077fs_visitor::visit(ir_call *ir)
1078{
1079   assert(!"FINISHME");
1080}
1081
1082void
1083fs_visitor::visit(ir_return *ir)
1084{
1085   assert(!"FINISHME");
1086}
1087
1088void
1089fs_visitor::visit(ir_function *ir)
1090{
1091   /* Ignore function bodies other than main() -- we shouldn't see calls to
1092    * them since they should all be inlined before we get to ir_to_mesa.
1093    */
1094   if (strcmp(ir->name, "main") == 0) {
1095      const ir_function_signature *sig;
1096      exec_list empty;
1097
1098      sig = ir->matching_signature(&empty);
1099
1100      assert(sig);
1101
1102      foreach_iter(exec_list_iterator, iter, sig->body) {
1103	 ir_instruction *ir = (ir_instruction *)iter.get();
1104	 this->base_ir = ir;
1105
1106	 ir->accept(this);
1107      }
1108   }
1109}
1110
1111void
1112fs_visitor::visit(ir_function_signature *ir)
1113{
1114   assert(!"not reached");
1115   (void)ir;
1116}
1117
1118fs_inst *
1119fs_visitor::emit(fs_inst inst)
1120{
1121   fs_inst *list_inst = new(mem_ctx) fs_inst;
1122   *list_inst = inst;
1123
1124   list_inst->annotation = this->current_annotation;
1125   list_inst->ir = this->base_ir;
1126
1127   this->instructions.push_tail(list_inst);
1128
1129   return list_inst;
1130}
1131
1132/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1133void
1134fs_visitor::emit_dummy_fs()
1135{
1136   /* Everyone's favorite color. */
1137   emit(fs_inst(BRW_OPCODE_MOV,
1138		fs_reg(MRF, 2),
1139		fs_reg(1.0f)));
1140   emit(fs_inst(BRW_OPCODE_MOV,
1141		fs_reg(MRF, 3),
1142		fs_reg(0.0f)));
1143   emit(fs_inst(BRW_OPCODE_MOV,
1144		fs_reg(MRF, 4),
1145		fs_reg(1.0f)));
1146   emit(fs_inst(BRW_OPCODE_MOV,
1147		fs_reg(MRF, 5),
1148		fs_reg(0.0f)));
1149
1150   fs_inst *write;
1151   write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1152			fs_reg(0),
1153			fs_reg(0)));
1154}
1155
1156/* The register location here is relative to the start of the URB
1157 * data.  It will get adjusted to be a real location before
1158 * generate_code() time.
1159 */
1160struct brw_reg
1161fs_visitor::interp_reg(int location, int channel)
1162{
1163   int regnr = location * 2 + channel / 2;
1164   int stride = (channel & 1) * 4;
1165
1166   return brw_vec1_grf(regnr, stride);
1167}
1168
1169/** Emits the interpolation for the varying inputs. */
1170void
1171fs_visitor::emit_interpolation()
1172{
1173   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1174   /* For now, the source regs for the setup URB data will be unset,
1175    * since we don't know until codegen how many push constants we'll
1176    * use, and therefore what the setup URB offset is.
1177    */
1178   fs_reg src_reg = reg_undef;
1179
1180   this->current_annotation = "compute pixel centers";
1181   this->pixel_x = fs_reg(this, glsl_type::uint_type);
1182   this->pixel_y = fs_reg(this, glsl_type::uint_type);
1183   emit(fs_inst(BRW_OPCODE_ADD,
1184		this->pixel_x,
1185		fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1186		fs_reg(brw_imm_v(0x10101010))));
1187   emit(fs_inst(BRW_OPCODE_ADD,
1188		this->pixel_y,
1189		fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1190		fs_reg(brw_imm_v(0x11001100))));
1191
1192   this->current_annotation = "compute pixel deltas from v0";
1193   this->delta_x = fs_reg(this, glsl_type::float_type);
1194   this->delta_y = fs_reg(this, glsl_type::float_type);
1195   emit(fs_inst(BRW_OPCODE_ADD,
1196		this->delta_x,
1197		this->pixel_x,
1198		fs_reg(negate(brw_vec1_grf(1, 0)))));
1199   emit(fs_inst(BRW_OPCODE_ADD,
1200		this->delta_y,
1201		this->pixel_y,
1202		fs_reg(brw_vec1_grf(1, 1))));
1203
1204   this->current_annotation = "compute pos.w and 1/pos.w";
1205   /* Compute wpos.  Unlike many other varying inputs, we usually need it
1206    * to produce 1/w, and the varying variable wouldn't show up.
1207    */
1208   fs_reg wpos = fs_reg(this, glsl_type::vec4_type);
1209   this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos;
1210   emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */
1211   wpos.reg_offset++;
1212   emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */
1213   wpos.reg_offset++;
1214   emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1215		interp_reg(FRAG_ATTRIB_WPOS, 2)));
1216   wpos.reg_offset++;
1217   emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1218		interp_reg(FRAG_ATTRIB_WPOS, 3)));
1219   /* Compute the pixel W value from wpos.w. */
1220   this->pixel_w = fs_reg(this, glsl_type::float_type);
1221   emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos));
1222
1223   /* FINISHME: gl_FrontFacing */
1224
1225   foreach_iter(exec_list_iterator, iter, *this->shader->ir) {
1226      ir_instruction *ir = (ir_instruction *)iter.get();
1227      ir_variable *var = ir->as_variable();
1228
1229      if (!var)
1230	 continue;
1231
1232      if (var->mode != ir_var_in)
1233	 continue;
1234
1235      /* If it's already set up (WPOS), skip. */
1236      if (var->location == 0)
1237	 continue;
1238
1239      this->current_annotation = talloc_asprintf(this->mem_ctx,
1240						 "interpolate %s "
1241						 "(FRAG_ATTRIB[%d])",
1242						 var->name,
1243						 var->location);
1244      emit_pinterp(var->location);
1245   }
1246   this->current_annotation = NULL;
1247}
1248
1249void
1250fs_visitor::emit_pinterp(int location)
1251{
1252   fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type);
1253   this->interp_attrs[location] = interp_attr;
1254
1255   for (unsigned int i = 0; i < 4; i++) {
1256      struct brw_reg interp = interp_reg(location, i);
1257      emit(fs_inst(FS_OPCODE_LINTERP,
1258		   interp_attr,
1259		   this->delta_x,
1260		   this->delta_y,
1261		   fs_reg(interp)));
1262      interp_attr.reg_offset++;
1263   }
1264   interp_attr.reg_offset -= 4;
1265
1266   for (unsigned int i = 0; i < 4; i++) {
1267      emit(fs_inst(BRW_OPCODE_MUL,
1268		   interp_attr,
1269		   interp_attr,
1270		   this->pixel_w));
1271      interp_attr.reg_offset++;
1272   }
1273}
1274
1275void
1276fs_visitor::emit_fb_writes()
1277{
1278   this->current_annotation = "FB write";
1279
1280   assert(this->frag_color || !"FINISHME: MRT");
1281   fs_reg color = *(variable_storage(this->frag_color));
1282
1283   for (int i = 0; i < 4; i++) {
1284      emit(fs_inst(BRW_OPCODE_MOV,
1285		   fs_reg(MRF, 2 + i),
1286		   color));
1287      color.reg_offset++;
1288   }
1289
1290   emit(fs_inst(FS_OPCODE_FB_WRITE,
1291		fs_reg(0),
1292		fs_reg(0)));
1293
1294   this->current_annotation = NULL;
1295}
1296
1297void
1298fs_visitor::generate_fb_write(fs_inst *inst)
1299{
1300   GLboolean eot = 1; /* FINISHME: MRT */
1301   /* FINISHME: AADS */
1302
1303   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1304    * move, here's g1.
1305    */
1306   brw_push_insn_state(p);
1307   brw_set_mask_control(p, BRW_MASK_DISABLE);
1308   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1309   brw_MOV(p,
1310	   brw_message_reg(1),
1311	   brw_vec8_grf(1, 0));
1312   brw_pop_insn_state(p);
1313
1314   int nr = 2 + 4;
1315
1316   brw_fb_WRITE(p,
1317		8, /* dispatch_width */
1318		retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1319		0, /* base MRF */
1320		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1321		0, /* FINISHME: MRT target */
1322		nr,
1323		0,
1324		eot);
1325}
1326
1327void
1328fs_visitor::generate_linterp(fs_inst *inst,
1329			     struct brw_reg dst, struct brw_reg *src)
1330{
1331   struct brw_reg delta_x = src[0];
1332   struct brw_reg delta_y = src[1];
1333   struct brw_reg interp = src[2];
1334
1335   if (brw->has_pln &&
1336       delta_y.nr == delta_x.nr + 1 &&
1337       (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1338      brw_PLN(p, dst, interp, delta_x);
1339   } else {
1340      brw_LINE(p, brw_null_reg(), interp, delta_x);
1341      brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1342   }
1343}
1344
1345void
1346fs_visitor::generate_math(fs_inst *inst,
1347			  struct brw_reg dst, struct brw_reg *src)
1348{
1349   int op;
1350
1351   switch (inst->opcode) {
1352   case FS_OPCODE_RCP:
1353      op = BRW_MATH_FUNCTION_INV;
1354      break;
1355   case FS_OPCODE_RSQ:
1356      op = BRW_MATH_FUNCTION_RSQ;
1357      break;
1358   case FS_OPCODE_SQRT:
1359      op = BRW_MATH_FUNCTION_SQRT;
1360      break;
1361   case FS_OPCODE_EXP2:
1362      op = BRW_MATH_FUNCTION_EXP;
1363      break;
1364   case FS_OPCODE_LOG2:
1365      op = BRW_MATH_FUNCTION_LOG;
1366      break;
1367   case FS_OPCODE_POW:
1368      op = BRW_MATH_FUNCTION_POW;
1369      break;
1370   case FS_OPCODE_SIN:
1371      op = BRW_MATH_FUNCTION_SIN;
1372      break;
1373   case FS_OPCODE_COS:
1374      op = BRW_MATH_FUNCTION_COS;
1375      break;
1376   default:
1377      assert(!"not reached: unknown math function");
1378      op = 0;
1379      break;
1380   }
1381
1382   if (inst->opcode == FS_OPCODE_POW) {
1383      brw_MOV(p, brw_message_reg(3), src[1]);
1384   }
1385
1386   brw_math(p, dst,
1387	    op,
1388	    inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1389	    BRW_MATH_SATURATE_NONE,
1390	    2, src[0],
1391	    BRW_MATH_DATA_VECTOR,
1392	    BRW_MATH_PRECISION_FULL);
1393}
1394
1395void
1396fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1397{
1398   int msg_type = -1;
1399   int rlen = 4;
1400
1401   if (intel->gen == 5) {
1402      switch (inst->opcode) {
1403      case FS_OPCODE_TEX:
1404	 if (inst->shadow_compare) {
1405	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1406	 } else {
1407	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1408	 }
1409	 break;
1410      case FS_OPCODE_TXB:
1411	 if (inst->shadow_compare) {
1412	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1413	 } else {
1414	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1415	 }
1416	 break;
1417      }
1418   } else {
1419      switch (inst->opcode) {
1420      case FS_OPCODE_TEX:
1421	 /* Note that G45 and older determines shadow compare and dispatch width
1422	  * from message length for most messages.
1423	  */
1424	 if (inst->shadow_compare) {
1425	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
1426	 } else {
1427	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
1428	 }
1429      case FS_OPCODE_TXB:
1430	 if (inst->shadow_compare) {
1431	    assert(!"FINISHME: shadow compare with bias.");
1432	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1433	 } else {
1434	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1435	    rlen = 8;
1436	 }
1437	 break;
1438      }
1439   }
1440   assert(msg_type != -1);
1441
1442   /* g0 header. */
1443   src.nr--;
1444
1445   brw_SAMPLE(p,
1446	      retype(dst, BRW_REGISTER_TYPE_UW),
1447	      src.nr,
1448	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1449              SURF_INDEX_TEXTURE(inst->sampler),
1450	      inst->sampler,
1451	      WRITEMASK_XYZW,
1452	      msg_type,
1453	      rlen,
1454	      inst->mlen + 1,
1455	      0,
1456	      1,
1457	      BRW_SAMPLER_SIMD_MODE_SIMD8);
1458}
1459
1460static void
1461trivial_assign_reg(int header_size, fs_reg *reg)
1462{
1463   if (reg->file == GRF && reg->reg != 0) {
1464      reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset;
1465      reg->reg = 0;
1466   }
1467}
1468
1469void
1470fs_visitor::assign_curb_setup()
1471{
1472   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1473   c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1474
1475   /* Map the offsets in the UNIFORM file to fixed HW regs. */
1476   foreach_iter(exec_list_iterator, iter, this->instructions) {
1477      fs_inst *inst = (fs_inst *)iter.get();
1478
1479      for (unsigned int i = 0; i < 3; i++) {
1480	 if (inst->src[i].file == UNIFORM) {
1481	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1482	    struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1483						  constant_nr / 8,
1484						  constant_nr % 8);
1485
1486	    inst->src[i].file = FIXED_HW_REG;
1487	    inst->src[i].fixed_hw_reg = brw_reg;
1488	 }
1489      }
1490   }
1491}
1492
1493void
1494fs_visitor::assign_urb_setup()
1495{
1496   int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
1497   int interp_reg_nr[FRAG_ATTRIB_MAX];
1498
1499   c->prog_data.urb_read_length = 0;
1500
1501   /* Figure out where each of the incoming setup attributes lands. */
1502   for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1503      interp_reg_nr[i] = -1;
1504
1505      if (i != FRAG_ATTRIB_WPOS &&
1506	  !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1507	 continue;
1508
1509      /* Each attribute is 4 setup channels, each of which is half a reg. */
1510      interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1511      c->prog_data.urb_read_length += 2;
1512   }
1513
1514   /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1515    * the correct setup input.
1516    */
1517   foreach_iter(exec_list_iterator, iter, this->instructions) {
1518      fs_inst *inst = (fs_inst *)iter.get();
1519
1520      if (inst->opcode != FS_OPCODE_LINTERP)
1521	 continue;
1522
1523      assert(inst->src[2].file == FIXED_HW_REG);
1524
1525      int location = inst->src[2].fixed_hw_reg.nr / 2;
1526      assert(interp_reg_nr[location] != -1);
1527      inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
1528				      (inst->src[2].fixed_hw_reg.nr & 1));
1529   }
1530
1531   this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
1532}
1533
1534void
1535fs_visitor::assign_regs()
1536{
1537   int header_size = this->first_non_payload_grf;
1538   int last_grf = 0;
1539
1540   /* FINISHME: trivial assignment of register numbers */
1541   foreach_iter(exec_list_iterator, iter, this->instructions) {
1542      fs_inst *inst = (fs_inst *)iter.get();
1543
1544      trivial_assign_reg(header_size, &inst->dst);
1545      trivial_assign_reg(header_size, &inst->src[0]);
1546      trivial_assign_reg(header_size, &inst->src[1]);
1547
1548      last_grf = MAX2(last_grf, inst->dst.hw_reg);
1549      last_grf = MAX2(last_grf, inst->src[0].hw_reg);
1550      last_grf = MAX2(last_grf, inst->src[1].hw_reg);
1551   }
1552
1553   this->grf_used = last_grf + 1;
1554}
1555
1556static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
1557{
1558   struct brw_reg brw_reg;
1559
1560   switch (reg->file) {
1561   case GRF:
1562   case ARF:
1563   case MRF:
1564      brw_reg = brw_vec8_reg(reg->file,
1565			    reg->hw_reg, 0);
1566      brw_reg = retype(brw_reg, reg->type);
1567      break;
1568   case IMM:
1569      switch (reg->type) {
1570      case BRW_REGISTER_TYPE_F:
1571	 brw_reg = brw_imm_f(reg->imm.f);
1572	 break;
1573      case BRW_REGISTER_TYPE_D:
1574	 brw_reg = brw_imm_d(reg->imm.i);
1575	 break;
1576      case BRW_REGISTER_TYPE_UD:
1577	 brw_reg = brw_imm_ud(reg->imm.u);
1578	 break;
1579      default:
1580	 assert(!"not reached");
1581	 break;
1582      }
1583      break;
1584   case FIXED_HW_REG:
1585      brw_reg = reg->fixed_hw_reg;
1586      break;
1587   case BAD_FILE:
1588      /* Probably unused. */
1589      brw_reg = brw_null_reg();
1590      break;
1591   case UNIFORM:
1592      assert(!"not reached");
1593      brw_reg = brw_null_reg();
1594      break;
1595   }
1596   if (reg->abs)
1597      brw_reg = brw_abs(brw_reg);
1598   if (reg->negate)
1599      brw_reg = negate(brw_reg);
1600
1601   return brw_reg;
1602}
1603
1604void
1605fs_visitor::generate_code()
1606{
1607   unsigned int annotation_len = 0;
1608   int last_native_inst = 0;
1609   struct brw_instruction *if_stack[16];
1610   int if_stack_depth = 0;
1611
1612   memset(&if_stack, 0, sizeof(if_stack));
1613   foreach_iter(exec_list_iterator, iter, this->instructions) {
1614      fs_inst *inst = (fs_inst *)iter.get();
1615      struct brw_reg src[3], dst;
1616
1617      for (unsigned int i = 0; i < 3; i++) {
1618	 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
1619      }
1620      dst = brw_reg_from_fs_reg(&inst->dst);
1621
1622      brw_set_conditionalmod(p, inst->conditional_mod);
1623      brw_set_predicate_control(p, inst->predicated);
1624
1625      switch (inst->opcode) {
1626      case BRW_OPCODE_MOV:
1627	 brw_MOV(p, dst, src[0]);
1628	 break;
1629      case BRW_OPCODE_ADD:
1630	 brw_ADD(p, dst, src[0], src[1]);
1631	 break;
1632      case BRW_OPCODE_MUL:
1633	 brw_MUL(p, dst, src[0], src[1]);
1634	 break;
1635
1636      case BRW_OPCODE_FRC:
1637	 brw_FRC(p, dst, src[0]);
1638	 break;
1639      case BRW_OPCODE_RNDD:
1640	 brw_RNDD(p, dst, src[0]);
1641	 break;
1642      case BRW_OPCODE_RNDZ:
1643	 brw_RNDZ(p, dst, src[0]);
1644	 break;
1645
1646      case BRW_OPCODE_AND:
1647	 brw_AND(p, dst, src[0], src[1]);
1648	 break;
1649      case BRW_OPCODE_OR:
1650	 brw_OR(p, dst, src[0], src[1]);
1651	 break;
1652      case BRW_OPCODE_XOR:
1653	 brw_XOR(p, dst, src[0], src[1]);
1654	 break;
1655
1656      case BRW_OPCODE_CMP:
1657	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
1658	 break;
1659      case BRW_OPCODE_SEL:
1660	 brw_SEL(p, dst, src[0], src[1]);
1661	 break;
1662
1663      case BRW_OPCODE_IF:
1664	 assert(if_stack_depth < 16);
1665	 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
1666	 if_stack_depth++;
1667	 break;
1668      case BRW_OPCODE_ELSE:
1669	 if_stack[if_stack_depth - 1] =
1670	    brw_ELSE(p, if_stack[if_stack_depth - 1]);
1671	 break;
1672      case BRW_OPCODE_ENDIF:
1673	 if_stack_depth--;
1674	 brw_ENDIF(p , if_stack[if_stack_depth]);
1675	 break;
1676      case FS_OPCODE_RCP:
1677      case FS_OPCODE_RSQ:
1678      case FS_OPCODE_SQRT:
1679      case FS_OPCODE_EXP2:
1680      case FS_OPCODE_LOG2:
1681      case FS_OPCODE_POW:
1682      case FS_OPCODE_SIN:
1683      case FS_OPCODE_COS:
1684	 generate_math(inst, dst, src);
1685	 break;
1686      case FS_OPCODE_LINTERP:
1687	 generate_linterp(inst, dst, src);
1688	 break;
1689      case FS_OPCODE_TEX:
1690      case FS_OPCODE_TXB:
1691      case FS_OPCODE_TXL:
1692	 generate_tex(inst, dst, src[0]);
1693	 break;
1694      case FS_OPCODE_FB_WRITE:
1695	 generate_fb_write(inst);
1696	 break;
1697      default:
1698	 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
1699	    _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
1700			  brw_opcodes[inst->opcode].name);
1701	 } else {
1702	    _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
1703	 }
1704	 this->fail = true;
1705      }
1706
1707      if (annotation_len < p->nr_insn) {
1708	 annotation_len *= 2;
1709	 if (annotation_len < 16)
1710	    annotation_len = 16;
1711
1712	 this->annotation_string = talloc_realloc(this->mem_ctx,
1713						  annotation_string,
1714						  const char *,
1715						  annotation_len);
1716	 this->annotation_ir = talloc_realloc(this->mem_ctx,
1717					      annotation_ir,
1718					      ir_instruction *,
1719					      annotation_len);
1720      }
1721
1722      for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
1723	 this->annotation_string[i] = inst->annotation;
1724	 this->annotation_ir[i] = inst->ir;
1725      }
1726      last_native_inst = p->nr_insn;
1727   }
1728}
1729
1730GLboolean
1731brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
1732{
1733   struct brw_compile *p = &c->func;
1734   struct intel_context *intel = &brw->intel;
1735   GLcontext *ctx = &intel->ctx;
1736   struct brw_shader *shader = NULL;
1737   struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
1738
1739   if (!prog)
1740      return GL_FALSE;
1741
1742   if (!using_new_fs)
1743      return GL_FALSE;
1744
1745   for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
1746      if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
1747	 shader = (struct brw_shader *)prog->_LinkedShaders[i];
1748	 break;
1749      }
1750   }
1751   if (!shader)
1752      return GL_FALSE;
1753
1754   /* We always use 8-wide mode, at least for now.  For one, flow
1755    * control only works in 8-wide.  Also, when we're fragment shader
1756    * bound, we're almost always under register pressure as well, so
1757    * 8-wide would save us from the performance cliff of spilling
1758    * regs.
1759    */
1760   c->dispatch_width = 8;
1761
1762   if (INTEL_DEBUG & DEBUG_WM) {
1763      printf("GLSL IR for native fragment shader %d:\n", prog->Name);
1764      _mesa_print_ir(shader->ir, NULL);
1765      printf("\n");
1766   }
1767
1768   /* Now the main event: Visit the shader IR and generate our FS IR for it.
1769    */
1770   fs_visitor v(c, shader);
1771
1772   if (0) {
1773      v.emit_dummy_fs();
1774   } else {
1775      v.emit_interpolation();
1776
1777      /* Generate FS IR for main().  (the visitor only descends into
1778       * functions called "main").
1779       */
1780      foreach_iter(exec_list_iterator, iter, *shader->ir) {
1781	 ir_instruction *ir = (ir_instruction *)iter.get();
1782	 v.base_ir = ir;
1783	 ir->accept(&v);
1784      }
1785
1786      if (v.fail)
1787	 return GL_FALSE;
1788
1789      v.emit_fb_writes();
1790      v.assign_curb_setup();
1791      v.assign_urb_setup();
1792      v.assign_regs();
1793   }
1794
1795   v.generate_code();
1796
1797   if (INTEL_DEBUG & DEBUG_WM) {
1798      const char *last_annotation_string = NULL;
1799      ir_instruction *last_annotation_ir = NULL;
1800
1801      printf("Native code for fragment shader %d:\n", prog->Name);
1802      for (unsigned int i = 0; i < p->nr_insn; i++) {
1803	 if (last_annotation_ir != v.annotation_ir[i]) {
1804	    last_annotation_ir = v.annotation_ir[i];
1805	    if (last_annotation_ir) {
1806	       printf("   ");
1807	       last_annotation_ir->print();
1808	       printf("\n");
1809	    }
1810	 }
1811	 if (last_annotation_string != v.annotation_string[i]) {
1812	    last_annotation_string = v.annotation_string[i];
1813	    if (last_annotation_string)
1814	       printf("   %s\n", last_annotation_string);
1815	 }
1816	 brw_disasm(stdout, &p->store[i], intel->gen);
1817      }
1818      printf("\n");
1819   }
1820
1821   c->prog_data.total_grf = v.grf_used;
1822   c->prog_data.total_scratch = 0;
1823
1824   return GL_TRUE;
1825}
1826