brw_fs.cpp revision b90c7d1713c5a52fd85cb9dacad5828ae2fdbf6c
1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28extern "C" {
29
30#include <sys/types.h>
31
32#include "main/macros.h"
33#include "main/shaderobj.h"
34#include "main/uniforms.h"
35#include "program/prog_parameter.h"
36#include "program/prog_print.h"
37#include "program/prog_optimize.h"
38#include "program/register_allocate.h"
39#include "program/sampler.h"
40#include "program/hash_table.h"
41#include "brw_context.h"
42#include "brw_eu.h"
43#include "brw_wm.h"
44#include "talloc.h"
45}
46#include "../glsl/glsl_types.h"
47#include "../glsl/ir_optimization.h"
48#include "../glsl/ir_print_visitor.h"
49
50enum register_file {
51   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
52   GRF = BRW_GENERAL_REGISTER_FILE,
53   MRF = BRW_MESSAGE_REGISTER_FILE,
54   IMM = BRW_IMMEDIATE_VALUE,
55   FIXED_HW_REG, /* a struct brw_reg */
56   UNIFORM, /* prog_data->params[hw_reg] */
57   BAD_FILE
58};
59
60enum fs_opcodes {
61   FS_OPCODE_FB_WRITE = 256,
62   FS_OPCODE_RCP,
63   FS_OPCODE_RSQ,
64   FS_OPCODE_SQRT,
65   FS_OPCODE_EXP2,
66   FS_OPCODE_LOG2,
67   FS_OPCODE_POW,
68   FS_OPCODE_SIN,
69   FS_OPCODE_COS,
70   FS_OPCODE_DDX,
71   FS_OPCODE_DDY,
72   FS_OPCODE_LINTERP,
73   FS_OPCODE_TEX,
74   FS_OPCODE_TXB,
75   FS_OPCODE_TXL,
76   FS_OPCODE_DISCARD,
77};
78
79static int using_new_fs = -1;
80static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
81
82struct gl_shader *
83brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
84{
85   struct brw_shader *shader;
86
87   shader = talloc_zero(NULL, struct brw_shader);
88   if (shader) {
89      shader->base.Type = type;
90      shader->base.Name = name;
91      _mesa_init_shader(ctx, &shader->base);
92   }
93
94   return &shader->base;
95}
96
97struct gl_shader_program *
98brw_new_shader_program(GLcontext *ctx, GLuint name)
99{
100   struct brw_shader_program *prog;
101   prog = talloc_zero(NULL, struct brw_shader_program);
102   if (prog) {
103      prog->base.Name = name;
104      _mesa_init_shader_program(ctx, &prog->base);
105   }
106   return &prog->base;
107}
108
109GLboolean
110brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
111{
112   if (!_mesa_ir_compile_shader(ctx, shader))
113      return GL_FALSE;
114
115   return GL_TRUE;
116}
117
118GLboolean
119brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
120{
121   if (using_new_fs == -1)
122      using_new_fs = getenv("INTEL_NEW_FS") != NULL;
123
124   for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
125      struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
126
127      if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
128	 void *mem_ctx = talloc_new(NULL);
129	 bool progress;
130
131	 if (shader->ir)
132	    talloc_free(shader->ir);
133	 shader->ir = new(shader) exec_list;
134	 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
135
136	 do_mat_op_to_vec(shader->ir);
137	 do_mod_to_fract(shader->ir);
138	 do_div_to_mul_rcp(shader->ir);
139	 do_sub_to_add_neg(shader->ir);
140	 do_explog_to_explog2(shader->ir);
141
142	 do {
143	    progress = false;
144
145	    brw_do_channel_expressions(shader->ir);
146	    brw_do_vector_splitting(shader->ir);
147
148	    progress = do_lower_jumps(shader->ir, true, true,
149				      true, /* main return */
150				      false, /* continue */
151				      false /* loops */
152				      ) || progress;
153
154	    progress = do_common_optimization(shader->ir, true, 32) || progress;
155
156	    progress = lower_noise(shader->ir) || progress;
157	    progress =
158	       lower_variable_index_to_cond_assign(shader->ir,
159						   GL_TRUE, /* input */
160						   GL_TRUE, /* output */
161						   GL_TRUE, /* temp */
162						   GL_TRUE /* uniform */
163						   ) || progress;
164	 } while (progress);
165
166	 validate_ir_tree(shader->ir);
167
168	 reparent_ir(shader->ir, shader->ir);
169	 talloc_free(mem_ctx);
170      }
171   }
172
173   if (!_mesa_ir_link_shader(ctx, prog))
174      return GL_FALSE;
175
176   return GL_TRUE;
177}
178
179static int
180type_size(const struct glsl_type *type)
181{
182   unsigned int size, i;
183
184   switch (type->base_type) {
185   case GLSL_TYPE_UINT:
186   case GLSL_TYPE_INT:
187   case GLSL_TYPE_FLOAT:
188   case GLSL_TYPE_BOOL:
189      return type->components();
190   case GLSL_TYPE_ARRAY:
191      return type_size(type->fields.array) * type->length;
192   case GLSL_TYPE_STRUCT:
193      size = 0;
194      for (i = 0; i < type->length; i++) {
195	 size += type_size(type->fields.structure[i].type);
196      }
197      return size;
198   case GLSL_TYPE_SAMPLER:
199      /* Samplers take up no register space, since they're baked in at
200       * link time.
201       */
202      return 0;
203   default:
204      assert(!"not reached");
205      return 0;
206   }
207}
208
209class fs_reg {
210public:
211   /* Callers of this talloc-based new need not call delete. It's
212    * easier to just talloc_free 'ctx' (or any of its ancestors). */
213   static void* operator new(size_t size, void *ctx)
214   {
215      void *node;
216
217      node = talloc_size(ctx, size);
218      assert(node != NULL);
219
220      return node;
221   }
222
223   void init()
224   {
225      this->reg = 0;
226      this->reg_offset = 0;
227      this->negate = 0;
228      this->abs = 0;
229      this->hw_reg = -1;
230   }
231
232   /** Generic unset register constructor. */
233   fs_reg()
234   {
235      init();
236      this->file = BAD_FILE;
237   }
238
239   /** Immediate value constructor. */
240   fs_reg(float f)
241   {
242      init();
243      this->file = IMM;
244      this->type = BRW_REGISTER_TYPE_F;
245      this->imm.f = f;
246   }
247
248   /** Immediate value constructor. */
249   fs_reg(int32_t i)
250   {
251      init();
252      this->file = IMM;
253      this->type = BRW_REGISTER_TYPE_D;
254      this->imm.i = i;
255   }
256
257   /** Immediate value constructor. */
258   fs_reg(uint32_t u)
259   {
260      init();
261      this->file = IMM;
262      this->type = BRW_REGISTER_TYPE_UD;
263      this->imm.u = u;
264   }
265
266   /** Fixed brw_reg Immediate value constructor. */
267   fs_reg(struct brw_reg fixed_hw_reg)
268   {
269      init();
270      this->file = FIXED_HW_REG;
271      this->fixed_hw_reg = fixed_hw_reg;
272      this->type = fixed_hw_reg.type;
273   }
274
275   fs_reg(enum register_file file, int hw_reg);
276   fs_reg(class fs_visitor *v, const struct glsl_type *type);
277
278   /** Register file: ARF, GRF, MRF, IMM. */
279   enum register_file file;
280   /** virtual register number.  0 = fixed hw reg */
281   int reg;
282   /** Offset within the virtual register. */
283   int reg_offset;
284   /** HW register number.  Generally unset until register allocation. */
285   int hw_reg;
286   /** Register type.  BRW_REGISTER_TYPE_* */
287   int type;
288   bool negate;
289   bool abs;
290   struct brw_reg fixed_hw_reg;
291
292   /** Value for file == BRW_IMMMEDIATE_FILE */
293   union {
294      int32_t i;
295      uint32_t u;
296      float f;
297   } imm;
298};
299
300static const fs_reg reg_undef;
301static const fs_reg reg_null(ARF, BRW_ARF_NULL);
302
303class fs_inst : public exec_node {
304public:
305   /* Callers of this talloc-based new need not call delete. It's
306    * easier to just talloc_free 'ctx' (or any of its ancestors). */
307   static void* operator new(size_t size, void *ctx)
308   {
309      void *node;
310
311      node = talloc_zero_size(ctx, size);
312      assert(node != NULL);
313
314      return node;
315   }
316
317   void init()
318   {
319      this->opcode = BRW_OPCODE_NOP;
320      this->saturate = false;
321      this->conditional_mod = BRW_CONDITIONAL_NONE;
322      this->predicated = false;
323      this->sampler = 0;
324      this->target = 0;
325      this->eot = false;
326      this->shadow_compare = false;
327   }
328
329   fs_inst()
330   {
331      init();
332   }
333
334   fs_inst(int opcode)
335   {
336      init();
337      this->opcode = opcode;
338   }
339
340   fs_inst(int opcode, fs_reg dst, fs_reg src0)
341   {
342      init();
343      this->opcode = opcode;
344      this->dst = dst;
345      this->src[0] = src0;
346   }
347
348   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
349   {
350      init();
351      this->opcode = opcode;
352      this->dst = dst;
353      this->src[0] = src0;
354      this->src[1] = src1;
355   }
356
357   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
358   {
359      init();
360      this->opcode = opcode;
361      this->dst = dst;
362      this->src[0] = src0;
363      this->src[1] = src1;
364      this->src[2] = src2;
365   }
366
367   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
368   fs_reg dst;
369   fs_reg src[3];
370   bool saturate;
371   bool predicated;
372   int conditional_mod; /**< BRW_CONDITIONAL_* */
373
374   int mlen; /**< SEND message length */
375   int sampler;
376   int target; /**< MRT target. */
377   bool eot;
378   bool shadow_compare;
379
380   /** @{
381    * Annotation for the generated IR.  One of the two can be set.
382    */
383   ir_instruction *ir;
384   const char *annotation;
385   /** @} */
386};
387
388class fs_visitor : public ir_visitor
389{
390public:
391
392   fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
393   {
394      this->c = c;
395      this->p = &c->func;
396      this->brw = p->brw;
397      this->fp = brw->fragment_program;
398      this->intel = &brw->intel;
399      this->ctx = &intel->ctx;
400      this->mem_ctx = talloc_new(NULL);
401      this->shader = shader;
402      this->fail = false;
403      this->variable_ht = hash_table_ctor(0,
404					  hash_table_pointer_hash,
405					  hash_table_pointer_compare);
406
407      this->frag_color = NULL;
408      this->frag_data = NULL;
409      this->frag_depth = NULL;
410      this->first_non_payload_grf = 0;
411
412      this->current_annotation = NULL;
413      this->annotation_string = NULL;
414      this->annotation_ir = NULL;
415      this->base_ir = NULL;
416
417      this->virtual_grf_sizes = NULL;
418      this->virtual_grf_next = 1;
419      this->virtual_grf_array_size = 0;
420      this->virtual_grf_def = NULL;
421      this->virtual_grf_use = NULL;
422   }
423   ~fs_visitor()
424   {
425      talloc_free(this->mem_ctx);
426      hash_table_dtor(this->variable_ht);
427   }
428
429   fs_reg *variable_storage(ir_variable *var);
430   int virtual_grf_alloc(int size);
431
432   void visit(ir_variable *ir);
433   void visit(ir_assignment *ir);
434   void visit(ir_dereference_variable *ir);
435   void visit(ir_dereference_record *ir);
436   void visit(ir_dereference_array *ir);
437   void visit(ir_expression *ir);
438   void visit(ir_texture *ir);
439   void visit(ir_if *ir);
440   void visit(ir_constant *ir);
441   void visit(ir_swizzle *ir);
442   void visit(ir_return *ir);
443   void visit(ir_loop *ir);
444   void visit(ir_loop_jump *ir);
445   void visit(ir_discard *ir);
446   void visit(ir_call *ir);
447   void visit(ir_function *ir);
448   void visit(ir_function_signature *ir);
449
450   fs_inst *emit(fs_inst inst);
451   void assign_curb_setup();
452   void assign_urb_setup();
453   void assign_regs();
454   void assign_regs_trivial();
455   void calculate_live_intervals();
456   bool virtual_grf_interferes(int a, int b);
457   void generate_code();
458   void generate_fb_write(fs_inst *inst);
459   void generate_linterp(fs_inst *inst, struct brw_reg dst,
460			 struct brw_reg *src);
461   void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
462   void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
463   void generate_discard(fs_inst *inst, struct brw_reg temp);
464   void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
465   void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
466
467   void emit_dummy_fs();
468   void emit_fragcoord_interpolation(ir_variable *ir);
469   void emit_general_interpolation(ir_variable *ir);
470   void emit_interpolation_setup();
471   void emit_fb_writes();
472
473   struct brw_reg interp_reg(int location, int channel);
474   int setup_uniform_values(int loc, const glsl_type *type);
475   void setup_builtin_uniform_values(ir_variable *ir);
476
477   struct brw_context *brw;
478   const struct gl_fragment_program *fp;
479   struct intel_context *intel;
480   GLcontext *ctx;
481   struct brw_wm_compile *c;
482   struct brw_compile *p;
483   struct brw_shader *shader;
484   void *mem_ctx;
485   exec_list instructions;
486
487   int *virtual_grf_sizes;
488   int virtual_grf_next;
489   int virtual_grf_array_size;
490   int *virtual_grf_def;
491   int *virtual_grf_use;
492
493   struct hash_table *variable_ht;
494   ir_variable *frag_color, *frag_data, *frag_depth;
495   int first_non_payload_grf;
496
497   /** @{ debug annotation info */
498   const char *current_annotation;
499   ir_instruction *base_ir;
500   const char **annotation_string;
501   ir_instruction **annotation_ir;
502   /** @} */
503
504   bool fail;
505
506   /* Result of last visit() method. */
507   fs_reg result;
508
509   fs_reg pixel_x;
510   fs_reg pixel_y;
511   fs_reg wpos_w;
512   fs_reg pixel_w;
513   fs_reg delta_x;
514   fs_reg delta_y;
515
516   int grf_used;
517
518};
519
520int
521fs_visitor::virtual_grf_alloc(int size)
522{
523   if (virtual_grf_array_size <= virtual_grf_next) {
524      if (virtual_grf_array_size == 0)
525	 virtual_grf_array_size = 16;
526      else
527	 virtual_grf_array_size *= 2;
528      virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
529					 int, virtual_grf_array_size);
530
531      /* This slot is always unused. */
532      virtual_grf_sizes[0] = 0;
533   }
534   virtual_grf_sizes[virtual_grf_next] = size;
535   return virtual_grf_next++;
536}
537
538/** Fixed HW reg constructor. */
539fs_reg::fs_reg(enum register_file file, int hw_reg)
540{
541   init();
542   this->file = file;
543   this->hw_reg = hw_reg;
544   this->type = BRW_REGISTER_TYPE_F;
545}
546
547int
548brw_type_for_base_type(const struct glsl_type *type)
549{
550   switch (type->base_type) {
551   case GLSL_TYPE_FLOAT:
552      return BRW_REGISTER_TYPE_F;
553   case GLSL_TYPE_INT:
554   case GLSL_TYPE_BOOL:
555      return BRW_REGISTER_TYPE_D;
556   case GLSL_TYPE_UINT:
557      return BRW_REGISTER_TYPE_UD;
558   case GLSL_TYPE_ARRAY:
559   case GLSL_TYPE_STRUCT:
560      /* These should be overridden with the type of the member when
561       * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
562       * way to trip up if we don't.
563       */
564      return BRW_REGISTER_TYPE_UD;
565   default:
566      assert(!"not reached");
567      return BRW_REGISTER_TYPE_F;
568   }
569}
570
571/** Automatic reg constructor. */
572fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
573{
574   init();
575
576   this->file = GRF;
577   this->reg = v->virtual_grf_alloc(type_size(type));
578   this->reg_offset = 0;
579   this->type = brw_type_for_base_type(type);
580}
581
582fs_reg *
583fs_visitor::variable_storage(ir_variable *var)
584{
585   return (fs_reg *)hash_table_find(this->variable_ht, var);
586}
587
588/* Our support for uniforms is piggy-backed on the struct
589 * gl_fragment_program, because that's where the values actually
590 * get stored, rather than in some global gl_shader_program uniform
591 * store.
592 */
593int
594fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
595{
596   unsigned int offset = 0;
597   float *vec_values;
598
599   if (type->is_matrix()) {
600      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
601							type->vector_elements,
602							1);
603
604      for (unsigned int i = 0; i < type->matrix_columns; i++) {
605	 offset += setup_uniform_values(loc + offset, column);
606      }
607
608      return offset;
609   }
610
611   switch (type->base_type) {
612   case GLSL_TYPE_FLOAT:
613   case GLSL_TYPE_UINT:
614   case GLSL_TYPE_INT:
615   case GLSL_TYPE_BOOL:
616      vec_values = fp->Base.Parameters->ParameterValues[loc];
617      for (unsigned int i = 0; i < type->vector_elements; i++) {
618	 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
619      }
620      return 1;
621
622   case GLSL_TYPE_STRUCT:
623      for (unsigned int i = 0; i < type->length; i++) {
624	 offset += setup_uniform_values(loc + offset,
625					type->fields.structure[i].type);
626      }
627      return offset;
628
629   case GLSL_TYPE_ARRAY:
630      for (unsigned int i = 0; i < type->length; i++) {
631	 offset += setup_uniform_values(loc + offset, type->fields.array);
632      }
633      return offset;
634
635   case GLSL_TYPE_SAMPLER:
636      /* The sampler takes up a slot, but we don't use any values from it. */
637      return 1;
638
639   default:
640      assert(!"not reached");
641      return 0;
642   }
643}
644
645
646/* Our support for builtin uniforms is even scarier than non-builtin.
647 * It sits on top of the PROG_STATE_VAR parameters that are
648 * automatically updated from GL context state.
649 */
650void
651fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
652{
653   const struct gl_builtin_uniform_desc *statevar = NULL;
654
655   for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
656      statevar = &_mesa_builtin_uniform_desc[i];
657      if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
658	 break;
659   }
660
661   if (!statevar->name) {
662      this->fail = true;
663      printf("Failed to find builtin uniform `%s'\n", ir->name);
664      return;
665   }
666
667   int array_count;
668   if (ir->type->is_array()) {
669      array_count = ir->type->length;
670   } else {
671      array_count = 1;
672   }
673
674   for (int a = 0; a < array_count; a++) {
675      for (unsigned int i = 0; i < statevar->num_elements; i++) {
676	 struct gl_builtin_uniform_element *element = &statevar->elements[i];
677	 int tokens[STATE_LENGTH];
678
679	 memcpy(tokens, element->tokens, sizeof(element->tokens));
680	 if (ir->type->is_array()) {
681	    tokens[1] = a;
682	 }
683
684	 /* This state reference has already been setup by ir_to_mesa,
685	  * but we'll get the same index back here.
686	  */
687	 int index = _mesa_add_state_reference(this->fp->Base.Parameters,
688					       (gl_state_index *)tokens);
689	 float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
690
691	 /* Add each of the unique swizzles of the element as a
692	  * parameter.  This'll end up matching the expected layout of
693	  * the array/matrix/structure we're trying to fill in.
694	  */
695	 int last_swiz = -1;
696	 for (unsigned int i = 0; i < 4; i++) {
697	    int this_swiz = GET_SWZ(element->swizzle, i);
698	    if (this_swiz == last_swiz)
699	       break;
700	    last_swiz = this_swiz;
701
702	    c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
703	 }
704      }
705   }
706}
707
708void
709fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
710{
711   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
712   fs_reg wpos = *reg;
713   fs_reg neg_y = this->pixel_y;
714   neg_y.negate = true;
715
716   /* gl_FragCoord.x */
717   if (ir->pixel_center_integer) {
718      emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
719   } else {
720      emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
721   }
722   wpos.reg_offset++;
723
724   /* gl_FragCoord.y */
725   if (ir->origin_upper_left && ir->pixel_center_integer) {
726      emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
727   } else {
728      fs_reg pixel_y = this->pixel_y;
729      float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
730
731      if (!ir->origin_upper_left) {
732	 pixel_y.negate = true;
733	 offset += c->key.drawable_height - 1.0;
734      }
735
736      emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
737   }
738   wpos.reg_offset++;
739
740   /* gl_FragCoord.z */
741   emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
742		interp_reg(FRAG_ATTRIB_WPOS, 2)));
743   wpos.reg_offset++;
744
745   /* gl_FragCoord.w: Already set up in emit_interpolation */
746   emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
747
748   hash_table_insert(this->variable_ht, reg, ir);
749}
750
751
752void
753fs_visitor::emit_general_interpolation(ir_variable *ir)
754{
755   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
756   /* Interpolation is always in floating point regs. */
757   reg->type = BRW_REGISTER_TYPE_F;
758   fs_reg attr = *reg;
759
760   unsigned int array_elements;
761   const glsl_type *type;
762
763   if (ir->type->is_array()) {
764      array_elements = ir->type->length;
765      if (array_elements == 0) {
766	 this->fail = true;
767      }
768      type = ir->type->fields.array;
769   } else {
770      array_elements = 1;
771      type = ir->type;
772   }
773
774   int location = ir->location;
775   for (unsigned int i = 0; i < array_elements; i++) {
776      for (unsigned int j = 0; j < type->matrix_columns; j++) {
777	 if (!(fp->Base.InputsRead & BITFIELD64_BIT(location))) {
778	    /* If there's no incoming setup data for this slot, don't
779	     * emit interpolation for it (since it's not used, and
780	     * we'd fall over later trying to find the setup data.
781	     */
782	    attr.reg_offset += type->vector_elements;
783	    continue;
784	 }
785
786	 for (unsigned int c = 0; c < type->vector_elements; c++) {
787	    struct brw_reg interp = interp_reg(location, c);
788	    emit(fs_inst(FS_OPCODE_LINTERP,
789			 attr,
790			 this->delta_x,
791			 this->delta_y,
792			 fs_reg(interp)));
793	    attr.reg_offset++;
794	 }
795	 attr.reg_offset -= type->vector_elements;
796
797	 for (unsigned int c = 0; c < type->vector_elements; c++) {
798	    emit(fs_inst(BRW_OPCODE_MUL,
799			 attr,
800			 attr,
801			 this->pixel_w));
802	    attr.reg_offset++;
803	 }
804	 location++;
805      }
806   }
807
808   hash_table_insert(this->variable_ht, reg, ir);
809}
810
811void
812fs_visitor::visit(ir_variable *ir)
813{
814   fs_reg *reg = NULL;
815
816   if (variable_storage(ir))
817      return;
818
819   if (strcmp(ir->name, "gl_FragColor") == 0) {
820      this->frag_color = ir;
821   } else if (strcmp(ir->name, "gl_FragData") == 0) {
822      this->frag_data = ir;
823   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
824      this->frag_depth = ir;
825   }
826
827   if (ir->mode == ir_var_in) {
828      if (!strcmp(ir->name, "gl_FragCoord")) {
829	 emit_fragcoord_interpolation(ir);
830	 return;
831      } else if (!strcmp(ir->name, "gl_FrontFacing")) {
832	 reg = new(this->mem_ctx) fs_reg(this, ir->type);
833	 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
834	 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
835	  * us front face
836	  */
837	 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
838				      *reg,
839				      fs_reg(r1_6ud),
840				      fs_reg(1u << 31)));
841	 inst->conditional_mod = BRW_CONDITIONAL_L;
842	 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
843      } else {
844	 emit_general_interpolation(ir);
845	 return;
846      }
847   }
848
849   if (ir->mode == ir_var_uniform) {
850      int param_index = c->prog_data.nr_params;
851
852      if (!strncmp(ir->name, "gl_", 3)) {
853	 setup_builtin_uniform_values(ir);
854      } else {
855	 setup_uniform_values(ir->location, ir->type);
856      }
857
858      reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
859   }
860
861   if (!reg)
862      reg = new(this->mem_ctx) fs_reg(this, ir->type);
863
864   hash_table_insert(this->variable_ht, reg, ir);
865}
866
867void
868fs_visitor::visit(ir_dereference_variable *ir)
869{
870   fs_reg *reg = variable_storage(ir->var);
871   this->result = *reg;
872}
873
874void
875fs_visitor::visit(ir_dereference_record *ir)
876{
877   const glsl_type *struct_type = ir->record->type;
878
879   ir->record->accept(this);
880
881   unsigned int offset = 0;
882   for (unsigned int i = 0; i < struct_type->length; i++) {
883      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
884	 break;
885      offset += type_size(struct_type->fields.structure[i].type);
886   }
887   this->result.reg_offset += offset;
888   this->result.type = brw_type_for_base_type(ir->type);
889}
890
891void
892fs_visitor::visit(ir_dereference_array *ir)
893{
894   ir_constant *index;
895   int element_size;
896
897   ir->array->accept(this);
898   index = ir->array_index->as_constant();
899
900   element_size = type_size(ir->type);
901   this->result.type = brw_type_for_base_type(ir->type);
902
903   if (index) {
904      assert(this->result.file == UNIFORM ||
905	     (this->result.file == GRF &&
906	      this->result.reg != 0));
907      this->result.reg_offset += index->value.i[0] * element_size;
908   } else {
909      assert(!"FINISHME: non-constant array element");
910   }
911}
912
913void
914fs_visitor::visit(ir_expression *ir)
915{
916   unsigned int operand;
917   fs_reg op[2], temp;
918   fs_reg result;
919   fs_inst *inst;
920
921   for (operand = 0; operand < ir->get_num_operands(); operand++) {
922      ir->operands[operand]->accept(this);
923      if (this->result.file == BAD_FILE) {
924	 ir_print_visitor v;
925	 printf("Failed to get tree for expression operand:\n");
926	 ir->operands[operand]->accept(&v);
927	 this->fail = true;
928      }
929      op[operand] = this->result;
930
931      /* Matrix expression operands should have been broken down to vector
932       * operations already.
933       */
934      assert(!ir->operands[operand]->type->is_matrix());
935      /* And then those vector operands should have been broken down to scalar.
936       */
937      assert(!ir->operands[operand]->type->is_vector());
938   }
939
940   /* Storage for our result.  If our result goes into an assignment, it will
941    * just get copy-propagated out, so no worries.
942    */
943   this->result = fs_reg(this, ir->type);
944
945   switch (ir->operation) {
946   case ir_unop_logic_not:
947      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
948      break;
949   case ir_unop_neg:
950      op[0].negate = !op[0].negate;
951      this->result = op[0];
952      break;
953   case ir_unop_abs:
954      op[0].abs = true;
955      this->result = op[0];
956      break;
957   case ir_unop_sign:
958      temp = fs_reg(this, ir->type);
959
960      emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
961
962      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
963      inst->conditional_mod = BRW_CONDITIONAL_G;
964      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
965      inst->predicated = true;
966
967      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
968      inst->conditional_mod = BRW_CONDITIONAL_L;
969      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
970      inst->predicated = true;
971
972      break;
973   case ir_unop_rcp:
974      emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
975      break;
976
977   case ir_unop_exp2:
978      emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
979      break;
980   case ir_unop_log2:
981      emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
982      break;
983   case ir_unop_exp:
984   case ir_unop_log:
985      assert(!"not reached: should be handled by ir_explog_to_explog2");
986      break;
987   case ir_unop_sin:
988      emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
989      break;
990   case ir_unop_cos:
991      emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
992      break;
993
994   case ir_unop_dFdx:
995      emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
996      break;
997   case ir_unop_dFdy:
998      emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
999      break;
1000
1001   case ir_binop_add:
1002      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
1003      break;
1004   case ir_binop_sub:
1005      assert(!"not reached: should be handled by ir_sub_to_add_neg");
1006      break;
1007
1008   case ir_binop_mul:
1009      emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
1010      break;
1011   case ir_binop_div:
1012      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1013      break;
1014   case ir_binop_mod:
1015      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1016      break;
1017
1018   case ir_binop_less:
1019      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1020      inst->conditional_mod = BRW_CONDITIONAL_L;
1021      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1022      break;
1023   case ir_binop_greater:
1024      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1025      inst->conditional_mod = BRW_CONDITIONAL_G;
1026      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1027      break;
1028   case ir_binop_lequal:
1029      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1030      inst->conditional_mod = BRW_CONDITIONAL_LE;
1031      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1032      break;
1033   case ir_binop_gequal:
1034      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1035      inst->conditional_mod = BRW_CONDITIONAL_GE;
1036      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1037      break;
1038   case ir_binop_equal:
1039   case ir_binop_all_equal: /* same as nequal for scalars */
1040      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1041      inst->conditional_mod = BRW_CONDITIONAL_Z;
1042      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1043      break;
1044   case ir_binop_nequal:
1045   case ir_binop_any_nequal: /* same as nequal for scalars */
1046      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1047      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1048      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1049      break;
1050
1051   case ir_binop_logic_xor:
1052      emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
1053      break;
1054
1055   case ir_binop_logic_or:
1056      emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
1057      break;
1058
1059   case ir_binop_logic_and:
1060      emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
1061      break;
1062
1063   case ir_binop_dot:
1064   case ir_binop_cross:
1065   case ir_unop_any:
1066      assert(!"not reached: should be handled by brw_fs_channel_expressions");
1067      break;
1068
1069   case ir_unop_noise:
1070      assert(!"not reached: should be handled by lower_noise");
1071      break;
1072
1073   case ir_unop_sqrt:
1074      emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
1075      break;
1076
1077   case ir_unop_rsq:
1078      emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
1079      break;
1080
1081   case ir_unop_i2f:
1082   case ir_unop_b2f:
1083   case ir_unop_b2i:
1084      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1085      break;
1086   case ir_unop_f2i:
1087      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1088      break;
1089   case ir_unop_f2b:
1090   case ir_unop_i2b:
1091      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
1092      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1093
1094   case ir_unop_trunc:
1095      emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1096      break;
1097   case ir_unop_ceil:
1098      op[0].negate = ~op[0].negate;
1099      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1100      this->result.negate = true;
1101      break;
1102   case ir_unop_floor:
1103      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1104      break;
1105   case ir_unop_fract:
1106      inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
1107      break;
1108
1109   case ir_binop_min:
1110      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1111      inst->conditional_mod = BRW_CONDITIONAL_L;
1112
1113      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1114      inst->predicated = true;
1115      break;
1116   case ir_binop_max:
1117      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1118      inst->conditional_mod = BRW_CONDITIONAL_G;
1119
1120      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1121      inst->predicated = true;
1122      break;
1123
1124   case ir_binop_pow:
1125      inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
1126      break;
1127
1128   case ir_unop_bit_not:
1129   case ir_unop_u2f:
1130   case ir_binop_lshift:
1131   case ir_binop_rshift:
1132   case ir_binop_bit_and:
1133   case ir_binop_bit_xor:
1134   case ir_binop_bit_or:
1135      assert(!"GLSL 1.30 features unsupported");
1136      break;
1137   }
1138}
1139
1140void
1141fs_visitor::visit(ir_assignment *ir)
1142{
1143   struct fs_reg l, r;
1144   int i;
1145   int write_mask;
1146   fs_inst *inst;
1147
1148   /* FINISHME: arrays on the lhs */
1149   ir->lhs->accept(this);
1150   l = this->result;
1151
1152   ir->rhs->accept(this);
1153   r = this->result;
1154
1155   /* FINISHME: This should really set to the correct maximal writemask for each
1156    * FINISHME: component written (in the loops below).  This case can only
1157    * FINISHME: occur for matrices, arrays, and structures.
1158    */
1159   if (ir->write_mask == 0) {
1160      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1161      write_mask = WRITEMASK_XYZW;
1162   } else {
1163      assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
1164      write_mask = ir->write_mask;
1165   }
1166
1167   assert(l.file != BAD_FILE);
1168   assert(r.file != BAD_FILE);
1169
1170   if (ir->condition) {
1171      /* Get the condition bool into the predicate. */
1172      ir->condition->accept(this);
1173      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
1174      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1175   }
1176
1177   for (i = 0; i < type_size(ir->lhs->type); i++) {
1178      if (i >= 4 || (write_mask & (1 << i))) {
1179	 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1180	 if (ir->condition)
1181	    inst->predicated = true;
1182	 r.reg_offset++;
1183      }
1184      l.reg_offset++;
1185   }
1186}
1187
1188void
1189fs_visitor::visit(ir_texture *ir)
1190{
1191   int base_mrf = 2;
1192   fs_inst *inst = NULL;
1193   unsigned int mlen = 0;
1194
1195   ir->coordinate->accept(this);
1196   fs_reg coordinate = this->result;
1197
1198   if (ir->projector) {
1199      fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
1200
1201      ir->projector->accept(this);
1202      emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
1203
1204      fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
1205      for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1206	 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
1207	 coordinate.reg_offset++;
1208	 proj_coordinate.reg_offset++;
1209      }
1210      proj_coordinate.reg_offset = 0;
1211
1212      coordinate = proj_coordinate;
1213   }
1214
1215   for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1216      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1217      coordinate.reg_offset++;
1218   }
1219
1220   /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1221   if (intel->gen < 5)
1222      mlen = 3;
1223
1224   if (ir->shadow_comparitor) {
1225      /* For shadow comparisons, we have to supply u,v,r. */
1226      mlen = 3;
1227
1228      ir->shadow_comparitor->accept(this);
1229      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1230      mlen++;
1231   }
1232
1233   /* Do we ever want to handle writemasking on texture samples?  Is it
1234    * performance relevant?
1235    */
1236   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1237
1238   switch (ir->op) {
1239   case ir_tex:
1240      inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1241      break;
1242   case ir_txb:
1243      ir->lod_info.bias->accept(this);
1244      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1245      mlen++;
1246
1247      inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1248      break;
1249   case ir_txl:
1250      ir->lod_info.lod->accept(this);
1251      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1252      mlen++;
1253
1254      inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1255      break;
1256   case ir_txd:
1257   case ir_txf:
1258      assert(!"GLSL 1.30 features unsupported");
1259      break;
1260   }
1261
1262   inst->sampler =
1263      _mesa_get_sampler_uniform_value(ir->sampler,
1264				      ctx->Shader.CurrentProgram,
1265				      &brw->fragment_program->Base);
1266   inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1267
1268   this->result = dst;
1269
1270   if (ir->shadow_comparitor)
1271      inst->shadow_compare = true;
1272   inst->mlen = mlen;
1273}
1274
1275void
1276fs_visitor::visit(ir_swizzle *ir)
1277{
1278   ir->val->accept(this);
1279   fs_reg val = this->result;
1280
1281   fs_reg result = fs_reg(this, ir->type);
1282   this->result = result;
1283
1284   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1285      fs_reg channel = val;
1286      int swiz = 0;
1287
1288      switch (i) {
1289      case 0:
1290	 swiz = ir->mask.x;
1291	 break;
1292      case 1:
1293	 swiz = ir->mask.y;
1294	 break;
1295      case 2:
1296	 swiz = ir->mask.z;
1297	 break;
1298      case 3:
1299	 swiz = ir->mask.w;
1300	 break;
1301      }
1302
1303      channel.reg_offset += swiz;
1304      emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1305      result.reg_offset++;
1306   }
1307}
1308
1309void
1310fs_visitor::visit(ir_discard *ir)
1311{
1312   fs_reg temp = fs_reg(this, glsl_type::uint_type);
1313
1314   assert(ir->condition == NULL); /* FINISHME */
1315
1316   emit(fs_inst(FS_OPCODE_DISCARD, temp, temp));
1317}
1318
1319void
1320fs_visitor::visit(ir_constant *ir)
1321{
1322   fs_reg reg(this, ir->type);
1323   this->result = reg;
1324
1325   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1326      switch (ir->type->base_type) {
1327      case GLSL_TYPE_FLOAT:
1328	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1329	 break;
1330      case GLSL_TYPE_UINT:
1331	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1332	 break;
1333      case GLSL_TYPE_INT:
1334	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1335	 break;
1336      case GLSL_TYPE_BOOL:
1337	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1338	 break;
1339      default:
1340	 assert(!"Non-float/uint/int/bool constant");
1341      }
1342      reg.reg_offset++;
1343   }
1344}
1345
1346void
1347fs_visitor::visit(ir_if *ir)
1348{
1349   fs_inst *inst;
1350
1351   /* Don't point the annotation at the if statement, because then it plus
1352    * the then and else blocks get printed.
1353    */
1354   this->base_ir = ir->condition;
1355
1356   /* Generate the condition into the condition code. */
1357   ir->condition->accept(this);
1358   inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1359   inst->conditional_mod = BRW_CONDITIONAL_NZ;
1360
1361   inst = emit(fs_inst(BRW_OPCODE_IF));
1362   inst->predicated = true;
1363
1364   foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1365      ir_instruction *ir = (ir_instruction *)iter.get();
1366      this->base_ir = ir;
1367
1368      ir->accept(this);
1369   }
1370
1371   if (!ir->else_instructions.is_empty()) {
1372      emit(fs_inst(BRW_OPCODE_ELSE));
1373
1374      foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1375	 ir_instruction *ir = (ir_instruction *)iter.get();
1376	 this->base_ir = ir;
1377
1378	 ir->accept(this);
1379      }
1380   }
1381
1382   emit(fs_inst(BRW_OPCODE_ENDIF));
1383}
1384
1385void
1386fs_visitor::visit(ir_loop *ir)
1387{
1388   fs_reg counter = reg_undef;
1389
1390   if (ir->counter) {
1391      this->base_ir = ir->counter;
1392      ir->counter->accept(this);
1393      counter = *(variable_storage(ir->counter));
1394
1395      if (ir->from) {
1396	 this->base_ir = ir->from;
1397	 ir->from->accept(this);
1398
1399	 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1400      }
1401   }
1402
1403   /* Start a safety counter.  If the user messed up their loop
1404    * counting, we don't want to hang the GPU.
1405    */
1406   fs_reg max_iter = fs_reg(this, glsl_type::int_type);
1407   emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000)));
1408
1409   emit(fs_inst(BRW_OPCODE_DO));
1410
1411   if (ir->to) {
1412      this->base_ir = ir->to;
1413      ir->to->accept(this);
1414
1415      fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1416				   counter, this->result));
1417      switch (ir->cmp) {
1418      case ir_binop_equal:
1419	 inst->conditional_mod = BRW_CONDITIONAL_Z;
1420	 break;
1421      case ir_binop_nequal:
1422	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1423	 break;
1424      case ir_binop_gequal:
1425	 inst->conditional_mod = BRW_CONDITIONAL_GE;
1426	 break;
1427      case ir_binop_lequal:
1428	 inst->conditional_mod = BRW_CONDITIONAL_LE;
1429	 break;
1430      case ir_binop_greater:
1431	 inst->conditional_mod = BRW_CONDITIONAL_G;
1432	 break;
1433      case ir_binop_less:
1434	 inst->conditional_mod = BRW_CONDITIONAL_L;
1435	 break;
1436      default:
1437	 assert(!"not reached: unknown loop condition");
1438	 this->fail = true;
1439	 break;
1440      }
1441
1442      inst = emit(fs_inst(BRW_OPCODE_BREAK));
1443      inst->predicated = true;
1444   }
1445
1446   foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1447      ir_instruction *ir = (ir_instruction *)iter.get();
1448      fs_inst *inst;
1449
1450      this->base_ir = ir;
1451      ir->accept(this);
1452
1453      /* Check the maximum loop iters counter. */
1454      inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1)));
1455      inst->conditional_mod = BRW_CONDITIONAL_Z;
1456
1457      inst = emit(fs_inst(BRW_OPCODE_BREAK));
1458      inst->predicated = true;
1459   }
1460
1461   if (ir->increment) {
1462      this->base_ir = ir->increment;
1463      ir->increment->accept(this);
1464      emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1465   }
1466
1467   emit(fs_inst(BRW_OPCODE_WHILE));
1468}
1469
1470void
1471fs_visitor::visit(ir_loop_jump *ir)
1472{
1473   switch (ir->mode) {
1474   case ir_loop_jump::jump_break:
1475      emit(fs_inst(BRW_OPCODE_BREAK));
1476      break;
1477   case ir_loop_jump::jump_continue:
1478      emit(fs_inst(BRW_OPCODE_CONTINUE));
1479      break;
1480   }
1481}
1482
1483void
1484fs_visitor::visit(ir_call *ir)
1485{
1486   assert(!"FINISHME");
1487}
1488
1489void
1490fs_visitor::visit(ir_return *ir)
1491{
1492   assert(!"FINISHME");
1493}
1494
1495void
1496fs_visitor::visit(ir_function *ir)
1497{
1498   /* Ignore function bodies other than main() -- we shouldn't see calls to
1499    * them since they should all be inlined before we get to ir_to_mesa.
1500    */
1501   if (strcmp(ir->name, "main") == 0) {
1502      const ir_function_signature *sig;
1503      exec_list empty;
1504
1505      sig = ir->matching_signature(&empty);
1506
1507      assert(sig);
1508
1509      foreach_iter(exec_list_iterator, iter, sig->body) {
1510	 ir_instruction *ir = (ir_instruction *)iter.get();
1511	 this->base_ir = ir;
1512
1513	 ir->accept(this);
1514      }
1515   }
1516}
1517
1518void
1519fs_visitor::visit(ir_function_signature *ir)
1520{
1521   assert(!"not reached");
1522   (void)ir;
1523}
1524
1525fs_inst *
1526fs_visitor::emit(fs_inst inst)
1527{
1528   fs_inst *list_inst = new(mem_ctx) fs_inst;
1529   *list_inst = inst;
1530
1531   list_inst->annotation = this->current_annotation;
1532   list_inst->ir = this->base_ir;
1533
1534   this->instructions.push_tail(list_inst);
1535
1536   return list_inst;
1537}
1538
1539/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1540void
1541fs_visitor::emit_dummy_fs()
1542{
1543   /* Everyone's favorite color. */
1544   emit(fs_inst(BRW_OPCODE_MOV,
1545		fs_reg(MRF, 2),
1546		fs_reg(1.0f)));
1547   emit(fs_inst(BRW_OPCODE_MOV,
1548		fs_reg(MRF, 3),
1549		fs_reg(0.0f)));
1550   emit(fs_inst(BRW_OPCODE_MOV,
1551		fs_reg(MRF, 4),
1552		fs_reg(1.0f)));
1553   emit(fs_inst(BRW_OPCODE_MOV,
1554		fs_reg(MRF, 5),
1555		fs_reg(0.0f)));
1556
1557   fs_inst *write;
1558   write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1559			fs_reg(0),
1560			fs_reg(0)));
1561}
1562
1563/* The register location here is relative to the start of the URB
1564 * data.  It will get adjusted to be a real location before
1565 * generate_code() time.
1566 */
1567struct brw_reg
1568fs_visitor::interp_reg(int location, int channel)
1569{
1570   int regnr = location * 2 + channel / 2;
1571   int stride = (channel & 1) * 4;
1572
1573   return brw_vec1_grf(regnr, stride);
1574}
1575
1576/** Emits the interpolation for the varying inputs. */
1577void
1578fs_visitor::emit_interpolation_setup()
1579{
1580   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1581
1582   this->current_annotation = "compute pixel centers";
1583   this->pixel_x = fs_reg(this, glsl_type::uint_type);
1584   this->pixel_y = fs_reg(this, glsl_type::uint_type);
1585   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1586   this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1587   emit(fs_inst(BRW_OPCODE_ADD,
1588		this->pixel_x,
1589		fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1590		fs_reg(brw_imm_v(0x10101010))));
1591   emit(fs_inst(BRW_OPCODE_ADD,
1592		this->pixel_y,
1593		fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1594		fs_reg(brw_imm_v(0x11001100))));
1595
1596   this->current_annotation = "compute pixel deltas from v0";
1597   this->delta_x = fs_reg(this, glsl_type::float_type);
1598   this->delta_y = fs_reg(this, glsl_type::float_type);
1599   emit(fs_inst(BRW_OPCODE_ADD,
1600		this->delta_x,
1601		this->pixel_x,
1602		fs_reg(negate(brw_vec1_grf(1, 0)))));
1603   emit(fs_inst(BRW_OPCODE_ADD,
1604		this->delta_y,
1605		this->pixel_y,
1606		fs_reg(negate(brw_vec1_grf(1, 1)))));
1607
1608   this->current_annotation = "compute pos.w and 1/pos.w";
1609   /* Compute wpos.w.  It's always in our setup, since it's needed to
1610    * interpolate the other attributes.
1611    */
1612   this->wpos_w = fs_reg(this, glsl_type::float_type);
1613   emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1614		interp_reg(FRAG_ATTRIB_WPOS, 3)));
1615   /* Compute the pixel 1/W value from wpos.w. */
1616   this->pixel_w = fs_reg(this, glsl_type::float_type);
1617   emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1618   this->current_annotation = NULL;
1619}
1620
1621void
1622fs_visitor::emit_fb_writes()
1623{
1624   this->current_annotation = "FB write header";
1625   int nr = 0;
1626
1627   /* m0, m1 header */
1628   nr += 2;
1629
1630   if (c->key.aa_dest_stencil_reg) {
1631      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1632		   fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1633   }
1634
1635   /* Reserve space for color. It'll be filled in per MRT below. */
1636   int color_mrf = nr;
1637   nr += 4;
1638
1639   if (c->key.source_depth_to_render_target) {
1640      if (c->key.computes_depth) {
1641	 /* Hand over gl_FragDepth. */
1642	 assert(this->frag_depth);
1643	 fs_reg depth = *(variable_storage(this->frag_depth));
1644
1645	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1646      } else {
1647	 /* Pass through the payload depth. */
1648	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1649		      fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1650      }
1651   }
1652
1653   if (c->key.dest_depth_reg) {
1654      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1655		   fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1656   }
1657
1658   fs_reg color = reg_undef;
1659   if (this->frag_color)
1660      color = *(variable_storage(this->frag_color));
1661   else if (this->frag_data)
1662      color = *(variable_storage(this->frag_data));
1663
1664   for (int target = 0; target < c->key.nr_color_regions; target++) {
1665      this->current_annotation = talloc_asprintf(this->mem_ctx,
1666						 "FB write target %d",
1667						 target);
1668      if (this->frag_color || this->frag_data) {
1669	 for (int i = 0; i < 4; i++) {
1670	    emit(fs_inst(BRW_OPCODE_MOV,
1671			 fs_reg(MRF, color_mrf + i),
1672			 color));
1673	    color.reg_offset++;
1674	 }
1675      }
1676
1677      if (this->frag_color)
1678	 color.reg_offset -= 4;
1679
1680      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1681				   reg_undef, reg_undef));
1682      inst->target = target;
1683      inst->mlen = nr;
1684      if (target == c->key.nr_color_regions - 1)
1685	 inst->eot = true;
1686   }
1687
1688   if (c->key.nr_color_regions == 0) {
1689      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1690				   reg_undef, reg_undef));
1691      inst->mlen = nr;
1692      inst->eot = true;
1693   }
1694
1695   this->current_annotation = NULL;
1696}
1697
1698void
1699fs_visitor::generate_fb_write(fs_inst *inst)
1700{
1701   GLboolean eot = inst->eot;
1702
1703   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1704    * move, here's g1.
1705    */
1706   brw_push_insn_state(p);
1707   brw_set_mask_control(p, BRW_MASK_DISABLE);
1708   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1709   brw_MOV(p,
1710	   brw_message_reg(1),
1711	   brw_vec8_grf(1, 0));
1712   brw_pop_insn_state(p);
1713
1714   brw_fb_WRITE(p,
1715		8, /* dispatch_width */
1716		retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1717		0, /* base MRF */
1718		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1719		inst->target,
1720		inst->mlen,
1721		0,
1722		eot);
1723}
1724
1725void
1726fs_visitor::generate_linterp(fs_inst *inst,
1727			     struct brw_reg dst, struct brw_reg *src)
1728{
1729   struct brw_reg delta_x = src[0];
1730   struct brw_reg delta_y = src[1];
1731   struct brw_reg interp = src[2];
1732
1733   if (brw->has_pln &&
1734       delta_y.nr == delta_x.nr + 1 &&
1735       (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1736      brw_PLN(p, dst, interp, delta_x);
1737   } else {
1738      brw_LINE(p, brw_null_reg(), interp, delta_x);
1739      brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1740   }
1741}
1742
1743void
1744fs_visitor::generate_math(fs_inst *inst,
1745			  struct brw_reg dst, struct brw_reg *src)
1746{
1747   int op;
1748
1749   switch (inst->opcode) {
1750   case FS_OPCODE_RCP:
1751      op = BRW_MATH_FUNCTION_INV;
1752      break;
1753   case FS_OPCODE_RSQ:
1754      op = BRW_MATH_FUNCTION_RSQ;
1755      break;
1756   case FS_OPCODE_SQRT:
1757      op = BRW_MATH_FUNCTION_SQRT;
1758      break;
1759   case FS_OPCODE_EXP2:
1760      op = BRW_MATH_FUNCTION_EXP;
1761      break;
1762   case FS_OPCODE_LOG2:
1763      op = BRW_MATH_FUNCTION_LOG;
1764      break;
1765   case FS_OPCODE_POW:
1766      op = BRW_MATH_FUNCTION_POW;
1767      break;
1768   case FS_OPCODE_SIN:
1769      op = BRW_MATH_FUNCTION_SIN;
1770      break;
1771   case FS_OPCODE_COS:
1772      op = BRW_MATH_FUNCTION_COS;
1773      break;
1774   default:
1775      assert(!"not reached: unknown math function");
1776      op = 0;
1777      break;
1778   }
1779
1780   if (inst->opcode == FS_OPCODE_POW) {
1781      brw_MOV(p, brw_message_reg(3), src[1]);
1782   }
1783
1784   brw_math(p, dst,
1785	    op,
1786	    inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1787	    BRW_MATH_SATURATE_NONE,
1788	    2, src[0],
1789	    BRW_MATH_DATA_VECTOR,
1790	    BRW_MATH_PRECISION_FULL);
1791}
1792
1793void
1794fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1795{
1796   int msg_type = -1;
1797   int rlen = 4;
1798
1799   if (intel->gen == 5) {
1800      switch (inst->opcode) {
1801      case FS_OPCODE_TEX:
1802	 if (inst->shadow_compare) {
1803	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1804	 } else {
1805	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1806	 }
1807	 break;
1808      case FS_OPCODE_TXB:
1809	 if (inst->shadow_compare) {
1810	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1811	 } else {
1812	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1813	 }
1814	 break;
1815      }
1816   } else {
1817      switch (inst->opcode) {
1818      case FS_OPCODE_TEX:
1819	 /* Note that G45 and older determines shadow compare and dispatch width
1820	  * from message length for most messages.
1821	  */
1822	 if (inst->shadow_compare) {
1823	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
1824	 } else {
1825	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
1826	 }
1827      case FS_OPCODE_TXB:
1828	 if (inst->shadow_compare) {
1829	    assert(!"FINISHME: shadow compare with bias.");
1830	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1831	 } else {
1832	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1833	    rlen = 8;
1834	 }
1835	 break;
1836      }
1837   }
1838   assert(msg_type != -1);
1839
1840   /* g0 header. */
1841   src.nr--;
1842
1843   brw_SAMPLE(p,
1844	      retype(dst, BRW_REGISTER_TYPE_UW),
1845	      src.nr,
1846	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1847              SURF_INDEX_TEXTURE(inst->sampler),
1848	      inst->sampler,
1849	      WRITEMASK_XYZW,
1850	      msg_type,
1851	      rlen,
1852	      inst->mlen + 1,
1853	      0,
1854	      1,
1855	      BRW_SAMPLER_SIMD_MODE_SIMD8);
1856}
1857
1858
1859/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1860 * looking like:
1861 *
1862 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1863 *
1864 * and we're trying to produce:
1865 *
1866 *           DDX                     DDY
1867 * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
1868 *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
1869 *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
1870 *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
1871 *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
1872 *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
1873 *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
1874 *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
1875 *
1876 * and add another set of two more subspans if in 16-pixel dispatch mode.
1877 *
1878 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1879 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1880 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1881 * between each other.  We could probably do it like ddx and swizzle the right
1882 * order later, but bail for now and just produce
1883 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1884 */
1885void
1886fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1887{
1888   struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
1889				 BRW_REGISTER_TYPE_F,
1890				 BRW_VERTICAL_STRIDE_2,
1891				 BRW_WIDTH_2,
1892				 BRW_HORIZONTAL_STRIDE_0,
1893				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1894   struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
1895				 BRW_REGISTER_TYPE_F,
1896				 BRW_VERTICAL_STRIDE_2,
1897				 BRW_WIDTH_2,
1898				 BRW_HORIZONTAL_STRIDE_0,
1899				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1900   brw_ADD(p, dst, src0, negate(src1));
1901}
1902
1903void
1904fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1905{
1906   struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
1907				 BRW_REGISTER_TYPE_F,
1908				 BRW_VERTICAL_STRIDE_4,
1909				 BRW_WIDTH_4,
1910				 BRW_HORIZONTAL_STRIDE_0,
1911				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1912   struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
1913				 BRW_REGISTER_TYPE_F,
1914				 BRW_VERTICAL_STRIDE_4,
1915				 BRW_WIDTH_4,
1916				 BRW_HORIZONTAL_STRIDE_0,
1917				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1918   brw_ADD(p, dst, src0, negate(src1));
1919}
1920
1921void
1922fs_visitor::generate_discard(fs_inst *inst, struct brw_reg temp)
1923{
1924   struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1925   temp = brw_uw1_reg(temp.file, temp.nr, 0);
1926
1927   brw_push_insn_state(p);
1928   brw_set_mask_control(p, BRW_MASK_DISABLE);
1929   brw_NOT(p, temp, brw_mask_reg(1)); /* IMASK */
1930   brw_AND(p, g0, temp, g0);
1931   brw_pop_insn_state(p);
1932}
1933
1934void
1935fs_visitor::assign_curb_setup()
1936{
1937   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1938   c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1939
1940   if (intel->gen == 5 && (c->prog_data.first_curbe_grf +
1941			   c->prog_data.curb_read_length) & 1) {
1942      /* Align the start of the interpolation coefficients so that we can use
1943       * the PLN instruction.
1944       */
1945      c->prog_data.first_curbe_grf++;
1946   }
1947
1948   /* Map the offsets in the UNIFORM file to fixed HW regs. */
1949   foreach_iter(exec_list_iterator, iter, this->instructions) {
1950      fs_inst *inst = (fs_inst *)iter.get();
1951
1952      for (unsigned int i = 0; i < 3; i++) {
1953	 if (inst->src[i].file == UNIFORM) {
1954	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1955	    struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1956						  constant_nr / 8,
1957						  constant_nr % 8);
1958
1959	    inst->src[i].file = FIXED_HW_REG;
1960	    inst->src[i].fixed_hw_reg = brw_reg;
1961	 }
1962      }
1963   }
1964}
1965
1966void
1967fs_visitor::assign_urb_setup()
1968{
1969   int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
1970   int interp_reg_nr[FRAG_ATTRIB_MAX];
1971
1972   c->prog_data.urb_read_length = 0;
1973
1974   /* Figure out where each of the incoming setup attributes lands. */
1975   for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1976      interp_reg_nr[i] = -1;
1977
1978      if (i != FRAG_ATTRIB_WPOS &&
1979	  !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1980	 continue;
1981
1982      /* Each attribute is 4 setup channels, each of which is half a reg. */
1983      interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1984      c->prog_data.urb_read_length += 2;
1985   }
1986
1987   /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1988    * the correct setup input.
1989    */
1990   foreach_iter(exec_list_iterator, iter, this->instructions) {
1991      fs_inst *inst = (fs_inst *)iter.get();
1992
1993      if (inst->opcode != FS_OPCODE_LINTERP)
1994	 continue;
1995
1996      assert(inst->src[2].file == FIXED_HW_REG);
1997
1998      int location = inst->src[2].fixed_hw_reg.nr / 2;
1999      assert(interp_reg_nr[location] != -1);
2000      inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
2001				      (inst->src[2].fixed_hw_reg.nr & 1));
2002   }
2003
2004   this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
2005}
2006
2007static void
2008assign_reg(int *reg_hw_locations, fs_reg *reg)
2009{
2010   if (reg->file == GRF && reg->reg != 0) {
2011      reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
2012      reg->reg = 0;
2013   }
2014}
2015
2016void
2017fs_visitor::assign_regs_trivial()
2018{
2019   int last_grf = 0;
2020   int hw_reg_mapping[this->virtual_grf_next];
2021   int i;
2022
2023   hw_reg_mapping[0] = 0;
2024   hw_reg_mapping[1] = this->first_non_payload_grf;
2025   for (i = 2; i < this->virtual_grf_next; i++) {
2026      hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
2027			   this->virtual_grf_sizes[i - 1]);
2028   }
2029   last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
2030
2031   foreach_iter(exec_list_iterator, iter, this->instructions) {
2032      fs_inst *inst = (fs_inst *)iter.get();
2033
2034      assign_reg(hw_reg_mapping, &inst->dst);
2035      assign_reg(hw_reg_mapping, &inst->src[0]);
2036      assign_reg(hw_reg_mapping, &inst->src[1]);
2037   }
2038
2039   this->grf_used = last_grf + 1;
2040}
2041
2042void
2043fs_visitor::assign_regs()
2044{
2045   int last_grf = 0;
2046   int hw_reg_mapping[this->virtual_grf_next + 1];
2047   int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
2048   int class_sizes[base_reg_count];
2049   int class_count = 0;
2050
2051   calculate_live_intervals();
2052
2053   /* Set up the register classes.
2054    *
2055    * The base registers store a scalar value.  For texture samples,
2056    * we get virtual GRFs composed of 4 contiguous hw register.  For
2057    * structures and arrays, we store them as contiguous larger things
2058    * than that, though we should be able to do better most of the
2059    * time.
2060    */
2061   class_sizes[class_count++] = 1;
2062   for (int r = 1; r < this->virtual_grf_next; r++) {
2063      int i;
2064
2065      for (i = 0; i < class_count; i++) {
2066	 if (class_sizes[i] == this->virtual_grf_sizes[r])
2067	    break;
2068      }
2069      if (i == class_count) {
2070	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
2071      }
2072   }
2073
2074   int ra_reg_count = 0;
2075   int class_base_reg[class_count];
2076   int class_reg_count[class_count];
2077   int classes[class_count];
2078
2079   for (int i = 0; i < class_count; i++) {
2080      class_base_reg[i] = ra_reg_count;
2081      class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
2082      ra_reg_count += class_reg_count[i];
2083   }
2084
2085   struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
2086   for (int i = 0; i < class_count; i++) {
2087      classes[i] = ra_alloc_reg_class(regs);
2088
2089      for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2090	 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
2091      }
2092
2093      /* Add conflicts between our contiguous registers aliasing
2094       * base regs and other register classes' contiguous registers
2095       * that alias base regs, or the base regs themselves for classes[0].
2096       */
2097      for (int c = 0; c <= i; c++) {
2098	 for (int i_r = 0; i_r < class_reg_count[i] - 1; i_r++) {
2099	    for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
2100		 c_r <= MIN2(class_reg_count[c] - 1, i_r + class_sizes[i] - 1);
2101		 c_r++) {
2102
2103	       if (0) {
2104		  printf("%d/%d conflicts %d/%d\n",
2105			 class_sizes[i], i_r,
2106			 class_sizes[c], c_r);
2107	       }
2108
2109	       ra_add_reg_conflict(regs,
2110				   class_base_reg[i] + i_r,
2111				   class_base_reg[c] + c_r);
2112	    }
2113	 }
2114      }
2115   }
2116
2117   ra_set_finalize(regs);
2118
2119   struct ra_graph *g = ra_alloc_interference_graph(regs,
2120						    this->virtual_grf_next);
2121   /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
2122    * with nodes.
2123    */
2124   ra_set_node_class(g, 0, classes[0]);
2125
2126   for (int i = 1; i < this->virtual_grf_next; i++) {
2127      for (int c = 0; c < class_count; c++) {
2128	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
2129	    ra_set_node_class(g, i, classes[c]);
2130	    break;
2131	 }
2132      }
2133
2134      for (int j = 1; j < i; j++) {
2135	 if (virtual_grf_interferes(i, j)) {
2136	    ra_add_node_interference(g, i, j);
2137	 }
2138      }
2139   }
2140
2141   /* FINISHME: Handle spilling */
2142   if (!ra_allocate_no_spills(g)) {
2143      fprintf(stderr, "Failed to allocate registers.\n");
2144      this->fail = true;
2145      return;
2146   }
2147
2148   /* Get the chosen virtual registers for each node, and map virtual
2149    * regs in the register classes back down to real hardware reg
2150    * numbers.
2151    */
2152   hw_reg_mapping[0] = 0; /* unused */
2153   for (int i = 1; i < this->virtual_grf_next; i++) {
2154      int reg = ra_get_node_reg(g, i);
2155      int hw_reg = -1;
2156
2157      for (int c = 0; c < class_count; c++) {
2158	 if (reg >= class_base_reg[c] &&
2159	     reg < class_base_reg[c] + class_reg_count[c] - 1) {
2160	    hw_reg = reg - class_base_reg[c];
2161	    break;
2162	 }
2163      }
2164
2165      assert(hw_reg != -1);
2166      hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
2167      last_grf = MAX2(last_grf,
2168		      hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
2169   }
2170
2171   foreach_iter(exec_list_iterator, iter, this->instructions) {
2172      fs_inst *inst = (fs_inst *)iter.get();
2173
2174      assign_reg(hw_reg_mapping, &inst->dst);
2175      assign_reg(hw_reg_mapping, &inst->src[0]);
2176      assign_reg(hw_reg_mapping, &inst->src[1]);
2177   }
2178
2179   this->grf_used = last_grf + 1;
2180
2181   talloc_free(g);
2182   talloc_free(regs);
2183}
2184
2185void
2186fs_visitor::calculate_live_intervals()
2187{
2188   int num_vars = this->virtual_grf_next;
2189   int *def = talloc_array(mem_ctx, int, num_vars);
2190   int *use = talloc_array(mem_ctx, int, num_vars);
2191   int loop_depth = 0;
2192   int loop_start = 0;
2193
2194   for (int i = 0; i < num_vars; i++) {
2195      def[i] = 1 << 30;
2196      use[i] = 0;
2197   }
2198
2199   int ip = 0;
2200   foreach_iter(exec_list_iterator, iter, this->instructions) {
2201      fs_inst *inst = (fs_inst *)iter.get();
2202
2203      if (inst->opcode == BRW_OPCODE_DO) {
2204	 if (loop_depth++ == 0)
2205	    loop_start = ip;
2206      } else if (inst->opcode == BRW_OPCODE_WHILE) {
2207	 loop_depth--;
2208
2209	 if (loop_depth == 0) {
2210	    /* FINISHME:
2211	     *
2212	     * Patches up any vars marked for use within the loop as
2213	     * live until the end.  This is conservative, as there
2214	     * will often be variables defined and used inside the
2215	     * loop but dead at the end of the loop body.
2216	     */
2217	    for (int i = 0; i < num_vars; i++) {
2218	       if (use[i] == loop_start) {
2219		  use[i] = ip;
2220	       }
2221	    }
2222	 }
2223      } else {
2224	 int eip = ip;
2225
2226	 if (loop_depth)
2227	    eip = loop_start;
2228
2229	 for (unsigned int i = 0; i < 3; i++) {
2230	    if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
2231	       def[inst->src[i].reg] = MIN2(def[inst->src[i].reg], eip);
2232	       use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip);
2233	    }
2234	 }
2235	 if (inst->dst.file == GRF && inst->dst.reg != 0) {
2236	    def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip);
2237	    use[inst->dst.reg] = MAX2(use[inst->dst.reg], eip);
2238	 }
2239      }
2240
2241      ip++;
2242   }
2243
2244   this->virtual_grf_def = def;
2245   this->virtual_grf_use = use;
2246}
2247
2248bool
2249fs_visitor::virtual_grf_interferes(int a, int b)
2250{
2251   int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
2252   int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
2253
2254   return start <= end;
2255}
2256
2257static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
2258{
2259   struct brw_reg brw_reg;
2260
2261   switch (reg->file) {
2262   case GRF:
2263   case ARF:
2264   case MRF:
2265      brw_reg = brw_vec8_reg(reg->file,
2266			    reg->hw_reg, 0);
2267      brw_reg = retype(brw_reg, reg->type);
2268      break;
2269   case IMM:
2270      switch (reg->type) {
2271      case BRW_REGISTER_TYPE_F:
2272	 brw_reg = brw_imm_f(reg->imm.f);
2273	 break;
2274      case BRW_REGISTER_TYPE_D:
2275	 brw_reg = brw_imm_d(reg->imm.i);
2276	 break;
2277      case BRW_REGISTER_TYPE_UD:
2278	 brw_reg = brw_imm_ud(reg->imm.u);
2279	 break;
2280      default:
2281	 assert(!"not reached");
2282	 break;
2283      }
2284      break;
2285   case FIXED_HW_REG:
2286      brw_reg = reg->fixed_hw_reg;
2287      break;
2288   case BAD_FILE:
2289      /* Probably unused. */
2290      brw_reg = brw_null_reg();
2291      break;
2292   case UNIFORM:
2293      assert(!"not reached");
2294      brw_reg = brw_null_reg();
2295      break;
2296   }
2297   if (reg->abs)
2298      brw_reg = brw_abs(brw_reg);
2299   if (reg->negate)
2300      brw_reg = negate(brw_reg);
2301
2302   return brw_reg;
2303}
2304
2305void
2306fs_visitor::generate_code()
2307{
2308   unsigned int annotation_len = 0;
2309   int last_native_inst = 0;
2310   struct brw_instruction *if_stack[16], *loop_stack[16];
2311   int if_stack_depth = 0, loop_stack_depth = 0;
2312   int if_depth_in_loop[16];
2313
2314   if_depth_in_loop[loop_stack_depth] = 0;
2315
2316   memset(&if_stack, 0, sizeof(if_stack));
2317   foreach_iter(exec_list_iterator, iter, this->instructions) {
2318      fs_inst *inst = (fs_inst *)iter.get();
2319      struct brw_reg src[3], dst;
2320
2321      for (unsigned int i = 0; i < 3; i++) {
2322	 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
2323      }
2324      dst = brw_reg_from_fs_reg(&inst->dst);
2325
2326      brw_set_conditionalmod(p, inst->conditional_mod);
2327      brw_set_predicate_control(p, inst->predicated);
2328
2329      switch (inst->opcode) {
2330      case BRW_OPCODE_MOV:
2331	 brw_MOV(p, dst, src[0]);
2332	 break;
2333      case BRW_OPCODE_ADD:
2334	 brw_ADD(p, dst, src[0], src[1]);
2335	 break;
2336      case BRW_OPCODE_MUL:
2337	 brw_MUL(p, dst, src[0], src[1]);
2338	 break;
2339
2340      case BRW_OPCODE_FRC:
2341	 brw_FRC(p, dst, src[0]);
2342	 break;
2343      case BRW_OPCODE_RNDD:
2344	 brw_RNDD(p, dst, src[0]);
2345	 break;
2346      case BRW_OPCODE_RNDZ:
2347	 brw_RNDZ(p, dst, src[0]);
2348	 break;
2349
2350      case BRW_OPCODE_AND:
2351	 brw_AND(p, dst, src[0], src[1]);
2352	 break;
2353      case BRW_OPCODE_OR:
2354	 brw_OR(p, dst, src[0], src[1]);
2355	 break;
2356      case BRW_OPCODE_XOR:
2357	 brw_XOR(p, dst, src[0], src[1]);
2358	 break;
2359
2360      case BRW_OPCODE_CMP:
2361	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2362	 break;
2363      case BRW_OPCODE_SEL:
2364	 brw_SEL(p, dst, src[0], src[1]);
2365	 break;
2366
2367      case BRW_OPCODE_IF:
2368	 assert(if_stack_depth < 16);
2369	 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2370	 if_depth_in_loop[loop_stack_depth]++;
2371	 if_stack_depth++;
2372	 break;
2373      case BRW_OPCODE_ELSE:
2374	 if_stack[if_stack_depth - 1] =
2375	    brw_ELSE(p, if_stack[if_stack_depth - 1]);
2376	 break;
2377      case BRW_OPCODE_ENDIF:
2378	 if_stack_depth--;
2379	 brw_ENDIF(p , if_stack[if_stack_depth]);
2380	 if_depth_in_loop[loop_stack_depth]--;
2381	 break;
2382
2383      case BRW_OPCODE_DO:
2384	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2385	 if_depth_in_loop[loop_stack_depth] = 0;
2386	 break;
2387
2388      case BRW_OPCODE_BREAK:
2389	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2390	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2391	 break;
2392      case BRW_OPCODE_CONTINUE:
2393	 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2394	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2395	 break;
2396
2397      case BRW_OPCODE_WHILE: {
2398	 struct brw_instruction *inst0, *inst1;
2399	 GLuint br = 1;
2400
2401	 if (intel->gen == 5)
2402	    br = 2;
2403
2404	 assert(loop_stack_depth > 0);
2405	 loop_stack_depth--;
2406	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2407	 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2408	 while (inst0 > loop_stack[loop_stack_depth]) {
2409	    inst0--;
2410	    if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2411		inst0->bits3.if_else.jump_count == 0) {
2412	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2413	    }
2414	    else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
2415		     inst0->bits3.if_else.jump_count == 0) {
2416	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
2417	    }
2418	 }
2419      }
2420	 break;
2421
2422      case FS_OPCODE_RCP:
2423      case FS_OPCODE_RSQ:
2424      case FS_OPCODE_SQRT:
2425      case FS_OPCODE_EXP2:
2426      case FS_OPCODE_LOG2:
2427      case FS_OPCODE_POW:
2428      case FS_OPCODE_SIN:
2429      case FS_OPCODE_COS:
2430	 generate_math(inst, dst, src);
2431	 break;
2432      case FS_OPCODE_LINTERP:
2433	 generate_linterp(inst, dst, src);
2434	 break;
2435      case FS_OPCODE_TEX:
2436      case FS_OPCODE_TXB:
2437      case FS_OPCODE_TXL:
2438	 generate_tex(inst, dst, src[0]);
2439	 break;
2440      case FS_OPCODE_DISCARD:
2441	 generate_discard(inst, dst /* src0 == dst */);
2442	 break;
2443      case FS_OPCODE_DDX:
2444	 generate_ddx(inst, dst, src[0]);
2445	 break;
2446      case FS_OPCODE_DDY:
2447	 generate_ddy(inst, dst, src[0]);
2448	 break;
2449      case FS_OPCODE_FB_WRITE:
2450	 generate_fb_write(inst);
2451	 break;
2452      default:
2453	 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2454	    _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2455			  brw_opcodes[inst->opcode].name);
2456	 } else {
2457	    _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2458	 }
2459	 this->fail = true;
2460      }
2461
2462      if (annotation_len < p->nr_insn) {
2463	 annotation_len *= 2;
2464	 if (annotation_len < 16)
2465	    annotation_len = 16;
2466
2467	 this->annotation_string = talloc_realloc(this->mem_ctx,
2468						  annotation_string,
2469						  const char *,
2470						  annotation_len);
2471	 this->annotation_ir = talloc_realloc(this->mem_ctx,
2472					      annotation_ir,
2473					      ir_instruction *,
2474					      annotation_len);
2475      }
2476
2477      for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2478	 this->annotation_string[i] = inst->annotation;
2479	 this->annotation_ir[i] = inst->ir;
2480      }
2481      last_native_inst = p->nr_insn;
2482   }
2483}
2484
2485GLboolean
2486brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2487{
2488   struct brw_compile *p = &c->func;
2489   struct intel_context *intel = &brw->intel;
2490   GLcontext *ctx = &intel->ctx;
2491   struct brw_shader *shader = NULL;
2492   struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2493
2494   if (!prog)
2495      return GL_FALSE;
2496
2497   if (!using_new_fs)
2498      return GL_FALSE;
2499
2500   for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2501      if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2502	 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2503	 break;
2504      }
2505   }
2506   if (!shader)
2507      return GL_FALSE;
2508
2509   /* We always use 8-wide mode, at least for now.  For one, flow
2510    * control only works in 8-wide.  Also, when we're fragment shader
2511    * bound, we're almost always under register pressure as well, so
2512    * 8-wide would save us from the performance cliff of spilling
2513    * regs.
2514    */
2515   c->dispatch_width = 8;
2516
2517   if (INTEL_DEBUG & DEBUG_WM) {
2518      printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2519      _mesa_print_ir(shader->ir, NULL);
2520      printf("\n");
2521   }
2522
2523   /* Now the main event: Visit the shader IR and generate our FS IR for it.
2524    */
2525   fs_visitor v(c, shader);
2526
2527   if (0) {
2528      v.emit_dummy_fs();
2529   } else {
2530      v.emit_interpolation_setup();
2531
2532      /* Generate FS IR for main().  (the visitor only descends into
2533       * functions called "main").
2534       */
2535      foreach_iter(exec_list_iterator, iter, *shader->ir) {
2536	 ir_instruction *ir = (ir_instruction *)iter.get();
2537	 v.base_ir = ir;
2538	 ir->accept(&v);
2539      }
2540
2541      v.emit_fb_writes();
2542      v.assign_curb_setup();
2543      v.assign_urb_setup();
2544      if (0)
2545	 v.assign_regs_trivial();
2546      else
2547	 v.assign_regs();
2548   }
2549
2550   v.generate_code();
2551
2552   assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
2553
2554   if (v.fail)
2555      return GL_FALSE;
2556
2557   if (INTEL_DEBUG & DEBUG_WM) {
2558      const char *last_annotation_string = NULL;
2559      ir_instruction *last_annotation_ir = NULL;
2560
2561      printf("Native code for fragment shader %d:\n", prog->Name);
2562      for (unsigned int i = 0; i < p->nr_insn; i++) {
2563	 if (last_annotation_ir != v.annotation_ir[i]) {
2564	    last_annotation_ir = v.annotation_ir[i];
2565	    if (last_annotation_ir) {
2566	       printf("   ");
2567	       last_annotation_ir->print();
2568	       printf("\n");
2569	    }
2570	 }
2571	 if (last_annotation_string != v.annotation_string[i]) {
2572	    last_annotation_string = v.annotation_string[i];
2573	    if (last_annotation_string)
2574	       printf("   %s\n", last_annotation_string);
2575	 }
2576	 brw_disasm(stdout, &p->store[i], intel->gen);
2577      }
2578      printf("\n");
2579   }
2580
2581   c->prog_data.total_grf = v.grf_used;
2582   c->prog_data.total_scratch = 0;
2583
2584   return GL_TRUE;
2585}
2586