brw_fs.cpp revision 4fb0c92c6986cf4e88296bab8837320210f1794f
1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 *    Eric Anholt <eric@anholt.net>
25 *
26 */
27
28extern "C" {
29
30#include <sys/types.h>
31
32#include "main/macros.h"
33#include "main/shaderobj.h"
34#include "main/uniforms.h"
35#include "program/prog_parameter.h"
36#include "program/prog_print.h"
37#include "program/prog_optimize.h"
38#include "program/register_allocate.h"
39#include "program/sampler.h"
40#include "program/hash_table.h"
41#include "brw_context.h"
42#include "brw_eu.h"
43#include "brw_wm.h"
44#include "talloc.h"
45}
46#include "../glsl/glsl_types.h"
47#include "../glsl/ir_optimization.h"
48#include "../glsl/ir_print_visitor.h"
49
50enum register_file {
51   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
52   GRF = BRW_GENERAL_REGISTER_FILE,
53   MRF = BRW_MESSAGE_REGISTER_FILE,
54   IMM = BRW_IMMEDIATE_VALUE,
55   FIXED_HW_REG, /* a struct brw_reg */
56   UNIFORM, /* prog_data->params[hw_reg] */
57   BAD_FILE
58};
59
60enum fs_opcodes {
61   FS_OPCODE_FB_WRITE = 256,
62   FS_OPCODE_RCP,
63   FS_OPCODE_RSQ,
64   FS_OPCODE_SQRT,
65   FS_OPCODE_EXP2,
66   FS_OPCODE_LOG2,
67   FS_OPCODE_POW,
68   FS_OPCODE_SIN,
69   FS_OPCODE_COS,
70   FS_OPCODE_DDX,
71   FS_OPCODE_DDY,
72   FS_OPCODE_LINTERP,
73   FS_OPCODE_TEX,
74   FS_OPCODE_TXB,
75   FS_OPCODE_TXL,
76   FS_OPCODE_DISCARD,
77};
78
79static int using_new_fs = -1;
80static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
81
82struct gl_shader *
83brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
84{
85   struct brw_shader *shader;
86
87   shader = talloc_zero(NULL, struct brw_shader);
88   if (shader) {
89      shader->base.Type = type;
90      shader->base.Name = name;
91      _mesa_init_shader(ctx, &shader->base);
92   }
93
94   return &shader->base;
95}
96
97struct gl_shader_program *
98brw_new_shader_program(GLcontext *ctx, GLuint name)
99{
100   struct brw_shader_program *prog;
101   prog = talloc_zero(NULL, struct brw_shader_program);
102   if (prog) {
103      prog->base.Name = name;
104      _mesa_init_shader_program(ctx, &prog->base);
105   }
106   return &prog->base;
107}
108
109GLboolean
110brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
111{
112   if (!_mesa_ir_compile_shader(ctx, shader))
113      return GL_FALSE;
114
115   return GL_TRUE;
116}
117
118GLboolean
119brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
120{
121   if (using_new_fs == -1)
122      using_new_fs = getenv("INTEL_NEW_FS") != NULL;
123
124   for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
125      struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
126
127      if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
128	 void *mem_ctx = talloc_new(NULL);
129	 bool progress;
130
131	 if (shader->ir)
132	    talloc_free(shader->ir);
133	 shader->ir = new(shader) exec_list;
134	 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
135
136	 do_mat_op_to_vec(shader->ir);
137	 do_mod_to_fract(shader->ir);
138	 do_div_to_mul_rcp(shader->ir);
139	 do_sub_to_add_neg(shader->ir);
140	 do_explog_to_explog2(shader->ir);
141	 do_lower_texture_projection(shader->ir);
142
143	 do {
144	    progress = false;
145
146	    brw_do_channel_expressions(shader->ir);
147	    brw_do_vector_splitting(shader->ir);
148
149	    progress = do_lower_jumps(shader->ir, true, true,
150				      true, /* main return */
151				      false, /* continue */
152				      false /* loops */
153				      ) || progress;
154
155	    progress = do_common_optimization(shader->ir, true, 32) || progress;
156
157	    progress = lower_noise(shader->ir) || progress;
158	    progress =
159	       lower_variable_index_to_cond_assign(shader->ir,
160						   GL_TRUE, /* input */
161						   GL_TRUE, /* output */
162						   GL_TRUE, /* temp */
163						   GL_TRUE /* uniform */
164						   ) || progress;
165	 } while (progress);
166
167	 validate_ir_tree(shader->ir);
168
169	 reparent_ir(shader->ir, shader->ir);
170	 talloc_free(mem_ctx);
171      }
172   }
173
174   if (!_mesa_ir_link_shader(ctx, prog))
175      return GL_FALSE;
176
177   return GL_TRUE;
178}
179
180static int
181type_size(const struct glsl_type *type)
182{
183   unsigned int size, i;
184
185   switch (type->base_type) {
186   case GLSL_TYPE_UINT:
187   case GLSL_TYPE_INT:
188   case GLSL_TYPE_FLOAT:
189   case GLSL_TYPE_BOOL:
190      return type->components();
191   case GLSL_TYPE_ARRAY:
192      return type_size(type->fields.array) * type->length;
193   case GLSL_TYPE_STRUCT:
194      size = 0;
195      for (i = 0; i < type->length; i++) {
196	 size += type_size(type->fields.structure[i].type);
197      }
198      return size;
199   case GLSL_TYPE_SAMPLER:
200      /* Samplers take up no register space, since they're baked in at
201       * link time.
202       */
203      return 0;
204   default:
205      assert(!"not reached");
206      return 0;
207   }
208}
209
210class fs_reg {
211public:
212   /* Callers of this talloc-based new need not call delete. It's
213    * easier to just talloc_free 'ctx' (or any of its ancestors). */
214   static void* operator new(size_t size, void *ctx)
215   {
216      void *node;
217
218      node = talloc_size(ctx, size);
219      assert(node != NULL);
220
221      return node;
222   }
223
224   void init()
225   {
226      this->reg = 0;
227      this->reg_offset = 0;
228      this->negate = 0;
229      this->abs = 0;
230      this->hw_reg = -1;
231   }
232
233   /** Generic unset register constructor. */
234   fs_reg()
235   {
236      init();
237      this->file = BAD_FILE;
238   }
239
240   /** Immediate value constructor. */
241   fs_reg(float f)
242   {
243      init();
244      this->file = IMM;
245      this->type = BRW_REGISTER_TYPE_F;
246      this->imm.f = f;
247   }
248
249   /** Immediate value constructor. */
250   fs_reg(int32_t i)
251   {
252      init();
253      this->file = IMM;
254      this->type = BRW_REGISTER_TYPE_D;
255      this->imm.i = i;
256   }
257
258   /** Immediate value constructor. */
259   fs_reg(uint32_t u)
260   {
261      init();
262      this->file = IMM;
263      this->type = BRW_REGISTER_TYPE_UD;
264      this->imm.u = u;
265   }
266
267   /** Fixed brw_reg Immediate value constructor. */
268   fs_reg(struct brw_reg fixed_hw_reg)
269   {
270      init();
271      this->file = FIXED_HW_REG;
272      this->fixed_hw_reg = fixed_hw_reg;
273      this->type = fixed_hw_reg.type;
274   }
275
276   fs_reg(enum register_file file, int hw_reg);
277   fs_reg(class fs_visitor *v, const struct glsl_type *type);
278
279   /** Register file: ARF, GRF, MRF, IMM. */
280   enum register_file file;
281   /** virtual register number.  0 = fixed hw reg */
282   int reg;
283   /** Offset within the virtual register. */
284   int reg_offset;
285   /** HW register number.  Generally unset until register allocation. */
286   int hw_reg;
287   /** Register type.  BRW_REGISTER_TYPE_* */
288   int type;
289   bool negate;
290   bool abs;
291   struct brw_reg fixed_hw_reg;
292
293   /** Value for file == BRW_IMMMEDIATE_FILE */
294   union {
295      int32_t i;
296      uint32_t u;
297      float f;
298   } imm;
299};
300
301static const fs_reg reg_undef;
302static const fs_reg reg_null(ARF, BRW_ARF_NULL);
303
304class fs_inst : public exec_node {
305public:
306   /* Callers of this talloc-based new need not call delete. It's
307    * easier to just talloc_free 'ctx' (or any of its ancestors). */
308   static void* operator new(size_t size, void *ctx)
309   {
310      void *node;
311
312      node = talloc_zero_size(ctx, size);
313      assert(node != NULL);
314
315      return node;
316   }
317
318   void init()
319   {
320      this->opcode = BRW_OPCODE_NOP;
321      this->saturate = false;
322      this->conditional_mod = BRW_CONDITIONAL_NONE;
323      this->predicated = false;
324      this->sampler = 0;
325      this->target = 0;
326      this->eot = false;
327      this->shadow_compare = false;
328   }
329
330   fs_inst()
331   {
332      init();
333   }
334
335   fs_inst(int opcode)
336   {
337      init();
338      this->opcode = opcode;
339   }
340
341   fs_inst(int opcode, fs_reg dst, fs_reg src0)
342   {
343      init();
344      this->opcode = opcode;
345      this->dst = dst;
346      this->src[0] = src0;
347   }
348
349   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
350   {
351      init();
352      this->opcode = opcode;
353      this->dst = dst;
354      this->src[0] = src0;
355      this->src[1] = src1;
356   }
357
358   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
359   {
360      init();
361      this->opcode = opcode;
362      this->dst = dst;
363      this->src[0] = src0;
364      this->src[1] = src1;
365      this->src[2] = src2;
366   }
367
368   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
369   fs_reg dst;
370   fs_reg src[3];
371   bool saturate;
372   bool predicated;
373   int conditional_mod; /**< BRW_CONDITIONAL_* */
374
375   int mlen; /**< SEND message length */
376   int sampler;
377   int target; /**< MRT target. */
378   bool eot;
379   bool shadow_compare;
380
381   /** @{
382    * Annotation for the generated IR.  One of the two can be set.
383    */
384   ir_instruction *ir;
385   const char *annotation;
386   /** @} */
387};
388
389class fs_visitor : public ir_visitor
390{
391public:
392
393   fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
394   {
395      this->c = c;
396      this->p = &c->func;
397      this->brw = p->brw;
398      this->fp = brw->fragment_program;
399      this->intel = &brw->intel;
400      this->ctx = &intel->ctx;
401      this->mem_ctx = talloc_new(NULL);
402      this->shader = shader;
403      this->fail = false;
404      this->variable_ht = hash_table_ctor(0,
405					  hash_table_pointer_hash,
406					  hash_table_pointer_compare);
407
408      this->frag_color = NULL;
409      this->frag_data = NULL;
410      this->frag_depth = NULL;
411      this->first_non_payload_grf = 0;
412
413      this->current_annotation = NULL;
414      this->annotation_string = NULL;
415      this->annotation_ir = NULL;
416      this->base_ir = NULL;
417
418      this->virtual_grf_sizes = NULL;
419      this->virtual_grf_next = 1;
420      this->virtual_grf_array_size = 0;
421      this->virtual_grf_def = NULL;
422      this->virtual_grf_use = NULL;
423   }
424   ~fs_visitor()
425   {
426      talloc_free(this->mem_ctx);
427      hash_table_dtor(this->variable_ht);
428   }
429
430   fs_reg *variable_storage(ir_variable *var);
431   int virtual_grf_alloc(int size);
432
433   void visit(ir_variable *ir);
434   void visit(ir_assignment *ir);
435   void visit(ir_dereference_variable *ir);
436   void visit(ir_dereference_record *ir);
437   void visit(ir_dereference_array *ir);
438   void visit(ir_expression *ir);
439   void visit(ir_texture *ir);
440   void visit(ir_if *ir);
441   void visit(ir_constant *ir);
442   void visit(ir_swizzle *ir);
443   void visit(ir_return *ir);
444   void visit(ir_loop *ir);
445   void visit(ir_loop_jump *ir);
446   void visit(ir_discard *ir);
447   void visit(ir_call *ir);
448   void visit(ir_function *ir);
449   void visit(ir_function_signature *ir);
450
451   fs_inst *emit(fs_inst inst);
452   void assign_curb_setup();
453   void calculate_urb_setup();
454   void assign_urb_setup();
455   void assign_regs();
456   void assign_regs_trivial();
457   void calculate_live_intervals();
458   bool virtual_grf_interferes(int a, int b);
459   void generate_code();
460   void generate_fb_write(fs_inst *inst);
461   void generate_linterp(fs_inst *inst, struct brw_reg dst,
462			 struct brw_reg *src);
463   void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
464   void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
465   void generate_discard(fs_inst *inst, struct brw_reg temp);
466   void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
467   void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
468
469   void emit_dummy_fs();
470   void emit_fragcoord_interpolation(ir_variable *ir);
471   void emit_general_interpolation(ir_variable *ir);
472   void emit_interpolation_setup_gen4();
473   void emit_interpolation_setup_gen6();
474   fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate);
475   fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate);
476   void emit_fb_writes();
477   void emit_assignment_writes(fs_reg &l, fs_reg &r,
478			       const glsl_type *type, bool predicated);
479
480   struct brw_reg interp_reg(int location, int channel);
481   int setup_uniform_values(int loc, const glsl_type *type);
482   void setup_builtin_uniform_values(ir_variable *ir);
483
484   struct brw_context *brw;
485   const struct gl_fragment_program *fp;
486   struct intel_context *intel;
487   GLcontext *ctx;
488   struct brw_wm_compile *c;
489   struct brw_compile *p;
490   struct brw_shader *shader;
491   void *mem_ctx;
492   exec_list instructions;
493
494   int *virtual_grf_sizes;
495   int virtual_grf_next;
496   int virtual_grf_array_size;
497   int *virtual_grf_def;
498   int *virtual_grf_use;
499
500   struct hash_table *variable_ht;
501   ir_variable *frag_color, *frag_data, *frag_depth;
502   int first_non_payload_grf;
503   int urb_setup[FRAG_ATTRIB_MAX];
504
505   /** @{ debug annotation info */
506   const char *current_annotation;
507   ir_instruction *base_ir;
508   const char **annotation_string;
509   ir_instruction **annotation_ir;
510   /** @} */
511
512   bool fail;
513
514   /* Result of last visit() method. */
515   fs_reg result;
516
517   fs_reg pixel_x;
518   fs_reg pixel_y;
519   fs_reg wpos_w;
520   fs_reg pixel_w;
521   fs_reg delta_x;
522   fs_reg delta_y;
523
524   int grf_used;
525
526};
527
528int
529fs_visitor::virtual_grf_alloc(int size)
530{
531   if (virtual_grf_array_size <= virtual_grf_next) {
532      if (virtual_grf_array_size == 0)
533	 virtual_grf_array_size = 16;
534      else
535	 virtual_grf_array_size *= 2;
536      virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
537					 int, virtual_grf_array_size);
538
539      /* This slot is always unused. */
540      virtual_grf_sizes[0] = 0;
541   }
542   virtual_grf_sizes[virtual_grf_next] = size;
543   return virtual_grf_next++;
544}
545
546/** Fixed HW reg constructor. */
547fs_reg::fs_reg(enum register_file file, int hw_reg)
548{
549   init();
550   this->file = file;
551   this->hw_reg = hw_reg;
552   this->type = BRW_REGISTER_TYPE_F;
553}
554
555int
556brw_type_for_base_type(const struct glsl_type *type)
557{
558   switch (type->base_type) {
559   case GLSL_TYPE_FLOAT:
560      return BRW_REGISTER_TYPE_F;
561   case GLSL_TYPE_INT:
562   case GLSL_TYPE_BOOL:
563      return BRW_REGISTER_TYPE_D;
564   case GLSL_TYPE_UINT:
565      return BRW_REGISTER_TYPE_UD;
566   case GLSL_TYPE_ARRAY:
567   case GLSL_TYPE_STRUCT:
568      /* These should be overridden with the type of the member when
569       * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
570       * way to trip up if we don't.
571       */
572      return BRW_REGISTER_TYPE_UD;
573   default:
574      assert(!"not reached");
575      return BRW_REGISTER_TYPE_F;
576   }
577}
578
579/** Automatic reg constructor. */
580fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
581{
582   init();
583
584   this->file = GRF;
585   this->reg = v->virtual_grf_alloc(type_size(type));
586   this->reg_offset = 0;
587   this->type = brw_type_for_base_type(type);
588}
589
590fs_reg *
591fs_visitor::variable_storage(ir_variable *var)
592{
593   return (fs_reg *)hash_table_find(this->variable_ht, var);
594}
595
596/* Our support for uniforms is piggy-backed on the struct
597 * gl_fragment_program, because that's where the values actually
598 * get stored, rather than in some global gl_shader_program uniform
599 * store.
600 */
601int
602fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
603{
604   unsigned int offset = 0;
605   float *vec_values;
606
607   if (type->is_matrix()) {
608      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
609							type->vector_elements,
610							1);
611
612      for (unsigned int i = 0; i < type->matrix_columns; i++) {
613	 offset += setup_uniform_values(loc + offset, column);
614      }
615
616      return offset;
617   }
618
619   switch (type->base_type) {
620   case GLSL_TYPE_FLOAT:
621   case GLSL_TYPE_UINT:
622   case GLSL_TYPE_INT:
623   case GLSL_TYPE_BOOL:
624      vec_values = fp->Base.Parameters->ParameterValues[loc];
625      for (unsigned int i = 0; i < type->vector_elements; i++) {
626	 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
627      }
628      return 1;
629
630   case GLSL_TYPE_STRUCT:
631      for (unsigned int i = 0; i < type->length; i++) {
632	 offset += setup_uniform_values(loc + offset,
633					type->fields.structure[i].type);
634      }
635      return offset;
636
637   case GLSL_TYPE_ARRAY:
638      for (unsigned int i = 0; i < type->length; i++) {
639	 offset += setup_uniform_values(loc + offset, type->fields.array);
640      }
641      return offset;
642
643   case GLSL_TYPE_SAMPLER:
644      /* The sampler takes up a slot, but we don't use any values from it. */
645      return 1;
646
647   default:
648      assert(!"not reached");
649      return 0;
650   }
651}
652
653
654/* Our support for builtin uniforms is even scarier than non-builtin.
655 * It sits on top of the PROG_STATE_VAR parameters that are
656 * automatically updated from GL context state.
657 */
658void
659fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
660{
661   const struct gl_builtin_uniform_desc *statevar = NULL;
662
663   for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
664      statevar = &_mesa_builtin_uniform_desc[i];
665      if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
666	 break;
667   }
668
669   if (!statevar->name) {
670      this->fail = true;
671      printf("Failed to find builtin uniform `%s'\n", ir->name);
672      return;
673   }
674
675   int array_count;
676   if (ir->type->is_array()) {
677      array_count = ir->type->length;
678   } else {
679      array_count = 1;
680   }
681
682   for (int a = 0; a < array_count; a++) {
683      for (unsigned int i = 0; i < statevar->num_elements; i++) {
684	 struct gl_builtin_uniform_element *element = &statevar->elements[i];
685	 int tokens[STATE_LENGTH];
686
687	 memcpy(tokens, element->tokens, sizeof(element->tokens));
688	 if (ir->type->is_array()) {
689	    tokens[1] = a;
690	 }
691
692	 /* This state reference has already been setup by ir_to_mesa,
693	  * but we'll get the same index back here.
694	  */
695	 int index = _mesa_add_state_reference(this->fp->Base.Parameters,
696					       (gl_state_index *)tokens);
697	 float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
698
699	 /* Add each of the unique swizzles of the element as a
700	  * parameter.  This'll end up matching the expected layout of
701	  * the array/matrix/structure we're trying to fill in.
702	  */
703	 int last_swiz = -1;
704	 for (unsigned int i = 0; i < 4; i++) {
705	    int swiz = GET_SWZ(element->swizzle, i);
706	    if (swiz == last_swiz)
707	       break;
708	    last_swiz = swiz;
709
710	    c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
711	 }
712      }
713   }
714}
715
716void
717fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
718{
719   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
720   fs_reg wpos = *reg;
721   fs_reg neg_y = this->pixel_y;
722   neg_y.negate = true;
723
724   /* gl_FragCoord.x */
725   if (ir->pixel_center_integer) {
726      emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
727   } else {
728      emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
729   }
730   wpos.reg_offset++;
731
732   /* gl_FragCoord.y */
733   if (ir->origin_upper_left && ir->pixel_center_integer) {
734      emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
735   } else {
736      fs_reg pixel_y = this->pixel_y;
737      float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
738
739      if (!ir->origin_upper_left) {
740	 pixel_y.negate = true;
741	 offset += c->key.drawable_height - 1.0;
742      }
743
744      emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
745   }
746   wpos.reg_offset++;
747
748   /* gl_FragCoord.z */
749   emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
750		interp_reg(FRAG_ATTRIB_WPOS, 2)));
751   wpos.reg_offset++;
752
753   /* gl_FragCoord.w: Already set up in emit_interpolation */
754   emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
755
756   hash_table_insert(this->variable_ht, reg, ir);
757}
758
759
760void
761fs_visitor::emit_general_interpolation(ir_variable *ir)
762{
763   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
764   /* Interpolation is always in floating point regs. */
765   reg->type = BRW_REGISTER_TYPE_F;
766   fs_reg attr = *reg;
767
768   unsigned int array_elements;
769   const glsl_type *type;
770
771   if (ir->type->is_array()) {
772      array_elements = ir->type->length;
773      if (array_elements == 0) {
774	 this->fail = true;
775      }
776      type = ir->type->fields.array;
777   } else {
778      array_elements = 1;
779      type = ir->type;
780   }
781
782   int location = ir->location;
783   for (unsigned int i = 0; i < array_elements; i++) {
784      for (unsigned int j = 0; j < type->matrix_columns; j++) {
785	 if (urb_setup[location] == -1) {
786	    /* If there's no incoming setup data for this slot, don't
787	     * emit interpolation for it.
788	     */
789	    attr.reg_offset += type->vector_elements;
790	    location++;
791	    continue;
792	 }
793
794	 for (unsigned int c = 0; c < type->vector_elements; c++) {
795	    struct brw_reg interp = interp_reg(location, c);
796	    emit(fs_inst(FS_OPCODE_LINTERP,
797			 attr,
798			 this->delta_x,
799			 this->delta_y,
800			 fs_reg(interp)));
801	    attr.reg_offset++;
802	 }
803	 attr.reg_offset -= type->vector_elements;
804
805	 for (unsigned int c = 0; c < type->vector_elements; c++) {
806	    emit(fs_inst(BRW_OPCODE_MUL,
807			 attr,
808			 attr,
809			 this->pixel_w));
810	    attr.reg_offset++;
811	 }
812	 location++;
813      }
814   }
815
816   hash_table_insert(this->variable_ht, reg, ir);
817}
818
819void
820fs_visitor::visit(ir_variable *ir)
821{
822   fs_reg *reg = NULL;
823
824   if (variable_storage(ir))
825      return;
826
827   if (strcmp(ir->name, "gl_FragColor") == 0) {
828      this->frag_color = ir;
829   } else if (strcmp(ir->name, "gl_FragData") == 0) {
830      this->frag_data = ir;
831   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
832      this->frag_depth = ir;
833   }
834
835   if (ir->mode == ir_var_in) {
836      if (!strcmp(ir->name, "gl_FragCoord")) {
837	 emit_fragcoord_interpolation(ir);
838	 return;
839      } else if (!strcmp(ir->name, "gl_FrontFacing")) {
840	 reg = new(this->mem_ctx) fs_reg(this, ir->type);
841	 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
842	 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
843	  * us front face
844	  */
845	 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
846				      *reg,
847				      fs_reg(r1_6ud),
848				      fs_reg(1u << 31)));
849	 inst->conditional_mod = BRW_CONDITIONAL_L;
850	 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
851      } else {
852	 emit_general_interpolation(ir);
853	 return;
854      }
855   }
856
857   if (ir->mode == ir_var_uniform) {
858      int param_index = c->prog_data.nr_params;
859
860      if (!strncmp(ir->name, "gl_", 3)) {
861	 setup_builtin_uniform_values(ir);
862      } else {
863	 setup_uniform_values(ir->location, ir->type);
864      }
865
866      reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
867   }
868
869   if (!reg)
870      reg = new(this->mem_ctx) fs_reg(this, ir->type);
871
872   hash_table_insert(this->variable_ht, reg, ir);
873}
874
875void
876fs_visitor::visit(ir_dereference_variable *ir)
877{
878   fs_reg *reg = variable_storage(ir->var);
879   this->result = *reg;
880}
881
882void
883fs_visitor::visit(ir_dereference_record *ir)
884{
885   const glsl_type *struct_type = ir->record->type;
886
887   ir->record->accept(this);
888
889   unsigned int offset = 0;
890   for (unsigned int i = 0; i < struct_type->length; i++) {
891      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
892	 break;
893      offset += type_size(struct_type->fields.structure[i].type);
894   }
895   this->result.reg_offset += offset;
896   this->result.type = brw_type_for_base_type(ir->type);
897}
898
899void
900fs_visitor::visit(ir_dereference_array *ir)
901{
902   ir_constant *index;
903   int element_size;
904
905   ir->array->accept(this);
906   index = ir->array_index->as_constant();
907
908   element_size = type_size(ir->type);
909   this->result.type = brw_type_for_base_type(ir->type);
910
911   if (index) {
912      assert(this->result.file == UNIFORM ||
913	     (this->result.file == GRF &&
914	      this->result.reg != 0));
915      this->result.reg_offset += index->value.i[0] * element_size;
916   } else {
917      assert(!"FINISHME: non-constant array element");
918   }
919}
920
921void
922fs_visitor::visit(ir_expression *ir)
923{
924   unsigned int operand;
925   fs_reg op[2], temp;
926   fs_reg result;
927   fs_inst *inst;
928
929   for (operand = 0; operand < ir->get_num_operands(); operand++) {
930      ir->operands[operand]->accept(this);
931      if (this->result.file == BAD_FILE) {
932	 ir_print_visitor v;
933	 printf("Failed to get tree for expression operand:\n");
934	 ir->operands[operand]->accept(&v);
935	 this->fail = true;
936      }
937      op[operand] = this->result;
938
939      /* Matrix expression operands should have been broken down to vector
940       * operations already.
941       */
942      assert(!ir->operands[operand]->type->is_matrix());
943      /* And then those vector operands should have been broken down to scalar.
944       */
945      assert(!ir->operands[operand]->type->is_vector());
946   }
947
948   /* Storage for our result.  If our result goes into an assignment, it will
949    * just get copy-propagated out, so no worries.
950    */
951   this->result = fs_reg(this, ir->type);
952
953   switch (ir->operation) {
954   case ir_unop_logic_not:
955      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
956      break;
957   case ir_unop_neg:
958      op[0].negate = !op[0].negate;
959      this->result = op[0];
960      break;
961   case ir_unop_abs:
962      op[0].abs = true;
963      this->result = op[0];
964      break;
965   case ir_unop_sign:
966      temp = fs_reg(this, ir->type);
967
968      emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
969
970      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
971      inst->conditional_mod = BRW_CONDITIONAL_G;
972      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
973      inst->predicated = true;
974
975      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
976      inst->conditional_mod = BRW_CONDITIONAL_L;
977      inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
978      inst->predicated = true;
979
980      break;
981   case ir_unop_rcp:
982      emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
983      break;
984
985   case ir_unop_exp2:
986      emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
987      break;
988   case ir_unop_log2:
989      emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
990      break;
991   case ir_unop_exp:
992   case ir_unop_log:
993      assert(!"not reached: should be handled by ir_explog_to_explog2");
994      break;
995   case ir_unop_sin:
996      emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
997      break;
998   case ir_unop_cos:
999      emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
1000      break;
1001
1002   case ir_unop_dFdx:
1003      emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
1004      break;
1005   case ir_unop_dFdy:
1006      emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
1007      break;
1008
1009   case ir_binop_add:
1010      emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
1011      break;
1012   case ir_binop_sub:
1013      assert(!"not reached: should be handled by ir_sub_to_add_neg");
1014      break;
1015
1016   case ir_binop_mul:
1017      emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
1018      break;
1019   case ir_binop_div:
1020      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1021      break;
1022   case ir_binop_mod:
1023      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1024      break;
1025
1026   case ir_binop_less:
1027      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1028      inst->conditional_mod = BRW_CONDITIONAL_L;
1029      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1030      break;
1031   case ir_binop_greater:
1032      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1033      inst->conditional_mod = BRW_CONDITIONAL_G;
1034      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1035      break;
1036   case ir_binop_lequal:
1037      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1038      inst->conditional_mod = BRW_CONDITIONAL_LE;
1039      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1040      break;
1041   case ir_binop_gequal:
1042      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1043      inst->conditional_mod = BRW_CONDITIONAL_GE;
1044      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1045      break;
1046   case ir_binop_equal:
1047   case ir_binop_all_equal: /* same as nequal for scalars */
1048      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1049      inst->conditional_mod = BRW_CONDITIONAL_Z;
1050      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1051      break;
1052   case ir_binop_nequal:
1053   case ir_binop_any_nequal: /* same as nequal for scalars */
1054      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1055      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1056      emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1057      break;
1058
1059   case ir_binop_logic_xor:
1060      emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
1061      break;
1062
1063   case ir_binop_logic_or:
1064      emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
1065      break;
1066
1067   case ir_binop_logic_and:
1068      emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
1069      break;
1070
1071   case ir_binop_dot:
1072   case ir_binop_cross:
1073   case ir_unop_any:
1074      assert(!"not reached: should be handled by brw_fs_channel_expressions");
1075      break;
1076
1077   case ir_unop_noise:
1078      assert(!"not reached: should be handled by lower_noise");
1079      break;
1080
1081   case ir_unop_sqrt:
1082      emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
1083      break;
1084
1085   case ir_unop_rsq:
1086      emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
1087      break;
1088
1089   case ir_unop_i2f:
1090   case ir_unop_b2f:
1091   case ir_unop_b2i:
1092      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1093      break;
1094   case ir_unop_f2i:
1095      emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1096      break;
1097   case ir_unop_f2b:
1098   case ir_unop_i2b:
1099      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
1100      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1101
1102   case ir_unop_trunc:
1103      emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1104      break;
1105   case ir_unop_ceil:
1106      op[0].negate = ~op[0].negate;
1107      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1108      this->result.negate = true;
1109      break;
1110   case ir_unop_floor:
1111      inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1112      break;
1113   case ir_unop_fract:
1114      inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
1115      break;
1116
1117   case ir_binop_min:
1118      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1119      inst->conditional_mod = BRW_CONDITIONAL_L;
1120
1121      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1122      inst->predicated = true;
1123      break;
1124   case ir_binop_max:
1125      inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1126      inst->conditional_mod = BRW_CONDITIONAL_G;
1127
1128      inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1129      inst->predicated = true;
1130      break;
1131
1132   case ir_binop_pow:
1133      inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
1134      break;
1135
1136   case ir_unop_bit_not:
1137   case ir_unop_u2f:
1138   case ir_binop_lshift:
1139   case ir_binop_rshift:
1140   case ir_binop_bit_and:
1141   case ir_binop_bit_xor:
1142   case ir_binop_bit_or:
1143      assert(!"GLSL 1.30 features unsupported");
1144      break;
1145   }
1146}
1147
1148void
1149fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
1150				   const glsl_type *type, bool predicated)
1151{
1152   switch (type->base_type) {
1153   case GLSL_TYPE_FLOAT:
1154   case GLSL_TYPE_UINT:
1155   case GLSL_TYPE_INT:
1156   case GLSL_TYPE_BOOL:
1157      for (unsigned int i = 0; i < type->components(); i++) {
1158	 l.type = brw_type_for_base_type(type);
1159	 r.type = brw_type_for_base_type(type);
1160
1161	 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1162	 inst->predicated = predicated;
1163
1164	 l.reg_offset++;
1165	 r.reg_offset++;
1166      }
1167      break;
1168   case GLSL_TYPE_ARRAY:
1169      for (unsigned int i = 0; i < type->length; i++) {
1170	 emit_assignment_writes(l, r, type->fields.array, predicated);
1171      }
1172
1173   case GLSL_TYPE_STRUCT:
1174      for (unsigned int i = 0; i < type->length; i++) {
1175	 emit_assignment_writes(l, r, type->fields.structure[i].type,
1176				predicated);
1177      }
1178      break;
1179
1180   case GLSL_TYPE_SAMPLER:
1181      break;
1182
1183   default:
1184      assert(!"not reached");
1185      break;
1186   }
1187}
1188
1189void
1190fs_visitor::visit(ir_assignment *ir)
1191{
1192   struct fs_reg l, r;
1193   fs_inst *inst;
1194
1195   /* FINISHME: arrays on the lhs */
1196   ir->lhs->accept(this);
1197   l = this->result;
1198
1199   ir->rhs->accept(this);
1200   r = this->result;
1201
1202   assert(l.file != BAD_FILE);
1203   assert(r.file != BAD_FILE);
1204
1205   if (ir->condition) {
1206      /* Get the condition bool into the predicate. */
1207      ir->condition->accept(this);
1208      inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
1209      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1210   }
1211
1212   if (ir->lhs->type->is_scalar() ||
1213       ir->lhs->type->is_vector()) {
1214      for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
1215	 if (ir->write_mask & (1 << i)) {
1216	    inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1217	    if (ir->condition)
1218	       inst->predicated = true;
1219	    r.reg_offset++;
1220	 }
1221	 l.reg_offset++;
1222      }
1223   } else {
1224      emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
1225   }
1226}
1227
1228fs_inst *
1229fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1230{
1231   int mlen;
1232   int base_mrf = 2;
1233   bool simd16 = false;
1234   fs_reg orig_dst;
1235
1236   if (ir->shadow_comparitor) {
1237      for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1238	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1239		      coordinate));
1240	 coordinate.reg_offset++;
1241      }
1242      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1243      mlen = 3;
1244
1245      if (ir->op == ir_tex) {
1246	 /* There's no plain shadow compare message, so we use shadow
1247	  * compare with a bias of 0.0.
1248	  */
1249	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1250		      fs_reg(0.0f)));
1251	 mlen++;
1252      } else if (ir->op == ir_txb) {
1253	 ir->lod_info.bias->accept(this);
1254	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1255		      this->result));
1256	 mlen++;
1257      } else {
1258	 assert(ir->op == ir_txl);
1259	 ir->lod_info.lod->accept(this);
1260	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1261		      this->result));
1262	 mlen++;
1263      }
1264
1265      ir->shadow_comparitor->accept(this);
1266      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1267      mlen++;
1268   } else if (ir->op == ir_tex) {
1269      for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1270	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1271		      coordinate));
1272	 coordinate.reg_offset++;
1273      }
1274      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1275      mlen = 3;
1276   } else {
1277      /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
1278       * instructions.  We'll need to do SIMD16 here.
1279       */
1280      assert(ir->op == ir_txb || ir->op == ir_txl);
1281
1282      for (mlen = 0; mlen < ir->coordinate->type->vector_elements * 2;) {
1283	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1284		      coordinate));
1285	 coordinate.reg_offset++;
1286	 mlen++;
1287
1288	 /* The unused upper half. */
1289	 mlen++;
1290      }
1291
1292      /* lod/bias appears after u/v/r. */
1293      mlen = 6;
1294
1295      if (ir->op == ir_txb) {
1296	 ir->lod_info.bias->accept(this);
1297	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1298		      this->result));
1299	 mlen++;
1300      } else {
1301	 ir->lod_info.lod->accept(this);
1302	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1303		      this->result));
1304	 mlen++;
1305      }
1306
1307      /* The unused upper half. */
1308      mlen++;
1309
1310      /* Now, since we're doing simd16, the return is 2 interleaved
1311       * vec4s where the odd-indexed ones are junk. We'll need to move
1312       * this weirdness around to the expected layout.
1313       */
1314      simd16 = true;
1315      orig_dst = dst;
1316      dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
1317						       2));
1318      dst.type = BRW_REGISTER_TYPE_F;
1319   }
1320
1321   fs_inst *inst = NULL;
1322   switch (ir->op) {
1323   case ir_tex:
1324      inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1325      break;
1326   case ir_txb:
1327      inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1328      break;
1329   case ir_txl:
1330      inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1331      break;
1332   case ir_txd:
1333   case ir_txf:
1334      assert(!"GLSL 1.30 features unsupported");
1335      break;
1336   }
1337   inst->mlen = mlen;
1338
1339   if (simd16) {
1340      for (int i = 0; i < 4; i++) {
1341	 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst));
1342	 orig_dst.reg_offset++;
1343	 dst.reg_offset += 2;
1344      }
1345   }
1346
1347   return inst;
1348}
1349
1350fs_inst *
1351fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1352{
1353   /* gen5's SIMD8 sampler has slots for u, v, r, array index, then
1354    * optional parameters like shadow comparitor or LOD bias.  If
1355    * optional parameters aren't present, those base slots are
1356    * optional and don't need to be included in the message.
1357    *
1358    * We don't fill in the unnecessary slots regardless, which may
1359    * look surprising in the disassembly.
1360    */
1361   int mlen;
1362   int base_mrf = 2;
1363
1364   for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1365      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1366      coordinate.reg_offset++;
1367   }
1368
1369   if (ir->shadow_comparitor) {
1370      mlen = MAX2(mlen, 4);
1371
1372      ir->shadow_comparitor->accept(this);
1373      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1374      mlen++;
1375   }
1376
1377   fs_inst *inst = NULL;
1378   switch (ir->op) {
1379   case ir_tex:
1380      inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1381      break;
1382   case ir_txb:
1383      ir->lod_info.bias->accept(this);
1384      mlen = MAX2(mlen, 4);
1385      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1386      mlen++;
1387
1388      inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1389      break;
1390   case ir_txl:
1391      ir->lod_info.lod->accept(this);
1392      mlen = MAX2(mlen, 4);
1393      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1394      mlen++;
1395
1396      inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1397      break;
1398   case ir_txd:
1399   case ir_txf:
1400      assert(!"GLSL 1.30 features unsupported");
1401      break;
1402   }
1403   inst->mlen = mlen;
1404
1405   return inst;
1406}
1407
1408void
1409fs_visitor::visit(ir_texture *ir)
1410{
1411   fs_inst *inst = NULL;
1412
1413   ir->coordinate->accept(this);
1414   fs_reg coordinate = this->result;
1415
1416   /* Should be lowered by do_lower_texture_projection */
1417   assert(!ir->projector);
1418
1419   /* Writemasking doesn't eliminate channels on SIMD8 texture
1420    * samples, so don't worry about them.
1421    */
1422   fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1423
1424   if (intel->gen < 5) {
1425      inst = emit_texture_gen4(ir, dst, coordinate);
1426   } else {
1427      inst = emit_texture_gen5(ir, dst, coordinate);
1428   }
1429
1430   inst->sampler =
1431      _mesa_get_sampler_uniform_value(ir->sampler,
1432				      ctx->Shader.CurrentProgram,
1433				      &brw->fragment_program->Base);
1434   inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1435
1436   this->result = dst;
1437
1438   if (ir->shadow_comparitor)
1439      inst->shadow_compare = true;
1440
1441   if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
1442      fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
1443
1444      for (int i = 0; i < 4; i++) {
1445	 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1446	 fs_reg l = swizzle_dst;
1447	 l.reg_offset += i;
1448
1449	 if (swiz == SWIZZLE_ZERO) {
1450	    emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f)));
1451	 } else if (swiz == SWIZZLE_ONE) {
1452	    emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f)));
1453	 } else {
1454	    fs_reg r = dst;
1455	    r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1456	    emit(fs_inst(BRW_OPCODE_MOV, l, r));
1457	 }
1458      }
1459      this->result = swizzle_dst;
1460   }
1461}
1462
1463void
1464fs_visitor::visit(ir_swizzle *ir)
1465{
1466   ir->val->accept(this);
1467   fs_reg val = this->result;
1468
1469   if (ir->type->vector_elements == 1) {
1470      this->result.reg_offset += ir->mask.x;
1471      return;
1472   }
1473
1474   fs_reg result = fs_reg(this, ir->type);
1475   this->result = result;
1476
1477   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1478      fs_reg channel = val;
1479      int swiz = 0;
1480
1481      switch (i) {
1482      case 0:
1483	 swiz = ir->mask.x;
1484	 break;
1485      case 1:
1486	 swiz = ir->mask.y;
1487	 break;
1488      case 2:
1489	 swiz = ir->mask.z;
1490	 break;
1491      case 3:
1492	 swiz = ir->mask.w;
1493	 break;
1494      }
1495
1496      channel.reg_offset += swiz;
1497      emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1498      result.reg_offset++;
1499   }
1500}
1501
1502void
1503fs_visitor::visit(ir_discard *ir)
1504{
1505   fs_reg temp = fs_reg(this, glsl_type::uint_type);
1506
1507   assert(ir->condition == NULL); /* FINISHME */
1508
1509   emit(fs_inst(FS_OPCODE_DISCARD, temp, temp));
1510}
1511
1512void
1513fs_visitor::visit(ir_constant *ir)
1514{
1515   fs_reg reg(this, ir->type);
1516   this->result = reg;
1517
1518   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1519      switch (ir->type->base_type) {
1520      case GLSL_TYPE_FLOAT:
1521	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1522	 break;
1523      case GLSL_TYPE_UINT:
1524	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1525	 break;
1526      case GLSL_TYPE_INT:
1527	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1528	 break;
1529      case GLSL_TYPE_BOOL:
1530	 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1531	 break;
1532      default:
1533	 assert(!"Non-float/uint/int/bool constant");
1534      }
1535      reg.reg_offset++;
1536   }
1537}
1538
1539void
1540fs_visitor::visit(ir_if *ir)
1541{
1542   fs_inst *inst;
1543
1544   /* Don't point the annotation at the if statement, because then it plus
1545    * the then and else blocks get printed.
1546    */
1547   this->base_ir = ir->condition;
1548
1549   /* Generate the condition into the condition code. */
1550   ir->condition->accept(this);
1551   inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1552   inst->conditional_mod = BRW_CONDITIONAL_NZ;
1553
1554   inst = emit(fs_inst(BRW_OPCODE_IF));
1555   inst->predicated = true;
1556
1557   foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1558      ir_instruction *ir = (ir_instruction *)iter.get();
1559      this->base_ir = ir;
1560
1561      ir->accept(this);
1562   }
1563
1564   if (!ir->else_instructions.is_empty()) {
1565      emit(fs_inst(BRW_OPCODE_ELSE));
1566
1567      foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1568	 ir_instruction *ir = (ir_instruction *)iter.get();
1569	 this->base_ir = ir;
1570
1571	 ir->accept(this);
1572      }
1573   }
1574
1575   emit(fs_inst(BRW_OPCODE_ENDIF));
1576}
1577
1578void
1579fs_visitor::visit(ir_loop *ir)
1580{
1581   fs_reg counter = reg_undef;
1582
1583   if (ir->counter) {
1584      this->base_ir = ir->counter;
1585      ir->counter->accept(this);
1586      counter = *(variable_storage(ir->counter));
1587
1588      if (ir->from) {
1589	 this->base_ir = ir->from;
1590	 ir->from->accept(this);
1591
1592	 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1593      }
1594   }
1595
1596   emit(fs_inst(BRW_OPCODE_DO));
1597
1598   if (ir->to) {
1599      this->base_ir = ir->to;
1600      ir->to->accept(this);
1601
1602      fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1603				   counter, this->result));
1604      switch (ir->cmp) {
1605      case ir_binop_equal:
1606	 inst->conditional_mod = BRW_CONDITIONAL_Z;
1607	 break;
1608      case ir_binop_nequal:
1609	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1610	 break;
1611      case ir_binop_gequal:
1612	 inst->conditional_mod = BRW_CONDITIONAL_GE;
1613	 break;
1614      case ir_binop_lequal:
1615	 inst->conditional_mod = BRW_CONDITIONAL_LE;
1616	 break;
1617      case ir_binop_greater:
1618	 inst->conditional_mod = BRW_CONDITIONAL_G;
1619	 break;
1620      case ir_binop_less:
1621	 inst->conditional_mod = BRW_CONDITIONAL_L;
1622	 break;
1623      default:
1624	 assert(!"not reached: unknown loop condition");
1625	 this->fail = true;
1626	 break;
1627      }
1628
1629      inst = emit(fs_inst(BRW_OPCODE_BREAK));
1630      inst->predicated = true;
1631   }
1632
1633   foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1634      ir_instruction *ir = (ir_instruction *)iter.get();
1635
1636      this->base_ir = ir;
1637      ir->accept(this);
1638   }
1639
1640   if (ir->increment) {
1641      this->base_ir = ir->increment;
1642      ir->increment->accept(this);
1643      emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1644   }
1645
1646   emit(fs_inst(BRW_OPCODE_WHILE));
1647}
1648
1649void
1650fs_visitor::visit(ir_loop_jump *ir)
1651{
1652   switch (ir->mode) {
1653   case ir_loop_jump::jump_break:
1654      emit(fs_inst(BRW_OPCODE_BREAK));
1655      break;
1656   case ir_loop_jump::jump_continue:
1657      emit(fs_inst(BRW_OPCODE_CONTINUE));
1658      break;
1659   }
1660}
1661
1662void
1663fs_visitor::visit(ir_call *ir)
1664{
1665   assert(!"FINISHME");
1666}
1667
1668void
1669fs_visitor::visit(ir_return *ir)
1670{
1671   assert(!"FINISHME");
1672}
1673
1674void
1675fs_visitor::visit(ir_function *ir)
1676{
1677   /* Ignore function bodies other than main() -- we shouldn't see calls to
1678    * them since they should all be inlined before we get to ir_to_mesa.
1679    */
1680   if (strcmp(ir->name, "main") == 0) {
1681      const ir_function_signature *sig;
1682      exec_list empty;
1683
1684      sig = ir->matching_signature(&empty);
1685
1686      assert(sig);
1687
1688      foreach_iter(exec_list_iterator, iter, sig->body) {
1689	 ir_instruction *ir = (ir_instruction *)iter.get();
1690	 this->base_ir = ir;
1691
1692	 ir->accept(this);
1693      }
1694   }
1695}
1696
1697void
1698fs_visitor::visit(ir_function_signature *ir)
1699{
1700   assert(!"not reached");
1701   (void)ir;
1702}
1703
1704fs_inst *
1705fs_visitor::emit(fs_inst inst)
1706{
1707   fs_inst *list_inst = new(mem_ctx) fs_inst;
1708   *list_inst = inst;
1709
1710   list_inst->annotation = this->current_annotation;
1711   list_inst->ir = this->base_ir;
1712
1713   this->instructions.push_tail(list_inst);
1714
1715   return list_inst;
1716}
1717
1718/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1719void
1720fs_visitor::emit_dummy_fs()
1721{
1722   /* Everyone's favorite color. */
1723   emit(fs_inst(BRW_OPCODE_MOV,
1724		fs_reg(MRF, 2),
1725		fs_reg(1.0f)));
1726   emit(fs_inst(BRW_OPCODE_MOV,
1727		fs_reg(MRF, 3),
1728		fs_reg(0.0f)));
1729   emit(fs_inst(BRW_OPCODE_MOV,
1730		fs_reg(MRF, 4),
1731		fs_reg(1.0f)));
1732   emit(fs_inst(BRW_OPCODE_MOV,
1733		fs_reg(MRF, 5),
1734		fs_reg(0.0f)));
1735
1736   fs_inst *write;
1737   write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1738			fs_reg(0),
1739			fs_reg(0)));
1740}
1741
1742/* The register location here is relative to the start of the URB
1743 * data.  It will get adjusted to be a real location before
1744 * generate_code() time.
1745 */
1746struct brw_reg
1747fs_visitor::interp_reg(int location, int channel)
1748{
1749   int regnr = urb_setup[location] * 2 + channel / 2;
1750   int stride = (channel & 1) * 4;
1751
1752   assert(urb_setup[location] != -1);
1753
1754   return brw_vec1_grf(regnr, stride);
1755}
1756
1757/** Emits the interpolation for the varying inputs. */
1758void
1759fs_visitor::emit_interpolation_setup_gen4()
1760{
1761   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1762
1763   this->current_annotation = "compute pixel centers";
1764   this->pixel_x = fs_reg(this, glsl_type::uint_type);
1765   this->pixel_y = fs_reg(this, glsl_type::uint_type);
1766   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1767   this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1768   emit(fs_inst(BRW_OPCODE_ADD,
1769		this->pixel_x,
1770		fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1771		fs_reg(brw_imm_v(0x10101010))));
1772   emit(fs_inst(BRW_OPCODE_ADD,
1773		this->pixel_y,
1774		fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1775		fs_reg(brw_imm_v(0x11001100))));
1776
1777   this->current_annotation = "compute pixel deltas from v0";
1778   if (brw->has_pln) {
1779      this->delta_x = fs_reg(this, glsl_type::vec2_type);
1780      this->delta_y = this->delta_x;
1781      this->delta_y.reg_offset++;
1782   } else {
1783      this->delta_x = fs_reg(this, glsl_type::float_type);
1784      this->delta_y = fs_reg(this, glsl_type::float_type);
1785   }
1786   emit(fs_inst(BRW_OPCODE_ADD,
1787		this->delta_x,
1788		this->pixel_x,
1789		fs_reg(negate(brw_vec1_grf(1, 0)))));
1790   emit(fs_inst(BRW_OPCODE_ADD,
1791		this->delta_y,
1792		this->pixel_y,
1793		fs_reg(negate(brw_vec1_grf(1, 1)))));
1794
1795   this->current_annotation = "compute pos.w and 1/pos.w";
1796   /* Compute wpos.w.  It's always in our setup, since it's needed to
1797    * interpolate the other attributes.
1798    */
1799   this->wpos_w = fs_reg(this, glsl_type::float_type);
1800   emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1801		interp_reg(FRAG_ATTRIB_WPOS, 3)));
1802   /* Compute the pixel 1/W value from wpos.w. */
1803   this->pixel_w = fs_reg(this, glsl_type::float_type);
1804   emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1805   this->current_annotation = NULL;
1806}
1807
1808/** Emits the interpolation for the varying inputs. */
1809void
1810fs_visitor::emit_interpolation_setup_gen6()
1811{
1812   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1813
1814   /* If the pixel centers end up used, the setup is the same as for gen4. */
1815   this->current_annotation = "compute pixel centers";
1816   this->pixel_x = fs_reg(this, glsl_type::uint_type);
1817   this->pixel_y = fs_reg(this, glsl_type::uint_type);
1818   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1819   this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1820   emit(fs_inst(BRW_OPCODE_ADD,
1821		this->pixel_x,
1822		fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1823		fs_reg(brw_imm_v(0x10101010))));
1824   emit(fs_inst(BRW_OPCODE_ADD,
1825		this->pixel_y,
1826		fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1827		fs_reg(brw_imm_v(0x11001100))));
1828
1829   this->current_annotation = "compute 1/pos.w";
1830   this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
1831   this->pixel_w = fs_reg(this, glsl_type::float_type);
1832   emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1833
1834   this->delta_x = fs_reg(brw_vec8_grf(2, 0));
1835   this->delta_y = fs_reg(brw_vec8_grf(3, 0));
1836
1837   this->current_annotation = NULL;
1838}
1839
1840void
1841fs_visitor::emit_fb_writes()
1842{
1843   this->current_annotation = "FB write header";
1844   int nr = 0;
1845
1846   /* m0, m1 header */
1847   nr += 2;
1848
1849   if (c->key.aa_dest_stencil_reg) {
1850      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1851		   fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1852   }
1853
1854   /* Reserve space for color. It'll be filled in per MRT below. */
1855   int color_mrf = nr;
1856   nr += 4;
1857
1858   if (c->key.source_depth_to_render_target) {
1859      if (c->key.computes_depth) {
1860	 /* Hand over gl_FragDepth. */
1861	 assert(this->frag_depth);
1862	 fs_reg depth = *(variable_storage(this->frag_depth));
1863
1864	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1865      } else {
1866	 /* Pass through the payload depth. */
1867	 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1868		      fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1869      }
1870   }
1871
1872   if (c->key.dest_depth_reg) {
1873      emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1874		   fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1875   }
1876
1877   fs_reg color = reg_undef;
1878   if (this->frag_color)
1879      color = *(variable_storage(this->frag_color));
1880   else if (this->frag_data)
1881      color = *(variable_storage(this->frag_data));
1882
1883   for (int target = 0; target < c->key.nr_color_regions; target++) {
1884      this->current_annotation = talloc_asprintf(this->mem_ctx,
1885						 "FB write target %d",
1886						 target);
1887      if (this->frag_color || this->frag_data) {
1888	 for (int i = 0; i < 4; i++) {
1889	    emit(fs_inst(BRW_OPCODE_MOV,
1890			 fs_reg(MRF, color_mrf + i),
1891			 color));
1892	    color.reg_offset++;
1893	 }
1894      }
1895
1896      if (this->frag_color)
1897	 color.reg_offset -= 4;
1898
1899      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1900				   reg_undef, reg_undef));
1901      inst->target = target;
1902      inst->mlen = nr;
1903      if (target == c->key.nr_color_regions - 1)
1904	 inst->eot = true;
1905   }
1906
1907   if (c->key.nr_color_regions == 0) {
1908      fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1909				   reg_undef, reg_undef));
1910      inst->mlen = nr;
1911      inst->eot = true;
1912   }
1913
1914   this->current_annotation = NULL;
1915}
1916
1917void
1918fs_visitor::generate_fb_write(fs_inst *inst)
1919{
1920   GLboolean eot = inst->eot;
1921
1922   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1923    * move, here's g1.
1924    */
1925   brw_push_insn_state(p);
1926   brw_set_mask_control(p, BRW_MASK_DISABLE);
1927   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1928   brw_MOV(p,
1929	   brw_message_reg(1),
1930	   brw_vec8_grf(1, 0));
1931   brw_pop_insn_state(p);
1932
1933   brw_fb_WRITE(p,
1934		8, /* dispatch_width */
1935		retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1936		0, /* base MRF */
1937		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1938		inst->target,
1939		inst->mlen,
1940		0,
1941		eot);
1942}
1943
1944void
1945fs_visitor::generate_linterp(fs_inst *inst,
1946			     struct brw_reg dst, struct brw_reg *src)
1947{
1948   struct brw_reg delta_x = src[0];
1949   struct brw_reg delta_y = src[1];
1950   struct brw_reg interp = src[2];
1951
1952   if (brw->has_pln &&
1953       delta_y.nr == delta_x.nr + 1 &&
1954       (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1955      brw_PLN(p, dst, interp, delta_x);
1956   } else {
1957      brw_LINE(p, brw_null_reg(), interp, delta_x);
1958      brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1959   }
1960}
1961
1962void
1963fs_visitor::generate_math(fs_inst *inst,
1964			  struct brw_reg dst, struct brw_reg *src)
1965{
1966   int op;
1967
1968   switch (inst->opcode) {
1969   case FS_OPCODE_RCP:
1970      op = BRW_MATH_FUNCTION_INV;
1971      break;
1972   case FS_OPCODE_RSQ:
1973      op = BRW_MATH_FUNCTION_RSQ;
1974      break;
1975   case FS_OPCODE_SQRT:
1976      op = BRW_MATH_FUNCTION_SQRT;
1977      break;
1978   case FS_OPCODE_EXP2:
1979      op = BRW_MATH_FUNCTION_EXP;
1980      break;
1981   case FS_OPCODE_LOG2:
1982      op = BRW_MATH_FUNCTION_LOG;
1983      break;
1984   case FS_OPCODE_POW:
1985      op = BRW_MATH_FUNCTION_POW;
1986      break;
1987   case FS_OPCODE_SIN:
1988      op = BRW_MATH_FUNCTION_SIN;
1989      break;
1990   case FS_OPCODE_COS:
1991      op = BRW_MATH_FUNCTION_COS;
1992      break;
1993   default:
1994      assert(!"not reached: unknown math function");
1995      op = 0;
1996      break;
1997   }
1998
1999   if (inst->opcode == FS_OPCODE_POW) {
2000      brw_MOV(p, brw_message_reg(3), src[1]);
2001   }
2002
2003   brw_math(p, dst,
2004	    op,
2005	    inst->saturate ? BRW_MATH_SATURATE_SATURATE :
2006	    BRW_MATH_SATURATE_NONE,
2007	    2, src[0],
2008	    BRW_MATH_DATA_VECTOR,
2009	    BRW_MATH_PRECISION_FULL);
2010}
2011
2012void
2013fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2014{
2015   int msg_type = -1;
2016   int rlen = 4;
2017   uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
2018
2019   if (intel->gen == 5) {
2020      switch (inst->opcode) {
2021      case FS_OPCODE_TEX:
2022	 if (inst->shadow_compare) {
2023	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
2024	 } else {
2025	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
2026	 }
2027	 break;
2028      case FS_OPCODE_TXB:
2029	 if (inst->shadow_compare) {
2030	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
2031	 } else {
2032	    msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
2033	 }
2034	 break;
2035      }
2036   } else {
2037      switch (inst->opcode) {
2038      case FS_OPCODE_TEX:
2039	 /* Note that G45 and older determines shadow compare and dispatch width
2040	  * from message length for most messages.
2041	  */
2042	 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2043	 if (inst->shadow_compare) {
2044	    assert(inst->mlen == 5);
2045	 } else {
2046	    assert(inst->mlen <= 6);
2047	 }
2048	 break;
2049      case FS_OPCODE_TXB:
2050	 if (inst->shadow_compare) {
2051	    assert(inst->mlen == 5);
2052	    msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2053	 } else {
2054	    assert(inst->mlen == 8);
2055	    msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
2056	    simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
2057	 }
2058	 break;
2059      }
2060   }
2061   assert(msg_type != -1);
2062
2063   if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
2064      rlen = 8;
2065      dst = vec16(dst);
2066   }
2067
2068   /* g0 header. */
2069   src.nr--;
2070
2071   brw_SAMPLE(p,
2072	      retype(dst, BRW_REGISTER_TYPE_UW),
2073	      src.nr,
2074	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
2075              SURF_INDEX_TEXTURE(inst->sampler),
2076	      inst->sampler,
2077	      WRITEMASK_XYZW,
2078	      msg_type,
2079	      rlen,
2080	      inst->mlen + 1,
2081	      0,
2082	      1,
2083	      simd_mode);
2084}
2085
2086
2087/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
2088 * looking like:
2089 *
2090 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
2091 *
2092 * and we're trying to produce:
2093 *
2094 *           DDX                     DDY
2095 * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
2096 *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
2097 *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
2098 *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
2099 *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
2100 *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
2101 *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
2102 *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
2103 *
2104 * and add another set of two more subspans if in 16-pixel dispatch mode.
2105 *
2106 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
2107 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
2108 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
2109 * between each other.  We could probably do it like ddx and swizzle the right
2110 * order later, but bail for now and just produce
2111 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
2112 */
2113void
2114fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2115{
2116   struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
2117				 BRW_REGISTER_TYPE_F,
2118				 BRW_VERTICAL_STRIDE_2,
2119				 BRW_WIDTH_2,
2120				 BRW_HORIZONTAL_STRIDE_0,
2121				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2122   struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
2123				 BRW_REGISTER_TYPE_F,
2124				 BRW_VERTICAL_STRIDE_2,
2125				 BRW_WIDTH_2,
2126				 BRW_HORIZONTAL_STRIDE_0,
2127				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2128   brw_ADD(p, dst, src0, negate(src1));
2129}
2130
2131void
2132fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2133{
2134   struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
2135				 BRW_REGISTER_TYPE_F,
2136				 BRW_VERTICAL_STRIDE_4,
2137				 BRW_WIDTH_4,
2138				 BRW_HORIZONTAL_STRIDE_0,
2139				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2140   struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
2141				 BRW_REGISTER_TYPE_F,
2142				 BRW_VERTICAL_STRIDE_4,
2143				 BRW_WIDTH_4,
2144				 BRW_HORIZONTAL_STRIDE_0,
2145				 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2146   brw_ADD(p, dst, src0, negate(src1));
2147}
2148
2149void
2150fs_visitor::generate_discard(fs_inst *inst, struct brw_reg temp)
2151{
2152   struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
2153   temp = brw_uw1_reg(temp.file, temp.nr, 0);
2154
2155   brw_push_insn_state(p);
2156   brw_set_mask_control(p, BRW_MASK_DISABLE);
2157   brw_NOT(p, temp, brw_mask_reg(1)); /* IMASK */
2158   brw_AND(p, g0, temp, g0);
2159   brw_pop_insn_state(p);
2160}
2161
2162void
2163fs_visitor::assign_curb_setup()
2164{
2165   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
2166   c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
2167
2168   /* Map the offsets in the UNIFORM file to fixed HW regs. */
2169   foreach_iter(exec_list_iterator, iter, this->instructions) {
2170      fs_inst *inst = (fs_inst *)iter.get();
2171
2172      for (unsigned int i = 0; i < 3; i++) {
2173	 if (inst->src[i].file == UNIFORM) {
2174	    int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
2175	    struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
2176						  constant_nr / 8,
2177						  constant_nr % 8);
2178
2179	    inst->src[i].file = FIXED_HW_REG;
2180	    inst->src[i].fixed_hw_reg = brw_reg;
2181	 }
2182      }
2183   }
2184}
2185
2186void
2187fs_visitor::calculate_urb_setup()
2188{
2189   for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2190      urb_setup[i] = -1;
2191   }
2192
2193   int urb_next = 0;
2194   /* Figure out where each of the incoming setup attributes lands. */
2195   if (intel->gen >= 6) {
2196      for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2197	 if (i == FRAG_ATTRIB_WPOS ||
2198	     (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) {
2199	    urb_setup[i] = urb_next++;
2200	 }
2201      }
2202   } else {
2203      /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
2204      for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) {
2205	 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
2206	    int fp_index;
2207
2208	    if (i >= VERT_RESULT_VAR0)
2209	       fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
2210	    else if (i <= VERT_RESULT_TEX7)
2211	       fp_index = i;
2212	    else
2213	       fp_index = -1;
2214
2215	    if (fp_index >= 0)
2216	       urb_setup[fp_index] = urb_next++;
2217	 }
2218      }
2219   }
2220
2221   /* Each attribute is 4 setup channels, each of which is half a reg. */
2222   c->prog_data.urb_read_length = urb_next * 2;
2223}
2224
2225void
2226fs_visitor::assign_urb_setup()
2227{
2228   int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
2229
2230   /* Offset all the urb_setup[] index by the actual position of the
2231    * setup regs, now that the location of the constants has been chosen.
2232    */
2233   foreach_iter(exec_list_iterator, iter, this->instructions) {
2234      fs_inst *inst = (fs_inst *)iter.get();
2235
2236      if (inst->opcode != FS_OPCODE_LINTERP)
2237	 continue;
2238
2239      assert(inst->src[2].file == FIXED_HW_REG);
2240
2241      inst->src[2].fixed_hw_reg.nr += urb_start;
2242   }
2243
2244   this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
2245}
2246
2247static void
2248assign_reg(int *reg_hw_locations, fs_reg *reg)
2249{
2250   if (reg->file == GRF && reg->reg != 0) {
2251      reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
2252      reg->reg = 0;
2253   }
2254}
2255
2256void
2257fs_visitor::assign_regs_trivial()
2258{
2259   int last_grf = 0;
2260   int hw_reg_mapping[this->virtual_grf_next];
2261   int i;
2262
2263   hw_reg_mapping[0] = 0;
2264   hw_reg_mapping[1] = this->first_non_payload_grf;
2265   for (i = 2; i < this->virtual_grf_next; i++) {
2266      hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
2267			   this->virtual_grf_sizes[i - 1]);
2268   }
2269   last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
2270
2271   foreach_iter(exec_list_iterator, iter, this->instructions) {
2272      fs_inst *inst = (fs_inst *)iter.get();
2273
2274      assign_reg(hw_reg_mapping, &inst->dst);
2275      assign_reg(hw_reg_mapping, &inst->src[0]);
2276      assign_reg(hw_reg_mapping, &inst->src[1]);
2277   }
2278
2279   this->grf_used = last_grf + 1;
2280}
2281
2282void
2283fs_visitor::assign_regs()
2284{
2285   int last_grf = 0;
2286   int hw_reg_mapping[this->virtual_grf_next + 1];
2287   int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
2288   int class_sizes[base_reg_count];
2289   int class_count = 0;
2290   int aligned_pair_class = -1;
2291
2292   calculate_live_intervals();
2293
2294   /* Set up the register classes.
2295    *
2296    * The base registers store a scalar value.  For texture samples,
2297    * we get virtual GRFs composed of 4 contiguous hw register.  For
2298    * structures and arrays, we store them as contiguous larger things
2299    * than that, though we should be able to do better most of the
2300    * time.
2301    */
2302   class_sizes[class_count++] = 1;
2303   if (brw->has_pln && intel->gen < 6) {
2304      /* Always set up the (unaligned) pairs for gen5, so we can find
2305       * them for making the aligned pair class.
2306       */
2307      class_sizes[class_count++] = 2;
2308   }
2309   for (int r = 1; r < this->virtual_grf_next; r++) {
2310      int i;
2311
2312      for (i = 0; i < class_count; i++) {
2313	 if (class_sizes[i] == this->virtual_grf_sizes[r])
2314	    break;
2315      }
2316      if (i == class_count) {
2317	 if (this->virtual_grf_sizes[r] >= base_reg_count) {
2318	    fprintf(stderr, "Object too large to register allocate.\n");
2319	    this->fail = true;
2320	 }
2321
2322	 class_sizes[class_count++] = this->virtual_grf_sizes[r];
2323      }
2324   }
2325
2326   int ra_reg_count = 0;
2327   int class_base_reg[class_count];
2328   int class_reg_count[class_count];
2329   int classes[class_count + 1];
2330
2331   for (int i = 0; i < class_count; i++) {
2332      class_base_reg[i] = ra_reg_count;
2333      class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
2334      ra_reg_count += class_reg_count[i];
2335   }
2336
2337   struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
2338   for (int i = 0; i < class_count; i++) {
2339      classes[i] = ra_alloc_reg_class(regs);
2340
2341      for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2342	 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
2343      }
2344
2345      /* Add conflicts between our contiguous registers aliasing
2346       * base regs and other register classes' contiguous registers
2347       * that alias base regs, or the base regs themselves for classes[0].
2348       */
2349      for (int c = 0; c <= i; c++) {
2350	 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2351	    for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
2352		 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
2353		 c_r++) {
2354
2355	       if (0) {
2356		  printf("%d/%d conflicts %d/%d\n",
2357			 class_sizes[i], this->first_non_payload_grf + i_r,
2358			 class_sizes[c], this->first_non_payload_grf + c_r);
2359	       }
2360
2361	       ra_add_reg_conflict(regs,
2362				   class_base_reg[i] + i_r,
2363				   class_base_reg[c] + c_r);
2364	    }
2365	 }
2366      }
2367   }
2368
2369   /* Add a special class for aligned pairs, which we'll put delta_x/y
2370    * in on gen5 so that we can do PLN.
2371    */
2372   if (brw->has_pln && intel->gen < 6) {
2373      int reg_count = (base_reg_count - 1) / 2;
2374      int unaligned_pair_class = 1;
2375      assert(class_sizes[unaligned_pair_class] == 2);
2376
2377      aligned_pair_class = class_count;
2378      classes[aligned_pair_class] = ra_alloc_reg_class(regs);
2379      class_base_reg[aligned_pair_class] = 0;
2380      class_reg_count[aligned_pair_class] = 0;
2381      int start = (this->first_non_payload_grf & 1) ? 1 : 0;
2382
2383      for (int i = 0; i < reg_count; i++) {
2384	 ra_class_add_reg(regs, classes[aligned_pair_class],
2385			  class_base_reg[unaligned_pair_class] + i * 2 + start);
2386      }
2387      class_count++;
2388   }
2389
2390   ra_set_finalize(regs);
2391
2392   struct ra_graph *g = ra_alloc_interference_graph(regs,
2393						    this->virtual_grf_next);
2394   /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
2395    * with nodes.
2396    */
2397   ra_set_node_class(g, 0, classes[0]);
2398
2399   for (int i = 1; i < this->virtual_grf_next; i++) {
2400      for (int c = 0; c < class_count; c++) {
2401	 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
2402	    if (aligned_pair_class >= 0 &&
2403		this->delta_x.reg == i) {
2404	       ra_set_node_class(g, i, classes[aligned_pair_class]);
2405	    } else {
2406	       ra_set_node_class(g, i, classes[c]);
2407	    }
2408	    break;
2409	 }
2410      }
2411
2412      for (int j = 1; j < i; j++) {
2413	 if (virtual_grf_interferes(i, j)) {
2414	    ra_add_node_interference(g, i, j);
2415	 }
2416      }
2417   }
2418
2419   /* FINISHME: Handle spilling */
2420   if (!ra_allocate_no_spills(g)) {
2421      fprintf(stderr, "Failed to allocate registers.\n");
2422      this->fail = true;
2423      return;
2424   }
2425
2426   /* Get the chosen virtual registers for each node, and map virtual
2427    * regs in the register classes back down to real hardware reg
2428    * numbers.
2429    */
2430   hw_reg_mapping[0] = 0; /* unused */
2431   for (int i = 1; i < this->virtual_grf_next; i++) {
2432      int reg = ra_get_node_reg(g, i);
2433      int hw_reg = -1;
2434
2435      for (int c = 0; c < class_count; c++) {
2436	 if (reg >= class_base_reg[c] &&
2437	     reg < class_base_reg[c] + class_reg_count[c]) {
2438	    hw_reg = reg - class_base_reg[c];
2439	    break;
2440	 }
2441      }
2442
2443      assert(hw_reg != -1);
2444      hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
2445      last_grf = MAX2(last_grf,
2446		      hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
2447   }
2448
2449   foreach_iter(exec_list_iterator, iter, this->instructions) {
2450      fs_inst *inst = (fs_inst *)iter.get();
2451
2452      assign_reg(hw_reg_mapping, &inst->dst);
2453      assign_reg(hw_reg_mapping, &inst->src[0]);
2454      assign_reg(hw_reg_mapping, &inst->src[1]);
2455   }
2456
2457   this->grf_used = last_grf + 1;
2458
2459   talloc_free(g);
2460   talloc_free(regs);
2461}
2462
2463void
2464fs_visitor::calculate_live_intervals()
2465{
2466   int num_vars = this->virtual_grf_next;
2467   int *def = talloc_array(mem_ctx, int, num_vars);
2468   int *use = talloc_array(mem_ctx, int, num_vars);
2469   int loop_depth = 0;
2470   int loop_start = 0;
2471
2472   for (int i = 0; i < num_vars; i++) {
2473      def[i] = 1 << 30;
2474      use[i] = 0;
2475   }
2476
2477   int ip = 0;
2478   foreach_iter(exec_list_iterator, iter, this->instructions) {
2479      fs_inst *inst = (fs_inst *)iter.get();
2480
2481      if (inst->opcode == BRW_OPCODE_DO) {
2482	 if (loop_depth++ == 0)
2483	    loop_start = ip;
2484      } else if (inst->opcode == BRW_OPCODE_WHILE) {
2485	 loop_depth--;
2486
2487	 if (loop_depth == 0) {
2488	    /* FINISHME:
2489	     *
2490	     * Patches up any vars marked for use within the loop as
2491	     * live until the end.  This is conservative, as there
2492	     * will often be variables defined and used inside the
2493	     * loop but dead at the end of the loop body.
2494	     */
2495	    for (int i = 0; i < num_vars; i++) {
2496	       if (use[i] == loop_start) {
2497		  use[i] = ip;
2498	       }
2499	    }
2500	 }
2501      } else {
2502	 int eip = ip;
2503
2504	 if (loop_depth)
2505	    eip = loop_start;
2506
2507	 for (unsigned int i = 0; i < 3; i++) {
2508	    if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
2509	       def[inst->src[i].reg] = MIN2(def[inst->src[i].reg], eip);
2510	       use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip);
2511	    }
2512	 }
2513	 if (inst->dst.file == GRF && inst->dst.reg != 0) {
2514	    def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip);
2515	    use[inst->dst.reg] = MAX2(use[inst->dst.reg], eip);
2516	 }
2517      }
2518
2519      ip++;
2520   }
2521
2522   this->virtual_grf_def = def;
2523   this->virtual_grf_use = use;
2524}
2525
2526bool
2527fs_visitor::virtual_grf_interferes(int a, int b)
2528{
2529   int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
2530   int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
2531
2532   return start <= end;
2533}
2534
2535static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
2536{
2537   struct brw_reg brw_reg;
2538
2539   switch (reg->file) {
2540   case GRF:
2541   case ARF:
2542   case MRF:
2543      brw_reg = brw_vec8_reg(reg->file,
2544			    reg->hw_reg, 0);
2545      brw_reg = retype(brw_reg, reg->type);
2546      break;
2547   case IMM:
2548      switch (reg->type) {
2549      case BRW_REGISTER_TYPE_F:
2550	 brw_reg = brw_imm_f(reg->imm.f);
2551	 break;
2552      case BRW_REGISTER_TYPE_D:
2553	 brw_reg = brw_imm_d(reg->imm.i);
2554	 break;
2555      case BRW_REGISTER_TYPE_UD:
2556	 brw_reg = brw_imm_ud(reg->imm.u);
2557	 break;
2558      default:
2559	 assert(!"not reached");
2560	 break;
2561      }
2562      break;
2563   case FIXED_HW_REG:
2564      brw_reg = reg->fixed_hw_reg;
2565      break;
2566   case BAD_FILE:
2567      /* Probably unused. */
2568      brw_reg = brw_null_reg();
2569      break;
2570   case UNIFORM:
2571      assert(!"not reached");
2572      brw_reg = brw_null_reg();
2573      break;
2574   }
2575   if (reg->abs)
2576      brw_reg = brw_abs(brw_reg);
2577   if (reg->negate)
2578      brw_reg = negate(brw_reg);
2579
2580   return brw_reg;
2581}
2582
2583void
2584fs_visitor::generate_code()
2585{
2586   unsigned int annotation_len = 0;
2587   int last_native_inst = 0;
2588   struct brw_instruction *if_stack[16], *loop_stack[16];
2589   int if_stack_depth = 0, loop_stack_depth = 0;
2590   int if_depth_in_loop[16];
2591
2592   if_depth_in_loop[loop_stack_depth] = 0;
2593
2594   memset(&if_stack, 0, sizeof(if_stack));
2595   foreach_iter(exec_list_iterator, iter, this->instructions) {
2596      fs_inst *inst = (fs_inst *)iter.get();
2597      struct brw_reg src[3], dst;
2598
2599      for (unsigned int i = 0; i < 3; i++) {
2600	 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
2601      }
2602      dst = brw_reg_from_fs_reg(&inst->dst);
2603
2604      brw_set_conditionalmod(p, inst->conditional_mod);
2605      brw_set_predicate_control(p, inst->predicated);
2606
2607      switch (inst->opcode) {
2608      case BRW_OPCODE_MOV:
2609	 brw_MOV(p, dst, src[0]);
2610	 break;
2611      case BRW_OPCODE_ADD:
2612	 brw_ADD(p, dst, src[0], src[1]);
2613	 break;
2614      case BRW_OPCODE_MUL:
2615	 brw_MUL(p, dst, src[0], src[1]);
2616	 break;
2617
2618      case BRW_OPCODE_FRC:
2619	 brw_FRC(p, dst, src[0]);
2620	 break;
2621      case BRW_OPCODE_RNDD:
2622	 brw_RNDD(p, dst, src[0]);
2623	 break;
2624      case BRW_OPCODE_RNDZ:
2625	 brw_RNDZ(p, dst, src[0]);
2626	 break;
2627
2628      case BRW_OPCODE_AND:
2629	 brw_AND(p, dst, src[0], src[1]);
2630	 break;
2631      case BRW_OPCODE_OR:
2632	 brw_OR(p, dst, src[0], src[1]);
2633	 break;
2634      case BRW_OPCODE_XOR:
2635	 brw_XOR(p, dst, src[0], src[1]);
2636	 break;
2637
2638      case BRW_OPCODE_CMP:
2639	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2640	 break;
2641      case BRW_OPCODE_SEL:
2642	 brw_SEL(p, dst, src[0], src[1]);
2643	 break;
2644
2645      case BRW_OPCODE_IF:
2646	 assert(if_stack_depth < 16);
2647	 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2648	 if_depth_in_loop[loop_stack_depth]++;
2649	 if_stack_depth++;
2650	 break;
2651      case BRW_OPCODE_ELSE:
2652	 if_stack[if_stack_depth - 1] =
2653	    brw_ELSE(p, if_stack[if_stack_depth - 1]);
2654	 break;
2655      case BRW_OPCODE_ENDIF:
2656	 if_stack_depth--;
2657	 brw_ENDIF(p , if_stack[if_stack_depth]);
2658	 if_depth_in_loop[loop_stack_depth]--;
2659	 break;
2660
2661      case BRW_OPCODE_DO:
2662	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2663	 if_depth_in_loop[loop_stack_depth] = 0;
2664	 break;
2665
2666      case BRW_OPCODE_BREAK:
2667	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2668	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2669	 break;
2670      case BRW_OPCODE_CONTINUE:
2671	 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2672	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2673	 break;
2674
2675      case BRW_OPCODE_WHILE: {
2676	 struct brw_instruction *inst0, *inst1;
2677	 GLuint br = 1;
2678
2679	 if (intel->gen >= 5)
2680	    br = 2;
2681
2682	 assert(loop_stack_depth > 0);
2683	 loop_stack_depth--;
2684	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2685	 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2686	 while (inst0 > loop_stack[loop_stack_depth]) {
2687	    inst0--;
2688	    if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2689		inst0->bits3.if_else.jump_count == 0) {
2690	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2691	    }
2692	    else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
2693		     inst0->bits3.if_else.jump_count == 0) {
2694	       inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
2695	    }
2696	 }
2697      }
2698	 break;
2699
2700      case FS_OPCODE_RCP:
2701      case FS_OPCODE_RSQ:
2702      case FS_OPCODE_SQRT:
2703      case FS_OPCODE_EXP2:
2704      case FS_OPCODE_LOG2:
2705      case FS_OPCODE_POW:
2706      case FS_OPCODE_SIN:
2707      case FS_OPCODE_COS:
2708	 generate_math(inst, dst, src);
2709	 break;
2710      case FS_OPCODE_LINTERP:
2711	 generate_linterp(inst, dst, src);
2712	 break;
2713      case FS_OPCODE_TEX:
2714      case FS_OPCODE_TXB:
2715      case FS_OPCODE_TXL:
2716	 generate_tex(inst, dst, src[0]);
2717	 break;
2718      case FS_OPCODE_DISCARD:
2719	 generate_discard(inst, dst /* src0 == dst */);
2720	 break;
2721      case FS_OPCODE_DDX:
2722	 generate_ddx(inst, dst, src[0]);
2723	 break;
2724      case FS_OPCODE_DDY:
2725	 generate_ddy(inst, dst, src[0]);
2726	 break;
2727      case FS_OPCODE_FB_WRITE:
2728	 generate_fb_write(inst);
2729	 break;
2730      default:
2731	 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2732	    _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2733			  brw_opcodes[inst->opcode].name);
2734	 } else {
2735	    _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2736	 }
2737	 this->fail = true;
2738      }
2739
2740      if (annotation_len < p->nr_insn) {
2741	 annotation_len *= 2;
2742	 if (annotation_len < 16)
2743	    annotation_len = 16;
2744
2745	 this->annotation_string = talloc_realloc(this->mem_ctx,
2746						  annotation_string,
2747						  const char *,
2748						  annotation_len);
2749	 this->annotation_ir = talloc_realloc(this->mem_ctx,
2750					      annotation_ir,
2751					      ir_instruction *,
2752					      annotation_len);
2753      }
2754
2755      for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2756	 this->annotation_string[i] = inst->annotation;
2757	 this->annotation_ir[i] = inst->ir;
2758      }
2759      last_native_inst = p->nr_insn;
2760   }
2761}
2762
2763GLboolean
2764brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2765{
2766   struct brw_compile *p = &c->func;
2767   struct intel_context *intel = &brw->intel;
2768   GLcontext *ctx = &intel->ctx;
2769   struct brw_shader *shader = NULL;
2770   struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2771
2772   if (!prog)
2773      return GL_FALSE;
2774
2775   if (!using_new_fs)
2776      return GL_FALSE;
2777
2778   for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2779      if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2780	 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2781	 break;
2782      }
2783   }
2784   if (!shader)
2785      return GL_FALSE;
2786
2787   /* We always use 8-wide mode, at least for now.  For one, flow
2788    * control only works in 8-wide.  Also, when we're fragment shader
2789    * bound, we're almost always under register pressure as well, so
2790    * 8-wide would save us from the performance cliff of spilling
2791    * regs.
2792    */
2793   c->dispatch_width = 8;
2794
2795   if (INTEL_DEBUG & DEBUG_WM) {
2796      printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2797      _mesa_print_ir(shader->ir, NULL);
2798      printf("\n");
2799   }
2800
2801   /* Now the main event: Visit the shader IR and generate our FS IR for it.
2802    */
2803   fs_visitor v(c, shader);
2804
2805   if (0) {
2806      v.emit_dummy_fs();
2807   } else {
2808      v.calculate_urb_setup();
2809      if (intel->gen < 6)
2810	 v.emit_interpolation_setup_gen4();
2811      else
2812	 v.emit_interpolation_setup_gen6();
2813
2814      /* Generate FS IR for main().  (the visitor only descends into
2815       * functions called "main").
2816       */
2817      foreach_iter(exec_list_iterator, iter, *shader->ir) {
2818	 ir_instruction *ir = (ir_instruction *)iter.get();
2819	 v.base_ir = ir;
2820	 ir->accept(&v);
2821      }
2822
2823      v.emit_fb_writes();
2824      v.assign_curb_setup();
2825      v.assign_urb_setup();
2826      if (0)
2827	 v.assign_regs_trivial();
2828      else
2829	 v.assign_regs();
2830   }
2831
2832   if (!v.fail)
2833      v.generate_code();
2834
2835   assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
2836
2837   if (v.fail)
2838      return GL_FALSE;
2839
2840   if (INTEL_DEBUG & DEBUG_WM) {
2841      const char *last_annotation_string = NULL;
2842      ir_instruction *last_annotation_ir = NULL;
2843
2844      printf("Native code for fragment shader %d:\n", prog->Name);
2845      for (unsigned int i = 0; i < p->nr_insn; i++) {
2846	 if (last_annotation_ir != v.annotation_ir[i]) {
2847	    last_annotation_ir = v.annotation_ir[i];
2848	    if (last_annotation_ir) {
2849	       printf("   ");
2850	       last_annotation_ir->print();
2851	       printf("\n");
2852	    }
2853	 }
2854	 if (last_annotation_string != v.annotation_string[i]) {
2855	    last_annotation_string = v.annotation_string[i];
2856	    if (last_annotation_string)
2857	       printf("   %s\n", last_annotation_string);
2858	 }
2859	 brw_disasm(stdout, &p->store[i], intel->gen);
2860      }
2861      printf("\n");
2862   }
2863
2864   c->prog_data.total_grf = v.grf_used;
2865   c->prog_data.total_scratch = 0;
2866
2867   return GL_TRUE;
2868}
2869