brw_fs_visitor.cpp revision 2e5a1a254ed81b1d3efa6064f48183eefac784d0
1/*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** @file brw_fs_visitor.cpp
25 *
26 * This file supports generating the FS LIR from the GLSL IR.  The LIR
27 * makes it easier to do backend-specific optimizations than doing so
28 * in the GLSL IR or in the native code.
29 */
30extern "C" {
31
32#include <sys/types.h>
33
34#include "main/macros.h"
35#include "main/shaderobj.h"
36#include "main/uniforms.h"
37#include "program/prog_parameter.h"
38#include "program/prog_print.h"
39#include "program/prog_optimize.h"
40#include "program/register_allocate.h"
41#include "program/sampler.h"
42#include "program/hash_table.h"
43#include "brw_context.h"
44#include "brw_eu.h"
45#include "brw_wm.h"
46}
47#include "brw_shader.h"
48#include "brw_fs.h"
49#include "glsl/glsl_types.h"
50#include "glsl/ir_optimization.h"
51#include "glsl/ir_print_visitor.h"
52
53void
54fs_visitor::visit(ir_variable *ir)
55{
56   fs_reg *reg = NULL;
57
58   if (variable_storage(ir))
59      return;
60
61   if (strcmp(ir->name, "gl_FragColor") == 0) {
62      this->frag_color = ir;
63   } else if (strcmp(ir->name, "gl_FragData") == 0) {
64      this->frag_data = ir;
65   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
66      this->frag_depth = ir;
67   }
68
69   if (ir->mode == ir_var_in) {
70      if (!strcmp(ir->name, "gl_FragCoord")) {
71	 reg = emit_fragcoord_interpolation(ir);
72      } else if (!strcmp(ir->name, "gl_FrontFacing")) {
73	 reg = emit_frontfacing_interpolation(ir);
74      } else {
75	 reg = emit_general_interpolation(ir);
76      }
77      assert(reg);
78      hash_table_insert(this->variable_ht, reg, ir);
79      return;
80   }
81
82   if (ir->mode == ir_var_uniform) {
83      int param_index = c->prog_data.nr_params;
84
85      if (c->dispatch_width == 16) {
86	 if (!variable_storage(ir)) {
87	    fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
88	 }
89	 return;
90      }
91
92      if (!strncmp(ir->name, "gl_", 3)) {
93	 setup_builtin_uniform_values(ir);
94      } else {
95	 setup_uniform_values(ir->location, ir->type);
96      }
97
98      reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
99      reg->type = brw_type_for_base_type(ir->type);
100   }
101
102   if (!reg)
103      reg = new(this->mem_ctx) fs_reg(this, ir->type);
104
105   hash_table_insert(this->variable_ht, reg, ir);
106}
107
108void
109fs_visitor::visit(ir_dereference_variable *ir)
110{
111   fs_reg *reg = variable_storage(ir->var);
112   this->result = *reg;
113}
114
115void
116fs_visitor::visit(ir_dereference_record *ir)
117{
118   const glsl_type *struct_type = ir->record->type;
119
120   ir->record->accept(this);
121
122   unsigned int offset = 0;
123   for (unsigned int i = 0; i < struct_type->length; i++) {
124      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
125	 break;
126      offset += type_size(struct_type->fields.structure[i].type);
127   }
128   this->result.reg_offset += offset;
129   this->result.type = brw_type_for_base_type(ir->type);
130}
131
132void
133fs_visitor::visit(ir_dereference_array *ir)
134{
135   ir_constant *index;
136   int element_size;
137
138   ir->array->accept(this);
139   index = ir->array_index->as_constant();
140
141   element_size = type_size(ir->type);
142   this->result.type = brw_type_for_base_type(ir->type);
143
144   if (index) {
145      assert(this->result.file == UNIFORM || this->result.file == GRF);
146      this->result.reg_offset += index->value.i[0] * element_size;
147   } else {
148      assert(!"FINISHME: non-constant array element");
149   }
150}
151
152/* Instruction selection: Produce a MOV.sat instead of
153 * MIN(MAX(val, 0), 1) when possible.
154 */
155bool
156fs_visitor::try_emit_saturate(ir_expression *ir)
157{
158   ir_rvalue *sat_val = ir->as_rvalue_to_saturate();
159
160   if (!sat_val)
161      return false;
162
163   sat_val->accept(this);
164   fs_reg src = this->result;
165
166   this->result = fs_reg(this, ir->type);
167   fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
168   inst->saturate = true;
169
170   return true;
171}
172
173void
174fs_visitor::visit(ir_expression *ir)
175{
176   unsigned int operand;
177   fs_reg op[2], temp;
178   fs_inst *inst;
179
180   assert(ir->get_num_operands() <= 2);
181
182   if (try_emit_saturate(ir))
183      return;
184
185   for (operand = 0; operand < ir->get_num_operands(); operand++) {
186      ir->operands[operand]->accept(this);
187      if (this->result.file == BAD_FILE) {
188	 ir_print_visitor v;
189	 fail("Failed to get tree for expression operand:\n");
190	 ir->operands[operand]->accept(&v);
191      }
192      op[operand] = this->result;
193
194      /* Matrix expression operands should have been broken down to vector
195       * operations already.
196       */
197      assert(!ir->operands[operand]->type->is_matrix());
198      /* And then those vector operands should have been broken down to scalar.
199       */
200      assert(!ir->operands[operand]->type->is_vector());
201   }
202
203   /* Storage for our result.  If our result goes into an assignment, it will
204    * just get copy-propagated out, so no worries.
205    */
206   this->result = fs_reg(this, ir->type);
207
208   switch (ir->operation) {
209   case ir_unop_logic_not:
210      /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
211       * ones complement of the whole register, not just bit 0.
212       */
213      emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1));
214      break;
215   case ir_unop_neg:
216      op[0].negate = !op[0].negate;
217      this->result = op[0];
218      break;
219   case ir_unop_abs:
220      op[0].abs = true;
221      op[0].negate = false;
222      this->result = op[0];
223      break;
224   case ir_unop_sign:
225      temp = fs_reg(this, ir->type);
226
227      emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f));
228
229      inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
230      inst->conditional_mod = BRW_CONDITIONAL_G;
231      inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f));
232      inst->predicated = true;
233
234      inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
235      inst->conditional_mod = BRW_CONDITIONAL_L;
236      inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f));
237      inst->predicated = true;
238
239      break;
240   case ir_unop_rcp:
241      emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
242      break;
243
244   case ir_unop_exp2:
245      emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
246      break;
247   case ir_unop_log2:
248      emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
249      break;
250   case ir_unop_exp:
251   case ir_unop_log:
252      assert(!"not reached: should be handled by ir_explog_to_explog2");
253      break;
254   case ir_unop_sin:
255   case ir_unop_sin_reduced:
256      emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
257      break;
258   case ir_unop_cos:
259   case ir_unop_cos_reduced:
260      emit_math(SHADER_OPCODE_COS, this->result, op[0]);
261      break;
262
263   case ir_unop_dFdx:
264      emit(FS_OPCODE_DDX, this->result, op[0]);
265      break;
266   case ir_unop_dFdy:
267      emit(FS_OPCODE_DDY, this->result, op[0]);
268      break;
269
270   case ir_binop_add:
271      emit(BRW_OPCODE_ADD, this->result, op[0], op[1]);
272      break;
273   case ir_binop_sub:
274      assert(!"not reached: should be handled by ir_sub_to_add_neg");
275      break;
276
277   case ir_binop_mul:
278      if (ir->type->is_integer()) {
279	 /* For integer multiplication, the MUL uses the low 16 bits
280	  * of one of the operands (src0 on gen6, src1 on gen7).  The
281	  * MACH accumulates in the contribution of the upper 16 bits
282	  * of that operand.
283	  *
284	  * FINISHME: Emit just the MUL if we know an operand is small
285	  * enough.
286	  */
287	 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
288
289	 emit(BRW_OPCODE_MUL, acc, op[0], op[1]);
290	 emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]);
291	 emit(BRW_OPCODE_MOV, this->result, fs_reg(acc));
292      } else {
293	 emit(BRW_OPCODE_MUL, this->result, op[0], op[1]);
294      }
295      break;
296   case ir_binop_div:
297      /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
298      assert(ir->type->is_integer());
299      emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
300      break;
301   case ir_binop_mod:
302      /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
303      assert(ir->type->is_integer());
304      emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
305      break;
306
307   case ir_binop_less:
308   case ir_binop_greater:
309   case ir_binop_lequal:
310   case ir_binop_gequal:
311   case ir_binop_equal:
312   case ir_binop_all_equal:
313   case ir_binop_nequal:
314   case ir_binop_any_nequal:
315      temp = this->result;
316      /* original gen4 does implicit conversion before comparison. */
317      if (intel->gen < 5)
318	 temp.type = op[0].type;
319
320      inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
321      inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
322      emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1));
323      break;
324
325   case ir_binop_logic_xor:
326      emit(BRW_OPCODE_XOR, this->result, op[0], op[1]);
327      break;
328
329   case ir_binop_logic_or:
330      emit(BRW_OPCODE_OR, this->result, op[0], op[1]);
331      break;
332
333   case ir_binop_logic_and:
334      emit(BRW_OPCODE_AND, this->result, op[0], op[1]);
335      break;
336
337   case ir_binop_dot:
338   case ir_unop_any:
339      assert(!"not reached: should be handled by brw_fs_channel_expressions");
340      break;
341
342   case ir_unop_noise:
343      assert(!"not reached: should be handled by lower_noise");
344      break;
345
346   case ir_quadop_vector:
347      assert(!"not reached: should be handled by lower_quadop_vector");
348      break;
349
350   case ir_unop_sqrt:
351      emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
352      break;
353
354   case ir_unop_rsq:
355      emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
356      break;
357
358   case ir_unop_i2u:
359      op[0].type = BRW_REGISTER_TYPE_UD;
360      this->result = op[0];
361      break;
362   case ir_unop_u2i:
363      op[0].type = BRW_REGISTER_TYPE_D;
364      this->result = op[0];
365      break;
366   case ir_unop_i2f:
367   case ir_unop_u2f:
368   case ir_unop_b2f:
369   case ir_unop_b2i:
370   case ir_unop_f2i:
371      emit(BRW_OPCODE_MOV, this->result, op[0]);
372      break;
373   case ir_unop_f2b:
374   case ir_unop_i2b:
375      temp = this->result;
376      /* original gen4 does implicit conversion before comparison. */
377      if (intel->gen < 5)
378	 temp.type = op[0].type;
379
380      inst = emit(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f));
381      inst->conditional_mod = BRW_CONDITIONAL_NZ;
382      inst = emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1));
383      break;
384
385   case ir_unop_trunc:
386      emit(BRW_OPCODE_RNDZ, this->result, op[0]);
387      break;
388   case ir_unop_ceil:
389      op[0].negate = !op[0].negate;
390      inst = emit(BRW_OPCODE_RNDD, this->result, op[0]);
391      this->result.negate = true;
392      break;
393   case ir_unop_floor:
394      inst = emit(BRW_OPCODE_RNDD, this->result, op[0]);
395      break;
396   case ir_unop_fract:
397      inst = emit(BRW_OPCODE_FRC, this->result, op[0]);
398      break;
399   case ir_unop_round_even:
400      emit(BRW_OPCODE_RNDE, this->result, op[0]);
401      break;
402
403   case ir_binop_min:
404      if (intel->gen >= 6) {
405	 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
406	 inst->conditional_mod = BRW_CONDITIONAL_L;
407      } else {
408	 /* Unalias the destination */
409	 this->result = fs_reg(this, ir->type);
410
411	 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
412	 inst->conditional_mod = BRW_CONDITIONAL_L;
413
414	 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
415	 inst->predicated = true;
416      }
417      break;
418   case ir_binop_max:
419      if (intel->gen >= 6) {
420	 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
421	 inst->conditional_mod = BRW_CONDITIONAL_GE;
422      } else {
423	 /* Unalias the destination */
424	 this->result = fs_reg(this, ir->type);
425
426	 inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
427	 inst->conditional_mod = BRW_CONDITIONAL_G;
428
429	 inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
430	 inst->predicated = true;
431      }
432      break;
433
434   case ir_binop_pow:
435      emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
436      break;
437
438   case ir_unop_bit_not:
439      inst = emit(BRW_OPCODE_NOT, this->result, op[0]);
440      break;
441   case ir_binop_bit_and:
442      inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]);
443      break;
444   case ir_binop_bit_xor:
445      inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]);
446      break;
447   case ir_binop_bit_or:
448      inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]);
449      break;
450
451   case ir_binop_lshift:
452      inst = emit(BRW_OPCODE_SHL, this->result, op[0], op[1]);
453      break;
454
455   case ir_binop_rshift:
456      if (ir->type->base_type == GLSL_TYPE_INT)
457	 inst = emit(BRW_OPCODE_ASR, this->result, op[0], op[1]);
458      else
459	 inst = emit(BRW_OPCODE_SHR, this->result, op[0], op[1]);
460      break;
461   }
462}
463
464void
465fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
466				   const glsl_type *type, bool predicated)
467{
468   switch (type->base_type) {
469   case GLSL_TYPE_FLOAT:
470   case GLSL_TYPE_UINT:
471   case GLSL_TYPE_INT:
472   case GLSL_TYPE_BOOL:
473      for (unsigned int i = 0; i < type->components(); i++) {
474	 l.type = brw_type_for_base_type(type);
475	 r.type = brw_type_for_base_type(type);
476
477	 if (predicated || !l.equals(&r)) {
478	    fs_inst *inst = emit(BRW_OPCODE_MOV, l, r);
479	    inst->predicated = predicated;
480	 }
481
482	 l.reg_offset++;
483	 r.reg_offset++;
484      }
485      break;
486   case GLSL_TYPE_ARRAY:
487      for (unsigned int i = 0; i < type->length; i++) {
488	 emit_assignment_writes(l, r, type->fields.array, predicated);
489      }
490      break;
491
492   case GLSL_TYPE_STRUCT:
493      for (unsigned int i = 0; i < type->length; i++) {
494	 emit_assignment_writes(l, r, type->fields.structure[i].type,
495				predicated);
496      }
497      break;
498
499   case GLSL_TYPE_SAMPLER:
500      break;
501
502   default:
503      assert(!"not reached");
504      break;
505   }
506}
507
508/* If the RHS processing resulted in an instruction generating a
509 * temporary value, and it would be easy to rewrite the instruction to
510 * generate its result right into the LHS instead, do so.  This ends
511 * up reliably removing instructions where it can be tricky to do so
512 * later without real UD chain information.
513 */
514bool
515fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
516                                   fs_reg dst,
517                                   fs_reg src,
518                                   fs_inst *pre_rhs_inst,
519                                   fs_inst *last_rhs_inst)
520{
521   if (pre_rhs_inst == last_rhs_inst)
522      return false; /* No instructions generated to work with. */
523
524   /* Only attempt if we're doing a direct assignment. */
525   if (ir->condition ||
526       !(ir->lhs->type->is_scalar() ||
527        (ir->lhs->type->is_vector() &&
528         ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1)))
529      return false;
530
531   /* Make sure the last instruction generated our source reg. */
532   if (last_rhs_inst->predicated ||
533       last_rhs_inst->force_uncompressed ||
534       last_rhs_inst->force_sechalf ||
535       !src.equals(&last_rhs_inst->dst))
536      return false;
537
538   /* Success!  Rewrite the instruction. */
539   last_rhs_inst->dst = dst;
540
541   return true;
542}
543
544void
545fs_visitor::visit(ir_assignment *ir)
546{
547   fs_reg l, r;
548   fs_inst *inst;
549
550   /* FINISHME: arrays on the lhs */
551   ir->lhs->accept(this);
552   l = this->result;
553
554   fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail();
555
556   ir->rhs->accept(this);
557   r = this->result;
558
559   fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail();
560
561   assert(l.file != BAD_FILE);
562   assert(r.file != BAD_FILE);
563
564   if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
565      return;
566
567   if (ir->condition) {
568      emit_bool_to_cond_code(ir->condition);
569   }
570
571   if (ir->lhs->type->is_scalar() ||
572       ir->lhs->type->is_vector()) {
573      for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
574	 if (ir->write_mask & (1 << i)) {
575	    inst = emit(BRW_OPCODE_MOV, l, r);
576	    if (ir->condition)
577	       inst->predicated = true;
578	    r.reg_offset++;
579	 }
580	 l.reg_offset++;
581      }
582   } else {
583      emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
584   }
585}
586
587fs_inst *
588fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
589			      int sampler)
590{
591   int mlen;
592   int base_mrf = 1;
593   bool simd16 = false;
594   fs_reg orig_dst;
595
596   /* g0 header. */
597   mlen = 1;
598
599   if (ir->shadow_comparitor && ir->op != ir_txd) {
600      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
601	 fs_inst *inst = emit(BRW_OPCODE_MOV,
602			      fs_reg(MRF, base_mrf + mlen + i), coordinate);
603	 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
604	    inst->saturate = true;
605
606	 coordinate.reg_offset++;
607      }
608      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
609      mlen += 3;
610
611      if (ir->op == ir_tex) {
612	 /* There's no plain shadow compare message, so we use shadow
613	  * compare with a bias of 0.0.
614	  */
615	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f));
616	 mlen++;
617      } else if (ir->op == ir_txb) {
618	 ir->lod_info.bias->accept(this);
619	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
620	 mlen++;
621      } else {
622	 assert(ir->op == ir_txl);
623	 ir->lod_info.lod->accept(this);
624	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
625	 mlen++;
626      }
627
628      ir->shadow_comparitor->accept(this);
629      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
630      mlen++;
631   } else if (ir->op == ir_tex) {
632      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
633	 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
634			      coordinate);
635	 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
636	    inst->saturate = true;
637	 coordinate.reg_offset++;
638      }
639      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
640      mlen += 3;
641   } else if (ir->op == ir_txd) {
642      ir->lod_info.grad.dPdx->accept(this);
643      fs_reg dPdx = this->result;
644
645      ir->lod_info.grad.dPdy->accept(this);
646      fs_reg dPdy = this->result;
647
648      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
649	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
650	 coordinate.reg_offset++;
651      }
652      /* the slots for u and v are always present, but r is optional */
653      mlen += MAX2(ir->coordinate->type->vector_elements, 2);
654
655      /*  P   = u, v, r
656       * dPdx = dudx, dvdx, drdx
657       * dPdy = dudy, dvdy, drdy
658       *
659       * 1-arg: Does not exist.
660       *
661       * 2-arg: dudx   dvdx   dudy   dvdy
662       *        dPdx.x dPdx.y dPdy.x dPdy.y
663       *        m4     m5     m6     m7
664       *
665       * 3-arg: dudx   dvdx   drdx   dudy   dvdy   drdy
666       *        dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
667       *        m5     m6     m7     m8     m9     m10
668       */
669      for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
670	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx);
671	 dPdx.reg_offset++;
672      }
673      mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
674
675      for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
676	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy);
677	 dPdy.reg_offset++;
678      }
679      mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
680   } else if (ir->op == ir_txs) {
681      /* There's no SIMD8 resinfo message on Gen4.  Use SIMD16 instead. */
682      simd16 = true;
683      ir->lod_info.lod->accept(this);
684      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
685      mlen += 2;
686   } else {
687      /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
688       * instructions.  We'll need to do SIMD16 here.
689       */
690      simd16 = true;
691      assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf);
692
693      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
694	 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF,
695						     base_mrf + mlen + i * 2,
696						     coordinate.type),
697			      coordinate);
698	 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
699	    inst->saturate = true;
700	 coordinate.reg_offset++;
701      }
702
703      /* Initialize the rest of u/v/r with 0.0.  Empirically, this seems to
704       * be necessary for TXF (ld), but seems wise to do for all messages.
705       */
706      for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
707	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f));
708      }
709
710      /* lod/bias appears after u/v/r. */
711      mlen += 6;
712
713      if (ir->op == ir_txb) {
714	 ir->lod_info.bias->accept(this);
715	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
716	 mlen++;
717      } else {
718	 ir->lod_info.lod->accept(this);
719	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, this->result.type),
720			      this->result);
721	 mlen++;
722      }
723
724      /* The unused upper half. */
725      mlen++;
726   }
727
728   if (simd16) {
729      /* Now, since we're doing simd16, the return is 2 interleaved
730       * vec4s where the odd-indexed ones are junk. We'll need to move
731       * this weirdness around to the expected layout.
732       */
733      orig_dst = dst;
734      const glsl_type *vec_type =
735	 glsl_type::get_instance(ir->type->base_type, 4, 1);
736      dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2));
737      dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type)
738			       : BRW_REGISTER_TYPE_F;
739   }
740
741   fs_inst *inst = NULL;
742   switch (ir->op) {
743   case ir_tex:
744      inst = emit(FS_OPCODE_TEX, dst);
745      break;
746   case ir_txb:
747      inst = emit(FS_OPCODE_TXB, dst);
748      break;
749   case ir_txl:
750      inst = emit(FS_OPCODE_TXL, dst);
751      break;
752   case ir_txd:
753      inst = emit(FS_OPCODE_TXD, dst);
754      break;
755   case ir_txs:
756      inst = emit(FS_OPCODE_TXS, dst);
757      break;
758   case ir_txf:
759      inst = emit(FS_OPCODE_TXF, dst);
760      break;
761   }
762   inst->base_mrf = base_mrf;
763   inst->mlen = mlen;
764   inst->header_present = true;
765
766   if (simd16) {
767      for (int i = 0; i < 4; i++) {
768	 emit(BRW_OPCODE_MOV, orig_dst, dst);
769	 orig_dst.reg_offset++;
770	 dst.reg_offset += 2;
771      }
772   }
773
774   return inst;
775}
776
777/* gen5's sampler has slots for u, v, r, array index, then optional
778 * parameters like shadow comparitor or LOD bias.  If optional
779 * parameters aren't present, those base slots are optional and don't
780 * need to be included in the message.
781 *
782 * We don't fill in the unnecessary slots regardless, which may look
783 * surprising in the disassembly.
784 */
785fs_inst *
786fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
787			      int sampler)
788{
789   int mlen = 0;
790   int base_mrf = 2;
791   int reg_width = c->dispatch_width / 8;
792   bool header_present = false;
793   const int vector_elements =
794      ir->coordinate ? ir->coordinate->type->vector_elements : 0;
795
796   if (ir->offset) {
797      /* The offsets set up by the ir_texture visitor are in the
798       * m1 header, so we can't go headerless.
799       */
800      header_present = true;
801      mlen++;
802      base_mrf--;
803   }
804
805   for (int i = 0; i < vector_elements; i++) {
806      fs_inst *inst = emit(BRW_OPCODE_MOV,
807			   fs_reg(MRF, base_mrf + mlen + i * reg_width,
808				  coordinate.type),
809			   coordinate);
810      if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
811	 inst->saturate = true;
812      coordinate.reg_offset++;
813   }
814   mlen += vector_elements * reg_width;
815
816   if (ir->shadow_comparitor && ir->op != ir_txd) {
817      mlen = MAX2(mlen, header_present + 4 * reg_width);
818
819      ir->shadow_comparitor->accept(this);
820      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
821      mlen += reg_width;
822   }
823
824   fs_inst *inst = NULL;
825   switch (ir->op) {
826   case ir_tex:
827      inst = emit(FS_OPCODE_TEX, dst);
828      break;
829   case ir_txb:
830      ir->lod_info.bias->accept(this);
831      mlen = MAX2(mlen, header_present + 4 * reg_width);
832      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
833      mlen += reg_width;
834
835      inst = emit(FS_OPCODE_TXB, dst);
836
837      break;
838   case ir_txl:
839      ir->lod_info.lod->accept(this);
840      mlen = MAX2(mlen, header_present + 4 * reg_width);
841      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
842      mlen += reg_width;
843
844      inst = emit(FS_OPCODE_TXL, dst);
845      break;
846   case ir_txd: {
847      ir->lod_info.grad.dPdx->accept(this);
848      fs_reg dPdx = this->result;
849
850      ir->lod_info.grad.dPdy->accept(this);
851      fs_reg dPdy = this->result;
852
853      mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */
854
855      /**
856       *  P   =  u,    v,    r
857       * dPdx = dudx, dvdx, drdx
858       * dPdy = dudy, dvdy, drdy
859       *
860       * Load up these values:
861       * - dudx   dudy   dvdx   dvdy   drdx   drdy
862       * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
863       */
864      for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
865	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx);
866	 dPdx.reg_offset++;
867	 mlen += reg_width;
868
869	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy);
870	 dPdy.reg_offset++;
871	 mlen += reg_width;
872      }
873
874      inst = emit(FS_OPCODE_TXD, dst);
875      break;
876   }
877   case ir_txs:
878      ir->lod_info.lod->accept(this);
879      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
880      mlen += reg_width;
881      inst = emit(FS_OPCODE_TXS, dst);
882      break;
883   case ir_txf:
884      mlen = header_present + 4 * reg_width;
885
886      ir->lod_info.lod->accept(this);
887      emit(BRW_OPCODE_MOV,
888	   fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD),
889	   this->result);
890      inst = emit(FS_OPCODE_TXF, dst);
891      break;
892   }
893   inst->base_mrf = base_mrf;
894   inst->mlen = mlen;
895   inst->header_present = header_present;
896
897   if (mlen > 11) {
898      fail("Message length >11 disallowed by hardware\n");
899   }
900
901   return inst;
902}
903
904fs_inst *
905fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
906			      int sampler)
907{
908   int mlen = 0;
909   int base_mrf = 2;
910   int reg_width = c->dispatch_width / 8;
911   bool header_present = false;
912
913   if (ir->offset) {
914      /* The offsets set up by the ir_texture visitor are in the
915       * m1 header, so we can't go headerless.
916       */
917      header_present = true;
918      mlen++;
919      base_mrf--;
920   }
921
922   if (ir->shadow_comparitor && ir->op != ir_txd) {
923      ir->shadow_comparitor->accept(this);
924      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
925      mlen += reg_width;
926   }
927
928   /* Set up the LOD info */
929   switch (ir->op) {
930   case ir_tex:
931      break;
932   case ir_txb:
933      ir->lod_info.bias->accept(this);
934      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
935      mlen += reg_width;
936      break;
937   case ir_txl:
938      ir->lod_info.lod->accept(this);
939      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
940      mlen += reg_width;
941      break;
942   case ir_txd: {
943      if (c->dispatch_width == 16)
944	 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
945
946      ir->lod_info.grad.dPdx->accept(this);
947      fs_reg dPdx = this->result;
948
949      ir->lod_info.grad.dPdy->accept(this);
950      fs_reg dPdy = this->result;
951
952      /* Load dPdx and the coordinate together:
953       * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
954       */
955      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
956	 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
957			      coordinate);
958	 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
959	    inst->saturate = true;
960	 coordinate.reg_offset++;
961	 mlen += reg_width;
962
963	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx);
964	 dPdx.reg_offset++;
965	 mlen += reg_width;
966
967	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy);
968	 dPdy.reg_offset++;
969	 mlen += reg_width;
970      }
971      break;
972   }
973   case ir_txs:
974      ir->lod_info.lod->accept(this);
975      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result);
976      mlen += reg_width;
977      break;
978   case ir_txf:
979      /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
980      emit(BRW_OPCODE_MOV,
981	   fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate);
982      coordinate.reg_offset++;
983      mlen += reg_width;
984
985      ir->lod_info.lod->accept(this);
986      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), this->result);
987      mlen += reg_width;
988
989      for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
990	 emit(BRW_OPCODE_MOV,
991	      fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), coordinate);
992	 coordinate.reg_offset++;
993	 mlen += reg_width;
994      }
995      break;
996   }
997
998   /* Set up the coordinate (except for cases where it was done above) */
999   if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf) {
1000      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1001	 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1002			      coordinate);
1003	 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
1004	    inst->saturate = true;
1005	 coordinate.reg_offset++;
1006	 mlen += reg_width;
1007      }
1008   }
1009
1010   /* Generate the SEND */
1011   fs_inst *inst = NULL;
1012   switch (ir->op) {
1013   case ir_tex: inst = emit(FS_OPCODE_TEX, dst); break;
1014   case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
1015   case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
1016   case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
1017   case ir_txf: inst = emit(FS_OPCODE_TXF, dst); break;
1018   case ir_txs: inst = emit(FS_OPCODE_TXS, dst); break;
1019   }
1020   inst->base_mrf = base_mrf;
1021   inst->mlen = mlen;
1022   inst->header_present = header_present;
1023
1024   if (mlen > 11) {
1025      fail("Message length >11 disallowed by hardware\n");
1026   }
1027
1028   return inst;
1029}
1030
1031void
1032fs_visitor::visit(ir_texture *ir)
1033{
1034   fs_inst *inst = NULL;
1035
1036   int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &fp->Base);
1037   sampler = fp->Base.SamplerUnits[sampler];
1038
1039   /* Our hardware doesn't have a sample_d_c message, so shadow compares
1040    * for textureGrad/TXD need to be emulated with instructions.
1041    */
1042   bool hw_compare_supported = ir->op != ir_txd;
1043   if (ir->shadow_comparitor && !hw_compare_supported) {
1044      assert(c->key.compare_funcs[sampler] != GL_NONE);
1045      /* No need to even sample for GL_ALWAYS or GL_NEVER...bail early */
1046      if (c->key.compare_funcs[sampler] == GL_ALWAYS)
1047	 return swizzle_result(ir, fs_reg(1.0f), sampler);
1048      else if (c->key.compare_funcs[sampler] == GL_NEVER)
1049	 return swizzle_result(ir, fs_reg(0.0f), sampler);
1050   }
1051
1052   if (ir->coordinate)
1053      ir->coordinate->accept(this);
1054   fs_reg coordinate = this->result;
1055
1056   if (ir->offset != NULL) {
1057      ir_constant *offset = ir->offset->as_constant();
1058      assert(offset != NULL);
1059
1060      signed char offsets[3];
1061      for (unsigned i = 0; i < ir->offset->type->vector_elements; i++)
1062	 offsets[i] = (signed char) offset->value.i[i];
1063
1064      /* Combine all three offsets into a single unsigned dword:
1065       *
1066       *    bits 11:8 - U Offset (X component)
1067       *    bits  7:4 - V Offset (Y component)
1068       *    bits  3:0 - R Offset (Z component)
1069       */
1070      unsigned offset_bits = 0;
1071      for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) {
1072	 const unsigned shift = 4 * (2 - i);
1073	 offset_bits |= (offsets[i] << shift) & (0xF << shift);
1074      }
1075
1076      /* Explicitly set up the message header by copying g0 to msg reg m1. */
1077      emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD),
1078	   fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD));
1079
1080      /* Then set the offset bits in DWord 2 of the message header. */
1081      emit(BRW_OPCODE_MOV,
1082	   fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2),
1083			 BRW_REGISTER_TYPE_UD)),
1084	   fs_reg(brw_imm_uw(offset_bits)));
1085   }
1086
1087   /* Should be lowered by do_lower_texture_projection */
1088   assert(!ir->projector);
1089
1090   /* The 965 requires the EU to do the normalization of GL rectangle
1091    * texture coordinates.  We use the program parameter state
1092    * tracking to get the scaling factor.
1093    */
1094   if (intel->gen < 6 &&
1095       ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) {
1096      struct gl_program_parameter_list *params = c->fp->program.Base.Parameters;
1097      int tokens[STATE_LENGTH] = {
1098	 STATE_INTERNAL,
1099	 STATE_TEXRECT_SCALE,
1100	 sampler,
1101	 0,
1102	 0
1103      };
1104
1105      if (c->dispatch_width == 16) {
1106	 fail("rectangle scale uniform setup not supported on 16-wide\n");
1107	 this->result = fs_reg(this, ir->type);
1108	 return;
1109      }
1110
1111      c->prog_data.param_convert[c->prog_data.nr_params] =
1112	 PARAM_NO_CONVERT;
1113      c->prog_data.param_convert[c->prog_data.nr_params + 1] =
1114	 PARAM_NO_CONVERT;
1115
1116      fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
1117      fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
1118      GLuint index = _mesa_add_state_reference(params,
1119					       (gl_state_index *)tokens);
1120
1121      this->param_index[c->prog_data.nr_params] = index;
1122      this->param_offset[c->prog_data.nr_params] = 0;
1123      c->prog_data.nr_params++;
1124      this->param_index[c->prog_data.nr_params] = index;
1125      this->param_offset[c->prog_data.nr_params] = 1;
1126      c->prog_data.nr_params++;
1127
1128      fs_reg dst = fs_reg(this, ir->coordinate->type);
1129      fs_reg src = coordinate;
1130      coordinate = dst;
1131
1132      emit(BRW_OPCODE_MUL, dst, src, scale_x);
1133      dst.reg_offset++;
1134      src.reg_offset++;
1135      emit(BRW_OPCODE_MUL, dst, src, scale_y);
1136   }
1137
1138   /* Writemasking doesn't eliminate channels on SIMD8 texture
1139    * samples, so don't worry about them.
1140    */
1141   fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
1142
1143   if (intel->gen >= 7) {
1144      inst = emit_texture_gen7(ir, dst, coordinate, sampler);
1145   } else if (intel->gen >= 5) {
1146      inst = emit_texture_gen5(ir, dst, coordinate, sampler);
1147   } else {
1148      inst = emit_texture_gen4(ir, dst, coordinate, sampler);
1149   }
1150
1151   /* If there's an offset, we already set up m1.  To avoid the implied move,
1152    * use the null register.  Otherwise, we want an implied move from g0.
1153    */
1154   if (ir->offset != NULL || !inst->header_present)
1155      inst->src[0] = reg_undef;
1156   else
1157      inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
1158
1159   inst->sampler = sampler;
1160
1161   if (ir->shadow_comparitor) {
1162      if (hw_compare_supported) {
1163	 inst->shadow_compare = true;
1164      } else {
1165	 ir->shadow_comparitor->accept(this);
1166	 fs_reg ref = this->result;
1167
1168	 fs_reg value = dst;
1169	 dst = fs_reg(this, glsl_type::vec4_type);
1170
1171	 /* FINISHME: This needs to be done pre-filtering. */
1172
1173	 uint32_t conditional = 0;
1174	 switch (c->key.compare_funcs[sampler]) {
1175	 /* GL_ALWAYS and GL_NEVER were handled at the top of the function */
1176	 case GL_LESS:     conditional = BRW_CONDITIONAL_L;   break;
1177	 case GL_GREATER:  conditional = BRW_CONDITIONAL_G;   break;
1178	 case GL_LEQUAL:   conditional = BRW_CONDITIONAL_LE;  break;
1179	 case GL_GEQUAL:   conditional = BRW_CONDITIONAL_GE;  break;
1180	 case GL_EQUAL:    conditional = BRW_CONDITIONAL_EQ;  break;
1181	 case GL_NOTEQUAL: conditional = BRW_CONDITIONAL_NEQ; break;
1182	 default: assert(!"Should not get here: bad shadow compare function");
1183	 }
1184
1185	 /* Use conditional moves to load 0 or 1 as the result */
1186	 this->current_annotation = "manual shadow comparison";
1187	 for (int i = 0; i < 4; i++) {
1188	    inst = emit(BRW_OPCODE_MOV, dst, fs_reg(0.0f));
1189
1190	    inst = emit(BRW_OPCODE_CMP, reg_null_f, ref, value);
1191	    inst->conditional_mod = conditional;
1192
1193	    inst = emit(BRW_OPCODE_MOV, dst, fs_reg(1.0f));
1194	    inst->predicated = true;
1195
1196	    dst.reg_offset++;
1197	    value.reg_offset++;
1198	 }
1199	 dst.reg_offset = 0;
1200      }
1201   }
1202
1203   swizzle_result(ir, dst, sampler);
1204}
1205
1206/**
1207 * Swizzle the result of a texture result.  This is necessary for
1208 * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
1209 */
1210void
1211fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler)
1212{
1213   this->result = orig_val;
1214
1215   if (ir->type == glsl_type::float_type) {
1216      /* Ignore DEPTH_TEXTURE_MODE swizzling. */
1217      assert(ir->sampler->type->sampler_shadow);
1218   } else if (c->key.tex_swizzles[sampler] != SWIZZLE_NOOP) {
1219      fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type);
1220
1221      for (int i = 0; i < 4; i++) {
1222	 int swiz = GET_SWZ(c->key.tex_swizzles[sampler], i);
1223	 fs_reg l = swizzled_result;
1224	 l.reg_offset += i;
1225
1226	 if (swiz == SWIZZLE_ZERO) {
1227	    emit(BRW_OPCODE_MOV, l, fs_reg(0.0f));
1228	 } else if (swiz == SWIZZLE_ONE) {
1229	    emit(BRW_OPCODE_MOV, l, fs_reg(1.0f));
1230	 } else {
1231	    fs_reg r = orig_val;
1232	    r.reg_offset += GET_SWZ(c->key.tex_swizzles[sampler], i);
1233	    emit(BRW_OPCODE_MOV, l, r);
1234	 }
1235      }
1236      this->result = swizzled_result;
1237   }
1238}
1239
1240void
1241fs_visitor::visit(ir_swizzle *ir)
1242{
1243   ir->val->accept(this);
1244   fs_reg val = this->result;
1245
1246   if (ir->type->vector_elements == 1) {
1247      this->result.reg_offset += ir->mask.x;
1248      return;
1249   }
1250
1251   fs_reg result = fs_reg(this, ir->type);
1252   this->result = result;
1253
1254   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1255      fs_reg channel = val;
1256      int swiz = 0;
1257
1258      switch (i) {
1259      case 0:
1260	 swiz = ir->mask.x;
1261	 break;
1262      case 1:
1263	 swiz = ir->mask.y;
1264	 break;
1265      case 2:
1266	 swiz = ir->mask.z;
1267	 break;
1268      case 3:
1269	 swiz = ir->mask.w;
1270	 break;
1271      }
1272
1273      channel.reg_offset += swiz;
1274      emit(BRW_OPCODE_MOV, result, channel);
1275      result.reg_offset++;
1276   }
1277}
1278
1279void
1280fs_visitor::visit(ir_discard *ir)
1281{
1282   assert(ir->condition == NULL); /* FINISHME */
1283
1284   emit(FS_OPCODE_DISCARD);
1285   kill_emitted = true;
1286}
1287
1288void
1289fs_visitor::visit(ir_constant *ir)
1290{
1291   /* Set this->result to reg at the bottom of the function because some code
1292    * paths will cause this visitor to be applied to other fields.  This will
1293    * cause the value stored in this->result to be modified.
1294    *
1295    * Make reg constant so that it doesn't get accidentally modified along the
1296    * way.  Yes, I actually had this problem. :(
1297    */
1298   const fs_reg reg(this, ir->type);
1299   fs_reg dst_reg = reg;
1300
1301   if (ir->type->is_array()) {
1302      const unsigned size = type_size(ir->type->fields.array);
1303
1304      for (unsigned i = 0; i < ir->type->length; i++) {
1305	 ir->array_elements[i]->accept(this);
1306	 fs_reg src_reg = this->result;
1307
1308	 dst_reg.type = src_reg.type;
1309	 for (unsigned j = 0; j < size; j++) {
1310	    emit(BRW_OPCODE_MOV, dst_reg, src_reg);
1311	    src_reg.reg_offset++;
1312	    dst_reg.reg_offset++;
1313	 }
1314      }
1315   } else if (ir->type->is_record()) {
1316      foreach_list(node, &ir->components) {
1317	 ir_instruction *const field = (ir_instruction *) node;
1318	 const unsigned size = type_size(field->type);
1319
1320	 field->accept(this);
1321	 fs_reg src_reg = this->result;
1322
1323	 dst_reg.type = src_reg.type;
1324	 for (unsigned j = 0; j < size; j++) {
1325	    emit(BRW_OPCODE_MOV, dst_reg, src_reg);
1326	    src_reg.reg_offset++;
1327	    dst_reg.reg_offset++;
1328	 }
1329      }
1330   } else {
1331      const unsigned size = type_size(ir->type);
1332
1333      for (unsigned i = 0; i < size; i++) {
1334	 switch (ir->type->base_type) {
1335	 case GLSL_TYPE_FLOAT:
1336	    emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]));
1337	    break;
1338	 case GLSL_TYPE_UINT:
1339	    emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]));
1340	    break;
1341	 case GLSL_TYPE_INT:
1342	    emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]));
1343	    break;
1344	 case GLSL_TYPE_BOOL:
1345	    emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]));
1346	    break;
1347	 default:
1348	    assert(!"Non-float/uint/int/bool constant");
1349	 }
1350	 dst_reg.reg_offset++;
1351      }
1352   }
1353
1354   this->result = reg;
1355}
1356
1357void
1358fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
1359{
1360   ir_expression *expr = ir->as_expression();
1361
1362   if (expr) {
1363      fs_reg op[2];
1364      fs_inst *inst;
1365
1366      assert(expr->get_num_operands() <= 2);
1367      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
1368	 assert(expr->operands[i]->type->is_scalar());
1369
1370	 expr->operands[i]->accept(this);
1371	 op[i] = this->result;
1372      }
1373
1374      switch (expr->operation) {
1375      case ir_unop_logic_not:
1376	 inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1));
1377	 inst->conditional_mod = BRW_CONDITIONAL_Z;
1378	 break;
1379
1380      case ir_binop_logic_xor:
1381	 inst = emit(BRW_OPCODE_XOR, reg_null_d, op[0], op[1]);
1382	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1383	 break;
1384
1385      case ir_binop_logic_or:
1386	 inst = emit(BRW_OPCODE_OR, reg_null_d, op[0], op[1]);
1387	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1388	 break;
1389
1390      case ir_binop_logic_and:
1391	 inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], op[1]);
1392	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1393	 break;
1394
1395      case ir_unop_f2b:
1396	 if (intel->gen >= 6) {
1397	    inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f));
1398	 } else {
1399	    inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]);
1400	 }
1401	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1402	 break;
1403
1404      case ir_unop_i2b:
1405	 if (intel->gen >= 6) {
1406	    inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0));
1407	 } else {
1408	    inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]);
1409	 }
1410	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1411	 break;
1412
1413      case ir_binop_greater:
1414      case ir_binop_gequal:
1415      case ir_binop_less:
1416      case ir_binop_lequal:
1417      case ir_binop_equal:
1418      case ir_binop_all_equal:
1419      case ir_binop_nequal:
1420      case ir_binop_any_nequal:
1421	 inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]);
1422	 inst->conditional_mod =
1423	    brw_conditional_for_comparison(expr->operation);
1424	 break;
1425
1426      default:
1427	 assert(!"not reached");
1428	 fail("bad cond code\n");
1429	 break;
1430      }
1431      return;
1432   }
1433
1434   ir->accept(this);
1435
1436   if (intel->gen >= 6) {
1437      fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1));
1438      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1439   } else {
1440      fs_inst *inst = emit(BRW_OPCODE_MOV, reg_null_d, this->result);
1441      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1442   }
1443}
1444
1445/**
1446 * Emit a gen6 IF statement with the comparison folded into the IF
1447 * instruction.
1448 */
1449void
1450fs_visitor::emit_if_gen6(ir_if *ir)
1451{
1452   ir_expression *expr = ir->condition->as_expression();
1453
1454   if (expr) {
1455      fs_reg op[2];
1456      fs_inst *inst;
1457      fs_reg temp;
1458
1459      assert(expr->get_num_operands() <= 2);
1460      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
1461	 assert(expr->operands[i]->type->is_scalar());
1462
1463	 expr->operands[i]->accept(this);
1464	 op[i] = this->result;
1465      }
1466
1467      switch (expr->operation) {
1468      case ir_unop_logic_not:
1469	 inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0));
1470	 inst->conditional_mod = BRW_CONDITIONAL_Z;
1471	 return;
1472
1473      case ir_binop_logic_xor:
1474	 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]);
1475	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1476	 return;
1477
1478      case ir_binop_logic_or:
1479	 temp = fs_reg(this, glsl_type::bool_type);
1480	 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
1481	 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0));
1482	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1483	 return;
1484
1485      case ir_binop_logic_and:
1486	 temp = fs_reg(this, glsl_type::bool_type);
1487	 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
1488	 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0));
1489	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1490	 return;
1491
1492      case ir_unop_f2b:
1493	 inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
1494	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1495	 return;
1496
1497      case ir_unop_i2b:
1498	 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
1499	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1500	 return;
1501
1502      case ir_binop_greater:
1503      case ir_binop_gequal:
1504      case ir_binop_less:
1505      case ir_binop_lequal:
1506      case ir_binop_equal:
1507      case ir_binop_all_equal:
1508      case ir_binop_nequal:
1509      case ir_binop_any_nequal:
1510	 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]);
1511	 inst->conditional_mod =
1512	    brw_conditional_for_comparison(expr->operation);
1513	 return;
1514      default:
1515	 assert(!"not reached");
1516	 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
1517	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1518	 fail("bad condition\n");
1519	 return;
1520      }
1521      return;
1522   }
1523
1524   ir->condition->accept(this);
1525
1526   fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0));
1527   inst->conditional_mod = BRW_CONDITIONAL_NZ;
1528}
1529
1530void
1531fs_visitor::visit(ir_if *ir)
1532{
1533   fs_inst *inst;
1534
1535   if (intel->gen < 6 && c->dispatch_width == 16) {
1536      fail("Can't support (non-uniform) control flow on 16-wide\n");
1537   }
1538
1539   /* Don't point the annotation at the if statement, because then it plus
1540    * the then and else blocks get printed.
1541    */
1542   this->base_ir = ir->condition;
1543
1544   if (intel->gen == 6) {
1545      emit_if_gen6(ir);
1546   } else {
1547      emit_bool_to_cond_code(ir->condition);
1548
1549      inst = emit(BRW_OPCODE_IF);
1550      inst->predicated = true;
1551   }
1552
1553   foreach_list(node, &ir->then_instructions) {
1554      ir_instruction *ir = (ir_instruction *)node;
1555      this->base_ir = ir;
1556
1557      ir->accept(this);
1558   }
1559
1560   if (!ir->else_instructions.is_empty()) {
1561      emit(BRW_OPCODE_ELSE);
1562
1563      foreach_list(node, &ir->else_instructions) {
1564	 ir_instruction *ir = (ir_instruction *)node;
1565	 this->base_ir = ir;
1566
1567	 ir->accept(this);
1568      }
1569   }
1570
1571   emit(BRW_OPCODE_ENDIF);
1572}
1573
1574void
1575fs_visitor::visit(ir_loop *ir)
1576{
1577   fs_reg counter = reg_undef;
1578
1579   if (c->dispatch_width == 16) {
1580      fail("Can't support (non-uniform) control flow on 16-wide\n");
1581   }
1582
1583   if (ir->counter) {
1584      this->base_ir = ir->counter;
1585      ir->counter->accept(this);
1586      counter = *(variable_storage(ir->counter));
1587
1588      if (ir->from) {
1589	 this->base_ir = ir->from;
1590	 ir->from->accept(this);
1591
1592	 emit(BRW_OPCODE_MOV, counter, this->result);
1593      }
1594   }
1595
1596   emit(BRW_OPCODE_DO);
1597
1598   if (ir->to) {
1599      this->base_ir = ir->to;
1600      ir->to->accept(this);
1601
1602      fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result);
1603      inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
1604
1605      inst = emit(BRW_OPCODE_BREAK);
1606      inst->predicated = true;
1607   }
1608
1609   foreach_list(node, &ir->body_instructions) {
1610      ir_instruction *ir = (ir_instruction *)node;
1611
1612      this->base_ir = ir;
1613      ir->accept(this);
1614   }
1615
1616   if (ir->increment) {
1617      this->base_ir = ir->increment;
1618      ir->increment->accept(this);
1619      emit(BRW_OPCODE_ADD, counter, counter, this->result);
1620   }
1621
1622   emit(BRW_OPCODE_WHILE);
1623}
1624
1625void
1626fs_visitor::visit(ir_loop_jump *ir)
1627{
1628   switch (ir->mode) {
1629   case ir_loop_jump::jump_break:
1630      emit(BRW_OPCODE_BREAK);
1631      break;
1632   case ir_loop_jump::jump_continue:
1633      emit(BRW_OPCODE_CONTINUE);
1634      break;
1635   }
1636}
1637
1638void
1639fs_visitor::visit(ir_call *ir)
1640{
1641   assert(!"FINISHME");
1642}
1643
1644void
1645fs_visitor::visit(ir_return *ir)
1646{
1647   assert(!"FINISHME");
1648}
1649
1650void
1651fs_visitor::visit(ir_function *ir)
1652{
1653   /* Ignore function bodies other than main() -- we shouldn't see calls to
1654    * them since they should all be inlined before we get to ir_to_mesa.
1655    */
1656   if (strcmp(ir->name, "main") == 0) {
1657      const ir_function_signature *sig;
1658      exec_list empty;
1659
1660      sig = ir->matching_signature(&empty);
1661
1662      assert(sig);
1663
1664      foreach_list(node, &sig->body) {
1665	 ir_instruction *ir = (ir_instruction *)node;
1666	 this->base_ir = ir;
1667
1668	 ir->accept(this);
1669      }
1670   }
1671}
1672
1673void
1674fs_visitor::visit(ir_function_signature *ir)
1675{
1676   assert(!"not reached");
1677   (void)ir;
1678}
1679
1680fs_inst *
1681fs_visitor::emit(fs_inst inst)
1682{
1683   fs_inst *list_inst = new(mem_ctx) fs_inst;
1684   *list_inst = inst;
1685
1686   if (force_uncompressed_stack > 0)
1687      list_inst->force_uncompressed = true;
1688   else if (force_sechalf_stack > 0)
1689      list_inst->force_sechalf = true;
1690
1691   list_inst->annotation = this->current_annotation;
1692   list_inst->ir = this->base_ir;
1693
1694   this->instructions.push_tail(list_inst);
1695
1696   return list_inst;
1697}
1698
1699/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1700void
1701fs_visitor::emit_dummy_fs()
1702{
1703   /* Everyone's favorite color. */
1704   emit(BRW_OPCODE_MOV, fs_reg(MRF, 2), fs_reg(1.0f));
1705   emit(BRW_OPCODE_MOV, fs_reg(MRF, 3), fs_reg(0.0f));
1706   emit(BRW_OPCODE_MOV, fs_reg(MRF, 4), fs_reg(1.0f));
1707   emit(BRW_OPCODE_MOV, fs_reg(MRF, 5), fs_reg(0.0f));
1708
1709   fs_inst *write;
1710   write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
1711   write->base_mrf = 2;
1712}
1713
1714/* The register location here is relative to the start of the URB
1715 * data.  It will get adjusted to be a real location before
1716 * generate_code() time.
1717 */
1718struct brw_reg
1719fs_visitor::interp_reg(int location, int channel)
1720{
1721   int regnr = urb_setup[location] * 2 + channel / 2;
1722   int stride = (channel & 1) * 4;
1723
1724   assert(urb_setup[location] != -1);
1725
1726   return brw_vec1_grf(regnr, stride);
1727}
1728
1729/** Emits the interpolation for the varying inputs. */
1730void
1731fs_visitor::emit_interpolation_setup_gen4()
1732{
1733   this->current_annotation = "compute pixel centers";
1734   this->pixel_x = fs_reg(this, glsl_type::uint_type);
1735   this->pixel_y = fs_reg(this, glsl_type::uint_type);
1736   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1737   this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1738
1739   emit(FS_OPCODE_PIXEL_X, this->pixel_x);
1740   emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
1741
1742   this->current_annotation = "compute pixel deltas from v0";
1743   if (brw->has_pln) {
1744      this->delta_x = fs_reg(this, glsl_type::vec2_type);
1745      this->delta_y = this->delta_x;
1746      this->delta_y.reg_offset++;
1747   } else {
1748      this->delta_x = fs_reg(this, glsl_type::float_type);
1749      this->delta_y = fs_reg(this, glsl_type::float_type);
1750   }
1751   emit(BRW_OPCODE_ADD, this->delta_x,
1752	this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0))));
1753   emit(BRW_OPCODE_ADD, this->delta_y,
1754	this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))));
1755
1756   this->current_annotation = "compute pos.w and 1/pos.w";
1757   /* Compute wpos.w.  It's always in our setup, since it's needed to
1758    * interpolate the other attributes.
1759    */
1760   this->wpos_w = fs_reg(this, glsl_type::float_type);
1761   emit(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1762	interp_reg(FRAG_ATTRIB_WPOS, 3));
1763   /* Compute the pixel 1/W value from wpos.w. */
1764   this->pixel_w = fs_reg(this, glsl_type::float_type);
1765   emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
1766   this->current_annotation = NULL;
1767}
1768
1769/** Emits the interpolation for the varying inputs. */
1770void
1771fs_visitor::emit_interpolation_setup_gen6()
1772{
1773   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1774
1775   /* If the pixel centers end up used, the setup is the same as for gen4. */
1776   this->current_annotation = "compute pixel centers";
1777   fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
1778   fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
1779   int_pixel_x.type = BRW_REGISTER_TYPE_UW;
1780   int_pixel_y.type = BRW_REGISTER_TYPE_UW;
1781   emit(BRW_OPCODE_ADD,
1782	int_pixel_x,
1783	fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1784	fs_reg(brw_imm_v(0x10101010)));
1785   emit(BRW_OPCODE_ADD,
1786	int_pixel_y,
1787	fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1788	fs_reg(brw_imm_v(0x11001100)));
1789
1790   /* As of gen6, we can no longer mix float and int sources.  We have
1791    * to turn the integer pixel centers into floats for their actual
1792    * use.
1793    */
1794   this->pixel_x = fs_reg(this, glsl_type::float_type);
1795   this->pixel_y = fs_reg(this, glsl_type::float_type);
1796   emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x);
1797   emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y);
1798
1799   this->current_annotation = "compute pos.w";
1800   this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
1801   this->wpos_w = fs_reg(this, glsl_type::float_type);
1802   emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
1803
1804   this->delta_x = fs_reg(brw_vec8_grf(2, 0));
1805   this->delta_y = fs_reg(brw_vec8_grf(3, 0));
1806
1807   this->current_annotation = NULL;
1808}
1809
1810void
1811fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
1812{
1813   int reg_width = c->dispatch_width / 8;
1814   fs_inst *inst;
1815
1816   if (c->dispatch_width == 8 || intel->gen >= 6) {
1817      /* SIMD8 write looks like:
1818       * m + 0: r0
1819       * m + 1: r1
1820       * m + 2: g0
1821       * m + 3: g1
1822       *
1823       * gen6 SIMD16 DP write looks like:
1824       * m + 0: r0
1825       * m + 1: r1
1826       * m + 2: g0
1827       * m + 3: g1
1828       * m + 4: b0
1829       * m + 5: b1
1830       * m + 6: a0
1831       * m + 7: a1
1832       */
1833      inst = emit(BRW_OPCODE_MOV,
1834		  fs_reg(MRF, first_color_mrf + index * reg_width),
1835		  color);
1836      inst->saturate = c->key.clamp_fragment_color;
1837   } else {
1838      /* pre-gen6 SIMD16 single source DP write looks like:
1839       * m + 0: r0
1840       * m + 1: g0
1841       * m + 2: b0
1842       * m + 3: a0
1843       * m + 4: r1
1844       * m + 5: g1
1845       * m + 6: b1
1846       * m + 7: a1
1847       */
1848      if (brw->has_compr4) {
1849	 /* By setting the high bit of the MRF register number, we
1850	  * indicate that we want COMPR4 mode - instead of doing the
1851	  * usual destination + 1 for the second half we get
1852	  * destination + 4.
1853	  */
1854	 inst = emit(BRW_OPCODE_MOV,
1855		     fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index),
1856		     color);
1857	 inst->saturate = c->key.clamp_fragment_color;
1858      } else {
1859	 push_force_uncompressed();
1860	 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index),
1861		     color);
1862	 inst->saturate = c->key.clamp_fragment_color;
1863	 pop_force_uncompressed();
1864
1865	 push_force_sechalf();
1866	 color.sechalf = true;
1867	 inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4),
1868		     color);
1869	 inst->saturate = c->key.clamp_fragment_color;
1870	 pop_force_sechalf();
1871	 color.sechalf = false;
1872      }
1873   }
1874}
1875
1876void
1877fs_visitor::emit_fb_writes()
1878{
1879   this->current_annotation = "FB write header";
1880   bool header_present = true;
1881   int base_mrf = 2;
1882   int nr = base_mrf;
1883   int reg_width = c->dispatch_width / 8;
1884
1885   if (intel->gen >= 6 &&
1886       !this->kill_emitted &&
1887       c->key.nr_color_regions == 1) {
1888      header_present = false;
1889   }
1890
1891   if (header_present) {
1892      /* m2, m3 header */
1893      nr += 2;
1894   }
1895
1896   if (c->aa_dest_stencil_reg) {
1897      push_force_uncompressed();
1898      emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1899	   fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)));
1900      pop_force_uncompressed();
1901   }
1902
1903   /* Reserve space for color. It'll be filled in per MRT below. */
1904   int color_mrf = nr;
1905   nr += 4 * reg_width;
1906
1907   if (c->source_depth_to_render_target) {
1908      if (intel->gen == 6 && c->dispatch_width == 16) {
1909	 /* For outputting oDepth on gen6, SIMD8 writes have to be
1910	  * used.  This would require 8-wide moves of each half to
1911	  * message regs, kind of like pre-gen5 SIMD16 FB writes.
1912	  * Just bail on doing so for now.
1913	  */
1914	 fail("Missing support for simd16 depth writes on gen6\n");
1915      }
1916
1917      if (c->computes_depth) {
1918	 /* Hand over gl_FragDepth. */
1919	 assert(this->frag_depth);
1920	 fs_reg depth = *(variable_storage(this->frag_depth));
1921
1922	 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth);
1923      } else {
1924	 /* Pass through the payload depth. */
1925	 emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
1926	      fs_reg(brw_vec8_grf(c->source_depth_reg, 0)));
1927      }
1928      nr += reg_width;
1929   }
1930
1931   if (c->dest_depth_reg) {
1932      emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
1933	   fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)));
1934      nr += reg_width;
1935   }
1936
1937   fs_reg color = reg_undef;
1938   if (this->frag_color)
1939      color = *(variable_storage(this->frag_color));
1940   else if (this->frag_data) {
1941      color = *(variable_storage(this->frag_data));
1942      color.type = BRW_REGISTER_TYPE_F;
1943   }
1944
1945   for (int target = 0; target < c->key.nr_color_regions; target++) {
1946      this->current_annotation = ralloc_asprintf(this->mem_ctx,
1947						 "FB write target %d",
1948						 target);
1949      if (this->frag_color || this->frag_data) {
1950	 for (int i = 0; i < 4; i++) {
1951	    emit_color_write(i, color_mrf, color);
1952	    color.reg_offset++;
1953	 }
1954      }
1955
1956      if (this->frag_color)
1957	 color.reg_offset -= 4;
1958
1959      fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
1960      inst->target = target;
1961      inst->base_mrf = base_mrf;
1962      inst->mlen = nr - base_mrf;
1963      if (target == c->key.nr_color_regions - 1)
1964	 inst->eot = true;
1965      inst->header_present = header_present;
1966   }
1967
1968   if (c->key.nr_color_regions == 0) {
1969      if (c->key.alpha_test && (this->frag_color || this->frag_data)) {
1970	 /* If the alpha test is enabled but there's no color buffer,
1971	  * we still need to send alpha out the pipeline to our null
1972	  * renderbuffer.
1973	  */
1974	 color.reg_offset += 3;
1975	 emit_color_write(3, color_mrf, color);
1976      }
1977
1978      fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
1979      inst->base_mrf = base_mrf;
1980      inst->mlen = nr - base_mrf;
1981      inst->eot = true;
1982      inst->header_present = header_present;
1983   }
1984
1985   this->current_annotation = NULL;
1986}
1987