brw_vec4_visitor.cpp revision 250770b74d33bb8625c780a74a89477af033d13a
1/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "brw_vec4.h"
25extern "C" {
26#include "main/macros.h"
27#include "program/prog_parameter.h"
28}
29
30namespace brw {
31
32src_reg::src_reg(dst_reg reg)
33{
34   init();
35
36   this->file = reg.file;
37   this->reg = reg.reg;
38   this->reg_offset = reg.reg_offset;
39   this->type = reg.type;
40   this->reladdr = reg.reladdr;
41
42   int swizzles[4];
43   int next_chan = 0;
44   int last = 0;
45
46   for (int i = 0; i < 4; i++) {
47      if (!(reg.writemask & (1 << i)))
48	 continue;
49
50      swizzles[next_chan++] = last = i;
51   }
52
53   for (; next_chan < 4; next_chan++) {
54      swizzles[next_chan] = last;
55   }
56
57   this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
58				swizzles[2], swizzles[3]);
59}
60
61dst_reg::dst_reg(src_reg reg)
62{
63   init();
64
65   this->file = reg.file;
66   this->reg = reg.reg;
67   this->reg_offset = reg.reg_offset;
68   this->type = reg.type;
69   this->writemask = WRITEMASK_XYZW;
70   this->reladdr = reg.reladdr;
71}
72
73vec4_instruction *
74vec4_visitor::emit(enum opcode opcode, dst_reg dst,
75		   src_reg src0, src_reg src1, src_reg src2)
76{
77   vec4_instruction *inst = new(mem_ctx) vec4_instruction();
78
79   inst->opcode = opcode;
80   inst->dst = dst;
81   inst->src[0] = src0;
82   inst->src[1] = src1;
83   inst->src[2] = src2;
84   inst->ir = this->base_ir;
85   inst->annotation = this->current_annotation;
86
87   this->instructions.push_tail(inst);
88
89   return inst;
90}
91
92
93vec4_instruction *
94vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
95{
96   return emit(opcode, dst, src0, src1, src_reg());
97}
98
99vec4_instruction *
100vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
101{
102   assert(dst.writemask != 0);
103   return emit(opcode, dst, src0, src_reg(), src_reg());
104}
105
106vec4_instruction *
107vec4_visitor::emit(enum opcode opcode)
108{
109   return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
110}
111
112void
113vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
114{
115   static enum opcode dot_opcodes[] = {
116      BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
117   };
118
119   emit(dot_opcodes[elements - 2], dst, src0, src1);
120}
121
122void
123vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
124{
125   /* The gen6 math instruction ignores the source modifiers --
126    * swizzle, abs, negate, and at least some parts of the register
127    * region description.
128    */
129   src_reg temp_src = src_reg(this, glsl_type::vec4_type);
130   emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
131
132   if (dst.writemask != WRITEMASK_XYZW) {
133      /* The gen6 math instruction must be align1, so we can't do
134       * writemasks.
135       */
136      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
137
138      emit(opcode, temp_dst, temp_src);
139
140      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
141   } else {
142      emit(opcode, dst, temp_src);
143   }
144}
145
146void
147vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
148{
149   vec4_instruction *inst = emit(opcode, dst, src);
150   inst->base_mrf = 1;
151   inst->mlen = 1;
152}
153
154void
155vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
156{
157   switch (opcode) {
158   case SHADER_OPCODE_RCP:
159   case SHADER_OPCODE_RSQ:
160   case SHADER_OPCODE_SQRT:
161   case SHADER_OPCODE_EXP2:
162   case SHADER_OPCODE_LOG2:
163   case SHADER_OPCODE_SIN:
164   case SHADER_OPCODE_COS:
165      break;
166   default:
167      assert(!"not reached: bad math opcode");
168      return;
169   }
170
171   if (intel->gen >= 6) {
172      return emit_math1_gen6(opcode, dst, src);
173   } else {
174      return emit_math1_gen4(opcode, dst, src);
175   }
176}
177
178void
179vec4_visitor::emit_math2_gen6(enum opcode opcode,
180			      dst_reg dst, src_reg src0, src_reg src1)
181{
182   src_reg expanded;
183
184   /* The gen6 math instruction ignores the source modifiers --
185    * swizzle, abs, negate, and at least some parts of the register
186    * region description.  Move the sources to temporaries to make it
187    * generally work.
188    */
189
190   expanded = src_reg(this, glsl_type::vec4_type);
191   emit(BRW_OPCODE_MOV, dst, src0);
192   src0 = expanded;
193
194   expanded = src_reg(this, glsl_type::vec4_type);
195   emit(BRW_OPCODE_MOV, dst, src1);
196   src1 = expanded;
197
198   if (dst.writemask != WRITEMASK_XYZW) {
199      /* The gen6 math instruction must be align1, so we can't do
200       * writemasks.
201       */
202      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
203
204      emit(opcode, temp_dst, src0, src1);
205
206      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
207   } else {
208      emit(opcode, dst, src0, src1);
209   }
210}
211
212void
213vec4_visitor::emit_math2_gen4(enum opcode opcode,
214			      dst_reg dst, src_reg src0, src_reg src1)
215{
216   vec4_instruction *inst = emit(opcode, dst, src0, src1);
217   inst->base_mrf = 1;
218   inst->mlen = 2;
219}
220
221void
222vec4_visitor::emit_math(enum opcode opcode,
223			dst_reg dst, src_reg src0, src_reg src1)
224{
225   assert(opcode == SHADER_OPCODE_POW);
226
227   if (intel->gen >= 6) {
228      return emit_math2_gen6(opcode, dst, src0, src1);
229   } else {
230      return emit_math2_gen4(opcode, dst, src0, src1);
231   }
232}
233
234void
235vec4_visitor::visit_instructions(const exec_list *list)
236{
237   foreach_iter(exec_list_iterator, iter, *list) {
238      ir_instruction *ir = (ir_instruction *)iter.get();
239
240      base_ir = ir;
241      ir->accept(this);
242   }
243}
244
245
246static int
247type_size(const struct glsl_type *type)
248{
249   unsigned int i;
250   int size;
251
252   switch (type->base_type) {
253   case GLSL_TYPE_UINT:
254   case GLSL_TYPE_INT:
255   case GLSL_TYPE_FLOAT:
256   case GLSL_TYPE_BOOL:
257      if (type->is_matrix()) {
258	 return type->matrix_columns;
259      } else {
260	 /* Regardless of size of vector, it gets a vec4. This is bad
261	  * packing for things like floats, but otherwise arrays become a
262	  * mess.  Hopefully a later pass over the code can pack scalars
263	  * down if appropriate.
264	  */
265	 return 1;
266      }
267   case GLSL_TYPE_ARRAY:
268      assert(type->length > 0);
269      return type_size(type->fields.array) * type->length;
270   case GLSL_TYPE_STRUCT:
271      size = 0;
272      for (i = 0; i < type->length; i++) {
273	 size += type_size(type->fields.structure[i].type);
274      }
275      return size;
276   case GLSL_TYPE_SAMPLER:
277      /* Samplers take up one slot in UNIFORMS[], but they're baked in
278       * at link time.
279       */
280      return 1;
281   default:
282      assert(0);
283      return 0;
284   }
285}
286
287int
288vec4_visitor::virtual_grf_alloc(int size)
289{
290   if (virtual_grf_array_size <= virtual_grf_count) {
291      if (virtual_grf_array_size == 0)
292	 virtual_grf_array_size = 16;
293      else
294	 virtual_grf_array_size *= 2;
295      virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
296				   virtual_grf_array_size);
297   }
298   virtual_grf_sizes[virtual_grf_count] = size;
299   return virtual_grf_count++;
300}
301
302src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
303{
304   init();
305
306   this->file = GRF;
307   this->reg = v->virtual_grf_alloc(type_size(type));
308
309   if (type->is_array() || type->is_record()) {
310      this->swizzle = BRW_SWIZZLE_NOOP;
311   } else {
312      this->swizzle = swizzle_for_size(type->vector_elements);
313   }
314
315   this->type = brw_type_for_base_type(type);
316}
317
318dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
319{
320   init();
321
322   this->file = GRF;
323   this->reg = v->virtual_grf_alloc(type_size(type));
324
325   if (type->is_array() || type->is_record()) {
326      this->writemask = WRITEMASK_XYZW;
327   } else {
328      this->writemask = (1 << type->vector_elements) - 1;
329   }
330
331   this->type = brw_type_for_base_type(type);
332}
333
334/* Our support for uniforms is piggy-backed on the struct
335 * gl_fragment_program, because that's where the values actually
336 * get stored, rather than in some global gl_shader_program uniform
337 * store.
338 */
339int
340vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
341{
342   unsigned int offset = 0;
343   float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
344
345   if (type->is_matrix()) {
346      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
347							type->vector_elements,
348							1);
349
350      for (unsigned int i = 0; i < type->matrix_columns; i++) {
351	 offset += setup_uniform_values(loc + offset, column);
352      }
353
354      return offset;
355   }
356
357   switch (type->base_type) {
358   case GLSL_TYPE_FLOAT:
359   case GLSL_TYPE_UINT:
360   case GLSL_TYPE_INT:
361   case GLSL_TYPE_BOOL:
362      for (unsigned int i = 0; i < type->vector_elements; i++) {
363	 int slot = this->uniforms * 4 + i;
364	 switch (type->base_type) {
365	 case GLSL_TYPE_FLOAT:
366	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
367	    break;
368	 case GLSL_TYPE_UINT:
369	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
370	    break;
371	 case GLSL_TYPE_INT:
372	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
373	    break;
374	 case GLSL_TYPE_BOOL:
375	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
376	    break;
377	 default:
378	    assert(!"not reached");
379	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
380	    break;
381	 }
382	 c->prog_data.param[slot] = &values[i];
383      }
384
385      for (unsigned int i = type->vector_elements; i < 4; i++) {
386	 c->prog_data.param_convert[this->uniforms * 4 + i] =
387	    PARAM_CONVERT_ZERO;
388	 c->prog_data.param[this->uniforms * 4 + i] = NULL;
389      }
390
391      this->uniform_size[this->uniforms] = type->vector_elements;
392      this->uniforms++;
393
394      return 1;
395
396   case GLSL_TYPE_STRUCT:
397      for (unsigned int i = 0; i < type->length; i++) {
398	 offset += setup_uniform_values(loc + offset,
399					type->fields.structure[i].type);
400      }
401      return offset;
402
403   case GLSL_TYPE_ARRAY:
404      for (unsigned int i = 0; i < type->length; i++) {
405	 offset += setup_uniform_values(loc + offset, type->fields.array);
406      }
407      return offset;
408
409   case GLSL_TYPE_SAMPLER:
410      /* The sampler takes up a slot, but we don't use any values from it. */
411      return 1;
412
413   default:
414      assert(!"not reached");
415      return 0;
416   }
417}
418
419/* Our support for builtin uniforms is even scarier than non-builtin.
420 * It sits on top of the PROG_STATE_VAR parameters that are
421 * automatically updated from GL context state.
422 */
423void
424vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
425{
426   const ir_state_slot *const slots = ir->state_slots;
427   assert(ir->state_slots != NULL);
428
429   for (unsigned int i = 0; i < ir->num_state_slots; i++) {
430      /* This state reference has already been setup by ir_to_mesa,
431       * but we'll get the same index back here.  We can reference
432       * ParameterValues directly, since unlike brw_fs.cpp, we never
433       * add new state references during compile.
434       */
435      int index = _mesa_add_state_reference(this->vp->Base.Parameters,
436					    (gl_state_index *)slots[i].tokens);
437      float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
438
439      this->uniform_size[this->uniforms] = 0;
440      /* Add each of the unique swizzled channels of the element.
441       * This will end up matching the size of the glsl_type of this field.
442       */
443      int last_swiz = -1;
444      for (unsigned int j = 0; j < 4; j++) {
445	 int swiz = GET_SWZ(slots[i].swizzle, j);
446	 if (swiz == last_swiz)
447	    break;
448	 last_swiz = swiz;
449
450	 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
451	 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
452	 this->uniform_size[this->uniforms]++;
453      }
454      this->uniforms++;
455   }
456}
457
458dst_reg *
459vec4_visitor::variable_storage(ir_variable *var)
460{
461   return (dst_reg *)hash_table_find(this->variable_ht, var);
462}
463
464void
465vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
466{
467   ir_expression *expr = ir->as_expression();
468
469   if (expr) {
470      src_reg op[2];
471      vec4_instruction *inst;
472
473      assert(expr->get_num_operands() <= 2);
474      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
475	 assert(expr->operands[i]->type->is_scalar());
476
477	 expr->operands[i]->accept(this);
478	 op[i] = this->result;
479      }
480
481      switch (expr->operation) {
482      case ir_unop_logic_not:
483	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
484	 inst->conditional_mod = BRW_CONDITIONAL_Z;
485	 break;
486
487      case ir_binop_logic_xor:
488	 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
489	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
490	 break;
491
492      case ir_binop_logic_or:
493	 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
494	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
495	 break;
496
497      case ir_binop_logic_and:
498	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
499	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
500	 break;
501
502      case ir_unop_f2b:
503	 if (intel->gen >= 6) {
504	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
505	 } else {
506	    inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
507	 }
508	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
509	 break;
510
511      case ir_unop_i2b:
512	 if (intel->gen >= 6) {
513	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
514	 } else {
515	    inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
516	 }
517	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
518	 break;
519
520      case ir_binop_greater:
521      case ir_binop_gequal:
522      case ir_binop_less:
523      case ir_binop_lequal:
524      case ir_binop_equal:
525      case ir_binop_all_equal:
526      case ir_binop_nequal:
527      case ir_binop_any_nequal:
528	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
529	 inst->conditional_mod =
530	    brw_conditional_for_comparison(expr->operation);
531	 break;
532
533      default:
534	 assert(!"not reached");
535	 break;
536      }
537      return;
538   }
539
540   ir->accept(this);
541
542   if (intel->gen >= 6) {
543      vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
544			       this->result, src_reg(1));
545      inst->conditional_mod = BRW_CONDITIONAL_NZ;
546   } else {
547      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
548      inst->conditional_mod = BRW_CONDITIONAL_NZ;
549   }
550}
551
552/**
553 * Emit a gen6 IF statement with the comparison folded into the IF
554 * instruction.
555 */
556void
557vec4_visitor::emit_if_gen6(ir_if *ir)
558{
559   ir_expression *expr = ir->condition->as_expression();
560
561   if (expr) {
562      src_reg op[2];
563      vec4_instruction *inst;
564      dst_reg temp;
565
566      assert(expr->get_num_operands() <= 2);
567      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
568	 assert(expr->operands[i]->type->is_scalar() ||
569		expr->operation == ir_binop_any_nequal ||
570		expr->operation == ir_binop_all_equal);
571
572	 expr->operands[i]->accept(this);
573	 op[i] = this->result;
574      }
575
576      switch (expr->operation) {
577      case ir_unop_logic_not:
578	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
579	 inst->conditional_mod = BRW_CONDITIONAL_Z;
580	 return;
581
582      case ir_binop_logic_xor:
583	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
584	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
585	 return;
586
587      case ir_binop_logic_or:
588	 temp = dst_reg(this, glsl_type::bool_type);
589	 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
590	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
591	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
592	 return;
593
594      case ir_binop_logic_and:
595	 temp = dst_reg(this, glsl_type::bool_type);
596	 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
597	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
598	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
599	 return;
600
601      case ir_unop_f2b:
602	 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
603	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
604	 return;
605
606      case ir_unop_i2b:
607	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
608	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
609	 return;
610
611      case ir_binop_greater:
612      case ir_binop_gequal:
613      case ir_binop_less:
614      case ir_binop_lequal:
615      case ir_binop_equal:
616      case ir_binop_nequal:
617	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
618	 inst->conditional_mod =
619	    brw_conditional_for_comparison(expr->operation);
620	 return;
621
622      case ir_binop_all_equal:
623	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
624	 inst->conditional_mod = BRW_CONDITIONAL_Z;
625
626	 inst = emit(BRW_OPCODE_IF);
627	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
628	 return;
629
630      case ir_binop_any_nequal:
631	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
632	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
633
634	 inst = emit(BRW_OPCODE_IF);
635	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
636	 return;
637
638      default:
639	 assert(!"not reached");
640	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
641	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
642	 return;
643      }
644      return;
645   }
646
647   ir->condition->accept(this);
648
649   vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
650			    this->result, src_reg(0));
651   inst->conditional_mod = BRW_CONDITIONAL_NZ;
652}
653
654void
655vec4_visitor::visit(ir_variable *ir)
656{
657   dst_reg *reg = NULL;
658
659   if (variable_storage(ir))
660      return;
661
662   switch (ir->mode) {
663   case ir_var_in:
664      reg = new(mem_ctx) dst_reg(ATTR, ir->location);
665      break;
666
667   case ir_var_out:
668      reg = new(mem_ctx) dst_reg(this, ir->type);
669
670      for (int i = 0; i < type_size(ir->type); i++) {
671	 output_reg[ir->location + i] = *reg;
672	 output_reg[ir->location + i].reg_offset = i;
673	 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
674      }
675      break;
676
677   case ir_var_auto:
678   case ir_var_temporary:
679      reg = new(mem_ctx) dst_reg(this, ir->type);
680      break;
681
682   case ir_var_uniform:
683      reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
684
685      if (!strncmp(ir->name, "gl_", 3)) {
686	 setup_builtin_uniform_values(ir);
687      } else {
688	 setup_uniform_values(ir->location, ir->type);
689      }
690      break;
691
692   default:
693      assert(!"not reached");
694   }
695
696   reg->type = brw_type_for_base_type(ir->type);
697   hash_table_insert(this->variable_ht, reg, ir);
698}
699
700void
701vec4_visitor::visit(ir_loop *ir)
702{
703   ir_dereference_variable *counter = NULL;
704
705   fail("not yet\n");
706
707   /* We don't want debugging output to print the whole body of the
708    * loop as the annotation.
709    */
710   this->base_ir = NULL;
711
712   if (ir->counter != NULL)
713      counter = new(ir) ir_dereference_variable(ir->counter);
714
715   if (ir->from != NULL) {
716      assert(ir->counter != NULL);
717
718      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
719
720      a->accept(this);
721      delete a;
722   }
723
724   emit(BRW_OPCODE_DO);
725
726   if (ir->to) {
727      ir_expression *e =
728	 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
729			       counter, ir->to);
730      ir_if *if_stmt =  new(ir) ir_if(e);
731
732      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
733
734      if_stmt->then_instructions.push_tail(brk);
735
736      if_stmt->accept(this);
737
738      delete if_stmt;
739      delete e;
740      delete brk;
741   }
742
743   visit_instructions(&ir->body_instructions);
744
745   if (ir->increment) {
746      ir_expression *e =
747	 new(ir) ir_expression(ir_binop_add, counter->type,
748			       counter, ir->increment);
749
750      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
751
752      a->accept(this);
753      delete a;
754      delete e;
755   }
756
757   emit(BRW_OPCODE_WHILE);
758}
759
760void
761vec4_visitor::visit(ir_loop_jump *ir)
762{
763   switch (ir->mode) {
764   case ir_loop_jump::jump_break:
765      emit(BRW_OPCODE_BREAK);
766      break;
767   case ir_loop_jump::jump_continue:
768      emit(BRW_OPCODE_CONTINUE);
769      break;
770   }
771}
772
773
774void
775vec4_visitor::visit(ir_function_signature *ir)
776{
777   assert(0);
778   (void)ir;
779}
780
781void
782vec4_visitor::visit(ir_function *ir)
783{
784   /* Ignore function bodies other than main() -- we shouldn't see calls to
785    * them since they should all be inlined.
786    */
787   if (strcmp(ir->name, "main") == 0) {
788      const ir_function_signature *sig;
789      exec_list empty;
790
791      sig = ir->matching_signature(&empty);
792
793      assert(sig);
794
795      visit_instructions(&sig->body);
796   }
797}
798
799GLboolean
800vec4_visitor::try_emit_sat(ir_expression *ir)
801{
802   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
803   if (!sat_src)
804      return false;
805
806   sat_src->accept(this);
807   src_reg src = this->result;
808
809   this->result = src_reg(this, ir->type);
810   vec4_instruction *inst;
811   inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
812   inst->saturate = true;
813
814   return true;
815}
816
817void
818vec4_visitor::emit_bool_comparison(unsigned int op,
819				 dst_reg dst, src_reg src0, src_reg src1)
820{
821   /* original gen4 does destination conversion before comparison. */
822   if (intel->gen < 5)
823      dst.type = src0.type;
824
825   vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
826   inst->conditional_mod = brw_conditional_for_comparison(op);
827
828   dst.type = BRW_REGISTER_TYPE_D;
829   emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
830}
831
832void
833vec4_visitor::visit(ir_expression *ir)
834{
835   unsigned int operand;
836   src_reg op[Elements(ir->operands)];
837   src_reg result_src;
838   dst_reg result_dst;
839   vec4_instruction *inst;
840
841   if (try_emit_sat(ir))
842      return;
843
844   for (operand = 0; operand < ir->get_num_operands(); operand++) {
845      this->result.file = BAD_FILE;
846      ir->operands[operand]->accept(this);
847      if (this->result.file == BAD_FILE) {
848	 printf("Failed to get tree for expression operand:\n");
849	 ir->operands[operand]->print();
850	 exit(1);
851      }
852      op[operand] = this->result;
853
854      /* Matrix expression operands should have been broken down to vector
855       * operations already.
856       */
857      assert(!ir->operands[operand]->type->is_matrix());
858   }
859
860   int vector_elements = ir->operands[0]->type->vector_elements;
861   if (ir->operands[1]) {
862      vector_elements = MAX2(vector_elements,
863			     ir->operands[1]->type->vector_elements);
864   }
865
866   this->result.file = BAD_FILE;
867
868   /* Storage for our result.  Ideally for an assignment we'd be using
869    * the actual storage for the result here, instead.
870    */
871   result_src = src_reg(this, ir->type);
872   /* convenience for the emit functions below. */
873   result_dst = dst_reg(result_src);
874   /* If nothing special happens, this is the result. */
875   this->result = result_src;
876   /* Limit writes to the channels that will be used by result_src later.
877    * This does limit this temp's use as a temporary for multi-instruction
878    * sequences.
879    */
880   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
881
882   switch (ir->operation) {
883   case ir_unop_logic_not:
884      /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
885       * ones complement of the whole register, not just bit 0.
886       */
887      emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
888      break;
889   case ir_unop_neg:
890      op[0].negate = !op[0].negate;
891      this->result = op[0];
892      break;
893   case ir_unop_abs:
894      op[0].abs = true;
895      op[0].negate = false;
896      this->result = op[0];
897      break;
898
899   case ir_unop_sign:
900      emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
901
902      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
903      inst->conditional_mod = BRW_CONDITIONAL_G;
904      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
905      inst->predicate = BRW_PREDICATE_NORMAL;
906
907      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
908      inst->conditional_mod = BRW_CONDITIONAL_L;
909      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
910      inst->predicate = BRW_PREDICATE_NORMAL;
911
912      break;
913
914   case ir_unop_rcp:
915      emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
916      break;
917
918   case ir_unop_exp2:
919      emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
920      break;
921   case ir_unop_log2:
922      emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
923      break;
924   case ir_unop_exp:
925   case ir_unop_log:
926      assert(!"not reached: should be handled by ir_explog_to_explog2");
927      break;
928   case ir_unop_sin:
929   case ir_unop_sin_reduced:
930      emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
931      break;
932   case ir_unop_cos:
933   case ir_unop_cos_reduced:
934      emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
935      break;
936
937   case ir_unop_dFdx:
938   case ir_unop_dFdy:
939      assert(!"derivatives not valid in vertex shader");
940      break;
941
942   case ir_unop_noise:
943      assert(!"not reached: should be handled by lower_noise");
944      break;
945
946   case ir_binop_add:
947      emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
948      break;
949   case ir_binop_sub:
950      assert(!"not reached: should be handled by ir_sub_to_add_neg");
951      break;
952
953   case ir_binop_mul:
954      emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
955      break;
956   case ir_binop_div:
957      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
958   case ir_binop_mod:
959      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
960      break;
961
962   case ir_binop_less:
963   case ir_binop_greater:
964   case ir_binop_lequal:
965   case ir_binop_gequal:
966   case ir_binop_equal:
967   case ir_binop_nequal: {
968      dst_reg temp = result_dst;
969      /* original gen4 does implicit conversion before comparison. */
970      if (intel->gen < 5)
971	 temp.type = op[0].type;
972
973      inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
974      inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
975      emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
976      break;
977   }
978
979   case ir_binop_all_equal:
980      /* "==" operator producing a scalar boolean. */
981      if (ir->operands[0]->type->is_vector() ||
982	  ir->operands[1]->type->is_vector()) {
983	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
984	 inst->conditional_mod = BRW_CONDITIONAL_Z;
985
986	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
987	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
988	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
989      } else {
990	 dst_reg temp = result_dst;
991	 /* original gen4 does implicit conversion before comparison. */
992	 if (intel->gen < 5)
993	    temp.type = op[0].type;
994
995	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
996	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
997	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
998      }
999      break;
1000   case ir_binop_any_nequal:
1001      /* "!=" operator producing a scalar boolean. */
1002      if (ir->operands[0]->type->is_vector() ||
1003	  ir->operands[1]->type->is_vector()) {
1004	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
1005	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1006
1007	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1008	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1009	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1010      } else {
1011	 dst_reg temp = result_dst;
1012	 /* original gen4 does implicit conversion before comparison. */
1013	 if (intel->gen < 5)
1014	    temp.type = op[0].type;
1015
1016	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1017	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1018	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1019      }
1020      break;
1021
1022   case ir_unop_any:
1023      inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
1024      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1025
1026      emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1027
1028      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1029      inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1030      break;
1031
1032   case ir_binop_logic_xor:
1033      emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1034      break;
1035
1036   case ir_binop_logic_or:
1037      emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1038      break;
1039
1040   case ir_binop_logic_and:
1041      emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1042      break;
1043
1044   case ir_binop_dot:
1045      assert(ir->operands[0]->type->is_vector());
1046      assert(ir->operands[0]->type == ir->operands[1]->type);
1047      emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1048      break;
1049
1050   case ir_unop_sqrt:
1051      emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1052      break;
1053   case ir_unop_rsq:
1054      emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1055      break;
1056   case ir_unop_i2f:
1057   case ir_unop_i2u:
1058   case ir_unop_u2i:
1059   case ir_unop_u2f:
1060   case ir_unop_b2f:
1061   case ir_unop_b2i:
1062   case ir_unop_f2i:
1063      emit(BRW_OPCODE_MOV, result_dst, op[0]);
1064      break;
1065   case ir_unop_f2b:
1066   case ir_unop_i2b: {
1067      dst_reg temp = result_dst;
1068      /* original gen4 does implicit conversion before comparison. */
1069      if (intel->gen < 5)
1070	 temp.type = op[0].type;
1071
1072      inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
1073      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1074      inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
1075      break;
1076   }
1077
1078   case ir_unop_trunc:
1079      emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
1080      break;
1081   case ir_unop_ceil:
1082      op[0].negate = !op[0].negate;
1083      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1084      this->result.negate = true;
1085      break;
1086   case ir_unop_floor:
1087      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1088      break;
1089   case ir_unop_fract:
1090      inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
1091      break;
1092   case ir_unop_round_even:
1093      emit(BRW_OPCODE_RNDE, result_dst, op[0]);
1094      break;
1095
1096   case ir_binop_min:
1097      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1098      inst->conditional_mod = BRW_CONDITIONAL_L;
1099
1100      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1101      inst->predicate = BRW_PREDICATE_NORMAL;
1102      break;
1103   case ir_binop_max:
1104      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1105      inst->conditional_mod = BRW_CONDITIONAL_G;
1106
1107      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1108      inst->predicate = BRW_PREDICATE_NORMAL;
1109      break;
1110
1111   case ir_binop_pow:
1112      emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1113      break;
1114
1115   case ir_unop_bit_not:
1116      inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
1117      break;
1118   case ir_binop_bit_and:
1119      inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1120      break;
1121   case ir_binop_bit_xor:
1122      inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1123      break;
1124   case ir_binop_bit_or:
1125      inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1126      break;
1127
1128   case ir_binop_lshift:
1129   case ir_binop_rshift:
1130      assert(!"GLSL 1.30 features unsupported");
1131      break;
1132
1133   case ir_quadop_vector:
1134      assert(!"not reached: should be handled by lower_quadop_vector");
1135      break;
1136   }
1137}
1138
1139
1140void
1141vec4_visitor::visit(ir_swizzle *ir)
1142{
1143   src_reg src;
1144   int i = 0;
1145   int swizzle[4];
1146
1147   /* Note that this is only swizzles in expressions, not those on the left
1148    * hand side of an assignment, which do write masking.  See ir_assignment
1149    * for that.
1150    */
1151
1152   ir->val->accept(this);
1153   src = this->result;
1154   assert(src.file != BAD_FILE);
1155
1156   for (i = 0; i < ir->type->vector_elements; i++) {
1157      switch (i) {
1158      case 0:
1159	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1160	 break;
1161      case 1:
1162	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1163	 break;
1164      case 2:
1165	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1166	 break;
1167      case 3:
1168	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1169	    break;
1170      }
1171   }
1172   for (; i < 4; i++) {
1173      /* Replicate the last channel out. */
1174      swizzle[i] = swizzle[ir->type->vector_elements - 1];
1175   }
1176
1177   src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1178
1179   this->result = src;
1180}
1181
1182void
1183vec4_visitor::visit(ir_dereference_variable *ir)
1184{
1185   const struct glsl_type *type = ir->type;
1186   dst_reg *reg = variable_storage(ir->var);
1187
1188   if (!reg) {
1189      fail("Failed to find variable storage for %s\n", ir->var->name);
1190      this->result = src_reg(brw_null_reg());
1191      return;
1192   }
1193
1194   this->result = src_reg(*reg);
1195
1196   if (type->is_scalar() || type->is_vector() || type->is_matrix())
1197      this->result.swizzle = swizzle_for_size(type->vector_elements);
1198}
1199
1200void
1201vec4_visitor::visit(ir_dereference_array *ir)
1202{
1203   ir_constant *constant_index;
1204   src_reg src;
1205   int element_size = type_size(ir->type);
1206
1207   constant_index = ir->array_index->constant_expression_value();
1208
1209   ir->array->accept(this);
1210   src = this->result;
1211
1212   if (constant_index) {
1213      src.reg_offset += constant_index->value.i[0] * element_size;
1214   } else {
1215      /* Variable index array dereference.  It eats the "vec4" of the
1216       * base of the array and an index that offsets the Mesa register
1217       * index.
1218       */
1219      ir->array_index->accept(this);
1220
1221      src_reg index_reg;
1222
1223      if (element_size == 1) {
1224	 index_reg = this->result;
1225      } else {
1226	 index_reg = src_reg(this, glsl_type::int_type);
1227
1228	 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1229	      this->result, src_reg(element_size));
1230      }
1231
1232      if (src.reladdr) {
1233	 src_reg temp = src_reg(this, glsl_type::int_type);
1234
1235	 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
1236
1237	 index_reg = temp;
1238      }
1239
1240      src.reladdr = ralloc(mem_ctx, src_reg);
1241      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1242   }
1243
1244   /* If the type is smaller than a vec4, replicate the last channel out. */
1245   if (ir->type->is_scalar() || ir->type->is_vector())
1246      src.swizzle = swizzle_for_size(ir->type->vector_elements);
1247   else
1248      src.swizzle = BRW_SWIZZLE_NOOP;
1249   src.type = brw_type_for_base_type(ir->type);
1250
1251   this->result = src;
1252}
1253
1254void
1255vec4_visitor::visit(ir_dereference_record *ir)
1256{
1257   unsigned int i;
1258   const glsl_type *struct_type = ir->record->type;
1259   int offset = 0;
1260
1261   ir->record->accept(this);
1262
1263   for (i = 0; i < struct_type->length; i++) {
1264      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1265	 break;
1266      offset += type_size(struct_type->fields.structure[i].type);
1267   }
1268
1269   /* If the type is smaller than a vec4, replicate the last channel out. */
1270   if (ir->type->is_scalar() || ir->type->is_vector())
1271      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1272   else
1273      this->result.swizzle = BRW_SWIZZLE_NOOP;
1274   this->result.type = brw_type_for_base_type(ir->type);
1275
1276   this->result.reg_offset += offset;
1277}
1278
1279/**
1280 * We want to be careful in assignment setup to hit the actual storage
1281 * instead of potentially using a temporary like we might with the
1282 * ir_dereference handler.
1283 */
1284static dst_reg
1285get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1286{
1287   /* The LHS must be a dereference.  If the LHS is a variable indexed array
1288    * access of a vector, it must be separated into a series conditional moves
1289    * before reaching this point (see ir_vec_index_to_cond_assign).
1290    */
1291   assert(ir->as_dereference());
1292   ir_dereference_array *deref_array = ir->as_dereference_array();
1293   if (deref_array) {
1294      assert(!deref_array->array->type->is_vector());
1295   }
1296
1297   /* Use the rvalue deref handler for the most part.  We'll ignore
1298    * swizzles in it and write swizzles using writemask, though.
1299    */
1300   ir->accept(v);
1301   return dst_reg(v->result);
1302}
1303
1304void
1305vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1306			      const struct glsl_type *type, bool predicated)
1307{
1308   if (type->base_type == GLSL_TYPE_STRUCT) {
1309      for (unsigned int i = 0; i < type->length; i++) {
1310	 emit_block_move(dst, src, type->fields.structure[i].type, predicated);
1311      }
1312      return;
1313   }
1314
1315   if (type->is_array()) {
1316      for (unsigned int i = 0; i < type->length; i++) {
1317	 emit_block_move(dst, src, type->fields.array, predicated);
1318      }
1319      return;
1320   }
1321
1322   if (type->is_matrix()) {
1323      const struct glsl_type *vec_type;
1324
1325      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1326					 type->vector_elements, 1);
1327
1328      for (int i = 0; i < type->matrix_columns; i++) {
1329	 emit_block_move(dst, src, vec_type, predicated);
1330      }
1331      return;
1332   }
1333
1334   assert(type->is_scalar() || type->is_vector());
1335
1336   dst->type = brw_type_for_base_type(type);
1337   src->type = dst->type;
1338
1339   dst->writemask = (1 << type->vector_elements) - 1;
1340
1341   /* Do we need to worry about swizzling a swizzle? */
1342   assert(src->swizzle = BRW_SWIZZLE_NOOP);
1343   src->swizzle = swizzle_for_size(type->vector_elements);
1344
1345   vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
1346   if (predicated)
1347      inst->predicate = BRW_PREDICATE_NORMAL;
1348
1349   dst->reg_offset++;
1350   src->reg_offset++;
1351}
1352
1353void
1354vec4_visitor::visit(ir_assignment *ir)
1355{
1356   dst_reg dst = get_assignment_lhs(ir->lhs, this);
1357
1358   if (!ir->lhs->type->is_scalar() &&
1359       !ir->lhs->type->is_vector()) {
1360      ir->rhs->accept(this);
1361      src_reg src = this->result;
1362
1363      if (ir->condition) {
1364	 emit_bool_to_cond_code(ir->condition);
1365      }
1366
1367      emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
1368      return;
1369   }
1370
1371   /* Now we're down to just a scalar/vector with writemasks. */
1372   int i;
1373
1374   ir->rhs->accept(this);
1375   src_reg src = this->result;
1376
1377   int swizzles[4];
1378   int first_enabled_chan = 0;
1379   int src_chan = 0;
1380
1381   assert(ir->lhs->type->is_vector() ||
1382	  ir->lhs->type->is_scalar());
1383   dst.writemask = ir->write_mask;
1384
1385   for (int i = 0; i < 4; i++) {
1386      if (dst.writemask & (1 << i)) {
1387	 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1388	 break;
1389      }
1390   }
1391
1392   /* Swizzle a small RHS vector into the channels being written.
1393    *
1394    * glsl ir treats write_mask as dictating how many channels are
1395    * present on the RHS while in our instructions we need to make
1396    * those channels appear in the slots of the vec4 they're written to.
1397    */
1398   for (int i = 0; i < 4; i++) {
1399      if (dst.writemask & (1 << i))
1400	 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1401      else
1402	 swizzles[i] = first_enabled_chan;
1403   }
1404   src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1405			      swizzles[2], swizzles[3]);
1406
1407   if (ir->condition) {
1408      emit_bool_to_cond_code(ir->condition);
1409   }
1410
1411   for (i = 0; i < type_size(ir->lhs->type); i++) {
1412      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1413
1414      if (ir->condition)
1415	 inst->predicate = BRW_PREDICATE_NORMAL;
1416
1417      dst.reg_offset++;
1418      src.reg_offset++;
1419   }
1420}
1421
1422void
1423vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1424{
1425   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1426      foreach_list(node, &ir->components) {
1427	 ir_constant *field_value = (ir_constant *)node;
1428
1429	 emit_constant_values(dst, field_value);
1430      }
1431      return;
1432   }
1433
1434   if (ir->type->is_array()) {
1435      for (unsigned int i = 0; i < ir->type->length; i++) {
1436	 emit_constant_values(dst, ir->array_elements[i]);
1437      }
1438      return;
1439   }
1440
1441   if (ir->type->is_matrix()) {
1442      for (int i = 0; i < ir->type->matrix_columns; i++) {
1443	 for (int j = 0; j < ir->type->vector_elements; j++) {
1444	    dst->writemask = 1 << j;
1445	    dst->type = BRW_REGISTER_TYPE_F;
1446
1447	    emit(BRW_OPCODE_MOV, *dst,
1448		 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1449	 }
1450	 dst->reg_offset++;
1451      }
1452      return;
1453   }
1454
1455   for (int i = 0; i < ir->type->vector_elements; i++) {
1456      dst->writemask = 1 << i;
1457      dst->type = brw_type_for_base_type(ir->type);
1458
1459      switch (ir->type->base_type) {
1460      case GLSL_TYPE_FLOAT:
1461	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
1462	 break;
1463      case GLSL_TYPE_INT:
1464	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
1465	 break;
1466      case GLSL_TYPE_UINT:
1467	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
1468	 break;
1469      case GLSL_TYPE_BOOL:
1470	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
1471	 break;
1472      default:
1473	 assert(!"Non-float/uint/int/bool constant");
1474	 break;
1475      }
1476   }
1477   dst->reg_offset++;
1478}
1479
1480void
1481vec4_visitor::visit(ir_constant *ir)
1482{
1483   dst_reg dst = dst_reg(this, ir->type);
1484   this->result = src_reg(dst);
1485
1486   emit_constant_values(&dst, ir);
1487}
1488
1489void
1490vec4_visitor::visit(ir_call *ir)
1491{
1492   assert(!"not reached");
1493}
1494
1495void
1496vec4_visitor::visit(ir_texture *ir)
1497{
1498   assert(!"not reached");
1499}
1500
1501void
1502vec4_visitor::visit(ir_return *ir)
1503{
1504   assert(!"not reached");
1505}
1506
1507void
1508vec4_visitor::visit(ir_discard *ir)
1509{
1510   assert(!"not reached");
1511}
1512
1513void
1514vec4_visitor::visit(ir_if *ir)
1515{
1516   /* Don't point the annotation at the if statement, because then it plus
1517    * the then and else blocks get printed.
1518    */
1519   this->base_ir = ir->condition;
1520
1521   if (intel->gen == 6) {
1522      emit_if_gen6(ir);
1523   } else {
1524      emit_bool_to_cond_code(ir->condition);
1525      vec4_instruction *inst = emit(BRW_OPCODE_IF);
1526      inst->predicate = BRW_PREDICATE_NORMAL;
1527   }
1528
1529   visit_instructions(&ir->then_instructions);
1530
1531   if (!ir->else_instructions.is_empty()) {
1532      this->base_ir = ir->condition;
1533      emit(BRW_OPCODE_ELSE);
1534
1535      visit_instructions(&ir->else_instructions);
1536   }
1537
1538   this->base_ir = ir->condition;
1539   emit(BRW_OPCODE_ENDIF);
1540}
1541
1542int
1543vec4_visitor::emit_vue_header_gen4(int header_mrf)
1544{
1545   /* Get the position */
1546   src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1547
1548   /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1549   dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1550
1551   current_annotation = "NDC";
1552   dst_reg ndc_w = ndc;
1553   ndc_w.writemask = WRITEMASK_W;
1554   src_reg pos_w = pos;
1555   pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1556   emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1557
1558   dst_reg ndc_xyz = ndc;
1559   ndc_xyz.writemask = WRITEMASK_XYZ;
1560
1561   emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1562
1563   if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1564       c->key.nr_userclip || brw->has_negative_rhw_bug) {
1565      dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1566      GLuint i;
1567
1568      emit(BRW_OPCODE_MOV, header1, 0u);
1569
1570      if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1571	 assert(!"finishme: psiz");
1572	 src_reg psiz;
1573
1574	 header1.writemask = WRITEMASK_W;
1575	 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1576	 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1577      }
1578
1579      for (i = 0; i < c->key.nr_userclip; i++) {
1580	 vec4_instruction *inst;
1581
1582	 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1583		     pos, src_reg(c->userplane[i]));
1584	 inst->conditional_mod = BRW_CONDITIONAL_L;
1585
1586	 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1587	 inst->predicate = BRW_PREDICATE_NORMAL;
1588      }
1589
1590      /* i965 clipping workaround:
1591       * 1) Test for -ve rhw
1592       * 2) If set,
1593       *      set ndc = (0,0,0,0)
1594       *      set ucp[6] = 1
1595       *
1596       * Later, clipping will detect ucp[6] and ensure the primitive is
1597       * clipped against all fixed planes.
1598       */
1599      if (brw->has_negative_rhw_bug) {
1600#if 0
1601	 /* FINISHME */
1602	 brw_CMP(p,
1603		 vec8(brw_null_reg()),
1604		 BRW_CONDITIONAL_L,
1605		 brw_swizzle1(ndc, 3),
1606		 brw_imm_f(0));
1607
1608	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1609	 brw_MOV(p, ndc, brw_imm_f(0));
1610	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1611#endif
1612      }
1613
1614      header1.writemask = WRITEMASK_XYZW;
1615      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1616   } else {
1617      emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1618				  BRW_REGISTER_TYPE_UD), 0u);
1619   }
1620
1621   if (intel->gen == 5) {
1622      /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1623       * dword 0-3 (m1) of the header is indices, point width, clip flags.
1624       * dword 4-7 (m2) is the ndc position (set above)
1625       * dword 8-11 (m3) of the vertex header is the 4D space position
1626       * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1627       * m6 is a pad so that the vertex element data is aligned
1628       * m7 is the first vertex data we fill.
1629       */
1630      current_annotation = "NDC";
1631      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1632
1633      current_annotation = "gl_Position";
1634      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1635
1636      /* user clip distance. */
1637      header_mrf += 2;
1638
1639      /* Pad so that vertex element data is aligned. */
1640      header_mrf++;
1641   } else {
1642      /* There are 8 dwords in VUE header pre-Ironlake:
1643       * dword 0-3 (m1) is indices, point width, clip flags.
1644       * dword 4-7 (m2) is ndc position (set above)
1645       *
1646       * dword 8-11 (m3) is the first vertex data.
1647       */
1648      current_annotation = "NDC";
1649      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1650
1651      current_annotation = "gl_Position";
1652      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1653   }
1654
1655   return header_mrf;
1656}
1657
1658int
1659vec4_visitor::emit_vue_header_gen6(int header_mrf)
1660{
1661   struct brw_reg reg;
1662
1663   /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1664    * dword 0-3 (m2) of the header is indices, point width, clip flags.
1665    * dword 4-7 (m3) is the 4D space position
1666    * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1667    * enabled.
1668    *
1669    * m4 or 6 is the first vertex element data we fill.
1670    */
1671
1672   current_annotation = "indices, point width, clip flags";
1673   reg = brw_message_reg(header_mrf++);
1674   emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1675   if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1676      emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1677	   src_reg(output_reg[VERT_RESULT_PSIZ]));
1678   }
1679
1680   current_annotation = "gl_Position";
1681   emit(BRW_OPCODE_MOV,
1682	brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1683
1684   current_annotation = "user clip distances";
1685   if (c->key.nr_userclip) {
1686      for (int i = 0; i < c->key.nr_userclip; i++) {
1687	 struct brw_reg m;
1688	 if (i < 4)
1689	    m = brw_message_reg(header_mrf);
1690	 else
1691	    m = brw_message_reg(header_mrf + 1);
1692
1693	 emit(BRW_OPCODE_DP4,
1694	      dst_reg(brw_writemask(m, 1 << (i & 3))),
1695	      src_reg(c->userplane[i]));
1696      }
1697      header_mrf += 2;
1698   }
1699
1700   current_annotation = NULL;
1701
1702   return header_mrf;
1703}
1704
1705static int
1706align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1707{
1708   struct intel_context *intel = &brw->intel;
1709
1710   if (intel->gen >= 6) {
1711      /* URB data written (does not include the message header reg) must
1712       * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
1713       * section 5.4.3.2.2: URB_INTERLEAVED.
1714       *
1715       * URB entries are allocated on a multiple of 1024 bits, so an
1716       * extra 128 bits written here to make the end align to 256 is
1717       * no problem.
1718       */
1719      if ((mlen % 2) != 1)
1720	 mlen++;
1721   }
1722
1723   return mlen;
1724}
1725
1726/**
1727 * Generates the VUE payload plus the 1 or 2 URB write instructions to
1728 * complete the VS thread.
1729 *
1730 * The VUE layout is documented in Volume 2a.
1731 */
1732void
1733vec4_visitor::emit_urb_writes()
1734{
1735   /* MRF 0 is reserved for the debugger, so start with message header
1736    * in MRF 1.
1737    */
1738   int base_mrf = 1;
1739   int mrf = base_mrf;
1740   int urb_entry_size;
1741   uint64_t outputs_remaining = c->prog_data.outputs_written;
1742   /* In the process of generating our URB write message contents, we
1743    * may need to unspill a register or load from an array.  Those
1744    * reads would use MRFs 14-15.
1745    */
1746   int max_usable_mrf = 13;
1747
1748   /* FINISHME: edgeflag */
1749
1750   /* First mrf is the g0-based message header containing URB handles and such,
1751    * which is implied in VS_OPCODE_URB_WRITE.
1752    */
1753   mrf++;
1754
1755   if (intel->gen >= 6) {
1756      mrf = emit_vue_header_gen6(mrf);
1757   } else {
1758      mrf = emit_vue_header_gen4(mrf);
1759   }
1760
1761   /* Set up the VUE data for the first URB write */
1762   int attr;
1763   for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1764      if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1765	 continue;
1766
1767      outputs_remaining &= ~BITFIELD64_BIT(attr);
1768
1769      /* This is set up in the VUE header. */
1770      if (attr == VERT_RESULT_HPOS)
1771	 continue;
1772
1773      /* This is loaded into the VUE header, and thus doesn't occupy
1774       * an attribute slot.
1775       */
1776      if (attr == VERT_RESULT_PSIZ)
1777	 continue;
1778
1779      emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1780
1781      /* If this was MRF 15, we can't fit anything more into this URB
1782       * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
1783       * even-numbered amount of URB write data, which will meet
1784       * gen6's requirements for length alignment.
1785       */
1786      if (mrf > max_usable_mrf) {
1787	 attr++;
1788	 break;
1789      }
1790   }
1791
1792   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1793   inst->base_mrf = base_mrf;
1794   inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1795   inst->eot = !outputs_remaining;
1796
1797   urb_entry_size = mrf - base_mrf;
1798
1799   /* Optional second URB write */
1800   if (outputs_remaining) {
1801      mrf = base_mrf + 1;
1802
1803      for (; attr < VERT_RESULT_MAX; attr++) {
1804	 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1805	    continue;
1806
1807	 assert(mrf < max_usable_mrf);
1808
1809	 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1810      }
1811
1812      inst = emit(VS_OPCODE_URB_WRITE);
1813      inst->base_mrf = base_mrf;
1814      inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1815      inst->eot = true;
1816      /* URB destination offset.  In the previous write, we got MRFs
1817       * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
1818       * URB row increments, and each of our MRFs is half of one of
1819       * those, since we're doing interleaved writes.
1820       */
1821      inst->offset = (max_usable_mrf - base_mrf) / 2;
1822
1823      urb_entry_size += mrf - base_mrf;
1824   }
1825
1826   if (intel->gen == 6)
1827      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1828   else
1829      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1830}
1831
1832src_reg
1833vec4_visitor::get_scratch_offset(vec4_instruction *inst,
1834				 src_reg *reladdr, int reg_offset)
1835{
1836   /* Because we store the values to scratch interleaved like our
1837    * vertex data, we need to scale the vec4 index by 2.
1838    */
1839   int message_header_scale = 2;
1840
1841   /* Pre-gen6, the message header uses byte offsets instead of vec4
1842    * (16-byte) offset units.
1843    */
1844   if (intel->gen < 6)
1845      message_header_scale *= 16;
1846
1847   if (reladdr) {
1848      src_reg index = src_reg(this, glsl_type::int_type);
1849
1850      vec4_instruction *add = emit(BRW_OPCODE_ADD,
1851				   dst_reg(index),
1852				   *reladdr,
1853				   src_reg(reg_offset));
1854      /* Move our new instruction from the tail to its correct place. */
1855      add->remove();
1856      inst->insert_before(add);
1857
1858      vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
1859				   index, src_reg(message_header_scale));
1860      mul->remove();
1861      inst->insert_before(mul);
1862
1863      return index;
1864   } else {
1865      return src_reg(reg_offset * message_header_scale);
1866   }
1867}
1868
1869/**
1870 * Emits an instruction before @inst to load the value named by @orig_src
1871 * from scratch space at @base_offset to @temp.
1872 */
1873void
1874vec4_visitor::emit_scratch_read(vec4_instruction *inst,
1875				dst_reg temp, src_reg orig_src,
1876				int base_offset)
1877{
1878   int reg_offset = base_offset + orig_src.reg_offset;
1879   src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
1880
1881   vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
1882					      temp, index);
1883
1884   scratch_read_inst->base_mrf = 14;
1885   scratch_read_inst->mlen = 1;
1886   /* Move our instruction from the tail to its correct place. */
1887   scratch_read_inst->remove();
1888   inst->insert_before(scratch_read_inst);
1889}
1890
1891/**
1892 * Emits an instruction after @inst to store the value to be written
1893 * to @orig_dst to scratch space at @base_offset, from @temp.
1894 */
1895void
1896vec4_visitor::emit_scratch_write(vec4_instruction *inst,
1897				 src_reg temp, dst_reg orig_dst,
1898				 int base_offset)
1899{
1900   int reg_offset = base_offset + orig_dst.reg_offset;
1901   src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
1902
1903   dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
1904				       orig_dst.writemask));
1905   vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
1906					       dst, temp, index);
1907   scratch_write_inst->base_mrf = 13;
1908   scratch_write_inst->mlen = 2;
1909   scratch_write_inst->predicate = inst->predicate;
1910   /* Move our instruction from the tail to its correct place. */
1911   scratch_write_inst->remove();
1912   inst->insert_after(scratch_write_inst);
1913}
1914
1915/**
1916 * We can't generally support array access in GRF space, because a
1917 * single instruction's destination can only span 2 contiguous
1918 * registers.  So, we send all GRF arrays that get variable index
1919 * access to scratch space.
1920 */
1921void
1922vec4_visitor::move_grf_array_access_to_scratch()
1923{
1924   int scratch_loc[this->virtual_grf_count];
1925
1926   for (int i = 0; i < this->virtual_grf_count; i++) {
1927      scratch_loc[i] = -1;
1928   }
1929
1930   /* First, calculate the set of virtual GRFs that need to be punted
1931    * to scratch due to having any array access on them, and where in
1932    * scratch.
1933    */
1934   foreach_list(node, &this->instructions) {
1935      vec4_instruction *inst = (vec4_instruction *)node;
1936
1937      if (inst->dst.file == GRF && inst->dst.reladdr &&
1938	  scratch_loc[inst->dst.reg] == -1) {
1939	 scratch_loc[inst->dst.reg] = c->last_scratch;
1940	 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
1941      }
1942
1943      for (int i = 0 ; i < 3; i++) {
1944	 src_reg *src = &inst->src[i];
1945
1946	 if (src->file == GRF && src->reladdr &&
1947	     scratch_loc[src->reg] == -1) {
1948	    scratch_loc[src->reg] = c->last_scratch;
1949	    c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
1950	 }
1951      }
1952   }
1953
1954   /* Now, for anything that will be accessed through scratch, rewrite
1955    * it to load/store.  Note that this is a _safe list walk, because
1956    * we may generate a new scratch_write instruction after the one
1957    * we're processing.
1958    */
1959   foreach_list_safe(node, &this->instructions) {
1960      vec4_instruction *inst = (vec4_instruction *)node;
1961
1962      /* Set up the annotation tracking for new generated instructions. */
1963      base_ir = inst->ir;
1964      current_annotation = inst->annotation;
1965
1966      if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
1967	 src_reg temp = src_reg(this, glsl_type::vec4_type);
1968
1969	 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
1970
1971	 inst->dst.file = temp.file;
1972	 inst->dst.reg = temp.reg;
1973	 inst->dst.reg_offset = temp.reg_offset;
1974	 inst->dst.reladdr = NULL;
1975      }
1976
1977      for (int i = 0 ; i < 3; i++) {
1978	 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
1979	    continue;
1980
1981	 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
1982
1983	 emit_scratch_read(inst, temp, inst->src[i],
1984			   scratch_loc[inst->src[i].reg]);
1985
1986	 inst->src[i].file = temp.file;
1987	 inst->src[i].reg = temp.reg;
1988	 inst->src[i].reg_offset = temp.reg_offset;
1989	 inst->src[i].reladdr = NULL;
1990      }
1991   }
1992}
1993
1994
1995vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
1996			   struct gl_shader_program *prog,
1997			   struct brw_shader *shader)
1998{
1999   this->c = c;
2000   this->p = &c->func;
2001   this->brw = p->brw;
2002   this->intel = &brw->intel;
2003   this->ctx = &intel->ctx;
2004   this->prog = prog;
2005   this->shader = shader;
2006
2007   this->mem_ctx = ralloc_context(NULL);
2008   this->failed = false;
2009
2010   this->base_ir = NULL;
2011   this->current_annotation = NULL;
2012
2013   this->c = c;
2014   this->vp = brw->vertex_program; /* FINISHME: change for precompile */
2015   this->prog_data = &c->prog_data;
2016
2017   this->variable_ht = hash_table_ctor(0,
2018				       hash_table_pointer_hash,
2019				       hash_table_pointer_compare);
2020
2021   this->virtual_grf_sizes = NULL;
2022   this->virtual_grf_count = 0;
2023   this->virtual_grf_array_size = 0;
2024
2025   this->uniforms = 0;
2026
2027   this->variable_ht = hash_table_ctor(0,
2028				       hash_table_pointer_hash,
2029				       hash_table_pointer_compare);
2030}
2031
2032vec4_visitor::~vec4_visitor()
2033{
2034   hash_table_dtor(this->variable_ht);
2035}
2036
2037
2038void
2039vec4_visitor::fail(const char *format, ...)
2040{
2041   va_list va;
2042   char *msg;
2043
2044   if (failed)
2045      return;
2046
2047   failed = true;
2048
2049   va_start(va, format);
2050   msg = ralloc_vasprintf(mem_ctx, format, va);
2051   va_end(va);
2052   msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2053
2054   this->fail_msg = msg;
2055
2056   if (INTEL_DEBUG & DEBUG_VS) {
2057      fprintf(stderr, "%s",  msg);
2058   }
2059}
2060
2061} /* namespace brw */
2062