brw_vec4_visitor.cpp revision fea7d34b3545878ce00914f388e1eeebf55f7748
1/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "brw_vec4.h"
25extern "C" {
26#include "main/macros.h"
27#include "program/prog_parameter.h"
28}
29
30namespace brw {
31
32src_reg::src_reg(dst_reg reg)
33{
34   init();
35
36   this->file = reg.file;
37   this->reg = reg.reg;
38   this->reg_offset = reg.reg_offset;
39   this->type = reg.type;
40   this->reladdr = reg.reladdr;
41
42   int swizzles[4];
43   int next_chan = 0;
44   int last = 0;
45
46   for (int i = 0; i < 4; i++) {
47      if (!(reg.writemask & (1 << i)))
48	 continue;
49
50      swizzles[next_chan++] = last = i;
51   }
52
53   for (; next_chan < 4; next_chan++) {
54      swizzles[next_chan] = last;
55   }
56
57   this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
58				swizzles[2], swizzles[3]);
59}
60
61dst_reg::dst_reg(src_reg reg)
62{
63   init();
64
65   this->file = reg.file;
66   this->reg = reg.reg;
67   this->reg_offset = reg.reg_offset;
68   this->type = reg.type;
69   this->writemask = WRITEMASK_XYZW;
70   this->reladdr = reg.reladdr;
71}
72
73vec4_instruction *
74vec4_visitor::emit(enum opcode opcode, dst_reg dst,
75		   src_reg src0, src_reg src1, src_reg src2)
76{
77   vec4_instruction *inst = new(mem_ctx) vec4_instruction();
78
79   inst->opcode = opcode;
80   inst->dst = dst;
81   inst->src[0] = src0;
82   inst->src[1] = src1;
83   inst->src[2] = src2;
84   inst->ir = this->base_ir;
85   inst->annotation = this->current_annotation;
86
87   this->instructions.push_tail(inst);
88
89   return inst;
90}
91
92
93vec4_instruction *
94vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
95{
96   return emit(opcode, dst, src0, src1, src_reg());
97}
98
99vec4_instruction *
100vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
101{
102   assert(dst.writemask != 0);
103   return emit(opcode, dst, src0, src_reg(), src_reg());
104}
105
106vec4_instruction *
107vec4_visitor::emit(enum opcode opcode)
108{
109   return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
110}
111
112void
113vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
114{
115   static enum opcode dot_opcodes[] = {
116      BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
117   };
118
119   emit(dot_opcodes[elements - 2], dst, src0, src1);
120}
121
122void
123vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
124{
125   /* The gen6 math instruction ignores the source modifiers --
126    * swizzle, abs, negate, and at least some parts of the register
127    * region description.
128    */
129   src_reg temp_src = src_reg(this, glsl_type::vec4_type);
130   emit(BRW_OPCODE_MOV, dst_reg(temp_src), src);
131
132   if (dst.writemask != WRITEMASK_XYZW) {
133      /* The gen6 math instruction must be align1, so we can't do
134       * writemasks.
135       */
136      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
137
138      emit(opcode, temp_dst, temp_src);
139
140      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
141   } else {
142      emit(opcode, dst, temp_src);
143   }
144}
145
146void
147vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
148{
149   vec4_instruction *inst = emit(opcode, dst, src);
150   inst->base_mrf = 1;
151   inst->mlen = 1;
152}
153
154void
155vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
156{
157   switch (opcode) {
158   case SHADER_OPCODE_RCP:
159   case SHADER_OPCODE_RSQ:
160   case SHADER_OPCODE_SQRT:
161   case SHADER_OPCODE_EXP2:
162   case SHADER_OPCODE_LOG2:
163   case SHADER_OPCODE_SIN:
164   case SHADER_OPCODE_COS:
165      break;
166   default:
167      assert(!"not reached: bad math opcode");
168      return;
169   }
170
171   if (intel->gen >= 6) {
172      return emit_math1_gen6(opcode, dst, src);
173   } else {
174      return emit_math1_gen4(opcode, dst, src);
175   }
176}
177
178void
179vec4_visitor::emit_math2_gen6(enum opcode opcode,
180			      dst_reg dst, src_reg src0, src_reg src1)
181{
182   src_reg expanded;
183
184   /* The gen6 math instruction ignores the source modifiers --
185    * swizzle, abs, negate, and at least some parts of the register
186    * region description.  Move the sources to temporaries to make it
187    * generally work.
188    */
189
190   expanded = src_reg(this, glsl_type::vec4_type);
191   emit(BRW_OPCODE_MOV, dst_reg(expanded), src0);
192   src0 = expanded;
193
194   expanded = src_reg(this, glsl_type::vec4_type);
195   emit(BRW_OPCODE_MOV, dst_reg(expanded), src1);
196   src1 = expanded;
197
198   if (dst.writemask != WRITEMASK_XYZW) {
199      /* The gen6 math instruction must be align1, so we can't do
200       * writemasks.
201       */
202      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
203
204      emit(opcode, temp_dst, src0, src1);
205
206      emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst));
207   } else {
208      emit(opcode, dst, src0, src1);
209   }
210}
211
212void
213vec4_visitor::emit_math2_gen4(enum opcode opcode,
214			      dst_reg dst, src_reg src0, src_reg src1)
215{
216   vec4_instruction *inst = emit(opcode, dst, src0, src1);
217   inst->base_mrf = 1;
218   inst->mlen = 2;
219}
220
221void
222vec4_visitor::emit_math(enum opcode opcode,
223			dst_reg dst, src_reg src0, src_reg src1)
224{
225   assert(opcode == SHADER_OPCODE_POW);
226
227   if (intel->gen >= 6) {
228      return emit_math2_gen6(opcode, dst, src0, src1);
229   } else {
230      return emit_math2_gen4(opcode, dst, src0, src1);
231   }
232}
233
234void
235vec4_visitor::visit_instructions(const exec_list *list)
236{
237   foreach_iter(exec_list_iterator, iter, *list) {
238      ir_instruction *ir = (ir_instruction *)iter.get();
239
240      base_ir = ir;
241      ir->accept(this);
242   }
243}
244
245
246static int
247type_size(const struct glsl_type *type)
248{
249   unsigned int i;
250   int size;
251
252   switch (type->base_type) {
253   case GLSL_TYPE_UINT:
254   case GLSL_TYPE_INT:
255   case GLSL_TYPE_FLOAT:
256   case GLSL_TYPE_BOOL:
257      if (type->is_matrix()) {
258	 return type->matrix_columns;
259      } else {
260	 /* Regardless of size of vector, it gets a vec4. This is bad
261	  * packing for things like floats, but otherwise arrays become a
262	  * mess.  Hopefully a later pass over the code can pack scalars
263	  * down if appropriate.
264	  */
265	 return 1;
266      }
267   case GLSL_TYPE_ARRAY:
268      assert(type->length > 0);
269      return type_size(type->fields.array) * type->length;
270   case GLSL_TYPE_STRUCT:
271      size = 0;
272      for (i = 0; i < type->length; i++) {
273	 size += type_size(type->fields.structure[i].type);
274      }
275      return size;
276   case GLSL_TYPE_SAMPLER:
277      /* Samplers take up one slot in UNIFORMS[], but they're baked in
278       * at link time.
279       */
280      return 1;
281   default:
282      assert(0);
283      return 0;
284   }
285}
286
287int
288vec4_visitor::virtual_grf_alloc(int size)
289{
290   if (virtual_grf_array_size <= virtual_grf_count) {
291      if (virtual_grf_array_size == 0)
292	 virtual_grf_array_size = 16;
293      else
294	 virtual_grf_array_size *= 2;
295      virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
296				   virtual_grf_array_size);
297   }
298   virtual_grf_sizes[virtual_grf_count] = size;
299   return virtual_grf_count++;
300}
301
302src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
303{
304   init();
305
306   this->file = GRF;
307   this->reg = v->virtual_grf_alloc(type_size(type));
308
309   if (type->is_array() || type->is_record()) {
310      this->swizzle = BRW_SWIZZLE_NOOP;
311   } else {
312      this->swizzle = swizzle_for_size(type->vector_elements);
313   }
314
315   this->type = brw_type_for_base_type(type);
316}
317
318dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
319{
320   init();
321
322   this->file = GRF;
323   this->reg = v->virtual_grf_alloc(type_size(type));
324
325   if (type->is_array() || type->is_record()) {
326      this->writemask = WRITEMASK_XYZW;
327   } else {
328      this->writemask = (1 << type->vector_elements) - 1;
329   }
330
331   this->type = brw_type_for_base_type(type);
332}
333
334/* Our support for uniforms is piggy-backed on the struct
335 * gl_fragment_program, because that's where the values actually
336 * get stored, rather than in some global gl_shader_program uniform
337 * store.
338 */
339int
340vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
341{
342   unsigned int offset = 0;
343   float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
344
345   if (type->is_matrix()) {
346      const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
347							type->vector_elements,
348							1);
349
350      for (unsigned int i = 0; i < type->matrix_columns; i++) {
351	 offset += setup_uniform_values(loc + offset, column);
352      }
353
354      return offset;
355   }
356
357   switch (type->base_type) {
358   case GLSL_TYPE_FLOAT:
359   case GLSL_TYPE_UINT:
360   case GLSL_TYPE_INT:
361   case GLSL_TYPE_BOOL:
362      for (unsigned int i = 0; i < type->vector_elements; i++) {
363	 int slot = this->uniforms * 4 + i;
364	 switch (type->base_type) {
365	 case GLSL_TYPE_FLOAT:
366	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
367	    break;
368	 case GLSL_TYPE_UINT:
369	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U;
370	    break;
371	 case GLSL_TYPE_INT:
372	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I;
373	    break;
374	 case GLSL_TYPE_BOOL:
375	    c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B;
376	    break;
377	 default:
378	    assert(!"not reached");
379	    c->prog_data.param_convert[slot] = PARAM_NO_CONVERT;
380	    break;
381	 }
382	 c->prog_data.param[slot] = &values[i];
383      }
384
385      for (unsigned int i = type->vector_elements; i < 4; i++) {
386	 c->prog_data.param_convert[this->uniforms * 4 + i] =
387	    PARAM_CONVERT_ZERO;
388	 c->prog_data.param[this->uniforms * 4 + i] = NULL;
389      }
390
391      this->uniform_size[this->uniforms] = type->vector_elements;
392      this->uniforms++;
393
394      return 1;
395
396   case GLSL_TYPE_STRUCT:
397      for (unsigned int i = 0; i < type->length; i++) {
398	 offset += setup_uniform_values(loc + offset,
399					type->fields.structure[i].type);
400      }
401      return offset;
402
403   case GLSL_TYPE_ARRAY:
404      for (unsigned int i = 0; i < type->length; i++) {
405	 offset += setup_uniform_values(loc + offset, type->fields.array);
406      }
407      return offset;
408
409   case GLSL_TYPE_SAMPLER:
410      /* The sampler takes up a slot, but we don't use any values from it. */
411      return 1;
412
413   default:
414      assert(!"not reached");
415      return 0;
416   }
417}
418
419/* Our support for builtin uniforms is even scarier than non-builtin.
420 * It sits on top of the PROG_STATE_VAR parameters that are
421 * automatically updated from GL context state.
422 */
423void
424vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
425{
426   const ir_state_slot *const slots = ir->state_slots;
427   assert(ir->state_slots != NULL);
428
429   for (unsigned int i = 0; i < ir->num_state_slots; i++) {
430      /* This state reference has already been setup by ir_to_mesa,
431       * but we'll get the same index back here.  We can reference
432       * ParameterValues directly, since unlike brw_fs.cpp, we never
433       * add new state references during compile.
434       */
435      int index = _mesa_add_state_reference(this->vp->Base.Parameters,
436					    (gl_state_index *)slots[i].tokens);
437      float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
438
439      this->uniform_size[this->uniforms] = 0;
440      /* Add each of the unique swizzled channels of the element.
441       * This will end up matching the size of the glsl_type of this field.
442       */
443      int last_swiz = -1;
444      for (unsigned int j = 0; j < 4; j++) {
445	 int swiz = GET_SWZ(slots[i].swizzle, j);
446	 last_swiz = swiz;
447
448	 c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
449	 c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT;
450	 if (swiz <= last_swiz)
451	    this->uniform_size[this->uniforms]++;
452      }
453      this->uniforms++;
454   }
455}
456
457dst_reg *
458vec4_visitor::variable_storage(ir_variable *var)
459{
460   return (dst_reg *)hash_table_find(this->variable_ht, var);
461}
462
463void
464vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
465{
466   ir_expression *expr = ir->as_expression();
467
468   if (expr) {
469      src_reg op[2];
470      vec4_instruction *inst;
471
472      assert(expr->get_num_operands() <= 2);
473      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
474	 assert(expr->operands[i]->type->is_scalar());
475
476	 expr->operands[i]->accept(this);
477	 op[i] = this->result;
478      }
479
480      switch (expr->operation) {
481      case ir_unop_logic_not:
482	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
483	 inst->conditional_mod = BRW_CONDITIONAL_Z;
484	 break;
485
486      case ir_binop_logic_xor:
487	 inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
488	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
489	 break;
490
491      case ir_binop_logic_or:
492	 inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
493	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
494	 break;
495
496      case ir_binop_logic_and:
497	 inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
498	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
499	 break;
500
501      case ir_unop_f2b:
502	 if (intel->gen >= 6) {
503	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
504	 } else {
505	    inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
506	 }
507	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
508	 break;
509
510      case ir_unop_i2b:
511	 if (intel->gen >= 6) {
512	    inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
513	 } else {
514	    inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
515	 }
516	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
517	 break;
518
519      case ir_binop_greater:
520      case ir_binop_gequal:
521      case ir_binop_less:
522      case ir_binop_lequal:
523      case ir_binop_equal:
524      case ir_binop_all_equal:
525      case ir_binop_nequal:
526      case ir_binop_any_nequal:
527	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
528	 inst->conditional_mod =
529	    brw_conditional_for_comparison(expr->operation);
530	 break;
531
532      default:
533	 assert(!"not reached");
534	 break;
535      }
536      return;
537   }
538
539   ir->accept(this);
540
541   if (intel->gen >= 6) {
542      vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
543			       this->result, src_reg(1));
544      inst->conditional_mod = BRW_CONDITIONAL_NZ;
545   } else {
546      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
547      inst->conditional_mod = BRW_CONDITIONAL_NZ;
548   }
549}
550
551/**
552 * Emit a gen6 IF statement with the comparison folded into the IF
553 * instruction.
554 */
555void
556vec4_visitor::emit_if_gen6(ir_if *ir)
557{
558   ir_expression *expr = ir->condition->as_expression();
559
560   if (expr) {
561      src_reg op[2];
562      vec4_instruction *inst;
563      dst_reg temp;
564
565      assert(expr->get_num_operands() <= 2);
566      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
567	 assert(expr->operands[i]->type->is_scalar() ||
568		expr->operation == ir_binop_any_nequal ||
569		expr->operation == ir_binop_all_equal);
570
571	 expr->operands[i]->accept(this);
572	 op[i] = this->result;
573      }
574
575      switch (expr->operation) {
576      case ir_unop_logic_not:
577	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
578	 inst->conditional_mod = BRW_CONDITIONAL_Z;
579	 return;
580
581      case ir_binop_logic_xor:
582	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
583	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
584	 return;
585
586      case ir_binop_logic_or:
587	 temp = dst_reg(this, glsl_type::bool_type);
588	 emit(BRW_OPCODE_OR, temp, op[0], op[1]);
589	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
590	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
591	 return;
592
593      case ir_binop_logic_and:
594	 temp = dst_reg(this, glsl_type::bool_type);
595	 emit(BRW_OPCODE_AND, temp, op[0], op[1]);
596	 inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
597	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
598	 return;
599
600      case ir_unop_f2b:
601	 inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
602	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
603	 return;
604
605      case ir_unop_i2b:
606	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
607	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
608	 return;
609
610      case ir_binop_greater:
611      case ir_binop_gequal:
612      case ir_binop_less:
613      case ir_binop_lequal:
614      case ir_binop_equal:
615      case ir_binop_nequal:
616	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
617	 inst->conditional_mod =
618	    brw_conditional_for_comparison(expr->operation);
619	 return;
620
621      case ir_binop_all_equal:
622	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
623	 inst->conditional_mod = BRW_CONDITIONAL_Z;
624
625	 inst = emit(BRW_OPCODE_IF);
626	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
627	 return;
628
629      case ir_binop_any_nequal:
630	 inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]);
631	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
632
633	 inst = emit(BRW_OPCODE_IF);
634	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
635	 return;
636
637      default:
638	 assert(!"not reached");
639	 inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
640	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
641	 return;
642      }
643      return;
644   }
645
646   ir->condition->accept(this);
647
648   vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
649			    this->result, src_reg(0));
650   inst->conditional_mod = BRW_CONDITIONAL_NZ;
651}
652
653void
654vec4_visitor::visit(ir_variable *ir)
655{
656   dst_reg *reg = NULL;
657
658   if (variable_storage(ir))
659      return;
660
661   switch (ir->mode) {
662   case ir_var_in:
663      reg = new(mem_ctx) dst_reg(ATTR, ir->location);
664      break;
665
666   case ir_var_out:
667      reg = new(mem_ctx) dst_reg(this, ir->type);
668
669      for (int i = 0; i < type_size(ir->type); i++) {
670	 output_reg[ir->location + i] = *reg;
671	 output_reg[ir->location + i].reg_offset = i;
672	 output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
673      }
674      break;
675
676   case ir_var_auto:
677   case ir_var_temporary:
678      reg = new(mem_ctx) dst_reg(this, ir->type);
679      break;
680
681   case ir_var_uniform:
682      reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
683
684      if (!strncmp(ir->name, "gl_", 3)) {
685	 setup_builtin_uniform_values(ir);
686      } else {
687	 setup_uniform_values(ir->location, ir->type);
688      }
689      break;
690
691   default:
692      assert(!"not reached");
693   }
694
695   reg->type = brw_type_for_base_type(ir->type);
696   hash_table_insert(this->variable_ht, reg, ir);
697}
698
699void
700vec4_visitor::visit(ir_loop *ir)
701{
702   dst_reg counter;
703
704   /* We don't want debugging output to print the whole body of the
705    * loop as the annotation.
706    */
707   this->base_ir = NULL;
708
709   if (ir->counter != NULL) {
710      this->base_ir = ir->counter;
711      ir->counter->accept(this);
712      counter = *(variable_storage(ir->counter));
713
714      if (ir->from != NULL) {
715	 this->base_ir = ir->from;
716	 ir->from->accept(this);
717
718	 emit(BRW_OPCODE_MOV, counter, this->result);
719      }
720   }
721
722   emit(BRW_OPCODE_DO);
723
724   if (ir->to) {
725      this->base_ir = ir->to;
726      ir->to->accept(this);
727
728      vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(),
729				    src_reg(counter), this->result);
730      inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
731
732      inst = emit(BRW_OPCODE_BREAK);
733      inst->predicate = BRW_PREDICATE_NORMAL;
734   }
735
736   visit_instructions(&ir->body_instructions);
737
738
739   if (ir->increment) {
740      this->base_ir = ir->increment;
741      ir->increment->accept(this);
742      emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result);
743   }
744
745   emit(BRW_OPCODE_WHILE);
746}
747
748void
749vec4_visitor::visit(ir_loop_jump *ir)
750{
751   switch (ir->mode) {
752   case ir_loop_jump::jump_break:
753      emit(BRW_OPCODE_BREAK);
754      break;
755   case ir_loop_jump::jump_continue:
756      emit(BRW_OPCODE_CONTINUE);
757      break;
758   }
759}
760
761
762void
763vec4_visitor::visit(ir_function_signature *ir)
764{
765   assert(0);
766   (void)ir;
767}
768
769void
770vec4_visitor::visit(ir_function *ir)
771{
772   /* Ignore function bodies other than main() -- we shouldn't see calls to
773    * them since they should all be inlined.
774    */
775   if (strcmp(ir->name, "main") == 0) {
776      const ir_function_signature *sig;
777      exec_list empty;
778
779      sig = ir->matching_signature(&empty);
780
781      assert(sig);
782
783      visit_instructions(&sig->body);
784   }
785}
786
787GLboolean
788vec4_visitor::try_emit_sat(ir_expression *ir)
789{
790   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
791   if (!sat_src)
792      return false;
793
794   sat_src->accept(this);
795   src_reg src = this->result;
796
797   this->result = src_reg(this, ir->type);
798   vec4_instruction *inst;
799   inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
800   inst->saturate = true;
801
802   return true;
803}
804
805void
806vec4_visitor::emit_bool_comparison(unsigned int op,
807				 dst_reg dst, src_reg src0, src_reg src1)
808{
809   /* original gen4 does destination conversion before comparison. */
810   if (intel->gen < 5)
811      dst.type = src0.type;
812
813   vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
814   inst->conditional_mod = brw_conditional_for_comparison(op);
815
816   dst.type = BRW_REGISTER_TYPE_D;
817   emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
818}
819
820void
821vec4_visitor::visit(ir_expression *ir)
822{
823   unsigned int operand;
824   src_reg op[Elements(ir->operands)];
825   src_reg result_src;
826   dst_reg result_dst;
827   vec4_instruction *inst;
828
829   if (try_emit_sat(ir))
830      return;
831
832   for (operand = 0; operand < ir->get_num_operands(); operand++) {
833      this->result.file = BAD_FILE;
834      ir->operands[operand]->accept(this);
835      if (this->result.file == BAD_FILE) {
836	 printf("Failed to get tree for expression operand:\n");
837	 ir->operands[operand]->print();
838	 exit(1);
839      }
840      op[operand] = this->result;
841
842      /* Matrix expression operands should have been broken down to vector
843       * operations already.
844       */
845      assert(!ir->operands[operand]->type->is_matrix());
846   }
847
848   int vector_elements = ir->operands[0]->type->vector_elements;
849   if (ir->operands[1]) {
850      vector_elements = MAX2(vector_elements,
851			     ir->operands[1]->type->vector_elements);
852   }
853
854   this->result.file = BAD_FILE;
855
856   /* Storage for our result.  Ideally for an assignment we'd be using
857    * the actual storage for the result here, instead.
858    */
859   result_src = src_reg(this, ir->type);
860   /* convenience for the emit functions below. */
861   result_dst = dst_reg(result_src);
862   /* If nothing special happens, this is the result. */
863   this->result = result_src;
864   /* Limit writes to the channels that will be used by result_src later.
865    * This does limit this temp's use as a temporary for multi-instruction
866    * sequences.
867    */
868   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
869
870   switch (ir->operation) {
871   case ir_unop_logic_not:
872      /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
873       * ones complement of the whole register, not just bit 0.
874       */
875      emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
876      break;
877   case ir_unop_neg:
878      op[0].negate = !op[0].negate;
879      this->result = op[0];
880      break;
881   case ir_unop_abs:
882      op[0].abs = true;
883      op[0].negate = false;
884      this->result = op[0];
885      break;
886
887   case ir_unop_sign:
888      emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
889
890      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
891      inst->conditional_mod = BRW_CONDITIONAL_G;
892      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
893      inst->predicate = BRW_PREDICATE_NORMAL;
894
895      inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
896      inst->conditional_mod = BRW_CONDITIONAL_L;
897      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
898      inst->predicate = BRW_PREDICATE_NORMAL;
899
900      break;
901
902   case ir_unop_rcp:
903      emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
904      break;
905
906   case ir_unop_exp2:
907      emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
908      break;
909   case ir_unop_log2:
910      emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
911      break;
912   case ir_unop_exp:
913   case ir_unop_log:
914      assert(!"not reached: should be handled by ir_explog_to_explog2");
915      break;
916   case ir_unop_sin:
917   case ir_unop_sin_reduced:
918      emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
919      break;
920   case ir_unop_cos:
921   case ir_unop_cos_reduced:
922      emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
923      break;
924
925   case ir_unop_dFdx:
926   case ir_unop_dFdy:
927      assert(!"derivatives not valid in vertex shader");
928      break;
929
930   case ir_unop_noise:
931      assert(!"not reached: should be handled by lower_noise");
932      break;
933
934   case ir_binop_add:
935      emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
936      break;
937   case ir_binop_sub:
938      assert(!"not reached: should be handled by ir_sub_to_add_neg");
939      break;
940
941   case ir_binop_mul:
942      emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
943      break;
944   case ir_binop_div:
945      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
946   case ir_binop_mod:
947      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
948      break;
949
950   case ir_binop_less:
951   case ir_binop_greater:
952   case ir_binop_lequal:
953   case ir_binop_gequal:
954   case ir_binop_equal:
955   case ir_binop_nequal: {
956      dst_reg temp = result_dst;
957      /* original gen4 does implicit conversion before comparison. */
958      if (intel->gen < 5)
959	 temp.type = op[0].type;
960
961      inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
962      inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
963      emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
964      break;
965   }
966
967   case ir_binop_all_equal:
968      /* "==" operator producing a scalar boolean. */
969      if (ir->operands[0]->type->is_vector() ||
970	  ir->operands[1]->type->is_vector()) {
971	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
972	 inst->conditional_mod = BRW_CONDITIONAL_Z;
973
974	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
975	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
976	 inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
977      } else {
978	 dst_reg temp = result_dst;
979	 /* original gen4 does implicit conversion before comparison. */
980	 if (intel->gen < 5)
981	    temp.type = op[0].type;
982
983	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
984	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
985	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
986      }
987      break;
988   case ir_binop_any_nequal:
989      /* "!=" operator producing a scalar boolean. */
990      if (ir->operands[0]->type->is_vector() ||
991	  ir->operands[1]->type->is_vector()) {
992	 inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
993	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
994
995	 emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
996	 inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
997	 inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
998      } else {
999	 dst_reg temp = result_dst;
1000	 /* original gen4 does implicit conversion before comparison. */
1001	 if (intel->gen < 5)
1002	    temp.type = op[0].type;
1003
1004	 inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
1005	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1006	 emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
1007      }
1008      break;
1009
1010   case ir_unop_any:
1011      inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
1012      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1013
1014      emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
1015
1016      inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
1017      inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
1018      break;
1019
1020   case ir_binop_logic_xor:
1021      emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1022      break;
1023
1024   case ir_binop_logic_or:
1025      emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1026      break;
1027
1028   case ir_binop_logic_and:
1029      emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1030      break;
1031
1032   case ir_binop_dot:
1033      assert(ir->operands[0]->type->is_vector());
1034      assert(ir->operands[0]->type == ir->operands[1]->type);
1035      emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
1036      break;
1037
1038   case ir_unop_sqrt:
1039      emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
1040      break;
1041   case ir_unop_rsq:
1042      emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
1043      break;
1044   case ir_unop_i2f:
1045   case ir_unop_i2u:
1046   case ir_unop_u2i:
1047   case ir_unop_u2f:
1048   case ir_unop_b2f:
1049   case ir_unop_b2i:
1050   case ir_unop_f2i:
1051      emit(BRW_OPCODE_MOV, result_dst, op[0]);
1052      break;
1053   case ir_unop_f2b:
1054   case ir_unop_i2b: {
1055      dst_reg temp = result_dst;
1056      /* original gen4 does implicit conversion before comparison. */
1057      if (intel->gen < 5)
1058	 temp.type = op[0].type;
1059
1060      inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
1061      inst->conditional_mod = BRW_CONDITIONAL_NZ;
1062      inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
1063      break;
1064   }
1065
1066   case ir_unop_trunc:
1067      emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
1068      break;
1069   case ir_unop_ceil:
1070      op[0].negate = !op[0].negate;
1071      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1072      this->result.negate = true;
1073      break;
1074   case ir_unop_floor:
1075      inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
1076      break;
1077   case ir_unop_fract:
1078      inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
1079      break;
1080   case ir_unop_round_even:
1081      emit(BRW_OPCODE_RNDE, result_dst, op[0]);
1082      break;
1083
1084   case ir_binop_min:
1085      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1086      inst->conditional_mod = BRW_CONDITIONAL_L;
1087
1088      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1089      inst->predicate = BRW_PREDICATE_NORMAL;
1090      break;
1091   case ir_binop_max:
1092      inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
1093      inst->conditional_mod = BRW_CONDITIONAL_G;
1094
1095      inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
1096      inst->predicate = BRW_PREDICATE_NORMAL;
1097      break;
1098
1099   case ir_binop_pow:
1100      emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
1101      break;
1102
1103   case ir_unop_bit_not:
1104      inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
1105      break;
1106   case ir_binop_bit_and:
1107      inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
1108      break;
1109   case ir_binop_bit_xor:
1110      inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
1111      break;
1112   case ir_binop_bit_or:
1113      inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
1114      break;
1115
1116   case ir_binop_lshift:
1117   case ir_binop_rshift:
1118      assert(!"GLSL 1.30 features unsupported");
1119      break;
1120
1121   case ir_quadop_vector:
1122      assert(!"not reached: should be handled by lower_quadop_vector");
1123      break;
1124   }
1125}
1126
1127
1128void
1129vec4_visitor::visit(ir_swizzle *ir)
1130{
1131   src_reg src;
1132   int i = 0;
1133   int swizzle[4];
1134
1135   /* Note that this is only swizzles in expressions, not those on the left
1136    * hand side of an assignment, which do write masking.  See ir_assignment
1137    * for that.
1138    */
1139
1140   ir->val->accept(this);
1141   src = this->result;
1142   assert(src.file != BAD_FILE);
1143
1144   for (i = 0; i < ir->type->vector_elements; i++) {
1145      switch (i) {
1146      case 0:
1147	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
1148	 break;
1149      case 1:
1150	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
1151	 break;
1152      case 2:
1153	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
1154	 break;
1155      case 3:
1156	 swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
1157	    break;
1158      }
1159   }
1160   for (; i < 4; i++) {
1161      /* Replicate the last channel out. */
1162      swizzle[i] = swizzle[ir->type->vector_elements - 1];
1163   }
1164
1165   src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1166
1167   this->result = src;
1168}
1169
1170void
1171vec4_visitor::visit(ir_dereference_variable *ir)
1172{
1173   const struct glsl_type *type = ir->type;
1174   dst_reg *reg = variable_storage(ir->var);
1175
1176   if (!reg) {
1177      fail("Failed to find variable storage for %s\n", ir->var->name);
1178      this->result = src_reg(brw_null_reg());
1179      return;
1180   }
1181
1182   this->result = src_reg(*reg);
1183
1184   if (type->is_scalar() || type->is_vector() || type->is_matrix())
1185      this->result.swizzle = swizzle_for_size(type->vector_elements);
1186}
1187
1188void
1189vec4_visitor::visit(ir_dereference_array *ir)
1190{
1191   ir_constant *constant_index;
1192   src_reg src;
1193   int element_size = type_size(ir->type);
1194
1195   constant_index = ir->array_index->constant_expression_value();
1196
1197   ir->array->accept(this);
1198   src = this->result;
1199
1200   if (constant_index) {
1201      src.reg_offset += constant_index->value.i[0] * element_size;
1202   } else {
1203      /* Variable index array dereference.  It eats the "vec4" of the
1204       * base of the array and an index that offsets the Mesa register
1205       * index.
1206       */
1207      ir->array_index->accept(this);
1208
1209      src_reg index_reg;
1210
1211      if (element_size == 1) {
1212	 index_reg = this->result;
1213      } else {
1214	 index_reg = src_reg(this, glsl_type::int_type);
1215
1216	 emit(BRW_OPCODE_MUL, dst_reg(index_reg),
1217	      this->result, src_reg(element_size));
1218      }
1219
1220      if (src.reladdr) {
1221	 src_reg temp = src_reg(this, glsl_type::int_type);
1222
1223	 emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg);
1224
1225	 index_reg = temp;
1226      }
1227
1228      src.reladdr = ralloc(mem_ctx, src_reg);
1229      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1230   }
1231
1232   /* If the type is smaller than a vec4, replicate the last channel out. */
1233   if (ir->type->is_scalar() || ir->type->is_vector())
1234      src.swizzle = swizzle_for_size(ir->type->vector_elements);
1235   else
1236      src.swizzle = BRW_SWIZZLE_NOOP;
1237   src.type = brw_type_for_base_type(ir->type);
1238
1239   this->result = src;
1240}
1241
1242void
1243vec4_visitor::visit(ir_dereference_record *ir)
1244{
1245   unsigned int i;
1246   const glsl_type *struct_type = ir->record->type;
1247   int offset = 0;
1248
1249   ir->record->accept(this);
1250
1251   for (i = 0; i < struct_type->length; i++) {
1252      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1253	 break;
1254      offset += type_size(struct_type->fields.structure[i].type);
1255   }
1256
1257   /* If the type is smaller than a vec4, replicate the last channel out. */
1258   if (ir->type->is_scalar() || ir->type->is_vector())
1259      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1260   else
1261      this->result.swizzle = BRW_SWIZZLE_NOOP;
1262   this->result.type = brw_type_for_base_type(ir->type);
1263
1264   this->result.reg_offset += offset;
1265}
1266
1267/**
1268 * We want to be careful in assignment setup to hit the actual storage
1269 * instead of potentially using a temporary like we might with the
1270 * ir_dereference handler.
1271 */
1272static dst_reg
1273get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
1274{
1275   /* The LHS must be a dereference.  If the LHS is a variable indexed array
1276    * access of a vector, it must be separated into a series conditional moves
1277    * before reaching this point (see ir_vec_index_to_cond_assign).
1278    */
1279   assert(ir->as_dereference());
1280   ir_dereference_array *deref_array = ir->as_dereference_array();
1281   if (deref_array) {
1282      assert(!deref_array->array->type->is_vector());
1283   }
1284
1285   /* Use the rvalue deref handler for the most part.  We'll ignore
1286    * swizzles in it and write swizzles using writemask, though.
1287    */
1288   ir->accept(v);
1289   return dst_reg(v->result);
1290}
1291
1292void
1293vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
1294			      const struct glsl_type *type, bool predicated)
1295{
1296   if (type->base_type == GLSL_TYPE_STRUCT) {
1297      for (unsigned int i = 0; i < type->length; i++) {
1298	 emit_block_move(dst, src, type->fields.structure[i].type, predicated);
1299      }
1300      return;
1301   }
1302
1303   if (type->is_array()) {
1304      for (unsigned int i = 0; i < type->length; i++) {
1305	 emit_block_move(dst, src, type->fields.array, predicated);
1306      }
1307      return;
1308   }
1309
1310   if (type->is_matrix()) {
1311      const struct glsl_type *vec_type;
1312
1313      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
1314					 type->vector_elements, 1);
1315
1316      for (int i = 0; i < type->matrix_columns; i++) {
1317	 emit_block_move(dst, src, vec_type, predicated);
1318      }
1319      return;
1320   }
1321
1322   assert(type->is_scalar() || type->is_vector());
1323
1324   dst->type = brw_type_for_base_type(type);
1325   src->type = dst->type;
1326
1327   dst->writemask = (1 << type->vector_elements) - 1;
1328
1329   /* Do we need to worry about swizzling a swizzle? */
1330   assert(src->swizzle = BRW_SWIZZLE_NOOP);
1331   src->swizzle = swizzle_for_size(type->vector_elements);
1332
1333   vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src);
1334   if (predicated)
1335      inst->predicate = BRW_PREDICATE_NORMAL;
1336
1337   dst->reg_offset++;
1338   src->reg_offset++;
1339}
1340
1341void
1342vec4_visitor::visit(ir_assignment *ir)
1343{
1344   dst_reg dst = get_assignment_lhs(ir->lhs, this);
1345
1346   if (!ir->lhs->type->is_scalar() &&
1347       !ir->lhs->type->is_vector()) {
1348      ir->rhs->accept(this);
1349      src_reg src = this->result;
1350
1351      if (ir->condition) {
1352	 emit_bool_to_cond_code(ir->condition);
1353      }
1354
1355      emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL);
1356      return;
1357   }
1358
1359   /* Now we're down to just a scalar/vector with writemasks. */
1360   int i;
1361
1362   ir->rhs->accept(this);
1363   src_reg src = this->result;
1364
1365   int swizzles[4];
1366   int first_enabled_chan = 0;
1367   int src_chan = 0;
1368
1369   assert(ir->lhs->type->is_vector() ||
1370	  ir->lhs->type->is_scalar());
1371   dst.writemask = ir->write_mask;
1372
1373   for (int i = 0; i < 4; i++) {
1374      if (dst.writemask & (1 << i)) {
1375	 first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
1376	 break;
1377      }
1378   }
1379
1380   /* Swizzle a small RHS vector into the channels being written.
1381    *
1382    * glsl ir treats write_mask as dictating how many channels are
1383    * present on the RHS while in our instructions we need to make
1384    * those channels appear in the slots of the vec4 they're written to.
1385    */
1386   for (int i = 0; i < 4; i++) {
1387      if (dst.writemask & (1 << i))
1388	 swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
1389      else
1390	 swizzles[i] = first_enabled_chan;
1391   }
1392   src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
1393			      swizzles[2], swizzles[3]);
1394
1395   if (ir->condition) {
1396      emit_bool_to_cond_code(ir->condition);
1397   }
1398
1399   for (i = 0; i < type_size(ir->lhs->type); i++) {
1400      vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
1401
1402      if (ir->condition)
1403	 inst->predicate = BRW_PREDICATE_NORMAL;
1404
1405      dst.reg_offset++;
1406      src.reg_offset++;
1407   }
1408}
1409
1410void
1411vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
1412{
1413   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1414      foreach_list(node, &ir->components) {
1415	 ir_constant *field_value = (ir_constant *)node;
1416
1417	 emit_constant_values(dst, field_value);
1418      }
1419      return;
1420   }
1421
1422   if (ir->type->is_array()) {
1423      for (unsigned int i = 0; i < ir->type->length; i++) {
1424	 emit_constant_values(dst, ir->array_elements[i]);
1425      }
1426      return;
1427   }
1428
1429   if (ir->type->is_matrix()) {
1430      for (int i = 0; i < ir->type->matrix_columns; i++) {
1431	 for (int j = 0; j < ir->type->vector_elements; j++) {
1432	    dst->writemask = 1 << j;
1433	    dst->type = BRW_REGISTER_TYPE_F;
1434
1435	    emit(BRW_OPCODE_MOV, *dst,
1436		 src_reg(ir->value.f[i * ir->type->vector_elements + j]));
1437	 }
1438	 dst->reg_offset++;
1439      }
1440      return;
1441   }
1442
1443   for (int i = 0; i < ir->type->vector_elements; i++) {
1444      dst->writemask = 1 << i;
1445      dst->type = brw_type_for_base_type(ir->type);
1446
1447      switch (ir->type->base_type) {
1448      case GLSL_TYPE_FLOAT:
1449	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i]));
1450	 break;
1451      case GLSL_TYPE_INT:
1452	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i]));
1453	 break;
1454      case GLSL_TYPE_UINT:
1455	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i]));
1456	 break;
1457      case GLSL_TYPE_BOOL:
1458	 emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i]));
1459	 break;
1460      default:
1461	 assert(!"Non-float/uint/int/bool constant");
1462	 break;
1463      }
1464   }
1465   dst->reg_offset++;
1466}
1467
1468void
1469vec4_visitor::visit(ir_constant *ir)
1470{
1471   dst_reg dst = dst_reg(this, ir->type);
1472   this->result = src_reg(dst);
1473
1474   emit_constant_values(&dst, ir);
1475}
1476
1477void
1478vec4_visitor::visit(ir_call *ir)
1479{
1480   assert(!"not reached");
1481}
1482
1483void
1484vec4_visitor::visit(ir_texture *ir)
1485{
1486   assert(!"not reached");
1487}
1488
1489void
1490vec4_visitor::visit(ir_return *ir)
1491{
1492   assert(!"not reached");
1493}
1494
1495void
1496vec4_visitor::visit(ir_discard *ir)
1497{
1498   assert(!"not reached");
1499}
1500
1501void
1502vec4_visitor::visit(ir_if *ir)
1503{
1504   /* Don't point the annotation at the if statement, because then it plus
1505    * the then and else blocks get printed.
1506    */
1507   this->base_ir = ir->condition;
1508
1509   if (intel->gen == 6) {
1510      emit_if_gen6(ir);
1511   } else {
1512      emit_bool_to_cond_code(ir->condition);
1513      vec4_instruction *inst = emit(BRW_OPCODE_IF);
1514      inst->predicate = BRW_PREDICATE_NORMAL;
1515   }
1516
1517   visit_instructions(&ir->then_instructions);
1518
1519   if (!ir->else_instructions.is_empty()) {
1520      this->base_ir = ir->condition;
1521      emit(BRW_OPCODE_ELSE);
1522
1523      visit_instructions(&ir->else_instructions);
1524   }
1525
1526   this->base_ir = ir->condition;
1527   emit(BRW_OPCODE_ENDIF);
1528}
1529
1530int
1531vec4_visitor::emit_vue_header_gen4(int header_mrf)
1532{
1533   /* Get the position */
1534   src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
1535
1536   /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
1537   dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
1538
1539   current_annotation = "NDC";
1540   dst_reg ndc_w = ndc;
1541   ndc_w.writemask = WRITEMASK_W;
1542   src_reg pos_w = pos;
1543   pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
1544   emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
1545
1546   dst_reg ndc_xyz = ndc;
1547   ndc_xyz.writemask = WRITEMASK_XYZ;
1548
1549   emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
1550
1551   if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
1552       c->key.nr_userclip || brw->has_negative_rhw_bug) {
1553      dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
1554      GLuint i;
1555
1556      emit(BRW_OPCODE_MOV, header1, 0u);
1557
1558      if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1559	 assert(!"finishme: psiz");
1560	 src_reg psiz;
1561
1562	 header1.writemask = WRITEMASK_W;
1563	 emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
1564	 emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
1565      }
1566
1567      for (i = 0; i < c->key.nr_userclip; i++) {
1568	 vec4_instruction *inst;
1569
1570	 inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
1571		     pos, src_reg(c->userplane[i]));
1572	 inst->conditional_mod = BRW_CONDITIONAL_L;
1573
1574	 emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
1575	 inst->predicate = BRW_PREDICATE_NORMAL;
1576      }
1577
1578      /* i965 clipping workaround:
1579       * 1) Test for -ve rhw
1580       * 2) If set,
1581       *      set ndc = (0,0,0,0)
1582       *      set ucp[6] = 1
1583       *
1584       * Later, clipping will detect ucp[6] and ensure the primitive is
1585       * clipped against all fixed planes.
1586       */
1587      if (brw->has_negative_rhw_bug) {
1588#if 0
1589	 /* FINISHME */
1590	 brw_CMP(p,
1591		 vec8(brw_null_reg()),
1592		 BRW_CONDITIONAL_L,
1593		 brw_swizzle1(ndc, 3),
1594		 brw_imm_f(0));
1595
1596	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
1597	 brw_MOV(p, ndc, brw_imm_f(0));
1598	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1599#endif
1600      }
1601
1602      header1.writemask = WRITEMASK_XYZW;
1603      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
1604   } else {
1605      emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
1606				  BRW_REGISTER_TYPE_UD), 0u);
1607   }
1608
1609   if (intel->gen == 5) {
1610      /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
1611       * dword 0-3 (m1) of the header is indices, point width, clip flags.
1612       * dword 4-7 (m2) is the ndc position (set above)
1613       * dword 8-11 (m3) of the vertex header is the 4D space position
1614       * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
1615       * m6 is a pad so that the vertex element data is aligned
1616       * m7 is the first vertex data we fill.
1617       */
1618      current_annotation = "NDC";
1619      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1620
1621      current_annotation = "gl_Position";
1622      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1623
1624      /* user clip distance. */
1625      header_mrf += 2;
1626
1627      /* Pad so that vertex element data is aligned. */
1628      header_mrf++;
1629   } else {
1630      /* There are 8 dwords in VUE header pre-Ironlake:
1631       * dword 0-3 (m1) is indices, point width, clip flags.
1632       * dword 4-7 (m2) is ndc position (set above)
1633       *
1634       * dword 8-11 (m3) is the first vertex data.
1635       */
1636      current_annotation = "NDC";
1637      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
1638
1639      current_annotation = "gl_Position";
1640      emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
1641   }
1642
1643   return header_mrf;
1644}
1645
1646int
1647vec4_visitor::emit_vue_header_gen6(int header_mrf)
1648{
1649   struct brw_reg reg;
1650
1651   /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
1652    * dword 0-3 (m2) of the header is indices, point width, clip flags.
1653    * dword 4-7 (m3) is the 4D space position
1654    * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
1655    * enabled.
1656    *
1657    * m4 or 6 is the first vertex element data we fill.
1658    */
1659
1660   current_annotation = "indices, point width, clip flags";
1661   reg = brw_message_reg(header_mrf++);
1662   emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
1663   if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
1664      emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
1665	   src_reg(output_reg[VERT_RESULT_PSIZ]));
1666   }
1667
1668   current_annotation = "gl_Position";
1669   emit(BRW_OPCODE_MOV,
1670	brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
1671
1672   current_annotation = "user clip distances";
1673   if (c->key.nr_userclip) {
1674      for (int i = 0; i < c->key.nr_userclip; i++) {
1675	 struct brw_reg m;
1676	 if (i < 4)
1677	    m = brw_message_reg(header_mrf);
1678	 else
1679	    m = brw_message_reg(header_mrf + 1);
1680
1681	 emit(BRW_OPCODE_DP4,
1682	      dst_reg(brw_writemask(m, 1 << (i & 3))),
1683	      src_reg(c->userplane[i]));
1684      }
1685      header_mrf += 2;
1686   }
1687
1688   current_annotation = NULL;
1689
1690   return header_mrf;
1691}
1692
1693static int
1694align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
1695{
1696   struct intel_context *intel = &brw->intel;
1697
1698   if (intel->gen >= 6) {
1699      /* URB data written (does not include the message header reg) must
1700       * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
1701       * section 5.4.3.2.2: URB_INTERLEAVED.
1702       *
1703       * URB entries are allocated on a multiple of 1024 bits, so an
1704       * extra 128 bits written here to make the end align to 256 is
1705       * no problem.
1706       */
1707      if ((mlen % 2) != 1)
1708	 mlen++;
1709   }
1710
1711   return mlen;
1712}
1713
1714/**
1715 * Generates the VUE payload plus the 1 or 2 URB write instructions to
1716 * complete the VS thread.
1717 *
1718 * The VUE layout is documented in Volume 2a.
1719 */
1720void
1721vec4_visitor::emit_urb_writes()
1722{
1723   /* MRF 0 is reserved for the debugger, so start with message header
1724    * in MRF 1.
1725    */
1726   int base_mrf = 1;
1727   int mrf = base_mrf;
1728   int urb_entry_size;
1729   uint64_t outputs_remaining = c->prog_data.outputs_written;
1730   /* In the process of generating our URB write message contents, we
1731    * may need to unspill a register or load from an array.  Those
1732    * reads would use MRFs 14-15.
1733    */
1734   int max_usable_mrf = 13;
1735
1736   /* FINISHME: edgeflag */
1737
1738   /* First mrf is the g0-based message header containing URB handles and such,
1739    * which is implied in VS_OPCODE_URB_WRITE.
1740    */
1741   mrf++;
1742
1743   if (intel->gen >= 6) {
1744      mrf = emit_vue_header_gen6(mrf);
1745   } else {
1746      mrf = emit_vue_header_gen4(mrf);
1747   }
1748
1749   /* Set up the VUE data for the first URB write */
1750   int attr;
1751   for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
1752      if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1753	 continue;
1754
1755      outputs_remaining &= ~BITFIELD64_BIT(attr);
1756
1757      /* This is set up in the VUE header. */
1758      if (attr == VERT_RESULT_HPOS)
1759	 continue;
1760
1761      /* This is loaded into the VUE header, and thus doesn't occupy
1762       * an attribute slot.
1763       */
1764      if (attr == VERT_RESULT_PSIZ)
1765	 continue;
1766
1767      emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1768
1769      /* If this was MRF 15, we can't fit anything more into this URB
1770       * WRITE.  Note that base_mrf of 1 means that MRF 15 is an
1771       * even-numbered amount of URB write data, which will meet
1772       * gen6's requirements for length alignment.
1773       */
1774      if (mrf > max_usable_mrf) {
1775	 attr++;
1776	 break;
1777      }
1778   }
1779
1780   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
1781   inst->base_mrf = base_mrf;
1782   inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1783   inst->eot = !outputs_remaining;
1784
1785   urb_entry_size = mrf - base_mrf;
1786
1787   /* Optional second URB write */
1788   if (outputs_remaining) {
1789      mrf = base_mrf + 1;
1790
1791      for (; attr < VERT_RESULT_MAX; attr++) {
1792	 if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
1793	    continue;
1794
1795	 assert(mrf < max_usable_mrf);
1796
1797	 emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
1798      }
1799
1800      inst = emit(VS_OPCODE_URB_WRITE);
1801      inst->base_mrf = base_mrf;
1802      inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
1803      inst->eot = true;
1804      /* URB destination offset.  In the previous write, we got MRFs
1805       * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
1806       * URB row increments, and each of our MRFs is half of one of
1807       * those, since we're doing interleaved writes.
1808       */
1809      inst->offset = (max_usable_mrf - base_mrf) / 2;
1810
1811      urb_entry_size += mrf - base_mrf;
1812   }
1813
1814   if (intel->gen == 6)
1815      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
1816   else
1817      c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
1818}
1819
1820src_reg
1821vec4_visitor::get_scratch_offset(vec4_instruction *inst,
1822				 src_reg *reladdr, int reg_offset)
1823{
1824   /* Because we store the values to scratch interleaved like our
1825    * vertex data, we need to scale the vec4 index by 2.
1826    */
1827   int message_header_scale = 2;
1828
1829   /* Pre-gen6, the message header uses byte offsets instead of vec4
1830    * (16-byte) offset units.
1831    */
1832   if (intel->gen < 6)
1833      message_header_scale *= 16;
1834
1835   if (reladdr) {
1836      src_reg index = src_reg(this, glsl_type::int_type);
1837
1838      vec4_instruction *add = emit(BRW_OPCODE_ADD,
1839				   dst_reg(index),
1840				   *reladdr,
1841				   src_reg(reg_offset));
1842      /* Move our new instruction from the tail to its correct place. */
1843      add->remove();
1844      inst->insert_before(add);
1845
1846      vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index),
1847				   index, src_reg(message_header_scale));
1848      mul->remove();
1849      inst->insert_before(mul);
1850
1851      return index;
1852   } else {
1853      return src_reg(reg_offset * message_header_scale);
1854   }
1855}
1856
1857/**
1858 * Emits an instruction before @inst to load the value named by @orig_src
1859 * from scratch space at @base_offset to @temp.
1860 */
1861void
1862vec4_visitor::emit_scratch_read(vec4_instruction *inst,
1863				dst_reg temp, src_reg orig_src,
1864				int base_offset)
1865{
1866   int reg_offset = base_offset + orig_src.reg_offset;
1867   src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
1868
1869   vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ,
1870					      temp, index);
1871
1872   scratch_read_inst->base_mrf = 14;
1873   scratch_read_inst->mlen = 1;
1874   /* Move our instruction from the tail to its correct place. */
1875   scratch_read_inst->remove();
1876   inst->insert_before(scratch_read_inst);
1877}
1878
1879/**
1880 * Emits an instruction after @inst to store the value to be written
1881 * to @orig_dst to scratch space at @base_offset, from @temp.
1882 */
1883void
1884vec4_visitor::emit_scratch_write(vec4_instruction *inst,
1885				 src_reg temp, dst_reg orig_dst,
1886				 int base_offset)
1887{
1888   int reg_offset = base_offset + orig_dst.reg_offset;
1889   src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
1890
1891   dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
1892				       orig_dst.writemask));
1893   vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE,
1894					       dst, temp, index);
1895   scratch_write_inst->base_mrf = 13;
1896   scratch_write_inst->mlen = 2;
1897   scratch_write_inst->predicate = inst->predicate;
1898   /* Move our instruction from the tail to its correct place. */
1899   scratch_write_inst->remove();
1900   inst->insert_after(scratch_write_inst);
1901}
1902
1903/**
1904 * We can't generally support array access in GRF space, because a
1905 * single instruction's destination can only span 2 contiguous
1906 * registers.  So, we send all GRF arrays that get variable index
1907 * access to scratch space.
1908 */
1909void
1910vec4_visitor::move_grf_array_access_to_scratch()
1911{
1912   int scratch_loc[this->virtual_grf_count];
1913
1914   for (int i = 0; i < this->virtual_grf_count; i++) {
1915      scratch_loc[i] = -1;
1916   }
1917
1918   /* First, calculate the set of virtual GRFs that need to be punted
1919    * to scratch due to having any array access on them, and where in
1920    * scratch.
1921    */
1922   foreach_list(node, &this->instructions) {
1923      vec4_instruction *inst = (vec4_instruction *)node;
1924
1925      if (inst->dst.file == GRF && inst->dst.reladdr &&
1926	  scratch_loc[inst->dst.reg] == -1) {
1927	 scratch_loc[inst->dst.reg] = c->last_scratch;
1928	 c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
1929      }
1930
1931      for (int i = 0 ; i < 3; i++) {
1932	 src_reg *src = &inst->src[i];
1933
1934	 if (src->file == GRF && src->reladdr &&
1935	     scratch_loc[src->reg] == -1) {
1936	    scratch_loc[src->reg] = c->last_scratch;
1937	    c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
1938	 }
1939      }
1940   }
1941
1942   /* Now, for anything that will be accessed through scratch, rewrite
1943    * it to load/store.  Note that this is a _safe list walk, because
1944    * we may generate a new scratch_write instruction after the one
1945    * we're processing.
1946    */
1947   foreach_list_safe(node, &this->instructions) {
1948      vec4_instruction *inst = (vec4_instruction *)node;
1949
1950      /* Set up the annotation tracking for new generated instructions. */
1951      base_ir = inst->ir;
1952      current_annotation = inst->annotation;
1953
1954      if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
1955	 src_reg temp = src_reg(this, glsl_type::vec4_type);
1956
1957	 emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
1958
1959	 inst->dst.file = temp.file;
1960	 inst->dst.reg = temp.reg;
1961	 inst->dst.reg_offset = temp.reg_offset;
1962	 inst->dst.reladdr = NULL;
1963      }
1964
1965      for (int i = 0 ; i < 3; i++) {
1966	 if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
1967	    continue;
1968
1969	 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
1970
1971	 emit_scratch_read(inst, temp, inst->src[i],
1972			   scratch_loc[inst->src[i].reg]);
1973
1974	 inst->src[i].file = temp.file;
1975	 inst->src[i].reg = temp.reg;
1976	 inst->src[i].reg_offset = temp.reg_offset;
1977	 inst->src[i].reladdr = NULL;
1978      }
1979   }
1980}
1981
1982
1983vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
1984			   struct gl_shader_program *prog,
1985			   struct brw_shader *shader)
1986{
1987   this->c = c;
1988   this->p = &c->func;
1989   this->brw = p->brw;
1990   this->intel = &brw->intel;
1991   this->ctx = &intel->ctx;
1992   this->prog = prog;
1993   this->shader = shader;
1994
1995   this->mem_ctx = ralloc_context(NULL);
1996   this->failed = false;
1997
1998   this->base_ir = NULL;
1999   this->current_annotation = NULL;
2000
2001   this->c = c;
2002   this->vp = brw->vertex_program; /* FINISHME: change for precompile */
2003   this->prog_data = &c->prog_data;
2004
2005   this->variable_ht = hash_table_ctor(0,
2006				       hash_table_pointer_hash,
2007				       hash_table_pointer_compare);
2008
2009   this->virtual_grf_sizes = NULL;
2010   this->virtual_grf_count = 0;
2011   this->virtual_grf_array_size = 0;
2012
2013   this->uniforms = 0;
2014
2015   this->variable_ht = hash_table_ctor(0,
2016				       hash_table_pointer_hash,
2017				       hash_table_pointer_compare);
2018}
2019
2020vec4_visitor::~vec4_visitor()
2021{
2022   hash_table_dtor(this->variable_ht);
2023}
2024
2025
2026void
2027vec4_visitor::fail(const char *format, ...)
2028{
2029   va_list va;
2030   char *msg;
2031
2032   if (failed)
2033      return;
2034
2035   failed = true;
2036
2037   va_start(va, format);
2038   msg = ralloc_vasprintf(mem_ctx, format, va);
2039   va_end(va);
2040   msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
2041
2042   this->fail_msg = msg;
2043
2044   if (INTEL_DEBUG & DEBUG_VS) {
2045      fprintf(stderr, "%s",  msg);
2046   }
2047}
2048
2049} /* namespace brw */
2050