brw_vec4_emit.cpp revision 584ff407482fd3baf5ce081dbbf9653eb76c40f1
1/* Copyright © 2011 Intel Corporation
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice (including the next
11 * paragraph) shall be included in all copies or substantial portions of the
12 * Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 * IN THE SOFTWARE.
21 */
22
23#include "brw_vec4.h"
24#include "../glsl/ir_print_visitor.h"
25
26extern "C" {
27#include "brw_eu.h"
28};
29
30using namespace brw;
31
32namespace brw {
33
34int
35vec4_visitor::setup_attributes(int payload_reg)
36{
37   int nr_attributes;
38   int attribute_map[VERT_ATTRIB_MAX];
39
40   nr_attributes = 0;
41   for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
42      if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
43	 attribute_map[i] = payload_reg + nr_attributes;
44	 nr_attributes++;
45      }
46   }
47
48   foreach_iter(exec_list_iterator, iter, this->instructions) {
49      vec4_instruction *inst = (vec4_instruction *)iter.get();
50
51      for (int i = 0; i < 3; i++) {
52	 if (inst->src[i].file != ATTR)
53	    continue;
54
55	 inst->src[i].file = HW_REG;
56	 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0);
57	 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
58      }
59   }
60
61   /* The BSpec says we always have to read at least one thing from
62    * the VF, and it appears that the hardware wedges otherwise.
63    */
64   if (nr_attributes == 0)
65      nr_attributes = 1;
66
67   prog_data->urb_read_length = (nr_attributes + 1) / 2;
68
69   return payload_reg + nr_attributes;
70}
71
72int
73vec4_visitor::setup_uniforms(int reg)
74{
75   /* User clip planes from curbe:
76    */
77   if (c->key.nr_userclip) {
78      if (intel->gen >= 6) {
79	 for (int i = 0; i < c->key.nr_userclip; i++) {
80	    c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
81						  (i % 2) * 4), 0, 4, 1);
82	 }
83	 reg += ALIGN(c->key.nr_userclip, 2) / 2;
84      } else {
85	 for (int i = 0; i < c->key.nr_userclip; i++) {
86	    c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
87						  (i % 2) * 4), 0, 4, 1);
88	 }
89	 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
90      }
91   }
92
93   /* The pre-gen6 VS requires that some push constants get loaded no
94    * matter what, or the GPU would hang.
95    */
96   if (intel->gen < 6 && this->uniforms == 0) {
97      this->uniform_size[this->uniforms] = 1;
98
99      for (unsigned int i = 0; i < 4; i++) {
100	 unsigned int slot = this->uniforms * 4 + i;
101
102	 c->prog_data.param[slot] = NULL;
103	 c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO;
104      }
105
106      this->uniforms++;
107      reg++;
108   } else {
109      reg += ALIGN(uniforms, 2) / 2;
110   }
111
112   /* for now, we are not doing any elimination of unused slots, nor
113    * are we packing our uniforms.
114    */
115   c->prog_data.nr_params = this->uniforms * 4;
116
117   c->prog_data.curb_read_length = reg - 1;
118   c->prog_data.uses_new_param_layout = true;
119
120   return reg;
121}
122
123void
124vec4_visitor::setup_payload(void)
125{
126   int reg = 0;
127
128   /* The payload always contains important data in g0, which contains
129    * the URB handles that are passed on to the URB write at the end
130    * of the thread.  So, we always start push constants at g1.
131    */
132   reg++;
133
134   reg = setup_uniforms(reg);
135
136   reg = setup_attributes(reg);
137
138   this->first_non_payload_grf = reg;
139}
140
141struct brw_reg
142vec4_instruction::get_dst(void)
143{
144   struct brw_reg brw_reg;
145
146   switch (dst.file) {
147   case GRF:
148      brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
149      brw_reg = retype(brw_reg, dst.type);
150      brw_reg.dw1.bits.writemask = dst.writemask;
151      break;
152
153   case HW_REG:
154      brw_reg = dst.fixed_hw_reg;
155      break;
156
157   case BAD_FILE:
158      brw_reg = brw_null_reg();
159      break;
160
161   default:
162      assert(!"not reached");
163      brw_reg = brw_null_reg();
164      break;
165   }
166   return brw_reg;
167}
168
169struct brw_reg
170vec4_instruction::get_src(int i)
171{
172   struct brw_reg brw_reg;
173
174   switch (src[i].file) {
175   case GRF:
176      brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
177      brw_reg = retype(brw_reg, src[i].type);
178      brw_reg.dw1.bits.swizzle = src[i].swizzle;
179      if (src[i].abs)
180	 brw_reg = brw_abs(brw_reg);
181      if (src[i].negate)
182	 brw_reg = negate(brw_reg);
183      break;
184
185   case IMM:
186      switch (src[i].type) {
187      case BRW_REGISTER_TYPE_F:
188	 brw_reg = brw_imm_f(src[i].imm.f);
189	 break;
190      case BRW_REGISTER_TYPE_D:
191	 brw_reg = brw_imm_d(src[i].imm.i);
192	 break;
193      case BRW_REGISTER_TYPE_UD:
194	 brw_reg = brw_imm_ud(src[i].imm.u);
195	 break;
196      default:
197	 assert(!"not reached");
198	 brw_reg = brw_null_reg();
199	 break;
200      }
201      break;
202
203   case UNIFORM:
204      brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
205				    ((src[i].reg + src[i].reg_offset) % 2) * 4),
206		       0, 4, 1);
207      brw_reg = retype(brw_reg, src[i].type);
208      brw_reg.dw1.bits.swizzle = src[i].swizzle;
209      if (src[i].abs)
210	 brw_reg = brw_abs(brw_reg);
211      if (src[i].negate)
212	 brw_reg = negate(brw_reg);
213      break;
214
215   case HW_REG:
216      brw_reg = src[i].fixed_hw_reg;
217      break;
218
219   case BAD_FILE:
220      /* Probably unused. */
221      brw_reg = brw_null_reg();
222      break;
223   case ATTR:
224   default:
225      assert(!"not reached");
226      brw_reg = brw_null_reg();
227      break;
228   }
229
230   return brw_reg;
231}
232
233void
234vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
235				  struct brw_reg dst,
236				  struct brw_reg src)
237{
238   brw_math(p,
239	    dst,
240	    brw_math_function(inst->opcode),
241	    BRW_MATH_SATURATE_NONE,
242	    inst->base_mrf,
243	    src,
244	    BRW_MATH_DATA_SCALAR,
245	    BRW_MATH_PRECISION_FULL);
246}
247
248void
249vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
250				  struct brw_reg dst,
251				  struct brw_reg src)
252{
253   brw_math(p,
254	    dst,
255	    brw_math_function(inst->opcode),
256	    BRW_MATH_SATURATE_NONE,
257	    inst->base_mrf,
258	    src,
259	    BRW_MATH_DATA_SCALAR,
260	    BRW_MATH_PRECISION_FULL);
261}
262
263void
264vec4_visitor::generate_urb_write(vec4_instruction *inst)
265{
266   brw_urb_WRITE(p,
267		 brw_null_reg(), /* dest */
268		 inst->base_mrf, /* starting mrf reg nr */
269		 brw_vec8_grf(0, 0), /* src */
270		 false,		/* allocate */
271		 true,		/* used */
272		 inst->mlen,
273		 0,		/* response len */
274		 inst->eot,	/* eot */
275		 inst->eot,	/* writes complete */
276		 inst->offset,	/* urb destination offset */
277		 BRW_URB_SWIZZLE_INTERLEAVE);
278}
279
280void
281vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1,
282						struct brw_reg index)
283{
284   int second_vertex_offset;
285
286   if (intel->gen >= 6)
287      second_vertex_offset = 1;
288   else
289      second_vertex_offset = 16;
290
291   m1 = retype(m1, BRW_REGISTER_TYPE_D);
292
293   /* Set up M1 (message payload).  Only the block offsets in M1.0 and
294    * M1.4 are used, and the rest are ignored.
295    */
296   struct brw_reg m1_0 = suboffset(vec1(m1), 0);
297   struct brw_reg m1_4 = suboffset(vec1(m1), 4);
298   struct brw_reg index_0 = suboffset(vec1(index), 0);
299   struct brw_reg index_4 = suboffset(vec1(index), 4);
300
301   brw_push_insn_state(p);
302   brw_set_mask_control(p, BRW_MASK_DISABLE);
303   brw_set_access_mode(p, BRW_ALIGN_1);
304
305   brw_MOV(p, m1_0, index_0);
306
307   brw_set_predicate_inverse(p, true);
308   if (index.file == BRW_IMMEDIATE_VALUE) {
309      index_4.dw1.ud++;
310      brw_MOV(p, m1_4, index_4);
311   } else {
312      brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
313   }
314
315   brw_pop_insn_state(p);
316}
317
318void
319vec4_visitor::generate_scratch_read(vec4_instruction *inst,
320				    struct brw_reg dst,
321				    struct brw_reg index)
322{
323   if (intel->gen >= 6) {
324      brw_push_insn_state(p);
325      brw_set_mask_control(p, BRW_MASK_DISABLE);
326      brw_MOV(p,
327	      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
328	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
329      brw_pop_insn_state(p);
330   }
331
332   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
333				     index);
334
335   uint32_t msg_type;
336
337   if (intel->gen >= 6)
338      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
339   else if (intel->gen == 5 || intel->is_g4x)
340      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
341   else
342      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
343
344   /* Each of the 8 channel enables is considered for whether each
345    * dword is written.
346    */
347   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
348   brw_set_dest(p, send, dst);
349   brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
350   brw_set_dp_read_message(p, send,
351			   255, /* binding table index: stateless access */
352			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
353			   msg_type,
354			   BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
355			   2, /* mlen */
356			   1 /* rlen */);
357}
358
359void
360vec4_visitor::generate_scratch_write(vec4_instruction *inst,
361				     struct brw_reg dst,
362				     struct brw_reg src,
363				     struct brw_reg index)
364{
365   /* If the instruction is predicated, we'll predicate the send, not
366    * the header setup.
367    */
368   brw_set_predicate_control(p, false);
369
370   if (intel->gen >= 6) {
371      brw_push_insn_state(p);
372      brw_set_mask_control(p, BRW_MASK_DISABLE);
373      brw_MOV(p,
374	      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
375	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
376      brw_pop_insn_state(p);
377   }
378
379   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
380				     index);
381
382   brw_MOV(p,
383	   retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
384	   retype(src, BRW_REGISTER_TYPE_D));
385
386   uint32_t msg_type;
387
388   if (intel->gen >= 6)
389      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
390   else
391      msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
392
393   brw_set_predicate_control(p, inst->predicate);
394
395   /* Each of the 8 channel enables is considered for whether each
396    * dword is written.
397    */
398   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
399   brw_set_dest(p, send, dst);
400   brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
401   brw_set_dp_write_message(p, send,
402			    255, /* binding table index: stateless access */
403			    BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
404			    msg_type,
405			    3, /* mlen */
406			    true, /* header present */
407			    false, /* pixel scoreboard */
408			    0, /* rlen */
409			    false, /* eot */
410			    false /* commit */);
411}
412
413void
414vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
415				      struct brw_reg dst,
416				      struct brw_reg *src)
417{
418   vec4_instruction *inst = (vec4_instruction *)instruction;
419
420   switch (inst->opcode) {
421   case SHADER_OPCODE_RCP:
422   case SHADER_OPCODE_RSQ:
423   case SHADER_OPCODE_SQRT:
424   case SHADER_OPCODE_EXP2:
425   case SHADER_OPCODE_LOG2:
426   case SHADER_OPCODE_SIN:
427   case SHADER_OPCODE_COS:
428      if (intel->gen >= 6) {
429	 generate_math1_gen6(inst, dst, src[0]);
430      } else {
431	 generate_math1_gen4(inst, dst, src[0]);
432      }
433      break;
434
435   case SHADER_OPCODE_POW:
436      assert(!"finishme");
437      break;
438
439   case VS_OPCODE_URB_WRITE:
440      generate_urb_write(inst);
441      break;
442
443   case VS_OPCODE_SCRATCH_READ:
444      generate_scratch_read(inst, dst, src[0]);
445      break;
446
447   case VS_OPCODE_SCRATCH_WRITE:
448      generate_scratch_write(inst, dst, src[0], src[1]);
449      break;
450
451   default:
452      if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
453	 fail("unsupported opcode in `%s' in VS\n",
454	      brw_opcodes[inst->opcode].name);
455      } else {
456	 fail("Unsupported opcode %d in VS", inst->opcode);
457      }
458   }
459}
460
461bool
462vec4_visitor::run()
463{
464   /* Generate VS IR for main().  (the visitor only descends into
465    * functions called "main").
466    */
467   foreach_iter(exec_list_iterator, iter, *shader->ir) {
468      ir_instruction *ir = (ir_instruction *)iter.get();
469      base_ir = ir;
470      ir->accept(this);
471   }
472
473   emit_urb_writes();
474
475   /* Before any optimization, push array accesses out to scratch
476    * space where we need them to be.  This pass may allocate new
477    * virtual GRFs, so we want to do it early.  It also makes sure
478    * that we have reladdr computations available for CSE, since we'll
479    * often do repeated subexpressions for those.
480    */
481   move_grf_array_access_to_scratch();
482
483   if (failed)
484      return false;
485
486   setup_payload();
487   reg_allocate();
488
489   brw_set_access_mode(p, BRW_ALIGN_16);
490
491   generate_code();
492
493   return !failed;
494}
495
496void
497vec4_visitor::generate_code()
498{
499   int last_native_inst = p->nr_insn;
500   const char *last_annotation_string = NULL;
501   ir_instruction *last_annotation_ir = NULL;
502
503   int loop_stack_array_size = 16;
504   int loop_stack_depth = 0;
505   brw_instruction **loop_stack =
506      rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
507   int *if_depth_in_loop =
508      rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
509
510
511   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
512      printf("Native code for vertex shader %d:\n", prog->Name);
513   }
514
515   foreach_list(node, &this->instructions) {
516      vec4_instruction *inst = (vec4_instruction *)node;
517      struct brw_reg src[3], dst;
518
519      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
520	 if (last_annotation_ir != inst->ir) {
521	    last_annotation_ir = inst->ir;
522	    if (last_annotation_ir) {
523	       printf("   ");
524	       last_annotation_ir->print();
525	       printf("\n");
526	    }
527	 }
528	 if (last_annotation_string != inst->annotation) {
529	    last_annotation_string = inst->annotation;
530	    if (last_annotation_string)
531	       printf("   %s\n", last_annotation_string);
532	 }
533      }
534
535      for (unsigned int i = 0; i < 3; i++) {
536	 src[i] = inst->get_src(i);
537      }
538      dst = inst->get_dst();
539
540      brw_set_conditionalmod(p, inst->conditional_mod);
541      brw_set_predicate_control(p, inst->predicate);
542      brw_set_predicate_inverse(p, inst->predicate_inverse);
543      brw_set_saturate(p, inst->saturate);
544
545      switch (inst->opcode) {
546      case BRW_OPCODE_MOV:
547	 brw_MOV(p, dst, src[0]);
548	 break;
549      case BRW_OPCODE_ADD:
550	 brw_ADD(p, dst, src[0], src[1]);
551	 break;
552      case BRW_OPCODE_MUL:
553	 brw_MUL(p, dst, src[0], src[1]);
554	 break;
555
556      case BRW_OPCODE_FRC:
557	 brw_FRC(p, dst, src[0]);
558	 break;
559      case BRW_OPCODE_RNDD:
560	 brw_RNDD(p, dst, src[0]);
561	 break;
562      case BRW_OPCODE_RNDE:
563	 brw_RNDE(p, dst, src[0]);
564	 break;
565      case BRW_OPCODE_RNDZ:
566	 brw_RNDZ(p, dst, src[0]);
567	 break;
568
569      case BRW_OPCODE_AND:
570	 brw_AND(p, dst, src[0], src[1]);
571	 break;
572      case BRW_OPCODE_OR:
573	 brw_OR(p, dst, src[0], src[1]);
574	 break;
575      case BRW_OPCODE_XOR:
576	 brw_XOR(p, dst, src[0], src[1]);
577	 break;
578      case BRW_OPCODE_NOT:
579	 brw_NOT(p, dst, src[0]);
580	 break;
581      case BRW_OPCODE_ASR:
582	 brw_ASR(p, dst, src[0], src[1]);
583	 break;
584      case BRW_OPCODE_SHR:
585	 brw_SHR(p, dst, src[0], src[1]);
586	 break;
587      case BRW_OPCODE_SHL:
588	 brw_SHL(p, dst, src[0], src[1]);
589	 break;
590
591      case BRW_OPCODE_CMP:
592	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
593	 break;
594      case BRW_OPCODE_SEL:
595	 brw_SEL(p, dst, src[0], src[1]);
596	 break;
597
598      case BRW_OPCODE_DP4:
599	 brw_DP4(p, dst, src[0], src[1]);
600	 break;
601
602      case BRW_OPCODE_DP3:
603	 brw_DP3(p, dst, src[0], src[1]);
604	 break;
605
606      case BRW_OPCODE_DP2:
607	 brw_DP2(p, dst, src[0], src[1]);
608	 break;
609
610      case BRW_OPCODE_IF:
611	 if (inst->src[0].file != BAD_FILE) {
612	    /* The instruction has an embedded compare (only allowed on gen6) */
613	    assert(intel->gen == 6);
614	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
615	 } else {
616	    struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
617	    brw_inst->header.predicate_control = inst->predicate;
618	 }
619	 if_depth_in_loop[loop_stack_depth]++;
620	 break;
621
622      case BRW_OPCODE_ELSE:
623	 brw_ELSE(p);
624	 break;
625      case BRW_OPCODE_ENDIF:
626	 brw_ENDIF(p);
627	 if_depth_in_loop[loop_stack_depth]--;
628	 break;
629
630      case BRW_OPCODE_DO:
631	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
632	 if (loop_stack_array_size <= loop_stack_depth) {
633	    loop_stack_array_size *= 2;
634	    loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
635				  loop_stack_array_size);
636	    if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
637				        loop_stack_array_size);
638	 }
639	 if_depth_in_loop[loop_stack_depth] = 0;
640	 break;
641
642      case BRW_OPCODE_BREAK:
643	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
644	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
645	 break;
646      case BRW_OPCODE_CONTINUE:
647	 /* FINISHME: We need to write the loop instruction support still. */
648	 if (intel->gen >= 6)
649	    gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
650	 else
651	    brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
652	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
653	 break;
654
655      case BRW_OPCODE_WHILE: {
656	 struct brw_instruction *inst0, *inst1;
657	 GLuint br = 1;
658
659	 if (intel->gen >= 5)
660	    br = 2;
661
662	 assert(loop_stack_depth > 0);
663	 loop_stack_depth--;
664	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
665	 if (intel->gen < 6) {
666	    /* patch all the BREAK/CONT instructions from last BGNLOOP */
667	    while (inst0 > loop_stack[loop_stack_depth]) {
668	       inst0--;
669	       if (inst0->header.opcode == BRW_OPCODE_BREAK &&
670		   inst0->bits3.if_else.jump_count == 0) {
671		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
672	    }
673	       else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
674			inst0->bits3.if_else.jump_count == 0) {
675		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
676	       }
677	    }
678	 }
679      }
680	 break;
681
682      default:
683	 generate_vs_instruction(inst, dst, src);
684	 break;
685      }
686
687      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
688	 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
689	    if (0) {
690	       printf("0x%08x 0x%08x 0x%08x 0x%08x ",
691		      ((uint32_t *)&p->store[i])[3],
692		      ((uint32_t *)&p->store[i])[2],
693		      ((uint32_t *)&p->store[i])[1],
694		      ((uint32_t *)&p->store[i])[0]);
695	    }
696	    brw_disasm(stdout, &p->store[i], intel->gen);
697	 }
698      }
699
700      last_native_inst = p->nr_insn;
701   }
702
703   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
704      printf("\n");
705   }
706
707   ralloc_free(loop_stack);
708   ralloc_free(if_depth_in_loop);
709
710   brw_set_uip_jip(p);
711
712   /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
713    * emit issues, it doesn't get the jump distances into the output,
714    * which is often something we want to debug.  So this is here in
715    * case you're doing that.
716    */
717   if (0) {
718      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
719	 for (unsigned int i = 0; i < p->nr_insn; i++) {
720	    printf("0x%08x 0x%08x 0x%08x 0x%08x ",
721		   ((uint32_t *)&p->store[i])[3],
722		   ((uint32_t *)&p->store[i])[2],
723		   ((uint32_t *)&p->store[i])[1],
724		   ((uint32_t *)&p->store[i])[0]);
725	    brw_disasm(stdout, &p->store[i], intel->gen);
726	 }
727      }
728   }
729}
730
731extern "C" {
732
733bool
734brw_vs_emit(struct brw_vs_compile *c)
735{
736   struct brw_compile *p = &c->func;
737   struct brw_context *brw = p->brw;
738   struct intel_context *intel = &brw->intel;
739   struct gl_context *ctx = &intel->ctx;
740   struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
741
742   if (!prog)
743      return false;
744
745   struct brw_shader *shader =
746     (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
747   if (!shader)
748      return false;
749
750   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
751      printf("GLSL IR for native vertex shader %d:\n", prog->Name);
752      _mesa_print_ir(shader->ir, NULL);
753      printf("\n\n");
754   }
755
756   vec4_visitor v(c, prog, shader);
757   if (!v.run()) {
758      /* FINISHME: Cleanly fail, test at link time, etc. */
759      assert(!"not reached");
760      return false;
761   }
762
763   return true;
764}
765
766} /* extern "C" */
767
768} /* namespace brw */
769