brw_vec4_emit.cpp revision abf843a797876b5e3c5c91dbec25b6553d2cc281
1/* Copyright © 2011 Intel Corporation
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a
4 * copy of this software and associated documentation files (the "Software"),
5 * to deal in the Software without restriction, including without limitation
6 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 * and/or sell copies of the Software, and to permit persons to whom the
8 * Software is furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice (including the next
11 * paragraph) shall be included in all copies or substantial portions of the
12 * Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 * IN THE SOFTWARE.
21 */
22
23#include "brw_vec4.h"
24#include "../glsl/ir_print_visitor.h"
25
26extern "C" {
27#include "brw_eu.h"
28};
29
30using namespace brw;
31
32namespace brw {
33
34int
35vec4_visitor::setup_attributes(int payload_reg)
36{
37   int nr_attributes;
38   int attribute_map[VERT_ATTRIB_MAX];
39
40   nr_attributes = 0;
41   for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
42      if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
43	 attribute_map[i] = payload_reg + nr_attributes;
44	 nr_attributes++;
45      }
46   }
47
48   foreach_iter(exec_list_iterator, iter, this->instructions) {
49      vec4_instruction *inst = (vec4_instruction *)iter.get();
50
51      for (int i = 0; i < 3; i++) {
52	 if (inst->src[i].file != ATTR)
53	    continue;
54
55	 inst->src[i].file = HW_REG;
56	 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0);
57	 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
58      }
59   }
60
61   /* The BSpec says we always have to read at least one thing from
62    * the VF, and it appears that the hardware wedges otherwise.
63    */
64   if (nr_attributes == 0)
65      nr_attributes = 1;
66
67   prog_data->urb_read_length = (nr_attributes + 1) / 2;
68
69   return payload_reg + nr_attributes;
70}
71
72int
73vec4_visitor::setup_uniforms(int reg)
74{
75   /* User clip planes from curbe:
76    */
77   if (c->key.nr_userclip) {
78      if (intel->gen >= 6) {
79	 for (int i = 0; i < c->key.nr_userclip; i++) {
80	    c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
81						  (i % 2) * 4), 0, 4, 1);
82	 }
83	 reg += ALIGN(c->key.nr_userclip, 2) / 2;
84      } else {
85	 for (int i = 0; i < c->key.nr_userclip; i++) {
86	    c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
87						  (i % 2) * 4), 0, 4, 1);
88	 }
89	 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
90      }
91   }
92
93   /* The pre-gen6 VS requires that some push constants get loaded no
94    * matter what, or the GPU would hang.
95    */
96   if (intel->gen < 6 && this->uniforms == 0) {
97      this->uniform_size[this->uniforms] = 1;
98
99      for (unsigned int i = 0; i < 4; i++) {
100	 unsigned int slot = this->uniforms * 4 + i;
101
102	 c->prog_data.param[slot] = NULL;
103	 c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO;
104      }
105
106      this->uniforms++;
107      reg++;
108   } else {
109      reg += ALIGN(uniforms, 2) / 2;
110   }
111
112   /* for now, we are not doing any elimination of unused slots, nor
113    * are we packing our uniforms.
114    */
115   c->prog_data.nr_params = this->uniforms * 4;
116
117   c->prog_data.curb_read_length = reg - 1;
118   c->prog_data.uses_new_param_layout = true;
119
120   return reg;
121}
122
123void
124vec4_visitor::setup_payload(void)
125{
126   int reg = 0;
127
128   /* The payload always contains important data in g0, which contains
129    * the URB handles that are passed on to the URB write at the end
130    * of the thread.  So, we always start push constants at g1.
131    */
132   reg++;
133
134   reg = setup_uniforms(reg);
135
136   reg = setup_attributes(reg);
137
138   this->first_non_payload_grf = reg;
139}
140
141struct brw_reg
142vec4_instruction::get_dst(void)
143{
144   struct brw_reg brw_reg;
145
146   switch (dst.file) {
147   case GRF:
148      brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
149      brw_reg = retype(brw_reg, dst.type);
150      brw_reg.dw1.bits.writemask = dst.writemask;
151      break;
152
153   case HW_REG:
154      brw_reg = dst.fixed_hw_reg;
155      break;
156
157   case BAD_FILE:
158      brw_reg = brw_null_reg();
159      break;
160
161   default:
162      assert(!"not reached");
163      brw_reg = brw_null_reg();
164      break;
165   }
166   return brw_reg;
167}
168
169struct brw_reg
170vec4_instruction::get_src(int i)
171{
172   struct brw_reg brw_reg;
173
174   switch (src[i].file) {
175   case GRF:
176      brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
177      brw_reg = retype(brw_reg, src[i].type);
178      brw_reg.dw1.bits.swizzle = src[i].swizzle;
179      if (src[i].abs)
180	 brw_reg = brw_abs(brw_reg);
181      if (src[i].negate)
182	 brw_reg = negate(brw_reg);
183      break;
184
185   case IMM:
186      switch (src[i].type) {
187      case BRW_REGISTER_TYPE_F:
188	 brw_reg = brw_imm_f(src[i].imm.f);
189	 break;
190      case BRW_REGISTER_TYPE_D:
191	 brw_reg = brw_imm_d(src[i].imm.i);
192	 break;
193      case BRW_REGISTER_TYPE_UD:
194	 brw_reg = brw_imm_ud(src[i].imm.u);
195	 break;
196      default:
197	 assert(!"not reached");
198	 brw_reg = brw_null_reg();
199	 break;
200      }
201      break;
202
203   case UNIFORM:
204      brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
205				    ((src[i].reg + src[i].reg_offset) % 2) * 4),
206		       0, 4, 1);
207      brw_reg = retype(brw_reg, src[i].type);
208      brw_reg.dw1.bits.swizzle = src[i].swizzle;
209      if (src[i].abs)
210	 brw_reg = brw_abs(brw_reg);
211      if (src[i].negate)
212	 brw_reg = negate(brw_reg);
213      break;
214
215   case HW_REG:
216      brw_reg = src[i].fixed_hw_reg;
217      break;
218
219   case BAD_FILE:
220      /* Probably unused. */
221      brw_reg = brw_null_reg();
222      break;
223   case ATTR:
224   default:
225      assert(!"not reached");
226      brw_reg = brw_null_reg();
227      break;
228   }
229
230   return brw_reg;
231}
232
233void
234vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
235				  struct brw_reg dst,
236				  struct brw_reg src)
237{
238   brw_math(p,
239	    dst,
240	    brw_math_function(inst->opcode),
241	    BRW_MATH_SATURATE_NONE,
242	    inst->base_mrf,
243	    src,
244	    BRW_MATH_DATA_SCALAR,
245	    BRW_MATH_PRECISION_FULL);
246}
247
248static void
249check_gen6_math_src_arg(struct brw_reg src)
250{
251   /* Source swizzles are ignored. */
252   assert(!src.abs);
253   assert(!src.negate);
254   assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW);
255}
256
257void
258vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
259				  struct brw_reg dst,
260				  struct brw_reg src)
261{
262   /* Can't do writemask because math can't be align16. */
263   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
264   check_gen6_math_src_arg(src);
265
266   brw_set_access_mode(p, BRW_ALIGN_1);
267   brw_math(p,
268	    dst,
269	    brw_math_function(inst->opcode),
270	    BRW_MATH_SATURATE_NONE,
271	    inst->base_mrf,
272	    src,
273	    BRW_MATH_DATA_SCALAR,
274	    BRW_MATH_PRECISION_FULL);
275   brw_set_access_mode(p, BRW_ALIGN_16);
276}
277
278void
279vec4_visitor::generate_math2_gen6(vec4_instruction *inst,
280				  struct brw_reg dst,
281				  struct brw_reg src0,
282				  struct brw_reg src1)
283{
284   /* Can't do writemask because math can't be align16. */
285   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
286   /* Source swizzles are ignored. */
287   check_gen6_math_src_arg(src0);
288   check_gen6_math_src_arg(src1);
289
290   brw_set_access_mode(p, BRW_ALIGN_1);
291   brw_math2(p,
292	     dst,
293	     brw_math_function(inst->opcode),
294	     src0, src1);
295   brw_set_access_mode(p, BRW_ALIGN_16);
296}
297
298void
299vec4_visitor::generate_math2_gen4(vec4_instruction *inst,
300				  struct brw_reg dst,
301				  struct brw_reg src0,
302				  struct brw_reg src1)
303{
304   /* Can't do writemask because math can't be align16. */
305   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
306
307   brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1);
308
309   brw_set_access_mode(p, BRW_ALIGN_1);
310   brw_math(p,
311	    dst,
312	    brw_math_function(inst->opcode),
313	    BRW_MATH_SATURATE_NONE,
314	    inst->base_mrf,
315	    src0,
316	    BRW_MATH_DATA_VECTOR,
317	    BRW_MATH_PRECISION_FULL);
318   brw_set_access_mode(p, BRW_ALIGN_16);
319}
320
321void
322vec4_visitor::generate_urb_write(vec4_instruction *inst)
323{
324   brw_urb_WRITE(p,
325		 brw_null_reg(), /* dest */
326		 inst->base_mrf, /* starting mrf reg nr */
327		 brw_vec8_grf(0, 0), /* src */
328		 false,		/* allocate */
329		 true,		/* used */
330		 inst->mlen,
331		 0,		/* response len */
332		 inst->eot,	/* eot */
333		 inst->eot,	/* writes complete */
334		 inst->offset,	/* urb destination offset */
335		 BRW_URB_SWIZZLE_INTERLEAVE);
336}
337
338void
339vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1,
340						struct brw_reg index)
341{
342   int second_vertex_offset;
343
344   if (intel->gen >= 6)
345      second_vertex_offset = 1;
346   else
347      second_vertex_offset = 16;
348
349   m1 = retype(m1, BRW_REGISTER_TYPE_D);
350
351   /* Set up M1 (message payload).  Only the block offsets in M1.0 and
352    * M1.4 are used, and the rest are ignored.
353    */
354   struct brw_reg m1_0 = suboffset(vec1(m1), 0);
355   struct brw_reg m1_4 = suboffset(vec1(m1), 4);
356   struct brw_reg index_0 = suboffset(vec1(index), 0);
357   struct brw_reg index_4 = suboffset(vec1(index), 4);
358
359   brw_push_insn_state(p);
360   brw_set_mask_control(p, BRW_MASK_DISABLE);
361   brw_set_access_mode(p, BRW_ALIGN_1);
362
363   brw_MOV(p, m1_0, index_0);
364
365   brw_set_predicate_inverse(p, true);
366   if (index.file == BRW_IMMEDIATE_VALUE) {
367      index_4.dw1.ud++;
368      brw_MOV(p, m1_4, index_4);
369   } else {
370      brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
371   }
372
373   brw_pop_insn_state(p);
374}
375
376void
377vec4_visitor::generate_scratch_read(vec4_instruction *inst,
378				    struct brw_reg dst,
379				    struct brw_reg index)
380{
381   if (intel->gen >= 6) {
382      brw_push_insn_state(p);
383      brw_set_mask_control(p, BRW_MASK_DISABLE);
384      brw_MOV(p,
385	      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
386	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
387      brw_pop_insn_state(p);
388   }
389
390   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
391				     index);
392
393   uint32_t msg_type;
394
395   if (intel->gen >= 6)
396      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
397   else if (intel->gen == 5 || intel->is_g4x)
398      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
399   else
400      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
401
402   /* Each of the 8 channel enables is considered for whether each
403    * dword is written.
404    */
405   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
406   brw_set_dest(p, send, dst);
407   brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
408   brw_set_dp_read_message(p, send,
409			   255, /* binding table index: stateless access */
410			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
411			   msg_type,
412			   BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
413			   2, /* mlen */
414			   1 /* rlen */);
415}
416
417void
418vec4_visitor::generate_scratch_write(vec4_instruction *inst,
419				     struct brw_reg dst,
420				     struct brw_reg src,
421				     struct brw_reg index)
422{
423   /* If the instruction is predicated, we'll predicate the send, not
424    * the header setup.
425    */
426   brw_set_predicate_control(p, false);
427
428   if (intel->gen >= 6) {
429      brw_push_insn_state(p);
430      brw_set_mask_control(p, BRW_MASK_DISABLE);
431      brw_MOV(p,
432	      retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D),
433	      retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D));
434      brw_pop_insn_state(p);
435   }
436
437   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
438				     index);
439
440   brw_MOV(p,
441	   retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
442	   retype(src, BRW_REGISTER_TYPE_D));
443
444   uint32_t msg_type;
445
446   if (intel->gen >= 6)
447      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
448   else
449      msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
450
451   brw_set_predicate_control(p, inst->predicate);
452
453   /* Each of the 8 channel enables is considered for whether each
454    * dword is written.
455    */
456   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
457   brw_set_dest(p, send, dst);
458   brw_set_src0(p, send, brw_message_reg(inst->base_mrf));
459   brw_set_dp_write_message(p, send,
460			    255, /* binding table index: stateless access */
461			    BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
462			    msg_type,
463			    3, /* mlen */
464			    true, /* header present */
465			    false, /* pixel scoreboard */
466			    0, /* rlen */
467			    false, /* eot */
468			    false /* commit */);
469}
470
471void
472vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
473				      struct brw_reg dst,
474				      struct brw_reg *src)
475{
476   vec4_instruction *inst = (vec4_instruction *)instruction;
477
478   switch (inst->opcode) {
479   case SHADER_OPCODE_RCP:
480   case SHADER_OPCODE_RSQ:
481   case SHADER_OPCODE_SQRT:
482   case SHADER_OPCODE_EXP2:
483   case SHADER_OPCODE_LOG2:
484   case SHADER_OPCODE_SIN:
485   case SHADER_OPCODE_COS:
486      if (intel->gen >= 6) {
487	 generate_math1_gen6(inst, dst, src[0]);
488      } else {
489	 generate_math1_gen4(inst, dst, src[0]);
490      }
491      break;
492
493   case SHADER_OPCODE_POW:
494      if (intel->gen >= 6) {
495	 generate_math2_gen6(inst, dst, src[0], src[1]);
496      } else {
497	 generate_math2_gen4(inst, dst, src[0], src[1]);
498      }
499      break;
500
501   case VS_OPCODE_URB_WRITE:
502      generate_urb_write(inst);
503      break;
504
505   case VS_OPCODE_SCRATCH_READ:
506      generate_scratch_read(inst, dst, src[0]);
507      break;
508
509   case VS_OPCODE_SCRATCH_WRITE:
510      generate_scratch_write(inst, dst, src[0], src[1]);
511      break;
512
513   default:
514      if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
515	 fail("unsupported opcode in `%s' in VS\n",
516	      brw_opcodes[inst->opcode].name);
517      } else {
518	 fail("Unsupported opcode %d in VS", inst->opcode);
519      }
520   }
521}
522
523bool
524vec4_visitor::run()
525{
526   /* Generate VS IR for main().  (the visitor only descends into
527    * functions called "main").
528    */
529   foreach_iter(exec_list_iterator, iter, *shader->ir) {
530      ir_instruction *ir = (ir_instruction *)iter.get();
531      base_ir = ir;
532      ir->accept(this);
533   }
534
535   emit_urb_writes();
536
537   /* Before any optimization, push array accesses out to scratch
538    * space where we need them to be.  This pass may allocate new
539    * virtual GRFs, so we want to do it early.  It also makes sure
540    * that we have reladdr computations available for CSE, since we'll
541    * often do repeated subexpressions for those.
542    */
543   move_grf_array_access_to_scratch();
544
545   if (failed)
546      return false;
547
548   setup_payload();
549   reg_allocate();
550
551   brw_set_access_mode(p, BRW_ALIGN_16);
552
553   generate_code();
554
555   return !failed;
556}
557
558void
559vec4_visitor::generate_code()
560{
561   int last_native_inst = p->nr_insn;
562   const char *last_annotation_string = NULL;
563   ir_instruction *last_annotation_ir = NULL;
564
565   int loop_stack_array_size = 16;
566   int loop_stack_depth = 0;
567   brw_instruction **loop_stack =
568      rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
569   int *if_depth_in_loop =
570      rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
571
572
573   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
574      printf("Native code for vertex shader %d:\n", prog->Name);
575   }
576
577   foreach_list(node, &this->instructions) {
578      vec4_instruction *inst = (vec4_instruction *)node;
579      struct brw_reg src[3], dst;
580
581      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
582	 if (last_annotation_ir != inst->ir) {
583	    last_annotation_ir = inst->ir;
584	    if (last_annotation_ir) {
585	       printf("   ");
586	       last_annotation_ir->print();
587	       printf("\n");
588	    }
589	 }
590	 if (last_annotation_string != inst->annotation) {
591	    last_annotation_string = inst->annotation;
592	    if (last_annotation_string)
593	       printf("   %s\n", last_annotation_string);
594	 }
595      }
596
597      for (unsigned int i = 0; i < 3; i++) {
598	 src[i] = inst->get_src(i);
599      }
600      dst = inst->get_dst();
601
602      brw_set_conditionalmod(p, inst->conditional_mod);
603      brw_set_predicate_control(p, inst->predicate);
604      brw_set_predicate_inverse(p, inst->predicate_inverse);
605      brw_set_saturate(p, inst->saturate);
606
607      switch (inst->opcode) {
608      case BRW_OPCODE_MOV:
609	 brw_MOV(p, dst, src[0]);
610	 break;
611      case BRW_OPCODE_ADD:
612	 brw_ADD(p, dst, src[0], src[1]);
613	 break;
614      case BRW_OPCODE_MUL:
615	 brw_MUL(p, dst, src[0], src[1]);
616	 break;
617
618      case BRW_OPCODE_FRC:
619	 brw_FRC(p, dst, src[0]);
620	 break;
621      case BRW_OPCODE_RNDD:
622	 brw_RNDD(p, dst, src[0]);
623	 break;
624      case BRW_OPCODE_RNDE:
625	 brw_RNDE(p, dst, src[0]);
626	 break;
627      case BRW_OPCODE_RNDZ:
628	 brw_RNDZ(p, dst, src[0]);
629	 break;
630
631      case BRW_OPCODE_AND:
632	 brw_AND(p, dst, src[0], src[1]);
633	 break;
634      case BRW_OPCODE_OR:
635	 brw_OR(p, dst, src[0], src[1]);
636	 break;
637      case BRW_OPCODE_XOR:
638	 brw_XOR(p, dst, src[0], src[1]);
639	 break;
640      case BRW_OPCODE_NOT:
641	 brw_NOT(p, dst, src[0]);
642	 break;
643      case BRW_OPCODE_ASR:
644	 brw_ASR(p, dst, src[0], src[1]);
645	 break;
646      case BRW_OPCODE_SHR:
647	 brw_SHR(p, dst, src[0], src[1]);
648	 break;
649      case BRW_OPCODE_SHL:
650	 brw_SHL(p, dst, src[0], src[1]);
651	 break;
652
653      case BRW_OPCODE_CMP:
654	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
655	 break;
656      case BRW_OPCODE_SEL:
657	 brw_SEL(p, dst, src[0], src[1]);
658	 break;
659
660      case BRW_OPCODE_DP4:
661	 brw_DP4(p, dst, src[0], src[1]);
662	 break;
663
664      case BRW_OPCODE_DP3:
665	 brw_DP3(p, dst, src[0], src[1]);
666	 break;
667
668      case BRW_OPCODE_DP2:
669	 brw_DP2(p, dst, src[0], src[1]);
670	 break;
671
672      case BRW_OPCODE_IF:
673	 if (inst->src[0].file != BAD_FILE) {
674	    /* The instruction has an embedded compare (only allowed on gen6) */
675	    assert(intel->gen == 6);
676	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
677	 } else {
678	    struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
679	    brw_inst->header.predicate_control = inst->predicate;
680	 }
681	 if_depth_in_loop[loop_stack_depth]++;
682	 break;
683
684      case BRW_OPCODE_ELSE:
685	 brw_ELSE(p);
686	 break;
687      case BRW_OPCODE_ENDIF:
688	 brw_ENDIF(p);
689	 if_depth_in_loop[loop_stack_depth]--;
690	 break;
691
692      case BRW_OPCODE_DO:
693	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
694	 if (loop_stack_array_size <= loop_stack_depth) {
695	    loop_stack_array_size *= 2;
696	    loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
697				  loop_stack_array_size);
698	    if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
699				        loop_stack_array_size);
700	 }
701	 if_depth_in_loop[loop_stack_depth] = 0;
702	 break;
703
704      case BRW_OPCODE_BREAK:
705	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
706	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
707	 break;
708      case BRW_OPCODE_CONTINUE:
709	 /* FINISHME: We need to write the loop instruction support still. */
710	 if (intel->gen >= 6)
711	    gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
712	 else
713	    brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
714	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
715	 break;
716
717      case BRW_OPCODE_WHILE: {
718	 struct brw_instruction *inst0, *inst1;
719	 GLuint br = 1;
720
721	 if (intel->gen >= 5)
722	    br = 2;
723
724	 assert(loop_stack_depth > 0);
725	 loop_stack_depth--;
726	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
727	 if (intel->gen < 6) {
728	    /* patch all the BREAK/CONT instructions from last BGNLOOP */
729	    while (inst0 > loop_stack[loop_stack_depth]) {
730	       inst0--;
731	       if (inst0->header.opcode == BRW_OPCODE_BREAK &&
732		   inst0->bits3.if_else.jump_count == 0) {
733		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
734	    }
735	       else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
736			inst0->bits3.if_else.jump_count == 0) {
737		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
738	       }
739	    }
740	 }
741      }
742	 break;
743
744      default:
745	 generate_vs_instruction(inst, dst, src);
746	 break;
747      }
748
749      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
750	 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
751	    if (0) {
752	       printf("0x%08x 0x%08x 0x%08x 0x%08x ",
753		      ((uint32_t *)&p->store[i])[3],
754		      ((uint32_t *)&p->store[i])[2],
755		      ((uint32_t *)&p->store[i])[1],
756		      ((uint32_t *)&p->store[i])[0]);
757	    }
758	    brw_disasm(stdout, &p->store[i], intel->gen);
759	 }
760      }
761
762      last_native_inst = p->nr_insn;
763   }
764
765   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
766      printf("\n");
767   }
768
769   ralloc_free(loop_stack);
770   ralloc_free(if_depth_in_loop);
771
772   brw_set_uip_jip(p);
773
774   /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
775    * emit issues, it doesn't get the jump distances into the output,
776    * which is often something we want to debug.  So this is here in
777    * case you're doing that.
778    */
779   if (0) {
780      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
781	 for (unsigned int i = 0; i < p->nr_insn; i++) {
782	    printf("0x%08x 0x%08x 0x%08x 0x%08x ",
783		   ((uint32_t *)&p->store[i])[3],
784		   ((uint32_t *)&p->store[i])[2],
785		   ((uint32_t *)&p->store[i])[1],
786		   ((uint32_t *)&p->store[i])[0]);
787	    brw_disasm(stdout, &p->store[i], intel->gen);
788	 }
789      }
790   }
791}
792
793extern "C" {
794
795bool
796brw_vs_emit(struct brw_vs_compile *c)
797{
798   struct brw_compile *p = &c->func;
799   struct brw_context *brw = p->brw;
800   struct intel_context *intel = &brw->intel;
801   struct gl_context *ctx = &intel->ctx;
802   struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
803
804   if (!prog)
805      return false;
806
807   struct brw_shader *shader =
808     (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
809   if (!shader)
810      return false;
811
812   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
813      printf("GLSL IR for native vertex shader %d:\n", prog->Name);
814      _mesa_print_ir(shader->ir, NULL);
815      printf("\n\n");
816   }
817
818   vec4_visitor v(c, prog, shader);
819   if (!v.run()) {
820      /* FINISHME: Cleanly fail, test at link time, etc. */
821      assert(!"not reached");
822      return false;
823   }
824
825   return true;
826}
827
828} /* extern "C" */
829
830} /* namespace brw */
831