brw_vec4_emit.cpp revision af3c9803d818fd33139f1247a387d64b967b8992
1/*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "brw_vec4.h"
25#include "../glsl/ir_print_visitor.h"
26
27extern "C" {
28#include "brw_eu.h"
29};
30
31using namespace brw;
32
33namespace brw {
34
35int
36vec4_visitor::setup_attributes(int payload_reg)
37{
38   int nr_attributes;
39   int attribute_map[VERT_ATTRIB_MAX];
40
41   nr_attributes = 0;
42   for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
43      if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
44	 attribute_map[i] = payload_reg + nr_attributes;
45	 nr_attributes++;
46      }
47   }
48
49   foreach_iter(exec_list_iterator, iter, this->instructions) {
50      vec4_instruction *inst = (vec4_instruction *)iter.get();
51
52      for (int i = 0; i < 3; i++) {
53	 if (inst->src[i].file != ATTR)
54	    continue;
55
56	 inst->src[i].file = HW_REG;
57	 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0);
58	 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
59      }
60   }
61
62   /* The BSpec says we always have to read at least one thing from
63    * the VF, and it appears that the hardware wedges otherwise.
64    */
65   if (nr_attributes == 0)
66      nr_attributes = 1;
67
68   prog_data->urb_read_length = (nr_attributes + 1) / 2;
69
70   return nr_attributes;
71}
72
73void
74vec4_visitor::setup_payload(void)
75{
76   int reg = 0;
77
78   /* r0 is always reserved, as it contains the payload with the URB
79    * handles that are passed on to the URB write at the end of the
80    * thread.
81    */
82   reg++;
83
84   /* User clip planes from curbe:
85    */
86   if (c->key.nr_userclip) {
87      if (intel->gen >= 6) {
88	 for (int i = 0; i < c->key.nr_userclip; i++) {
89	    c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
90						  (i % 2) * 4), 0, 4, 1);
91	 }
92	 reg += ALIGN(c->key.nr_userclip, 2) / 2;
93      } else {
94	 for (int i = 0; i < c->key.nr_userclip; i++) {
95	    c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
96						  (i % 2) * 4), 0, 4, 1);
97	 }
98	 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
99      }
100   }
101
102   /* FINISHME: push constants */
103   c->prog_data.curb_read_length = reg - 1;
104   c->prog_data.nr_params = 0;
105   /* XXX 0 causes a bug elsewhere... */
106   if (intel->gen < 6 && c->prog_data.nr_params == 0)
107      c->prog_data.nr_params = 4;
108
109   reg += setup_attributes(reg);
110
111   this->first_non_payload_grf = reg;
112}
113
114struct brw_reg
115vec4_instruction::get_dst(void)
116{
117   struct brw_reg brw_reg;
118
119   switch (dst.file) {
120   case GRF:
121      assert(dst.reg_offset == 0);
122      brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
123      brw_reg = retype(brw_reg, dst.type);
124      brw_reg.dw1.bits.writemask = dst.writemask;
125      break;
126
127   case HW_REG:
128      brw_reg = dst.fixed_hw_reg;
129      break;
130
131   case BAD_FILE:
132      brw_reg = brw_null_reg();
133      break;
134
135   default:
136      assert(!"not reached");
137      brw_reg = brw_null_reg();
138      break;
139   }
140   return brw_reg;
141}
142
143struct brw_reg
144vec4_instruction::get_src(int i)
145{
146   struct brw_reg brw_reg;
147
148   switch (src[i].file) {
149   case GRF:
150      brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
151      brw_reg = retype(brw_reg, src[i].type);
152      brw_reg.dw1.bits.swizzle = src[i].swizzle;
153      if (src[i].abs)
154	 brw_reg = brw_abs(brw_reg);
155      if (src[i].negate)
156	 brw_reg = negate(brw_reg);
157      break;
158
159   case IMM:
160      switch (src[i].type) {
161      case BRW_REGISTER_TYPE_F:
162	 brw_reg = brw_imm_f(src[i].imm.f);
163	 break;
164      case BRW_REGISTER_TYPE_D:
165	 brw_reg = brw_imm_d(src[i].imm.i);
166	 break;
167      case BRW_REGISTER_TYPE_UD:
168	 brw_reg = brw_imm_ud(src[i].imm.u);
169	 break;
170      default:
171	 assert(!"not reached");
172	 brw_reg = brw_null_reg();
173	 break;
174      }
175      break;
176
177   case HW_REG:
178      brw_reg = src[i].fixed_hw_reg;
179      break;
180
181   case BAD_FILE:
182      /* Probably unused. */
183      brw_reg = brw_null_reg();
184      break;
185   case ATTR:
186   default:
187      assert(!"not reached");
188      brw_reg = brw_null_reg();
189      break;
190   }
191
192   return brw_reg;
193}
194
195void
196vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
197				  struct brw_reg dst,
198				  struct brw_reg src)
199{
200   brw_math(p,
201	    dst,
202	    brw_math_function(inst->opcode),
203	    BRW_MATH_SATURATE_NONE,
204	    inst->base_mrf,
205	    src,
206	    BRW_MATH_DATA_SCALAR,
207	    BRW_MATH_PRECISION_FULL);
208}
209
210void
211vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
212				  struct brw_reg dst,
213				  struct brw_reg src)
214{
215   brw_math(p,
216	    dst,
217	    brw_math_function(inst->opcode),
218	    BRW_MATH_SATURATE_NONE,
219	    inst->base_mrf,
220	    src,
221	    BRW_MATH_DATA_SCALAR,
222	    BRW_MATH_PRECISION_FULL);
223}
224
225void
226vec4_visitor::generate_urb_write(vec4_instruction *inst)
227{
228   brw_urb_WRITE(p,
229		 brw_null_reg(), /* dest */
230		 inst->base_mrf, /* starting mrf reg nr */
231		 brw_vec8_grf(0, 0), /* src */
232		 false,		/* allocate */
233		 true,		/* used */
234		 inst->mlen,
235		 0,		/* response len */
236		 inst->eot,	/* eot */
237		 inst->eot,	/* writes complete */
238		 inst->offset,	/* urb destination offset */
239		 BRW_URB_SWIZZLE_INTERLEAVE);
240}
241
242void
243vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
244				      struct brw_reg dst,
245				      struct brw_reg *src)
246{
247   vec4_instruction *inst = (vec4_instruction *)instruction;
248
249   switch (inst->opcode) {
250   case SHADER_OPCODE_RCP:
251   case SHADER_OPCODE_RSQ:
252   case SHADER_OPCODE_SQRT:
253   case SHADER_OPCODE_EXP2:
254   case SHADER_OPCODE_LOG2:
255   case SHADER_OPCODE_SIN:
256   case SHADER_OPCODE_COS:
257      if (intel->gen >= 6) {
258	 generate_math1_gen6(inst, dst, src[0]);
259      } else {
260	 generate_math1_gen4(inst, dst, src[0]);
261      }
262      break;
263
264   case SHADER_OPCODE_POW:
265      assert(!"finishme");
266      break;
267
268   case VS_OPCODE_URB_WRITE:
269      generate_urb_write(inst);
270      break;
271
272   default:
273      if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
274	 fail("unsupported opcode in `%s' in VS\n",
275	      brw_opcodes[inst->opcode].name);
276      } else {
277	 fail("Unsupported opcode %d in VS", inst->opcode);
278      }
279   }
280}
281
282bool
283vec4_visitor::run()
284{
285   /* Generate FS IR for main().  (the visitor only descends into
286    * functions called "main").
287    */
288   foreach_iter(exec_list_iterator, iter, *shader->ir) {
289      ir_instruction *ir = (ir_instruction *)iter.get();
290      base_ir = ir;
291      ir->accept(this);
292   }
293
294   emit_urb_writes();
295
296   if (failed)
297      return false;
298
299   setup_payload();
300   reg_allocate();
301
302   brw_set_access_mode(p, BRW_ALIGN_16);
303
304   generate_code();
305
306   return !failed;
307}
308
309void
310vec4_visitor::generate_code()
311{
312   int last_native_inst = p->nr_insn;
313   const char *last_annotation_string = NULL;
314   ir_instruction *last_annotation_ir = NULL;
315
316   int loop_stack_array_size = 16;
317   int loop_stack_depth = 0;
318   brw_instruction **loop_stack =
319      rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
320   int *if_depth_in_loop =
321      rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
322
323
324   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
325      printf("Native code for vertex shader %d:\n", prog->Name);
326   }
327
328   foreach_list(node, &this->instructions) {
329      vec4_instruction *inst = (vec4_instruction *)node;
330      struct brw_reg src[3], dst;
331
332      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
333	 if (last_annotation_ir != inst->ir) {
334	    last_annotation_ir = inst->ir;
335	    if (last_annotation_ir) {
336	       printf("   ");
337	       last_annotation_ir->print();
338	       printf("\n");
339	    }
340	 }
341	 if (last_annotation_string != inst->annotation) {
342	    last_annotation_string = inst->annotation;
343	    if (last_annotation_string)
344	       printf("   %s\n", last_annotation_string);
345	 }
346      }
347
348      for (unsigned int i = 0; i < 3; i++) {
349	 src[i] = inst->get_src(i);
350      }
351      dst = inst->get_dst();
352
353      brw_set_conditionalmod(p, inst->conditional_mod);
354      brw_set_predicate_control(p, inst->predicate);
355      brw_set_predicate_inverse(p, inst->predicate_inverse);
356      brw_set_saturate(p, inst->saturate);
357
358      switch (inst->opcode) {
359      case BRW_OPCODE_MOV:
360	 brw_MOV(p, dst, src[0]);
361	 break;
362      case BRW_OPCODE_ADD:
363	 brw_ADD(p, dst, src[0], src[1]);
364	 break;
365      case BRW_OPCODE_MUL:
366	 brw_MUL(p, dst, src[0], src[1]);
367	 break;
368
369      case BRW_OPCODE_FRC:
370	 brw_FRC(p, dst, src[0]);
371	 break;
372      case BRW_OPCODE_RNDD:
373	 brw_RNDD(p, dst, src[0]);
374	 break;
375      case BRW_OPCODE_RNDE:
376	 brw_RNDE(p, dst, src[0]);
377	 break;
378      case BRW_OPCODE_RNDZ:
379	 brw_RNDZ(p, dst, src[0]);
380	 break;
381
382      case BRW_OPCODE_AND:
383	 brw_AND(p, dst, src[0], src[1]);
384	 break;
385      case BRW_OPCODE_OR:
386	 brw_OR(p, dst, src[0], src[1]);
387	 break;
388      case BRW_OPCODE_XOR:
389	 brw_XOR(p, dst, src[0], src[1]);
390	 break;
391      case BRW_OPCODE_NOT:
392	 brw_NOT(p, dst, src[0]);
393	 break;
394      case BRW_OPCODE_ASR:
395	 brw_ASR(p, dst, src[0], src[1]);
396	 break;
397      case BRW_OPCODE_SHR:
398	 brw_SHR(p, dst, src[0], src[1]);
399	 break;
400      case BRW_OPCODE_SHL:
401	 brw_SHL(p, dst, src[0], src[1]);
402	 break;
403
404      case BRW_OPCODE_CMP:
405	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
406	 break;
407      case BRW_OPCODE_SEL:
408	 brw_SEL(p, dst, src[0], src[1]);
409	 break;
410
411      case BRW_OPCODE_IF:
412	 if (inst->src[0].file != BAD_FILE) {
413	    /* The instruction has an embedded compare (only allowed on gen6) */
414	    assert(intel->gen == 6);
415	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
416	 } else {
417	    brw_IF(p, BRW_EXECUTE_8);
418	 }
419	 if_depth_in_loop[loop_stack_depth]++;
420	 break;
421
422      case BRW_OPCODE_ELSE:
423	 brw_ELSE(p);
424	 break;
425      case BRW_OPCODE_ENDIF:
426	 brw_ENDIF(p);
427	 if_depth_in_loop[loop_stack_depth]--;
428	 break;
429
430      case BRW_OPCODE_DO:
431	 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
432	 if (loop_stack_array_size <= loop_stack_depth) {
433	    loop_stack_array_size *= 2;
434	    loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
435				  loop_stack_array_size);
436	    if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
437				        loop_stack_array_size);
438	 }
439	 if_depth_in_loop[loop_stack_depth] = 0;
440	 break;
441
442      case BRW_OPCODE_BREAK:
443	 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
444	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
445	 break;
446      case BRW_OPCODE_CONTINUE:
447	 /* FINISHME: We need to write the loop instruction support still. */
448	 if (intel->gen >= 6)
449	    gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
450	 else
451	    brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
452	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
453	 break;
454
455      case BRW_OPCODE_WHILE: {
456	 struct brw_instruction *inst0, *inst1;
457	 GLuint br = 1;
458
459	 if (intel->gen >= 5)
460	    br = 2;
461
462	 assert(loop_stack_depth > 0);
463	 loop_stack_depth--;
464	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
465	 if (intel->gen < 6) {
466	    /* patch all the BREAK/CONT instructions from last BGNLOOP */
467	    while (inst0 > loop_stack[loop_stack_depth]) {
468	       inst0--;
469	       if (inst0->header.opcode == BRW_OPCODE_BREAK &&
470		   inst0->bits3.if_else.jump_count == 0) {
471		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
472	    }
473	       else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
474			inst0->bits3.if_else.jump_count == 0) {
475		  inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
476	       }
477	    }
478	 }
479      }
480	 break;
481
482      default:
483	 generate_vs_instruction(inst, dst, src);
484	 break;
485      }
486
487      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
488	 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
489	    if (0) {
490	       printf("0x%08x 0x%08x 0x%08x 0x%08x ",
491		      ((uint32_t *)&p->store[i])[3],
492		      ((uint32_t *)&p->store[i])[2],
493		      ((uint32_t *)&p->store[i])[1],
494		      ((uint32_t *)&p->store[i])[0]);
495	    }
496	    brw_disasm(stdout, &p->store[i], intel->gen);
497	 }
498      }
499
500      last_native_inst = p->nr_insn;
501   }
502
503   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
504      printf("\n");
505   }
506
507   ralloc_free(loop_stack);
508   ralloc_free(if_depth_in_loop);
509
510   brw_set_uip_jip(p);
511
512   /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
513    * emit issues, it doesn't get the jump distances into the output,
514    * which is often something we want to debug.  So this is here in
515    * case you're doing that.
516    */
517   if (0) {
518      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
519	 for (unsigned int i = 0; i < p->nr_insn; i++) {
520	    printf("0x%08x 0x%08x 0x%08x 0x%08x ",
521		   ((uint32_t *)&p->store[i])[3],
522		   ((uint32_t *)&p->store[i])[2],
523		   ((uint32_t *)&p->store[i])[1],
524		   ((uint32_t *)&p->store[i])[0]);
525	    brw_disasm(stdout, &p->store[i], intel->gen);
526	 }
527      }
528   }
529}
530
531extern "C" {
532
533bool
534brw_vs_emit(struct brw_vs_compile *c)
535{
536   struct brw_compile *p = &c->func;
537   struct brw_context *brw = p->brw;
538   struct intel_context *intel = &brw->intel;
539   struct gl_context *ctx = &intel->ctx;
540   struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
541
542   if (!prog)
543      return false;
544
545   struct brw_shader *shader =
546     (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
547   if (!shader)
548      return false;
549
550   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
551      printf("GLSL IR for native vertex shader %d:\n", prog->Name);
552      _mesa_print_ir(shader->ir, NULL);
553      printf("\n\n");
554   }
555
556   vec4_visitor v(c, prog, shader);
557   if (!v.run()) {
558      /* FINISHME: Cleanly fail, test at link time, etc. */
559      assert(!"not reached");
560      return false;
561   }
562
563   return true;
564}
565
566} /* extern "C" */
567
568} /* namespace brw */
569