brw_vec4_emit.cpp revision af3c9803d818fd33139f1247a387d64b967b8992
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25#include "../glsl/ir_print_visitor.h" 26 27extern "C" { 28#include "brw_eu.h" 29}; 30 31using namespace brw; 32 33namespace brw { 34 35int 36vec4_visitor::setup_attributes(int payload_reg) 37{ 38 int nr_attributes; 39 int attribute_map[VERT_ATTRIB_MAX]; 40 41 nr_attributes = 0; 42 for (int i = 0; i < VERT_ATTRIB_MAX; i++) { 43 if (prog_data->inputs_read & BITFIELD64_BIT(i)) { 44 attribute_map[i] = payload_reg + nr_attributes; 45 nr_attributes++; 46 } 47 } 48 49 foreach_iter(exec_list_iterator, iter, this->instructions) { 50 vec4_instruction *inst = (vec4_instruction *)iter.get(); 51 52 for (int i = 0; i < 3; i++) { 53 if (inst->src[i].file != ATTR) 54 continue; 55 56 inst->src[i].file = HW_REG; 57 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0); 58 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; 59 } 60 } 61 62 /* The BSpec says we always have to read at least one thing from 63 * the VF, and it appears that the hardware wedges otherwise. 64 */ 65 if (nr_attributes == 0) 66 nr_attributes = 1; 67 68 prog_data->urb_read_length = (nr_attributes + 1) / 2; 69 70 return nr_attributes; 71} 72 73void 74vec4_visitor::setup_payload(void) 75{ 76 int reg = 0; 77 78 /* r0 is always reserved, as it contains the payload with the URB 79 * handles that are passed on to the URB write at the end of the 80 * thread. 81 */ 82 reg++; 83 84 /* User clip planes from curbe: 85 */ 86 if (c->key.nr_userclip) { 87 if (intel->gen >= 6) { 88 for (int i = 0; i < c->key.nr_userclip; i++) { 89 c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, 90 (i % 2) * 4), 0, 4, 1); 91 } 92 reg += ALIGN(c->key.nr_userclip, 2) / 2; 93 } else { 94 for (int i = 0; i < c->key.nr_userclip; i++) { 95 c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, 96 (i % 2) * 4), 0, 4, 1); 97 } 98 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; 99 } 100 } 101 102 /* FINISHME: push constants */ 103 c->prog_data.curb_read_length = reg - 1; 104 c->prog_data.nr_params = 0; 105 /* XXX 0 causes a bug elsewhere... */ 106 if (intel->gen < 6 && c->prog_data.nr_params == 0) 107 c->prog_data.nr_params = 4; 108 109 reg += setup_attributes(reg); 110 111 this->first_non_payload_grf = reg; 112} 113 114struct brw_reg 115vec4_instruction::get_dst(void) 116{ 117 struct brw_reg brw_reg; 118 119 switch (dst.file) { 120 case GRF: 121 assert(dst.reg_offset == 0); 122 brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); 123 brw_reg = retype(brw_reg, dst.type); 124 brw_reg.dw1.bits.writemask = dst.writemask; 125 break; 126 127 case HW_REG: 128 brw_reg = dst.fixed_hw_reg; 129 break; 130 131 case BAD_FILE: 132 brw_reg = brw_null_reg(); 133 break; 134 135 default: 136 assert(!"not reached"); 137 brw_reg = brw_null_reg(); 138 break; 139 } 140 return brw_reg; 141} 142 143struct brw_reg 144vec4_instruction::get_src(int i) 145{ 146 struct brw_reg brw_reg; 147 148 switch (src[i].file) { 149 case GRF: 150 brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); 151 brw_reg = retype(brw_reg, src[i].type); 152 brw_reg.dw1.bits.swizzle = src[i].swizzle; 153 if (src[i].abs) 154 brw_reg = brw_abs(brw_reg); 155 if (src[i].negate) 156 brw_reg = negate(brw_reg); 157 break; 158 159 case IMM: 160 switch (src[i].type) { 161 case BRW_REGISTER_TYPE_F: 162 brw_reg = brw_imm_f(src[i].imm.f); 163 break; 164 case BRW_REGISTER_TYPE_D: 165 brw_reg = brw_imm_d(src[i].imm.i); 166 break; 167 case BRW_REGISTER_TYPE_UD: 168 brw_reg = brw_imm_ud(src[i].imm.u); 169 break; 170 default: 171 assert(!"not reached"); 172 brw_reg = brw_null_reg(); 173 break; 174 } 175 break; 176 177 case HW_REG: 178 brw_reg = src[i].fixed_hw_reg; 179 break; 180 181 case BAD_FILE: 182 /* Probably unused. */ 183 brw_reg = brw_null_reg(); 184 break; 185 case ATTR: 186 default: 187 assert(!"not reached"); 188 brw_reg = brw_null_reg(); 189 break; 190 } 191 192 return brw_reg; 193} 194 195void 196vec4_visitor::generate_math1_gen4(vec4_instruction *inst, 197 struct brw_reg dst, 198 struct brw_reg src) 199{ 200 brw_math(p, 201 dst, 202 brw_math_function(inst->opcode), 203 BRW_MATH_SATURATE_NONE, 204 inst->base_mrf, 205 src, 206 BRW_MATH_DATA_SCALAR, 207 BRW_MATH_PRECISION_FULL); 208} 209 210void 211vec4_visitor::generate_math1_gen6(vec4_instruction *inst, 212 struct brw_reg dst, 213 struct brw_reg src) 214{ 215 brw_math(p, 216 dst, 217 brw_math_function(inst->opcode), 218 BRW_MATH_SATURATE_NONE, 219 inst->base_mrf, 220 src, 221 BRW_MATH_DATA_SCALAR, 222 BRW_MATH_PRECISION_FULL); 223} 224 225void 226vec4_visitor::generate_urb_write(vec4_instruction *inst) 227{ 228 brw_urb_WRITE(p, 229 brw_null_reg(), /* dest */ 230 inst->base_mrf, /* starting mrf reg nr */ 231 brw_vec8_grf(0, 0), /* src */ 232 false, /* allocate */ 233 true, /* used */ 234 inst->mlen, 235 0, /* response len */ 236 inst->eot, /* eot */ 237 inst->eot, /* writes complete */ 238 inst->offset, /* urb destination offset */ 239 BRW_URB_SWIZZLE_INTERLEAVE); 240} 241 242void 243vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, 244 struct brw_reg dst, 245 struct brw_reg *src) 246{ 247 vec4_instruction *inst = (vec4_instruction *)instruction; 248 249 switch (inst->opcode) { 250 case SHADER_OPCODE_RCP: 251 case SHADER_OPCODE_RSQ: 252 case SHADER_OPCODE_SQRT: 253 case SHADER_OPCODE_EXP2: 254 case SHADER_OPCODE_LOG2: 255 case SHADER_OPCODE_SIN: 256 case SHADER_OPCODE_COS: 257 if (intel->gen >= 6) { 258 generate_math1_gen6(inst, dst, src[0]); 259 } else { 260 generate_math1_gen4(inst, dst, src[0]); 261 } 262 break; 263 264 case SHADER_OPCODE_POW: 265 assert(!"finishme"); 266 break; 267 268 case VS_OPCODE_URB_WRITE: 269 generate_urb_write(inst); 270 break; 271 272 default: 273 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 274 fail("unsupported opcode in `%s' in VS\n", 275 brw_opcodes[inst->opcode].name); 276 } else { 277 fail("Unsupported opcode %d in VS", inst->opcode); 278 } 279 } 280} 281 282bool 283vec4_visitor::run() 284{ 285 /* Generate FS IR for main(). (the visitor only descends into 286 * functions called "main"). 287 */ 288 foreach_iter(exec_list_iterator, iter, *shader->ir) { 289 ir_instruction *ir = (ir_instruction *)iter.get(); 290 base_ir = ir; 291 ir->accept(this); 292 } 293 294 emit_urb_writes(); 295 296 if (failed) 297 return false; 298 299 setup_payload(); 300 reg_allocate(); 301 302 brw_set_access_mode(p, BRW_ALIGN_16); 303 304 generate_code(); 305 306 return !failed; 307} 308 309void 310vec4_visitor::generate_code() 311{ 312 int last_native_inst = p->nr_insn; 313 const char *last_annotation_string = NULL; 314 ir_instruction *last_annotation_ir = NULL; 315 316 int loop_stack_array_size = 16; 317 int loop_stack_depth = 0; 318 brw_instruction **loop_stack = 319 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); 320 int *if_depth_in_loop = 321 rzalloc_array(this->mem_ctx, int, loop_stack_array_size); 322 323 324 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 325 printf("Native code for vertex shader %d:\n", prog->Name); 326 } 327 328 foreach_list(node, &this->instructions) { 329 vec4_instruction *inst = (vec4_instruction *)node; 330 struct brw_reg src[3], dst; 331 332 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 333 if (last_annotation_ir != inst->ir) { 334 last_annotation_ir = inst->ir; 335 if (last_annotation_ir) { 336 printf(" "); 337 last_annotation_ir->print(); 338 printf("\n"); 339 } 340 } 341 if (last_annotation_string != inst->annotation) { 342 last_annotation_string = inst->annotation; 343 if (last_annotation_string) 344 printf(" %s\n", last_annotation_string); 345 } 346 } 347 348 for (unsigned int i = 0; i < 3; i++) { 349 src[i] = inst->get_src(i); 350 } 351 dst = inst->get_dst(); 352 353 brw_set_conditionalmod(p, inst->conditional_mod); 354 brw_set_predicate_control(p, inst->predicate); 355 brw_set_predicate_inverse(p, inst->predicate_inverse); 356 brw_set_saturate(p, inst->saturate); 357 358 switch (inst->opcode) { 359 case BRW_OPCODE_MOV: 360 brw_MOV(p, dst, src[0]); 361 break; 362 case BRW_OPCODE_ADD: 363 brw_ADD(p, dst, src[0], src[1]); 364 break; 365 case BRW_OPCODE_MUL: 366 brw_MUL(p, dst, src[0], src[1]); 367 break; 368 369 case BRW_OPCODE_FRC: 370 brw_FRC(p, dst, src[0]); 371 break; 372 case BRW_OPCODE_RNDD: 373 brw_RNDD(p, dst, src[0]); 374 break; 375 case BRW_OPCODE_RNDE: 376 brw_RNDE(p, dst, src[0]); 377 break; 378 case BRW_OPCODE_RNDZ: 379 brw_RNDZ(p, dst, src[0]); 380 break; 381 382 case BRW_OPCODE_AND: 383 brw_AND(p, dst, src[0], src[1]); 384 break; 385 case BRW_OPCODE_OR: 386 brw_OR(p, dst, src[0], src[1]); 387 break; 388 case BRW_OPCODE_XOR: 389 brw_XOR(p, dst, src[0], src[1]); 390 break; 391 case BRW_OPCODE_NOT: 392 brw_NOT(p, dst, src[0]); 393 break; 394 case BRW_OPCODE_ASR: 395 brw_ASR(p, dst, src[0], src[1]); 396 break; 397 case BRW_OPCODE_SHR: 398 brw_SHR(p, dst, src[0], src[1]); 399 break; 400 case BRW_OPCODE_SHL: 401 brw_SHL(p, dst, src[0], src[1]); 402 break; 403 404 case BRW_OPCODE_CMP: 405 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 406 break; 407 case BRW_OPCODE_SEL: 408 brw_SEL(p, dst, src[0], src[1]); 409 break; 410 411 case BRW_OPCODE_IF: 412 if (inst->src[0].file != BAD_FILE) { 413 /* The instruction has an embedded compare (only allowed on gen6) */ 414 assert(intel->gen == 6); 415 gen6_IF(p, inst->conditional_mod, src[0], src[1]); 416 } else { 417 brw_IF(p, BRW_EXECUTE_8); 418 } 419 if_depth_in_loop[loop_stack_depth]++; 420 break; 421 422 case BRW_OPCODE_ELSE: 423 brw_ELSE(p); 424 break; 425 case BRW_OPCODE_ENDIF: 426 brw_ENDIF(p); 427 if_depth_in_loop[loop_stack_depth]--; 428 break; 429 430 case BRW_OPCODE_DO: 431 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 432 if (loop_stack_array_size <= loop_stack_depth) { 433 loop_stack_array_size *= 2; 434 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, 435 loop_stack_array_size); 436 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, 437 loop_stack_array_size); 438 } 439 if_depth_in_loop[loop_stack_depth] = 0; 440 break; 441 442 case BRW_OPCODE_BREAK: 443 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 444 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 445 break; 446 case BRW_OPCODE_CONTINUE: 447 /* FINISHME: We need to write the loop instruction support still. */ 448 if (intel->gen >= 6) 449 gen6_CONT(p, loop_stack[loop_stack_depth - 1]); 450 else 451 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 452 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 453 break; 454 455 case BRW_OPCODE_WHILE: { 456 struct brw_instruction *inst0, *inst1; 457 GLuint br = 1; 458 459 if (intel->gen >= 5) 460 br = 2; 461 462 assert(loop_stack_depth > 0); 463 loop_stack_depth--; 464 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 465 if (intel->gen < 6) { 466 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 467 while (inst0 > loop_stack[loop_stack_depth]) { 468 inst0--; 469 if (inst0->header.opcode == BRW_OPCODE_BREAK && 470 inst0->bits3.if_else.jump_count == 0) { 471 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 472 } 473 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 474 inst0->bits3.if_else.jump_count == 0) { 475 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 476 } 477 } 478 } 479 } 480 break; 481 482 default: 483 generate_vs_instruction(inst, dst, src); 484 break; 485 } 486 487 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 488 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 489 if (0) { 490 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 491 ((uint32_t *)&p->store[i])[3], 492 ((uint32_t *)&p->store[i])[2], 493 ((uint32_t *)&p->store[i])[1], 494 ((uint32_t *)&p->store[i])[0]); 495 } 496 brw_disasm(stdout, &p->store[i], intel->gen); 497 } 498 } 499 500 last_native_inst = p->nr_insn; 501 } 502 503 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 504 printf("\n"); 505 } 506 507 ralloc_free(loop_stack); 508 ralloc_free(if_depth_in_loop); 509 510 brw_set_uip_jip(p); 511 512 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS 513 * emit issues, it doesn't get the jump distances into the output, 514 * which is often something we want to debug. So this is here in 515 * case you're doing that. 516 */ 517 if (0) { 518 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 519 for (unsigned int i = 0; i < p->nr_insn; i++) { 520 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 521 ((uint32_t *)&p->store[i])[3], 522 ((uint32_t *)&p->store[i])[2], 523 ((uint32_t *)&p->store[i])[1], 524 ((uint32_t *)&p->store[i])[0]); 525 brw_disasm(stdout, &p->store[i], intel->gen); 526 } 527 } 528 } 529} 530 531extern "C" { 532 533bool 534brw_vs_emit(struct brw_vs_compile *c) 535{ 536 struct brw_compile *p = &c->func; 537 struct brw_context *brw = p->brw; 538 struct intel_context *intel = &brw->intel; 539 struct gl_context *ctx = &intel->ctx; 540 struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram; 541 542 if (!prog) 543 return false; 544 545 struct brw_shader *shader = 546 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; 547 if (!shader) 548 return false; 549 550 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 551 printf("GLSL IR for native vertex shader %d:\n", prog->Name); 552 _mesa_print_ir(shader->ir, NULL); 553 printf("\n\n"); 554 } 555 556 vec4_visitor v(c, prog, shader); 557 if (!v.run()) { 558 /* FINISHME: Cleanly fail, test at link time, etc. */ 559 assert(!"not reached"); 560 return false; 561 } 562 563 return true; 564} 565 566} /* extern "C" */ 567 568} /* namespace brw */ 569