brw_vec4_emit.cpp revision c3752b399ab376aa53392afb8f2d4b526054f0a8
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25#include "../glsl/ir_print_visitor.h" 26 27extern "C" { 28#include "brw_eu.h" 29}; 30 31using namespace brw; 32 33namespace brw { 34 35int 36vec4_visitor::setup_attributes(int payload_reg) 37{ 38 int nr_attributes; 39 int attribute_map[VERT_ATTRIB_MAX]; 40 41 nr_attributes = 0; 42 for (int i = 0; i < VERT_ATTRIB_MAX; i++) { 43 if (prog_data->inputs_read & BITFIELD64_BIT(i)) { 44 attribute_map[i] = payload_reg + nr_attributes; 45 nr_attributes++; 46 } 47 } 48 49 foreach_iter(exec_list_iterator, iter, this->instructions) { 50 vec4_instruction *inst = (vec4_instruction *)iter.get(); 51 52 for (int i = 0; i < 3; i++) { 53 if (inst->src[i].file != ATTR) 54 continue; 55 56 inst->src[i].file = HW_REG; 57 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0); 58 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; 59 } 60 } 61 62 /* The BSpec says we always have to read at least one thing from 63 * the VF, and it appears that the hardware wedges otherwise. 64 */ 65 if (nr_attributes == 0) 66 nr_attributes = 1; 67 68 prog_data->urb_read_length = (nr_attributes + 1) / 2; 69 70 return payload_reg + nr_attributes; 71} 72 73int 74vec4_visitor::setup_uniforms(int reg) 75{ 76 /* User clip planes from curbe: 77 */ 78 if (c->key.nr_userclip) { 79 if (intel->gen >= 6) { 80 for (int i = 0; i < c->key.nr_userclip; i++) { 81 c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, 82 (i % 2) * 4), 0, 4, 1); 83 } 84 reg += ALIGN(c->key.nr_userclip, 2) / 2; 85 } else { 86 for (int i = 0; i < c->key.nr_userclip; i++) { 87 c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, 88 (i % 2) * 4), 0, 4, 1); 89 } 90 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; 91 } 92 } 93 94 /* The pre-gen6 VS requires that some push constants get loaded no 95 * matter what, or the GPU would hang. 96 */ 97 if (intel->gen < 6 && this->uniforms == 0) { 98 this->uniform_size[this->uniforms] = 1; 99 100 for (unsigned int i = 0; i < 4; i++) { 101 unsigned int slot = this->uniforms * 4 + i; 102 103 c->prog_data.param[slot] = NULL; 104 c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO; 105 } 106 107 this->uniforms++; 108 reg++; 109 } else { 110 reg += ALIGN(uniforms, 2) / 2; 111 } 112 113 /* for now, we are not doing any elimination of unused slots, nor 114 * are we packing our uniforms. 115 */ 116 c->prog_data.nr_params = this->uniforms * 4; 117 118 c->prog_data.curb_read_length = reg - 1; 119 c->prog_data.uses_new_param_layout = true; 120 121 return reg; 122} 123 124void 125vec4_visitor::setup_payload(void) 126{ 127 int reg = 0; 128 129 /* The payload always contains important data in g0, which contains 130 * the URB handles that are passed on to the URB write at the end 131 * of the thread. So, we always start push constants at g1. 132 */ 133 reg++; 134 135 reg = setup_uniforms(reg); 136 137 reg = setup_attributes(reg); 138 139 this->first_non_payload_grf = reg; 140} 141 142struct brw_reg 143vec4_instruction::get_dst(void) 144{ 145 struct brw_reg brw_reg; 146 147 switch (dst.file) { 148 case GRF: 149 brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); 150 brw_reg = retype(brw_reg, dst.type); 151 brw_reg.dw1.bits.writemask = dst.writemask; 152 break; 153 154 case HW_REG: 155 brw_reg = dst.fixed_hw_reg; 156 break; 157 158 case BAD_FILE: 159 brw_reg = brw_null_reg(); 160 break; 161 162 default: 163 assert(!"not reached"); 164 brw_reg = brw_null_reg(); 165 break; 166 } 167 return brw_reg; 168} 169 170struct brw_reg 171vec4_instruction::get_src(int i) 172{ 173 struct brw_reg brw_reg; 174 175 switch (src[i].file) { 176 case GRF: 177 brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); 178 brw_reg = retype(brw_reg, src[i].type); 179 brw_reg.dw1.bits.swizzle = src[i].swizzle; 180 if (src[i].abs) 181 brw_reg = brw_abs(brw_reg); 182 if (src[i].negate) 183 brw_reg = negate(brw_reg); 184 break; 185 186 case IMM: 187 switch (src[i].type) { 188 case BRW_REGISTER_TYPE_F: 189 brw_reg = brw_imm_f(src[i].imm.f); 190 break; 191 case BRW_REGISTER_TYPE_D: 192 brw_reg = brw_imm_d(src[i].imm.i); 193 break; 194 case BRW_REGISTER_TYPE_UD: 195 brw_reg = brw_imm_ud(src[i].imm.u); 196 break; 197 default: 198 assert(!"not reached"); 199 brw_reg = brw_null_reg(); 200 break; 201 } 202 break; 203 204 case UNIFORM: 205 brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2, 206 ((src[i].reg + src[i].reg_offset) % 2) * 4), 207 0, 4, 1); 208 brw_reg = retype(brw_reg, src[i].type); 209 brw_reg.dw1.bits.swizzle = src[i].swizzle; 210 if (src[i].abs) 211 brw_reg = brw_abs(brw_reg); 212 if (src[i].negate) 213 brw_reg = negate(brw_reg); 214 break; 215 216 case HW_REG: 217 brw_reg = src[i].fixed_hw_reg; 218 break; 219 220 case BAD_FILE: 221 /* Probably unused. */ 222 brw_reg = brw_null_reg(); 223 break; 224 case ATTR: 225 default: 226 assert(!"not reached"); 227 brw_reg = brw_null_reg(); 228 break; 229 } 230 231 return brw_reg; 232} 233 234void 235vec4_visitor::generate_math1_gen4(vec4_instruction *inst, 236 struct brw_reg dst, 237 struct brw_reg src) 238{ 239 brw_math(p, 240 dst, 241 brw_math_function(inst->opcode), 242 BRW_MATH_SATURATE_NONE, 243 inst->base_mrf, 244 src, 245 BRW_MATH_DATA_SCALAR, 246 BRW_MATH_PRECISION_FULL); 247} 248 249void 250vec4_visitor::generate_math1_gen6(vec4_instruction *inst, 251 struct brw_reg dst, 252 struct brw_reg src) 253{ 254 brw_math(p, 255 dst, 256 brw_math_function(inst->opcode), 257 BRW_MATH_SATURATE_NONE, 258 inst->base_mrf, 259 src, 260 BRW_MATH_DATA_SCALAR, 261 BRW_MATH_PRECISION_FULL); 262} 263 264void 265vec4_visitor::generate_urb_write(vec4_instruction *inst) 266{ 267 brw_urb_WRITE(p, 268 brw_null_reg(), /* dest */ 269 inst->base_mrf, /* starting mrf reg nr */ 270 brw_vec8_grf(0, 0), /* src */ 271 false, /* allocate */ 272 true, /* used */ 273 inst->mlen, 274 0, /* response len */ 275 inst->eot, /* eot */ 276 inst->eot, /* writes complete */ 277 inst->offset, /* urb destination offset */ 278 BRW_URB_SWIZZLE_INTERLEAVE); 279} 280 281void 282vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, 283 struct brw_reg dst, 284 struct brw_reg *src) 285{ 286 vec4_instruction *inst = (vec4_instruction *)instruction; 287 288 switch (inst->opcode) { 289 case SHADER_OPCODE_RCP: 290 case SHADER_OPCODE_RSQ: 291 case SHADER_OPCODE_SQRT: 292 case SHADER_OPCODE_EXP2: 293 case SHADER_OPCODE_LOG2: 294 case SHADER_OPCODE_SIN: 295 case SHADER_OPCODE_COS: 296 if (intel->gen >= 6) { 297 generate_math1_gen6(inst, dst, src[0]); 298 } else { 299 generate_math1_gen4(inst, dst, src[0]); 300 } 301 break; 302 303 case SHADER_OPCODE_POW: 304 assert(!"finishme"); 305 break; 306 307 case VS_OPCODE_URB_WRITE: 308 generate_urb_write(inst); 309 break; 310 311 default: 312 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 313 fail("unsupported opcode in `%s' in VS\n", 314 brw_opcodes[inst->opcode].name); 315 } else { 316 fail("Unsupported opcode %d in VS", inst->opcode); 317 } 318 } 319} 320 321bool 322vec4_visitor::run() 323{ 324 /* Generate FS IR for main(). (the visitor only descends into 325 * functions called "main"). 326 */ 327 foreach_iter(exec_list_iterator, iter, *shader->ir) { 328 ir_instruction *ir = (ir_instruction *)iter.get(); 329 base_ir = ir; 330 ir->accept(this); 331 } 332 333 emit_urb_writes(); 334 335 if (failed) 336 return false; 337 338 setup_payload(); 339 reg_allocate(); 340 341 brw_set_access_mode(p, BRW_ALIGN_16); 342 343 generate_code(); 344 345 return !failed; 346} 347 348void 349vec4_visitor::generate_code() 350{ 351 int last_native_inst = p->nr_insn; 352 const char *last_annotation_string = NULL; 353 ir_instruction *last_annotation_ir = NULL; 354 355 int loop_stack_array_size = 16; 356 int loop_stack_depth = 0; 357 brw_instruction **loop_stack = 358 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); 359 int *if_depth_in_loop = 360 rzalloc_array(this->mem_ctx, int, loop_stack_array_size); 361 362 363 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 364 printf("Native code for vertex shader %d:\n", prog->Name); 365 } 366 367 foreach_list(node, &this->instructions) { 368 vec4_instruction *inst = (vec4_instruction *)node; 369 struct brw_reg src[3], dst; 370 371 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 372 if (last_annotation_ir != inst->ir) { 373 last_annotation_ir = inst->ir; 374 if (last_annotation_ir) { 375 printf(" "); 376 last_annotation_ir->print(); 377 printf("\n"); 378 } 379 } 380 if (last_annotation_string != inst->annotation) { 381 last_annotation_string = inst->annotation; 382 if (last_annotation_string) 383 printf(" %s\n", last_annotation_string); 384 } 385 } 386 387 for (unsigned int i = 0; i < 3; i++) { 388 src[i] = inst->get_src(i); 389 } 390 dst = inst->get_dst(); 391 392 brw_set_conditionalmod(p, inst->conditional_mod); 393 brw_set_predicate_control(p, inst->predicate); 394 brw_set_predicate_inverse(p, inst->predicate_inverse); 395 brw_set_saturate(p, inst->saturate); 396 397 switch (inst->opcode) { 398 case BRW_OPCODE_MOV: 399 brw_MOV(p, dst, src[0]); 400 break; 401 case BRW_OPCODE_ADD: 402 brw_ADD(p, dst, src[0], src[1]); 403 break; 404 case BRW_OPCODE_MUL: 405 brw_MUL(p, dst, src[0], src[1]); 406 break; 407 408 case BRW_OPCODE_FRC: 409 brw_FRC(p, dst, src[0]); 410 break; 411 case BRW_OPCODE_RNDD: 412 brw_RNDD(p, dst, src[0]); 413 break; 414 case BRW_OPCODE_RNDE: 415 brw_RNDE(p, dst, src[0]); 416 break; 417 case BRW_OPCODE_RNDZ: 418 brw_RNDZ(p, dst, src[0]); 419 break; 420 421 case BRW_OPCODE_AND: 422 brw_AND(p, dst, src[0], src[1]); 423 break; 424 case BRW_OPCODE_OR: 425 brw_OR(p, dst, src[0], src[1]); 426 break; 427 case BRW_OPCODE_XOR: 428 brw_XOR(p, dst, src[0], src[1]); 429 break; 430 case BRW_OPCODE_NOT: 431 brw_NOT(p, dst, src[0]); 432 break; 433 case BRW_OPCODE_ASR: 434 brw_ASR(p, dst, src[0], src[1]); 435 break; 436 case BRW_OPCODE_SHR: 437 brw_SHR(p, dst, src[0], src[1]); 438 break; 439 case BRW_OPCODE_SHL: 440 brw_SHL(p, dst, src[0], src[1]); 441 break; 442 443 case BRW_OPCODE_CMP: 444 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 445 break; 446 case BRW_OPCODE_SEL: 447 brw_SEL(p, dst, src[0], src[1]); 448 break; 449 450 case BRW_OPCODE_DP4: 451 brw_DP4(p, dst, src[0], src[1]); 452 break; 453 454 case BRW_OPCODE_DP3: 455 brw_DP3(p, dst, src[0], src[1]); 456 break; 457 458 case BRW_OPCODE_DP2: 459 brw_DP2(p, dst, src[0], src[1]); 460 break; 461 462 case BRW_OPCODE_IF: 463 if (inst->src[0].file != BAD_FILE) { 464 /* The instruction has an embedded compare (only allowed on gen6) */ 465 assert(intel->gen == 6); 466 gen6_IF(p, inst->conditional_mod, src[0], src[1]); 467 } else { 468 brw_IF(p, BRW_EXECUTE_8); 469 } 470 if_depth_in_loop[loop_stack_depth]++; 471 break; 472 473 case BRW_OPCODE_ELSE: 474 brw_ELSE(p); 475 break; 476 case BRW_OPCODE_ENDIF: 477 brw_ENDIF(p); 478 if_depth_in_loop[loop_stack_depth]--; 479 break; 480 481 case BRW_OPCODE_DO: 482 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 483 if (loop_stack_array_size <= loop_stack_depth) { 484 loop_stack_array_size *= 2; 485 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, 486 loop_stack_array_size); 487 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, 488 loop_stack_array_size); 489 } 490 if_depth_in_loop[loop_stack_depth] = 0; 491 break; 492 493 case BRW_OPCODE_BREAK: 494 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 495 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 496 break; 497 case BRW_OPCODE_CONTINUE: 498 /* FINISHME: We need to write the loop instruction support still. */ 499 if (intel->gen >= 6) 500 gen6_CONT(p, loop_stack[loop_stack_depth - 1]); 501 else 502 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 503 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 504 break; 505 506 case BRW_OPCODE_WHILE: { 507 struct brw_instruction *inst0, *inst1; 508 GLuint br = 1; 509 510 if (intel->gen >= 5) 511 br = 2; 512 513 assert(loop_stack_depth > 0); 514 loop_stack_depth--; 515 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 516 if (intel->gen < 6) { 517 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 518 while (inst0 > loop_stack[loop_stack_depth]) { 519 inst0--; 520 if (inst0->header.opcode == BRW_OPCODE_BREAK && 521 inst0->bits3.if_else.jump_count == 0) { 522 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 523 } 524 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 525 inst0->bits3.if_else.jump_count == 0) { 526 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 527 } 528 } 529 } 530 } 531 break; 532 533 default: 534 generate_vs_instruction(inst, dst, src); 535 break; 536 } 537 538 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 539 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 540 if (0) { 541 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 542 ((uint32_t *)&p->store[i])[3], 543 ((uint32_t *)&p->store[i])[2], 544 ((uint32_t *)&p->store[i])[1], 545 ((uint32_t *)&p->store[i])[0]); 546 } 547 brw_disasm(stdout, &p->store[i], intel->gen); 548 } 549 } 550 551 last_native_inst = p->nr_insn; 552 } 553 554 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 555 printf("\n"); 556 } 557 558 ralloc_free(loop_stack); 559 ralloc_free(if_depth_in_loop); 560 561 brw_set_uip_jip(p); 562 563 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS 564 * emit issues, it doesn't get the jump distances into the output, 565 * which is often something we want to debug. So this is here in 566 * case you're doing that. 567 */ 568 if (0) { 569 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 570 for (unsigned int i = 0; i < p->nr_insn; i++) { 571 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 572 ((uint32_t *)&p->store[i])[3], 573 ((uint32_t *)&p->store[i])[2], 574 ((uint32_t *)&p->store[i])[1], 575 ((uint32_t *)&p->store[i])[0]); 576 brw_disasm(stdout, &p->store[i], intel->gen); 577 } 578 } 579 } 580} 581 582extern "C" { 583 584bool 585brw_vs_emit(struct brw_vs_compile *c) 586{ 587 struct brw_compile *p = &c->func; 588 struct brw_context *brw = p->brw; 589 struct intel_context *intel = &brw->intel; 590 struct gl_context *ctx = &intel->ctx; 591 struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram; 592 593 if (!prog) 594 return false; 595 596 struct brw_shader *shader = 597 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; 598 if (!shader) 599 return false; 600 601 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 602 printf("GLSL IR for native vertex shader %d:\n", prog->Name); 603 _mesa_print_ir(shader->ir, NULL); 604 printf("\n\n"); 605 } 606 607 vec4_visitor v(c, prog, shader); 608 if (!v.run()) { 609 /* FINISHME: Cleanly fail, test at link time, etc. */ 610 assert(!"not reached"); 611 return false; 612 } 613 614 return true; 615} 616 617} /* extern "C" */ 618 619} /* namespace brw */ 620