brw_vec4_emit.cpp revision 814a9bef30beda427e8fbf6f3b8abb6a45f0e2e4
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25#include "../glsl/ir_print_visitor.h" 26 27extern "C" { 28#include "brw_eu.h" 29}; 30 31using namespace brw; 32 33namespace brw { 34 35int 36vec4_visitor::setup_attributes(int payload_reg) 37{ 38 int nr_attributes; 39 int attribute_map[VERT_ATTRIB_MAX]; 40 41 nr_attributes = 0; 42 for (int i = 0; i < VERT_ATTRIB_MAX; i++) { 43 if (prog_data->inputs_read & BITFIELD64_BIT(i)) { 44 attribute_map[i] = payload_reg + nr_attributes; 45 nr_attributes++; 46 } 47 } 48 49 foreach_iter(exec_list_iterator, iter, this->instructions) { 50 vec4_instruction *inst = (vec4_instruction *)iter.get(); 51 52 for (int i = 0; i < 3; i++) { 53 if (inst->src[i].file != ATTR) 54 continue; 55 56 inst->src[i].file = HW_REG; 57 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0); 58 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; 59 } 60 } 61 62 /* The BSpec says we always have to read at least one thing from 63 * the VF, and it appears that the hardware wedges otherwise. 64 */ 65 if (nr_attributes == 0) 66 nr_attributes = 1; 67 68 prog_data->urb_read_length = (nr_attributes + 1) / 2; 69 70 return payload_reg + nr_attributes; 71} 72 73int 74vec4_visitor::setup_uniforms(int reg) 75{ 76 /* User clip planes from curbe: 77 */ 78 if (c->key.nr_userclip) { 79 if (intel->gen >= 6) { 80 for (int i = 0; i < c->key.nr_userclip; i++) { 81 c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, 82 (i % 2) * 4), 0, 4, 1); 83 } 84 reg += ALIGN(c->key.nr_userclip, 2) / 2; 85 } else { 86 for (int i = 0; i < c->key.nr_userclip; i++) { 87 c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, 88 (i % 2) * 4), 0, 4, 1); 89 } 90 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; 91 } 92 } 93 94 /* The pre-gen6 VS requires that some push constants get loaded no 95 * matter what, or the GPU would hang. 96 */ 97 if (intel->gen < 6 && this->uniforms == 0) { 98 this->uniform_size[this->uniforms] = 1; 99 100 for (unsigned int i = 0; i < 4; i++) { 101 unsigned int slot = this->uniforms * 4 + i; 102 103 c->prog_data.param[slot] = NULL; 104 c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO; 105 } 106 107 this->uniforms++; 108 reg++; 109 } else { 110 reg += ALIGN(uniforms, 2) / 2; 111 } 112 113 /* for now, we are not doing any elimination of unused slots, nor 114 * are we packing our uniforms. 115 */ 116 c->prog_data.nr_params = this->uniforms * 4; 117 118 c->prog_data.curb_read_length = reg - 1; 119 c->prog_data.uses_new_param_layout = true; 120 121 return reg; 122} 123 124void 125vec4_visitor::setup_payload(void) 126{ 127 int reg = 0; 128 129 /* The payload always contains important data in g0, which contains 130 * the URB handles that are passed on to the URB write at the end 131 * of the thread. So, we always start push constants at g1. 132 */ 133 reg++; 134 135 reg = setup_uniforms(reg); 136 137 reg = setup_attributes(reg); 138 139 this->first_non_payload_grf = reg; 140} 141 142struct brw_reg 143vec4_instruction::get_dst(void) 144{ 145 struct brw_reg brw_reg; 146 147 switch (dst.file) { 148 case GRF: 149 brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); 150 brw_reg = retype(brw_reg, dst.type); 151 brw_reg.dw1.bits.writemask = dst.writemask; 152 break; 153 154 case HW_REG: 155 brw_reg = dst.fixed_hw_reg; 156 break; 157 158 case BAD_FILE: 159 brw_reg = brw_null_reg(); 160 break; 161 162 default: 163 assert(!"not reached"); 164 brw_reg = brw_null_reg(); 165 break; 166 } 167 return brw_reg; 168} 169 170struct brw_reg 171vec4_instruction::get_src(int i) 172{ 173 struct brw_reg brw_reg; 174 175 switch (src[i].file) { 176 case GRF: 177 brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); 178 brw_reg = retype(brw_reg, src[i].type); 179 brw_reg.dw1.bits.swizzle = src[i].swizzle; 180 if (src[i].abs) 181 brw_reg = brw_abs(brw_reg); 182 if (src[i].negate) 183 brw_reg = negate(brw_reg); 184 break; 185 186 case IMM: 187 switch (src[i].type) { 188 case BRW_REGISTER_TYPE_F: 189 brw_reg = brw_imm_f(src[i].imm.f); 190 break; 191 case BRW_REGISTER_TYPE_D: 192 brw_reg = brw_imm_d(src[i].imm.i); 193 break; 194 case BRW_REGISTER_TYPE_UD: 195 brw_reg = brw_imm_ud(src[i].imm.u); 196 break; 197 default: 198 assert(!"not reached"); 199 brw_reg = brw_null_reg(); 200 break; 201 } 202 break; 203 204 case UNIFORM: 205 brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2, 206 ((src[i].reg + src[i].reg_offset) % 2) * 4), 207 0, 4, 1); 208 brw_reg = retype(brw_reg, src[i].type); 209 brw_reg.dw1.bits.swizzle = src[i].swizzle; 210 if (src[i].abs) 211 brw_reg = brw_abs(brw_reg); 212 if (src[i].negate) 213 brw_reg = negate(brw_reg); 214 break; 215 216 case HW_REG: 217 brw_reg = src[i].fixed_hw_reg; 218 break; 219 220 case BAD_FILE: 221 /* Probably unused. */ 222 brw_reg = brw_null_reg(); 223 break; 224 case ATTR: 225 default: 226 assert(!"not reached"); 227 brw_reg = brw_null_reg(); 228 break; 229 } 230 231 return brw_reg; 232} 233 234void 235vec4_visitor::generate_math1_gen4(vec4_instruction *inst, 236 struct brw_reg dst, 237 struct brw_reg src) 238{ 239 brw_math(p, 240 dst, 241 brw_math_function(inst->opcode), 242 BRW_MATH_SATURATE_NONE, 243 inst->base_mrf, 244 src, 245 BRW_MATH_DATA_SCALAR, 246 BRW_MATH_PRECISION_FULL); 247} 248 249void 250vec4_visitor::generate_math1_gen6(vec4_instruction *inst, 251 struct brw_reg dst, 252 struct brw_reg src) 253{ 254 brw_math(p, 255 dst, 256 brw_math_function(inst->opcode), 257 BRW_MATH_SATURATE_NONE, 258 inst->base_mrf, 259 src, 260 BRW_MATH_DATA_SCALAR, 261 BRW_MATH_PRECISION_FULL); 262} 263 264void 265vec4_visitor::generate_urb_write(vec4_instruction *inst) 266{ 267 brw_urb_WRITE(p, 268 brw_null_reg(), /* dest */ 269 inst->base_mrf, /* starting mrf reg nr */ 270 brw_vec8_grf(0, 0), /* src */ 271 false, /* allocate */ 272 true, /* used */ 273 inst->mlen, 274 0, /* response len */ 275 inst->eot, /* eot */ 276 inst->eot, /* writes complete */ 277 inst->offset, /* urb destination offset */ 278 BRW_URB_SWIZZLE_INTERLEAVE); 279} 280 281void 282vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, 283 struct brw_reg dst, 284 struct brw_reg *src) 285{ 286 vec4_instruction *inst = (vec4_instruction *)instruction; 287 288 switch (inst->opcode) { 289 case SHADER_OPCODE_RCP: 290 case SHADER_OPCODE_RSQ: 291 case SHADER_OPCODE_SQRT: 292 case SHADER_OPCODE_EXP2: 293 case SHADER_OPCODE_LOG2: 294 case SHADER_OPCODE_SIN: 295 case SHADER_OPCODE_COS: 296 if (intel->gen >= 6) { 297 generate_math1_gen6(inst, dst, src[0]); 298 } else { 299 generate_math1_gen4(inst, dst, src[0]); 300 } 301 break; 302 303 case SHADER_OPCODE_POW: 304 assert(!"finishme"); 305 break; 306 307 case VS_OPCODE_URB_WRITE: 308 generate_urb_write(inst); 309 break; 310 311 default: 312 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 313 fail("unsupported opcode in `%s' in VS\n", 314 brw_opcodes[inst->opcode].name); 315 } else { 316 fail("Unsupported opcode %d in VS", inst->opcode); 317 } 318 } 319} 320 321bool 322vec4_visitor::run() 323{ 324 /* Generate FS IR for main(). (the visitor only descends into 325 * functions called "main"). 326 */ 327 foreach_iter(exec_list_iterator, iter, *shader->ir) { 328 ir_instruction *ir = (ir_instruction *)iter.get(); 329 base_ir = ir; 330 ir->accept(this); 331 } 332 333 emit_urb_writes(); 334 335 if (failed) 336 return false; 337 338 setup_payload(); 339 reg_allocate(); 340 341 brw_set_access_mode(p, BRW_ALIGN_16); 342 343 generate_code(); 344 345 return !failed; 346} 347 348void 349vec4_visitor::generate_code() 350{ 351 int last_native_inst = p->nr_insn; 352 const char *last_annotation_string = NULL; 353 ir_instruction *last_annotation_ir = NULL; 354 355 int loop_stack_array_size = 16; 356 int loop_stack_depth = 0; 357 brw_instruction **loop_stack = 358 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); 359 int *if_depth_in_loop = 360 rzalloc_array(this->mem_ctx, int, loop_stack_array_size); 361 362 363 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 364 printf("Native code for vertex shader %d:\n", prog->Name); 365 } 366 367 foreach_list(node, &this->instructions) { 368 vec4_instruction *inst = (vec4_instruction *)node; 369 struct brw_reg src[3], dst; 370 371 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 372 if (last_annotation_ir != inst->ir) { 373 last_annotation_ir = inst->ir; 374 if (last_annotation_ir) { 375 printf(" "); 376 last_annotation_ir->print(); 377 printf("\n"); 378 } 379 } 380 if (last_annotation_string != inst->annotation) { 381 last_annotation_string = inst->annotation; 382 if (last_annotation_string) 383 printf(" %s\n", last_annotation_string); 384 } 385 } 386 387 for (unsigned int i = 0; i < 3; i++) { 388 src[i] = inst->get_src(i); 389 } 390 dst = inst->get_dst(); 391 392 brw_set_conditionalmod(p, inst->conditional_mod); 393 brw_set_predicate_control(p, inst->predicate); 394 brw_set_predicate_inverse(p, inst->predicate_inverse); 395 brw_set_saturate(p, inst->saturate); 396 397 switch (inst->opcode) { 398 case BRW_OPCODE_MOV: 399 brw_MOV(p, dst, src[0]); 400 break; 401 case BRW_OPCODE_ADD: 402 brw_ADD(p, dst, src[0], src[1]); 403 break; 404 case BRW_OPCODE_MUL: 405 brw_MUL(p, dst, src[0], src[1]); 406 break; 407 408 case BRW_OPCODE_FRC: 409 brw_FRC(p, dst, src[0]); 410 break; 411 case BRW_OPCODE_RNDD: 412 brw_RNDD(p, dst, src[0]); 413 break; 414 case BRW_OPCODE_RNDE: 415 brw_RNDE(p, dst, src[0]); 416 break; 417 case BRW_OPCODE_RNDZ: 418 brw_RNDZ(p, dst, src[0]); 419 break; 420 421 case BRW_OPCODE_AND: 422 brw_AND(p, dst, src[0], src[1]); 423 break; 424 case BRW_OPCODE_OR: 425 brw_OR(p, dst, src[0], src[1]); 426 break; 427 case BRW_OPCODE_XOR: 428 brw_XOR(p, dst, src[0], src[1]); 429 break; 430 case BRW_OPCODE_NOT: 431 brw_NOT(p, dst, src[0]); 432 break; 433 case BRW_OPCODE_ASR: 434 brw_ASR(p, dst, src[0], src[1]); 435 break; 436 case BRW_OPCODE_SHR: 437 brw_SHR(p, dst, src[0], src[1]); 438 break; 439 case BRW_OPCODE_SHL: 440 brw_SHL(p, dst, src[0], src[1]); 441 break; 442 443 case BRW_OPCODE_CMP: 444 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 445 break; 446 case BRW_OPCODE_SEL: 447 brw_SEL(p, dst, src[0], src[1]); 448 break; 449 450 case BRW_OPCODE_IF: 451 if (inst->src[0].file != BAD_FILE) { 452 /* The instruction has an embedded compare (only allowed on gen6) */ 453 assert(intel->gen == 6); 454 gen6_IF(p, inst->conditional_mod, src[0], src[1]); 455 } else { 456 brw_IF(p, BRW_EXECUTE_8); 457 } 458 if_depth_in_loop[loop_stack_depth]++; 459 break; 460 461 case BRW_OPCODE_ELSE: 462 brw_ELSE(p); 463 break; 464 case BRW_OPCODE_ENDIF: 465 brw_ENDIF(p); 466 if_depth_in_loop[loop_stack_depth]--; 467 break; 468 469 case BRW_OPCODE_DO: 470 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 471 if (loop_stack_array_size <= loop_stack_depth) { 472 loop_stack_array_size *= 2; 473 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, 474 loop_stack_array_size); 475 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, 476 loop_stack_array_size); 477 } 478 if_depth_in_loop[loop_stack_depth] = 0; 479 break; 480 481 case BRW_OPCODE_BREAK: 482 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 483 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 484 break; 485 case BRW_OPCODE_CONTINUE: 486 /* FINISHME: We need to write the loop instruction support still. */ 487 if (intel->gen >= 6) 488 gen6_CONT(p, loop_stack[loop_stack_depth - 1]); 489 else 490 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 491 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 492 break; 493 494 case BRW_OPCODE_WHILE: { 495 struct brw_instruction *inst0, *inst1; 496 GLuint br = 1; 497 498 if (intel->gen >= 5) 499 br = 2; 500 501 assert(loop_stack_depth > 0); 502 loop_stack_depth--; 503 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 504 if (intel->gen < 6) { 505 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 506 while (inst0 > loop_stack[loop_stack_depth]) { 507 inst0--; 508 if (inst0->header.opcode == BRW_OPCODE_BREAK && 509 inst0->bits3.if_else.jump_count == 0) { 510 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 511 } 512 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 513 inst0->bits3.if_else.jump_count == 0) { 514 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 515 } 516 } 517 } 518 } 519 break; 520 521 default: 522 generate_vs_instruction(inst, dst, src); 523 break; 524 } 525 526 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 527 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 528 if (0) { 529 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 530 ((uint32_t *)&p->store[i])[3], 531 ((uint32_t *)&p->store[i])[2], 532 ((uint32_t *)&p->store[i])[1], 533 ((uint32_t *)&p->store[i])[0]); 534 } 535 brw_disasm(stdout, &p->store[i], intel->gen); 536 } 537 } 538 539 last_native_inst = p->nr_insn; 540 } 541 542 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 543 printf("\n"); 544 } 545 546 ralloc_free(loop_stack); 547 ralloc_free(if_depth_in_loop); 548 549 brw_set_uip_jip(p); 550 551 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS 552 * emit issues, it doesn't get the jump distances into the output, 553 * which is often something we want to debug. So this is here in 554 * case you're doing that. 555 */ 556 if (0) { 557 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 558 for (unsigned int i = 0; i < p->nr_insn; i++) { 559 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 560 ((uint32_t *)&p->store[i])[3], 561 ((uint32_t *)&p->store[i])[2], 562 ((uint32_t *)&p->store[i])[1], 563 ((uint32_t *)&p->store[i])[0]); 564 brw_disasm(stdout, &p->store[i], intel->gen); 565 } 566 } 567 } 568} 569 570extern "C" { 571 572bool 573brw_vs_emit(struct brw_vs_compile *c) 574{ 575 struct brw_compile *p = &c->func; 576 struct brw_context *brw = p->brw; 577 struct intel_context *intel = &brw->intel; 578 struct gl_context *ctx = &intel->ctx; 579 struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram; 580 581 if (!prog) 582 return false; 583 584 struct brw_shader *shader = 585 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; 586 if (!shader) 587 return false; 588 589 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 590 printf("GLSL IR for native vertex shader %d:\n", prog->Name); 591 _mesa_print_ir(shader->ir, NULL); 592 printf("\n\n"); 593 } 594 595 vec4_visitor v(c, prog, shader); 596 if (!v.run()) { 597 /* FINISHME: Cleanly fail, test at link time, etc. */ 598 assert(!"not reached"); 599 return false; 600 } 601 602 return true; 603} 604 605} /* extern "C" */ 606 607} /* namespace brw */ 608