brw_vec4_emit.cpp revision eca762d831e099b549dafa0be896eac82b3fceb9
1/* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "brw_vec4.h" 25#include "../glsl/ir_print_visitor.h" 26 27extern "C" { 28#include "brw_eu.h" 29}; 30 31using namespace brw; 32 33namespace brw { 34 35int 36vec4_visitor::setup_attributes(int payload_reg) 37{ 38 int nr_attributes; 39 int attribute_map[VERT_ATTRIB_MAX]; 40 41 nr_attributes = 0; 42 for (int i = 0; i < VERT_ATTRIB_MAX; i++) { 43 if (prog_data->inputs_read & BITFIELD64_BIT(i)) { 44 attribute_map[i] = payload_reg + nr_attributes; 45 nr_attributes++; 46 } 47 } 48 49 foreach_iter(exec_list_iterator, iter, this->instructions) { 50 vec4_instruction *inst = (vec4_instruction *)iter.get(); 51 52 for (int i = 0; i < 3; i++) { 53 if (inst->src[i].file != ATTR) 54 continue; 55 56 inst->src[i].file = HW_REG; 57 inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0); 58 inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle; 59 } 60 } 61 62 /* The BSpec says we always have to read at least one thing from 63 * the VF, and it appears that the hardware wedges otherwise. 64 */ 65 if (nr_attributes == 0) 66 nr_attributes = 1; 67 68 prog_data->urb_read_length = (nr_attributes + 1) / 2; 69 70 return payload_reg + nr_attributes; 71} 72 73int 74vec4_visitor::setup_uniforms(int reg) 75{ 76 /* User clip planes from curbe: 77 */ 78 if (c->key.nr_userclip) { 79 if (intel->gen >= 6) { 80 for (int i = 0; i < c->key.nr_userclip; i++) { 81 c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, 82 (i % 2) * 4), 0, 4, 1); 83 } 84 reg += ALIGN(c->key.nr_userclip, 2) / 2; 85 } else { 86 for (int i = 0; i < c->key.nr_userclip; i++) { 87 c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, 88 (i % 2) * 4), 0, 4, 1); 89 } 90 reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; 91 } 92 } 93 94 /* The pre-gen6 VS requires that some push constants get loaded no 95 * matter what, or the GPU would hang. 96 */ 97 if (intel->gen < 6 && this->uniforms == 0) { 98 this->uniform_size[this->uniforms] = 1; 99 100 for (unsigned int i = 0; i < 4; i++) { 101 unsigned int slot = this->uniforms * 4 + i; 102 103 c->prog_data.param[slot] = NULL; 104 c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO; 105 } 106 107 this->uniforms++; 108 reg++; 109 } else { 110 reg += ALIGN(uniforms, 2) / 2; 111 } 112 113 /* for now, we are not doing any elimination of unused slots, nor 114 * are we packing our uniforms. 115 */ 116 c->prog_data.nr_params = this->uniforms * 4; 117 118 c->prog_data.curb_read_length = reg - 1; 119 c->prog_data.uses_new_param_layout = true; 120 121 return reg; 122} 123 124void 125vec4_visitor::setup_payload(void) 126{ 127 int reg = 0; 128 129 /* The payload always contains important data in g0, which contains 130 * the URB handles that are passed on to the URB write at the end 131 * of the thread. So, we always start push constants at g1. 132 */ 133 reg++; 134 135 reg = setup_uniforms(reg); 136 137 reg = setup_attributes(reg); 138 139 this->first_non_payload_grf = reg; 140} 141 142struct brw_reg 143vec4_instruction::get_dst(void) 144{ 145 struct brw_reg brw_reg; 146 147 switch (dst.file) { 148 case GRF: 149 assert(dst.reg_offset == 0); 150 brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); 151 brw_reg = retype(brw_reg, dst.type); 152 brw_reg.dw1.bits.writemask = dst.writemask; 153 break; 154 155 case HW_REG: 156 brw_reg = dst.fixed_hw_reg; 157 break; 158 159 case BAD_FILE: 160 brw_reg = brw_null_reg(); 161 break; 162 163 default: 164 assert(!"not reached"); 165 brw_reg = brw_null_reg(); 166 break; 167 } 168 return brw_reg; 169} 170 171struct brw_reg 172vec4_instruction::get_src(int i) 173{ 174 struct brw_reg brw_reg; 175 176 switch (src[i].file) { 177 case GRF: 178 brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); 179 brw_reg = retype(brw_reg, src[i].type); 180 brw_reg.dw1.bits.swizzle = src[i].swizzle; 181 if (src[i].abs) 182 brw_reg = brw_abs(brw_reg); 183 if (src[i].negate) 184 brw_reg = negate(brw_reg); 185 break; 186 187 case IMM: 188 switch (src[i].type) { 189 case BRW_REGISTER_TYPE_F: 190 brw_reg = brw_imm_f(src[i].imm.f); 191 break; 192 case BRW_REGISTER_TYPE_D: 193 brw_reg = brw_imm_d(src[i].imm.i); 194 break; 195 case BRW_REGISTER_TYPE_UD: 196 brw_reg = brw_imm_ud(src[i].imm.u); 197 break; 198 default: 199 assert(!"not reached"); 200 brw_reg = brw_null_reg(); 201 break; 202 } 203 break; 204 205 case UNIFORM: 206 brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2, 207 ((src[i].reg + src[i].reg_offset) % 2) * 4), 208 0, 4, 1); 209 brw_reg = retype(brw_reg, src[i].type); 210 brw_reg.dw1.bits.swizzle = src[i].swizzle; 211 if (src[i].abs) 212 brw_reg = brw_abs(brw_reg); 213 if (src[i].negate) 214 brw_reg = negate(brw_reg); 215 break; 216 217 case HW_REG: 218 brw_reg = src[i].fixed_hw_reg; 219 break; 220 221 case BAD_FILE: 222 /* Probably unused. */ 223 brw_reg = brw_null_reg(); 224 break; 225 case ATTR: 226 default: 227 assert(!"not reached"); 228 brw_reg = brw_null_reg(); 229 break; 230 } 231 232 return brw_reg; 233} 234 235void 236vec4_visitor::generate_math1_gen4(vec4_instruction *inst, 237 struct brw_reg dst, 238 struct brw_reg src) 239{ 240 brw_math(p, 241 dst, 242 brw_math_function(inst->opcode), 243 BRW_MATH_SATURATE_NONE, 244 inst->base_mrf, 245 src, 246 BRW_MATH_DATA_SCALAR, 247 BRW_MATH_PRECISION_FULL); 248} 249 250void 251vec4_visitor::generate_math1_gen6(vec4_instruction *inst, 252 struct brw_reg dst, 253 struct brw_reg src) 254{ 255 brw_math(p, 256 dst, 257 brw_math_function(inst->opcode), 258 BRW_MATH_SATURATE_NONE, 259 inst->base_mrf, 260 src, 261 BRW_MATH_DATA_SCALAR, 262 BRW_MATH_PRECISION_FULL); 263} 264 265void 266vec4_visitor::generate_urb_write(vec4_instruction *inst) 267{ 268 brw_urb_WRITE(p, 269 brw_null_reg(), /* dest */ 270 inst->base_mrf, /* starting mrf reg nr */ 271 brw_vec8_grf(0, 0), /* src */ 272 false, /* allocate */ 273 true, /* used */ 274 inst->mlen, 275 0, /* response len */ 276 inst->eot, /* eot */ 277 inst->eot, /* writes complete */ 278 inst->offset, /* urb destination offset */ 279 BRW_URB_SWIZZLE_INTERLEAVE); 280} 281 282void 283vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, 284 struct brw_reg dst, 285 struct brw_reg *src) 286{ 287 vec4_instruction *inst = (vec4_instruction *)instruction; 288 289 switch (inst->opcode) { 290 case SHADER_OPCODE_RCP: 291 case SHADER_OPCODE_RSQ: 292 case SHADER_OPCODE_SQRT: 293 case SHADER_OPCODE_EXP2: 294 case SHADER_OPCODE_LOG2: 295 case SHADER_OPCODE_SIN: 296 case SHADER_OPCODE_COS: 297 if (intel->gen >= 6) { 298 generate_math1_gen6(inst, dst, src[0]); 299 } else { 300 generate_math1_gen4(inst, dst, src[0]); 301 } 302 break; 303 304 case SHADER_OPCODE_POW: 305 assert(!"finishme"); 306 break; 307 308 case VS_OPCODE_URB_WRITE: 309 generate_urb_write(inst); 310 break; 311 312 default: 313 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { 314 fail("unsupported opcode in `%s' in VS\n", 315 brw_opcodes[inst->opcode].name); 316 } else { 317 fail("Unsupported opcode %d in VS", inst->opcode); 318 } 319 } 320} 321 322bool 323vec4_visitor::run() 324{ 325 /* Generate FS IR for main(). (the visitor only descends into 326 * functions called "main"). 327 */ 328 foreach_iter(exec_list_iterator, iter, *shader->ir) { 329 ir_instruction *ir = (ir_instruction *)iter.get(); 330 base_ir = ir; 331 ir->accept(this); 332 } 333 334 emit_urb_writes(); 335 336 if (failed) 337 return false; 338 339 setup_payload(); 340 reg_allocate(); 341 342 brw_set_access_mode(p, BRW_ALIGN_16); 343 344 generate_code(); 345 346 return !failed; 347} 348 349void 350vec4_visitor::generate_code() 351{ 352 int last_native_inst = p->nr_insn; 353 const char *last_annotation_string = NULL; 354 ir_instruction *last_annotation_ir = NULL; 355 356 int loop_stack_array_size = 16; 357 int loop_stack_depth = 0; 358 brw_instruction **loop_stack = 359 rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); 360 int *if_depth_in_loop = 361 rzalloc_array(this->mem_ctx, int, loop_stack_array_size); 362 363 364 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 365 printf("Native code for vertex shader %d:\n", prog->Name); 366 } 367 368 foreach_list(node, &this->instructions) { 369 vec4_instruction *inst = (vec4_instruction *)node; 370 struct brw_reg src[3], dst; 371 372 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 373 if (last_annotation_ir != inst->ir) { 374 last_annotation_ir = inst->ir; 375 if (last_annotation_ir) { 376 printf(" "); 377 last_annotation_ir->print(); 378 printf("\n"); 379 } 380 } 381 if (last_annotation_string != inst->annotation) { 382 last_annotation_string = inst->annotation; 383 if (last_annotation_string) 384 printf(" %s\n", last_annotation_string); 385 } 386 } 387 388 for (unsigned int i = 0; i < 3; i++) { 389 src[i] = inst->get_src(i); 390 } 391 dst = inst->get_dst(); 392 393 brw_set_conditionalmod(p, inst->conditional_mod); 394 brw_set_predicate_control(p, inst->predicate); 395 brw_set_predicate_inverse(p, inst->predicate_inverse); 396 brw_set_saturate(p, inst->saturate); 397 398 switch (inst->opcode) { 399 case BRW_OPCODE_MOV: 400 brw_MOV(p, dst, src[0]); 401 break; 402 case BRW_OPCODE_ADD: 403 brw_ADD(p, dst, src[0], src[1]); 404 break; 405 case BRW_OPCODE_MUL: 406 brw_MUL(p, dst, src[0], src[1]); 407 break; 408 409 case BRW_OPCODE_FRC: 410 brw_FRC(p, dst, src[0]); 411 break; 412 case BRW_OPCODE_RNDD: 413 brw_RNDD(p, dst, src[0]); 414 break; 415 case BRW_OPCODE_RNDE: 416 brw_RNDE(p, dst, src[0]); 417 break; 418 case BRW_OPCODE_RNDZ: 419 brw_RNDZ(p, dst, src[0]); 420 break; 421 422 case BRW_OPCODE_AND: 423 brw_AND(p, dst, src[0], src[1]); 424 break; 425 case BRW_OPCODE_OR: 426 brw_OR(p, dst, src[0], src[1]); 427 break; 428 case BRW_OPCODE_XOR: 429 brw_XOR(p, dst, src[0], src[1]); 430 break; 431 case BRW_OPCODE_NOT: 432 brw_NOT(p, dst, src[0]); 433 break; 434 case BRW_OPCODE_ASR: 435 brw_ASR(p, dst, src[0], src[1]); 436 break; 437 case BRW_OPCODE_SHR: 438 brw_SHR(p, dst, src[0], src[1]); 439 break; 440 case BRW_OPCODE_SHL: 441 brw_SHL(p, dst, src[0], src[1]); 442 break; 443 444 case BRW_OPCODE_CMP: 445 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); 446 break; 447 case BRW_OPCODE_SEL: 448 brw_SEL(p, dst, src[0], src[1]); 449 break; 450 451 case BRW_OPCODE_IF: 452 if (inst->src[0].file != BAD_FILE) { 453 /* The instruction has an embedded compare (only allowed on gen6) */ 454 assert(intel->gen == 6); 455 gen6_IF(p, inst->conditional_mod, src[0], src[1]); 456 } else { 457 brw_IF(p, BRW_EXECUTE_8); 458 } 459 if_depth_in_loop[loop_stack_depth]++; 460 break; 461 462 case BRW_OPCODE_ELSE: 463 brw_ELSE(p); 464 break; 465 case BRW_OPCODE_ENDIF: 466 brw_ENDIF(p); 467 if_depth_in_loop[loop_stack_depth]--; 468 break; 469 470 case BRW_OPCODE_DO: 471 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); 472 if (loop_stack_array_size <= loop_stack_depth) { 473 loop_stack_array_size *= 2; 474 loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, 475 loop_stack_array_size); 476 if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, 477 loop_stack_array_size); 478 } 479 if_depth_in_loop[loop_stack_depth] = 0; 480 break; 481 482 case BRW_OPCODE_BREAK: 483 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); 484 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 485 break; 486 case BRW_OPCODE_CONTINUE: 487 /* FINISHME: We need to write the loop instruction support still. */ 488 if (intel->gen >= 6) 489 gen6_CONT(p, loop_stack[loop_stack_depth - 1]); 490 else 491 brw_CONT(p, if_depth_in_loop[loop_stack_depth]); 492 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 493 break; 494 495 case BRW_OPCODE_WHILE: { 496 struct brw_instruction *inst0, *inst1; 497 GLuint br = 1; 498 499 if (intel->gen >= 5) 500 br = 2; 501 502 assert(loop_stack_depth > 0); 503 loop_stack_depth--; 504 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); 505 if (intel->gen < 6) { 506 /* patch all the BREAK/CONT instructions from last BGNLOOP */ 507 while (inst0 > loop_stack[loop_stack_depth]) { 508 inst0--; 509 if (inst0->header.opcode == BRW_OPCODE_BREAK && 510 inst0->bits3.if_else.jump_count == 0) { 511 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); 512 } 513 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && 514 inst0->bits3.if_else.jump_count == 0) { 515 inst0->bits3.if_else.jump_count = br * (inst1 - inst0); 516 } 517 } 518 } 519 } 520 break; 521 522 default: 523 generate_vs_instruction(inst, dst, src); 524 break; 525 } 526 527 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 528 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { 529 if (0) { 530 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 531 ((uint32_t *)&p->store[i])[3], 532 ((uint32_t *)&p->store[i])[2], 533 ((uint32_t *)&p->store[i])[1], 534 ((uint32_t *)&p->store[i])[0]); 535 } 536 brw_disasm(stdout, &p->store[i], intel->gen); 537 } 538 } 539 540 last_native_inst = p->nr_insn; 541 } 542 543 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 544 printf("\n"); 545 } 546 547 ralloc_free(loop_stack); 548 ralloc_free(if_depth_in_loop); 549 550 brw_set_uip_jip(p); 551 552 /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS 553 * emit issues, it doesn't get the jump distances into the output, 554 * which is often something we want to debug. So this is here in 555 * case you're doing that. 556 */ 557 if (0) { 558 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 559 for (unsigned int i = 0; i < p->nr_insn; i++) { 560 printf("0x%08x 0x%08x 0x%08x 0x%08x ", 561 ((uint32_t *)&p->store[i])[3], 562 ((uint32_t *)&p->store[i])[2], 563 ((uint32_t *)&p->store[i])[1], 564 ((uint32_t *)&p->store[i])[0]); 565 brw_disasm(stdout, &p->store[i], intel->gen); 566 } 567 } 568 } 569} 570 571extern "C" { 572 573bool 574brw_vs_emit(struct brw_vs_compile *c) 575{ 576 struct brw_compile *p = &c->func; 577 struct brw_context *brw = p->brw; 578 struct intel_context *intel = &brw->intel; 579 struct gl_context *ctx = &intel->ctx; 580 struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram; 581 582 if (!prog) 583 return false; 584 585 struct brw_shader *shader = 586 (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; 587 if (!shader) 588 return false; 589 590 if (unlikely(INTEL_DEBUG & DEBUG_VS)) { 591 printf("GLSL IR for native vertex shader %d:\n", prog->Name); 592 _mesa_print_ir(shader->ir, NULL); 593 printf("\n\n"); 594 } 595 596 vec4_visitor v(c, prog, shader); 597 if (!v.run()) { 598 /* FINISHME: Cleanly fail, test at link time, etc. */ 599 assert(!"not reached"); 600 return false; 601 } 602 603 return true; 604} 605 606} /* extern "C" */ 607 608} /* namespace brw */ 609