brw_eu_emit.c revision 34b11334d417fae65ebe9cf96980aea581e24893
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37#include "glsl/ralloc.h" 38 39/*********************************************************************** 40 * Internal helper for constructing instructions 41 */ 42 43static void guess_execution_size(struct brw_compile *p, 44 struct brw_instruction *insn, 45 struct brw_reg reg) 46{ 47 if (reg.width == BRW_WIDTH_8 && p->compressed) 48 insn->header.execution_size = BRW_EXECUTE_16; 49 else 50 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 51} 52 53 54/** 55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source 56 * registers, implicitly moving the operand to a message register. 57 * 58 * On Sandybridge, this is no longer the case. This function performs the 59 * explicit move; it should be called before emitting a SEND instruction. 60 */ 61void 62gen6_resolve_implied_move(struct brw_compile *p, 63 struct brw_reg *src, 64 GLuint msg_reg_nr) 65{ 66 struct brw_context *brw = p->brw; 67 if (brw->gen < 6) 68 return; 69 70 if (src->file == BRW_MESSAGE_REGISTER_FILE) 71 return; 72 73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 74 brw_push_insn_state(p); 75 brw_set_mask_control(p, BRW_MASK_DISABLE); 76 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 78 retype(*src, BRW_REGISTER_TYPE_UD)); 79 brw_pop_insn_state(p); 80 } 81 *src = brw_message_reg(msg_reg_nr); 82} 83 84static void 85gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) 86{ 87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"): 88 * "The send with EOT should use register space R112-R127 for <src>. This is 89 * to enable loading of a new thread into the same slot while the message 90 * with EOT for current thread is pending dispatch." 91 * 92 * Since we're pretending to have 16 MRFs anyway, we may as well use the 93 * registers required for messages with EOT. 94 */ 95 struct brw_context *brw = p->brw; 96 if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { 97 reg->file = BRW_GENERAL_REGISTER_FILE; 98 reg->nr += GEN7_MRF_HACK_START; 99 } 100} 101 102 103void 104brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, 105 struct brw_reg dest) 106{ 107 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 108 dest.file != BRW_MESSAGE_REGISTER_FILE) 109 assert(dest.nr < 128); 110 111 gen7_convert_mrf_to_grf(p, &dest); 112 113 insn->bits1.da1.dest_reg_file = dest.file; 114 insn->bits1.da1.dest_reg_type = dest.type; 115 insn->bits1.da1.dest_address_mode = dest.address_mode; 116 117 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 118 insn->bits1.da1.dest_reg_nr = dest.nr; 119 120 if (insn->header.access_mode == BRW_ALIGN_1) { 121 insn->bits1.da1.dest_subreg_nr = dest.subnr; 122 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 123 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 124 insn->bits1.da1.dest_horiz_stride = dest.hstride; 125 } 126 else { 127 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 128 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 129 if (dest.file == BRW_GENERAL_REGISTER_FILE || 130 dest.file == BRW_MESSAGE_REGISTER_FILE) { 131 assert(dest.dw1.bits.writemask != 0); 132 } 133 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: 134 * Although Dst.HorzStride is a don't care for Align16, HW needs 135 * this to be programmed as "01". 136 */ 137 insn->bits1.da16.dest_horiz_stride = 1; 138 } 139 } 140 else { 141 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 142 143 /* These are different sizes in align1 vs align16: 144 */ 145 if (insn->header.access_mode == BRW_ALIGN_1) { 146 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 147 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 148 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 149 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 150 } 151 else { 152 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 153 /* even ignored in da16, still need to set as '01' */ 154 insn->bits1.ia16.dest_horiz_stride = 1; 155 } 156 } 157 158 /* NEW: Set the execution size based on dest.width and 159 * insn->compression_control: 160 */ 161 guess_execution_size(p, insn, dest); 162} 163 164extern int reg_type_size[]; 165 166static void 167validate_reg(struct brw_instruction *insn, struct brw_reg reg) 168{ 169 int hstride_for_reg[] = {0, 1, 2, 4}; 170 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 171 int width_for_reg[] = {1, 2, 4, 8, 16}; 172 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 173 int width, hstride, vstride, execsize; 174 175 if (reg.file == BRW_IMMEDIATE_VALUE) { 176 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 177 * mean the destination has to be 128-bit aligned and the 178 * destination horiz stride has to be a word. 179 */ 180 if (reg.type == BRW_REGISTER_TYPE_V) { 181 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 182 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 183 } 184 185 return; 186 } 187 188 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 189 reg.file == BRW_ARF_NULL) 190 return; 191 192 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 193 hstride = hstride_for_reg[reg.hstride]; 194 195 if (reg.vstride == 0xf) { 196 vstride = -1; 197 } else { 198 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 199 vstride = vstride_for_reg[reg.vstride]; 200 } 201 202 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 203 width = width_for_reg[reg.width]; 204 205 assert(insn->header.execution_size >= 0 && 206 insn->header.execution_size < Elements(execsize_for_reg)); 207 execsize = execsize_for_reg[insn->header.execution_size]; 208 209 /* Restrictions from 3.3.10: Register Region Restrictions. */ 210 /* 3. */ 211 assert(execsize >= width); 212 213 /* 4. */ 214 if (execsize == width && hstride != 0) { 215 assert(vstride == -1 || vstride == width * hstride); 216 } 217 218 /* 5. */ 219 if (execsize == width && hstride == 0) { 220 /* no restriction on vstride. */ 221 } 222 223 /* 6. */ 224 if (width == 1) { 225 assert(hstride == 0); 226 } 227 228 /* 7. */ 229 if (execsize == 1 && width == 1) { 230 assert(hstride == 0); 231 assert(vstride == 0); 232 } 233 234 /* 8. */ 235 if (vstride == 0 && hstride == 0) { 236 assert(width == 1); 237 } 238 239 /* 10. Check destination issues. */ 240} 241 242void 243brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, 244 struct brw_reg reg) 245{ 246 struct brw_context *brw = p->brw; 247 248 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 249 assert(reg.nr < 128); 250 251 gen7_convert_mrf_to_grf(p, ®); 252 253 if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND || 254 insn->header.opcode == BRW_OPCODE_SENDC)) { 255 /* Any source modifiers or regions will be ignored, since this just 256 * identifies the MRF/GRF to start reading the message contents from. 257 * Check for some likely failures. 258 */ 259 assert(!reg.negate); 260 assert(!reg.abs); 261 assert(reg.address_mode == BRW_ADDRESS_DIRECT); 262 } 263 264 validate_reg(insn, reg); 265 266 insn->bits1.da1.src0_reg_file = reg.file; 267 insn->bits1.da1.src0_reg_type = reg.type; 268 insn->bits2.da1.src0_abs = reg.abs; 269 insn->bits2.da1.src0_negate = reg.negate; 270 insn->bits2.da1.src0_address_mode = reg.address_mode; 271 272 if (reg.file == BRW_IMMEDIATE_VALUE) { 273 insn->bits3.ud = reg.dw1.ud; 274 275 /* Required to set some fields in src1 as well: 276 */ 277 insn->bits1.da1.src1_reg_file = 0; /* arf */ 278 insn->bits1.da1.src1_reg_type = reg.type; 279 } 280 else 281 { 282 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 283 if (insn->header.access_mode == BRW_ALIGN_1) { 284 insn->bits2.da1.src0_subreg_nr = reg.subnr; 285 insn->bits2.da1.src0_reg_nr = reg.nr; 286 } 287 else { 288 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 289 insn->bits2.da16.src0_reg_nr = reg.nr; 290 } 291 } 292 else { 293 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 294 295 if (insn->header.access_mode == BRW_ALIGN_1) { 296 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 297 } 298 else { 299 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 300 } 301 } 302 303 if (insn->header.access_mode == BRW_ALIGN_1) { 304 if (reg.width == BRW_WIDTH_1 && 305 insn->header.execution_size == BRW_EXECUTE_1) { 306 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 307 insn->bits2.da1.src0_width = BRW_WIDTH_1; 308 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 309 } 310 else { 311 insn->bits2.da1.src0_horiz_stride = reg.hstride; 312 insn->bits2.da1.src0_width = reg.width; 313 insn->bits2.da1.src0_vert_stride = reg.vstride; 314 } 315 } 316 else { 317 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 318 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 319 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 320 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 321 322 /* This is an oddity of the fact we're using the same 323 * descriptions for registers in align_16 as align_1: 324 */ 325 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 326 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 327 else 328 insn->bits2.da16.src0_vert_stride = reg.vstride; 329 } 330 } 331} 332 333 334void brw_set_src1(struct brw_compile *p, 335 struct brw_instruction *insn, 336 struct brw_reg reg) 337{ 338 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 339 340 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 341 assert(reg.nr < 128); 342 343 gen7_convert_mrf_to_grf(p, ®); 344 345 validate_reg(insn, reg); 346 347 insn->bits1.da1.src1_reg_file = reg.file; 348 insn->bits1.da1.src1_reg_type = reg.type; 349 insn->bits3.da1.src1_abs = reg.abs; 350 insn->bits3.da1.src1_negate = reg.negate; 351 352 /* Only src1 can be immediate in two-argument instructions. 353 */ 354 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 355 356 if (reg.file == BRW_IMMEDIATE_VALUE) { 357 insn->bits3.ud = reg.dw1.ud; 358 } 359 else { 360 /* This is a hardware restriction, which may or may not be lifted 361 * in the future: 362 */ 363 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 364 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 365 366 if (insn->header.access_mode == BRW_ALIGN_1) { 367 insn->bits3.da1.src1_subreg_nr = reg.subnr; 368 insn->bits3.da1.src1_reg_nr = reg.nr; 369 } 370 else { 371 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 372 insn->bits3.da16.src1_reg_nr = reg.nr; 373 } 374 375 if (insn->header.access_mode == BRW_ALIGN_1) { 376 if (reg.width == BRW_WIDTH_1 && 377 insn->header.execution_size == BRW_EXECUTE_1) { 378 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 379 insn->bits3.da1.src1_width = BRW_WIDTH_1; 380 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 381 } 382 else { 383 insn->bits3.da1.src1_horiz_stride = reg.hstride; 384 insn->bits3.da1.src1_width = reg.width; 385 insn->bits3.da1.src1_vert_stride = reg.vstride; 386 } 387 } 388 else { 389 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 390 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 391 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 392 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 393 394 /* This is an oddity of the fact we're using the same 395 * descriptions for registers in align_16 as align_1: 396 */ 397 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 398 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 399 else 400 insn->bits3.da16.src1_vert_stride = reg.vstride; 401 } 402 } 403} 404 405/** 406 * Set the Message Descriptor and Extended Message Descriptor fields 407 * for SEND messages. 408 * 409 * \note This zeroes out the Function Control bits, so it must be called 410 * \b before filling out any message-specific data. Callers can 411 * choose not to fill in irrelevant bits; they will be zero. 412 */ 413static void 414brw_set_message_descriptor(struct brw_compile *p, 415 struct brw_instruction *inst, 416 enum brw_message_target sfid, 417 unsigned msg_length, 418 unsigned response_length, 419 bool header_present, 420 bool end_of_thread) 421{ 422 struct brw_context *brw = p->brw; 423 424 brw_set_src1(p, inst, brw_imm_d(0)); 425 426 if (brw->gen >= 5) { 427 inst->bits3.generic_gen5.header_present = header_present; 428 inst->bits3.generic_gen5.response_length = response_length; 429 inst->bits3.generic_gen5.msg_length = msg_length; 430 inst->bits3.generic_gen5.end_of_thread = end_of_thread; 431 432 if (brw->gen >= 6) { 433 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ 434 inst->header.destreg__conditionalmod = sfid; 435 } else { 436 /* Set Extended Message Descriptor (ex_desc) */ 437 inst->bits2.send_gen5.sfid = sfid; 438 inst->bits2.send_gen5.end_of_thread = end_of_thread; 439 } 440 } else { 441 inst->bits3.generic.response_length = response_length; 442 inst->bits3.generic.msg_length = msg_length; 443 inst->bits3.generic.msg_target = sfid; 444 inst->bits3.generic.end_of_thread = end_of_thread; 445 } 446} 447 448static void brw_set_math_message( struct brw_compile *p, 449 struct brw_instruction *insn, 450 GLuint function, 451 GLuint integer_type, 452 bool low_precision, 453 GLuint dataType ) 454{ 455 struct brw_context *brw = p->brw; 456 unsigned msg_length; 457 unsigned response_length; 458 459 /* Infer message length from the function */ 460 switch (function) { 461 case BRW_MATH_FUNCTION_POW: 462 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 463 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 464 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 465 msg_length = 2; 466 break; 467 default: 468 msg_length = 1; 469 break; 470 } 471 472 /* Infer response length from the function */ 473 switch (function) { 474 case BRW_MATH_FUNCTION_SINCOS: 475 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 476 response_length = 2; 477 break; 478 default: 479 response_length = 1; 480 break; 481 } 482 483 484 brw_set_message_descriptor(p, insn, BRW_SFID_MATH, 485 msg_length, response_length, false, false); 486 if (brw->gen == 5) { 487 insn->bits3.math_gen5.function = function; 488 insn->bits3.math_gen5.int_type = integer_type; 489 insn->bits3.math_gen5.precision = low_precision; 490 insn->bits3.math_gen5.saturate = insn->header.saturate; 491 insn->bits3.math_gen5.data_type = dataType; 492 insn->bits3.math_gen5.snapshot = 0; 493 } else { 494 insn->bits3.math.function = function; 495 insn->bits3.math.int_type = integer_type; 496 insn->bits3.math.precision = low_precision; 497 insn->bits3.math.saturate = insn->header.saturate; 498 insn->bits3.math.data_type = dataType; 499 } 500 insn->header.saturate = 0; 501} 502 503 504static void brw_set_ff_sync_message(struct brw_compile *p, 505 struct brw_instruction *insn, 506 bool allocate, 507 GLuint response_length, 508 bool end_of_thread) 509{ 510 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 511 1, response_length, true, end_of_thread); 512 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 513 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 514 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 515 insn->bits3.urb_gen5.allocate = allocate; 516 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 517 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 518} 519 520static void brw_set_urb_message( struct brw_compile *p, 521 struct brw_instruction *insn, 522 enum brw_urb_write_flags flags, 523 GLuint msg_length, 524 GLuint response_length, 525 GLuint offset, 526 GLuint swizzle_control ) 527{ 528 struct brw_context *brw = p->brw; 529 530 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 531 msg_length, response_length, true, 532 flags & BRW_URB_WRITE_EOT); 533 if (brw->gen == 7) { 534 if (flags & BRW_URB_WRITE_OWORD) { 535 assert(msg_length == 2); /* header + one OWORD of data */ 536 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_OWORD; 537 } else { 538 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_HWORD; 539 } 540 insn->bits3.urb_gen7.offset = offset; 541 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); 542 insn->bits3.urb_gen7.swizzle_control = swizzle_control; 543 insn->bits3.urb_gen7.per_slot_offset = 544 flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0; 545 insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0; 546 } else if (brw->gen >= 5) { 547 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ 548 insn->bits3.urb_gen5.offset = offset; 549 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 550 insn->bits3.urb_gen5.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0; 551 insn->bits3.urb_gen5.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1; 552 insn->bits3.urb_gen5.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0; 553 } else { 554 insn->bits3.urb.opcode = 0; /* ? */ 555 insn->bits3.urb.offset = offset; 556 insn->bits3.urb.swizzle_control = swizzle_control; 557 insn->bits3.urb.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0; 558 insn->bits3.urb.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1; 559 insn->bits3.urb.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0; 560 } 561} 562 563void 564brw_set_dp_write_message(struct brw_compile *p, 565 struct brw_instruction *insn, 566 GLuint binding_table_index, 567 GLuint msg_control, 568 GLuint msg_type, 569 GLuint msg_length, 570 bool header_present, 571 GLuint last_render_target, 572 GLuint response_length, 573 GLuint end_of_thread, 574 GLuint send_commit_msg) 575{ 576 struct brw_context *brw = p->brw; 577 unsigned sfid; 578 579 if (brw->gen >= 7) { 580 /* Use the Render Cache for RT writes; otherwise use the Data Cache */ 581 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) 582 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 583 else 584 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 585 } else if (brw->gen == 6) { 586 /* Use the render cache for all write messages. */ 587 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 588 } else { 589 sfid = BRW_SFID_DATAPORT_WRITE; 590 } 591 592 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, 593 header_present, end_of_thread); 594 595 if (brw->gen >= 7) { 596 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 597 insn->bits3.gen7_dp.msg_control = msg_control; 598 insn->bits3.gen7_dp.last_render_target = last_render_target; 599 insn->bits3.gen7_dp.msg_type = msg_type; 600 } else if (brw->gen == 6) { 601 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 602 insn->bits3.gen6_dp.msg_control = msg_control; 603 insn->bits3.gen6_dp.last_render_target = last_render_target; 604 insn->bits3.gen6_dp.msg_type = msg_type; 605 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; 606 } else if (brw->gen == 5) { 607 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 608 insn->bits3.dp_write_gen5.msg_control = msg_control; 609 insn->bits3.dp_write_gen5.last_render_target = last_render_target; 610 insn->bits3.dp_write_gen5.msg_type = msg_type; 611 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 612 } else { 613 insn->bits3.dp_write.binding_table_index = binding_table_index; 614 insn->bits3.dp_write.msg_control = msg_control; 615 insn->bits3.dp_write.last_render_target = last_render_target; 616 insn->bits3.dp_write.msg_type = msg_type; 617 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 618 } 619} 620 621void 622brw_set_dp_read_message(struct brw_compile *p, 623 struct brw_instruction *insn, 624 GLuint binding_table_index, 625 GLuint msg_control, 626 GLuint msg_type, 627 GLuint target_cache, 628 GLuint msg_length, 629 bool header_present, 630 GLuint response_length) 631{ 632 struct brw_context *brw = p->brw; 633 unsigned sfid; 634 635 if (brw->gen >= 7) { 636 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 637 } else if (brw->gen == 6) { 638 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) 639 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 640 else 641 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; 642 } else { 643 sfid = BRW_SFID_DATAPORT_READ; 644 } 645 646 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, 647 header_present, false); 648 649 if (brw->gen >= 7) { 650 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 651 insn->bits3.gen7_dp.msg_control = msg_control; 652 insn->bits3.gen7_dp.last_render_target = 0; 653 insn->bits3.gen7_dp.msg_type = msg_type; 654 } else if (brw->gen == 6) { 655 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 656 insn->bits3.gen6_dp.msg_control = msg_control; 657 insn->bits3.gen6_dp.last_render_target = 0; 658 insn->bits3.gen6_dp.msg_type = msg_type; 659 insn->bits3.gen6_dp.send_commit_msg = 0; 660 } else if (brw->gen == 5) { 661 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 662 insn->bits3.dp_read_gen5.msg_control = msg_control; 663 insn->bits3.dp_read_gen5.msg_type = msg_type; 664 insn->bits3.dp_read_gen5.target_cache = target_cache; 665 } else if (brw->is_g4x) { 666 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 667 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 668 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 669 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 670 } else { 671 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 672 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 673 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 674 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 675 } 676} 677 678void 679brw_set_sampler_message(struct brw_compile *p, 680 struct brw_instruction *insn, 681 GLuint binding_table_index, 682 GLuint sampler, 683 GLuint msg_type, 684 GLuint response_length, 685 GLuint msg_length, 686 GLuint header_present, 687 GLuint simd_mode, 688 GLuint return_format) 689{ 690 struct brw_context *brw = p->brw; 691 692 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length, 693 response_length, header_present, false); 694 695 if (brw->gen >= 7) { 696 insn->bits3.sampler_gen7.binding_table_index = binding_table_index; 697 insn->bits3.sampler_gen7.sampler = sampler; 698 insn->bits3.sampler_gen7.msg_type = msg_type; 699 insn->bits3.sampler_gen7.simd_mode = simd_mode; 700 } else if (brw->gen >= 5) { 701 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 702 insn->bits3.sampler_gen5.sampler = sampler; 703 insn->bits3.sampler_gen5.msg_type = msg_type; 704 insn->bits3.sampler_gen5.simd_mode = simd_mode; 705 } else if (brw->is_g4x) { 706 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 707 insn->bits3.sampler_g4x.sampler = sampler; 708 insn->bits3.sampler_g4x.msg_type = msg_type; 709 } else { 710 insn->bits3.sampler.binding_table_index = binding_table_index; 711 insn->bits3.sampler.sampler = sampler; 712 insn->bits3.sampler.msg_type = msg_type; 713 insn->bits3.sampler.return_format = return_format; 714 } 715} 716 717 718#define next_insn brw_next_insn 719struct brw_instruction * 720brw_next_insn(struct brw_compile *p, GLuint opcode) 721{ 722 struct brw_instruction *insn; 723 724 if (p->nr_insn + 1 > p->store_size) { 725 if (0) 726 printf("incresing the store size to %d\n", p->store_size << 1); 727 p->store_size <<= 1; 728 p->store = reralloc(p->mem_ctx, p->store, 729 struct brw_instruction, p->store_size); 730 if (!p->store) 731 assert(!"realloc eu store memeory failed"); 732 } 733 734 p->next_insn_offset += 16; 735 insn = &p->store[p->nr_insn++]; 736 memcpy(insn, p->current, sizeof(*insn)); 737 738 /* Reset this one-shot flag: 739 */ 740 741 if (p->current->header.destreg__conditionalmod) { 742 p->current->header.destreg__conditionalmod = 0; 743 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 744 } 745 746 insn->header.opcode = opcode; 747 return insn; 748} 749 750static struct brw_instruction *brw_alu1( struct brw_compile *p, 751 GLuint opcode, 752 struct brw_reg dest, 753 struct brw_reg src ) 754{ 755 struct brw_instruction *insn = next_insn(p, opcode); 756 brw_set_dest(p, insn, dest); 757 brw_set_src0(p, insn, src); 758 return insn; 759} 760 761static struct brw_instruction *brw_alu2(struct brw_compile *p, 762 GLuint opcode, 763 struct brw_reg dest, 764 struct brw_reg src0, 765 struct brw_reg src1 ) 766{ 767 struct brw_instruction *insn = next_insn(p, opcode); 768 brw_set_dest(p, insn, dest); 769 brw_set_src0(p, insn, src0); 770 brw_set_src1(p, insn, src1); 771 return insn; 772} 773 774static int 775get_3src_subreg_nr(struct brw_reg reg) 776{ 777 if (reg.vstride == BRW_VERTICAL_STRIDE_0) { 778 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle)); 779 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0); 780 } else { 781 return reg.subnr / 4; 782 } 783} 784 785static struct brw_instruction *brw_alu3(struct brw_compile *p, 786 GLuint opcode, 787 struct brw_reg dest, 788 struct brw_reg src0, 789 struct brw_reg src1, 790 struct brw_reg src2) 791{ 792 struct brw_context *brw = p->brw; 793 struct brw_instruction *insn = next_insn(p, opcode); 794 795 gen7_convert_mrf_to_grf(p, &dest); 796 797 assert(insn->header.access_mode == BRW_ALIGN_16); 798 799 assert(dest.file == BRW_GENERAL_REGISTER_FILE || 800 dest.file == BRW_MESSAGE_REGISTER_FILE); 801 assert(dest.nr < 128); 802 assert(dest.address_mode == BRW_ADDRESS_DIRECT); 803 assert(dest.type == BRW_REGISTER_TYPE_F || 804 dest.type == BRW_REGISTER_TYPE_D || 805 dest.type == BRW_REGISTER_TYPE_UD); 806 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE); 807 insn->bits1.da3src.dest_reg_nr = dest.nr; 808 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16; 809 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask; 810 guess_execution_size(p, insn, dest); 811 812 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 813 assert(src0.address_mode == BRW_ADDRESS_DIRECT); 814 assert(src0.nr < 128); 815 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle; 816 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0); 817 insn->bits2.da3src.src0_reg_nr = src0.nr; 818 insn->bits1.da3src.src0_abs = src0.abs; 819 insn->bits1.da3src.src0_negate = src0.negate; 820 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0; 821 822 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 823 assert(src1.address_mode == BRW_ADDRESS_DIRECT); 824 assert(src1.nr < 128); 825 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle; 826 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3; 827 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2; 828 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0; 829 insn->bits3.da3src.src1_reg_nr = src1.nr; 830 insn->bits1.da3src.src1_abs = src1.abs; 831 insn->bits1.da3src.src1_negate = src1.negate; 832 833 assert(src2.file == BRW_GENERAL_REGISTER_FILE); 834 assert(src2.address_mode == BRW_ADDRESS_DIRECT); 835 assert(src2.nr < 128); 836 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle; 837 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2); 838 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0; 839 insn->bits3.da3src.src2_reg_nr = src2.nr; 840 insn->bits1.da3src.src2_abs = src2.abs; 841 insn->bits1.da3src.src2_negate = src2.negate; 842 843 if (brw->gen >= 7) { 844 /* Set both the source and destination types based on dest.type, 845 * ignoring the source register types. The MAD and LRP emitters ensure 846 * that all four types are float. The BFE and BFI2 emitters, however, 847 * may send us mixed D and UD types and want us to ignore that and use 848 * the destination type. 849 */ 850 switch (dest.type) { 851 case BRW_REGISTER_TYPE_F: 852 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F; 853 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F; 854 break; 855 case BRW_REGISTER_TYPE_D: 856 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D; 857 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D; 858 break; 859 case BRW_REGISTER_TYPE_UD: 860 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD; 861 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD; 862 break; 863 } 864 } 865 866 return insn; 867} 868 869 870/*********************************************************************** 871 * Convenience routines. 872 */ 873#define ALU1(OP) \ 874struct brw_instruction *brw_##OP(struct brw_compile *p, \ 875 struct brw_reg dest, \ 876 struct brw_reg src0) \ 877{ \ 878 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 879} 880 881#define ALU2(OP) \ 882struct brw_instruction *brw_##OP(struct brw_compile *p, \ 883 struct brw_reg dest, \ 884 struct brw_reg src0, \ 885 struct brw_reg src1) \ 886{ \ 887 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 888} 889 890#define ALU3(OP) \ 891struct brw_instruction *brw_##OP(struct brw_compile *p, \ 892 struct brw_reg dest, \ 893 struct brw_reg src0, \ 894 struct brw_reg src1, \ 895 struct brw_reg src2) \ 896{ \ 897 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ 898} 899 900#define ALU3F(OP) \ 901struct brw_instruction *brw_##OP(struct brw_compile *p, \ 902 struct brw_reg dest, \ 903 struct brw_reg src0, \ 904 struct brw_reg src1, \ 905 struct brw_reg src2) \ 906{ \ 907 assert(dest.type == BRW_REGISTER_TYPE_F); \ 908 assert(src0.type == BRW_REGISTER_TYPE_F); \ 909 assert(src1.type == BRW_REGISTER_TYPE_F); \ 910 assert(src2.type == BRW_REGISTER_TYPE_F); \ 911 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ 912} 913 914/* Rounding operations (other than RNDD) require two instructions - the first 915 * stores a rounded value (possibly the wrong way) in the dest register, but 916 * also sets a per-channel "increment bit" in the flag register. A predicated 917 * add of 1.0 fixes dest to contain the desired result. 918 * 919 * Sandybridge and later appear to round correctly without an ADD. 920 */ 921#define ROUND(OP) \ 922void brw_##OP(struct brw_compile *p, \ 923 struct brw_reg dest, \ 924 struct brw_reg src) \ 925{ \ 926 struct brw_instruction *rnd, *add; \ 927 rnd = next_insn(p, BRW_OPCODE_##OP); \ 928 brw_set_dest(p, rnd, dest); \ 929 brw_set_src0(p, rnd, src); \ 930 \ 931 if (p->brw->gen < 6) { \ 932 /* turn on round-increments */ \ 933 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ 934 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 935 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 936 } \ 937} 938 939 940ALU1(MOV) 941ALU2(SEL) 942ALU1(NOT) 943ALU2(AND) 944ALU2(OR) 945ALU2(XOR) 946ALU2(SHR) 947ALU2(SHL) 948ALU2(ASR) 949ALU1(F32TO16) 950ALU1(F16TO32) 951ALU1(FRC) 952ALU1(RNDD) 953ALU2(MAC) 954ALU2(MACH) 955ALU1(LZD) 956ALU2(DP4) 957ALU2(DPH) 958ALU2(DP3) 959ALU2(DP2) 960ALU2(LINE) 961ALU2(PLN) 962ALU3F(MAD) 963ALU3F(LRP) 964ALU1(BFREV) 965ALU3(BFE) 966ALU2(BFI1) 967ALU3(BFI2) 968ALU1(FBH) 969ALU1(FBL) 970ALU1(CBIT) 971 972ROUND(RNDZ) 973ROUND(RNDE) 974 975 976struct brw_instruction *brw_ADD(struct brw_compile *p, 977 struct brw_reg dest, 978 struct brw_reg src0, 979 struct brw_reg src1) 980{ 981 /* 6.2.2: add */ 982 if (src0.type == BRW_REGISTER_TYPE_F || 983 (src0.file == BRW_IMMEDIATE_VALUE && 984 src0.type == BRW_REGISTER_TYPE_VF)) { 985 assert(src1.type != BRW_REGISTER_TYPE_UD); 986 assert(src1.type != BRW_REGISTER_TYPE_D); 987 } 988 989 if (src1.type == BRW_REGISTER_TYPE_F || 990 (src1.file == BRW_IMMEDIATE_VALUE && 991 src1.type == BRW_REGISTER_TYPE_VF)) { 992 assert(src0.type != BRW_REGISTER_TYPE_UD); 993 assert(src0.type != BRW_REGISTER_TYPE_D); 994 } 995 996 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 997} 998 999struct brw_instruction *brw_AVG(struct brw_compile *p, 1000 struct brw_reg dest, 1001 struct brw_reg src0, 1002 struct brw_reg src1) 1003{ 1004 assert(dest.type == src0.type); 1005 assert(src0.type == src1.type); 1006 switch (src0.type) { 1007 case BRW_REGISTER_TYPE_B: 1008 case BRW_REGISTER_TYPE_UB: 1009 case BRW_REGISTER_TYPE_W: 1010 case BRW_REGISTER_TYPE_UW: 1011 case BRW_REGISTER_TYPE_D: 1012 case BRW_REGISTER_TYPE_UD: 1013 break; 1014 default: 1015 assert(!"Bad type for brw_AVG"); 1016 } 1017 1018 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1); 1019} 1020 1021struct brw_instruction *brw_MUL(struct brw_compile *p, 1022 struct brw_reg dest, 1023 struct brw_reg src0, 1024 struct brw_reg src1) 1025{ 1026 /* 6.32.38: mul */ 1027 if (src0.type == BRW_REGISTER_TYPE_D || 1028 src0.type == BRW_REGISTER_TYPE_UD || 1029 src1.type == BRW_REGISTER_TYPE_D || 1030 src1.type == BRW_REGISTER_TYPE_UD) { 1031 assert(dest.type != BRW_REGISTER_TYPE_F); 1032 } 1033 1034 if (src0.type == BRW_REGISTER_TYPE_F || 1035 (src0.file == BRW_IMMEDIATE_VALUE && 1036 src0.type == BRW_REGISTER_TYPE_VF)) { 1037 assert(src1.type != BRW_REGISTER_TYPE_UD); 1038 assert(src1.type != BRW_REGISTER_TYPE_D); 1039 } 1040 1041 if (src1.type == BRW_REGISTER_TYPE_F || 1042 (src1.file == BRW_IMMEDIATE_VALUE && 1043 src1.type == BRW_REGISTER_TYPE_VF)) { 1044 assert(src0.type != BRW_REGISTER_TYPE_UD); 1045 assert(src0.type != BRW_REGISTER_TYPE_D); 1046 } 1047 1048 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 1049 src0.nr != BRW_ARF_ACCUMULATOR); 1050 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 1051 src1.nr != BRW_ARF_ACCUMULATOR); 1052 1053 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 1054} 1055 1056 1057void brw_NOP(struct brw_compile *p) 1058{ 1059 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 1060 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1061 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1062 brw_set_src1(p, insn, brw_imm_ud(0x0)); 1063} 1064 1065 1066 1067 1068 1069/*********************************************************************** 1070 * Comparisons, if/else/endif 1071 */ 1072 1073struct brw_instruction *brw_JMPI(struct brw_compile *p, 1074 struct brw_reg dest, 1075 struct brw_reg src0, 1076 struct brw_reg src1) 1077{ 1078 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 1079 1080 insn->header.execution_size = 1; 1081 insn->header.compression_control = BRW_COMPRESSION_NONE; 1082 insn->header.mask_control = BRW_MASK_DISABLE; 1083 1084 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1085 1086 return insn; 1087} 1088 1089static void 1090push_if_stack(struct brw_compile *p, struct brw_instruction *inst) 1091{ 1092 p->if_stack[p->if_stack_depth] = inst - p->store; 1093 1094 p->if_stack_depth++; 1095 if (p->if_stack_array_size <= p->if_stack_depth) { 1096 p->if_stack_array_size *= 2; 1097 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int, 1098 p->if_stack_array_size); 1099 } 1100} 1101 1102static struct brw_instruction * 1103pop_if_stack(struct brw_compile *p) 1104{ 1105 p->if_stack_depth--; 1106 return &p->store[p->if_stack[p->if_stack_depth]]; 1107} 1108 1109static void 1110push_loop_stack(struct brw_compile *p, struct brw_instruction *inst) 1111{ 1112 if (p->loop_stack_array_size < p->loop_stack_depth) { 1113 p->loop_stack_array_size *= 2; 1114 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, 1115 p->loop_stack_array_size); 1116 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int, 1117 p->loop_stack_array_size); 1118 } 1119 1120 p->loop_stack[p->loop_stack_depth] = inst - p->store; 1121 p->loop_stack_depth++; 1122 p->if_depth_in_loop[p->loop_stack_depth] = 0; 1123} 1124 1125static struct brw_instruction * 1126get_inner_do_insn(struct brw_compile *p) 1127{ 1128 return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; 1129} 1130 1131/* EU takes the value from the flag register and pushes it onto some 1132 * sort of a stack (presumably merging with any flag value already on 1133 * the stack). Within an if block, the flags at the top of the stack 1134 * control execution on each channel of the unit, eg. on each of the 1135 * 16 pixel values in our wm programs. 1136 * 1137 * When the matching 'else' instruction is reached (presumably by 1138 * countdown of the instruction count patched in by our ELSE/ENDIF 1139 * functions), the relevent flags are inverted. 1140 * 1141 * When the matching 'endif' instruction is reached, the flags are 1142 * popped off. If the stack is now empty, normal execution resumes. 1143 */ 1144struct brw_instruction * 1145brw_IF(struct brw_compile *p, GLuint execute_size) 1146{ 1147 struct brw_context *brw = p->brw; 1148 struct brw_instruction *insn; 1149 1150 insn = next_insn(p, BRW_OPCODE_IF); 1151 1152 /* Override the defaults for this instruction: 1153 */ 1154 if (brw->gen < 6) { 1155 brw_set_dest(p, insn, brw_ip_reg()); 1156 brw_set_src0(p, insn, brw_ip_reg()); 1157 brw_set_src1(p, insn, brw_imm_d(0x0)); 1158 } else if (brw->gen == 6) { 1159 brw_set_dest(p, insn, brw_imm_w(0)); 1160 insn->bits1.branch_gen6.jump_count = 0; 1161 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1162 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1163 } else { 1164 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1165 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1166 brw_set_src1(p, insn, brw_imm_ud(0)); 1167 insn->bits3.break_cont.jip = 0; 1168 insn->bits3.break_cont.uip = 0; 1169 } 1170 1171 insn->header.execution_size = execute_size; 1172 insn->header.compression_control = BRW_COMPRESSION_NONE; 1173 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 1174 insn->header.mask_control = BRW_MASK_ENABLE; 1175 if (!p->single_program_flow) 1176 insn->header.thread_control = BRW_THREAD_SWITCH; 1177 1178 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1179 1180 push_if_stack(p, insn); 1181 p->if_depth_in_loop[p->loop_stack_depth]++; 1182 return insn; 1183} 1184 1185/* This function is only used for gen6-style IF instructions with an 1186 * embedded comparison (conditional modifier). It is not used on gen7. 1187 */ 1188struct brw_instruction * 1189gen6_IF(struct brw_compile *p, uint32_t conditional, 1190 struct brw_reg src0, struct brw_reg src1) 1191{ 1192 struct brw_instruction *insn; 1193 1194 insn = next_insn(p, BRW_OPCODE_IF); 1195 1196 brw_set_dest(p, insn, brw_imm_w(0)); 1197 if (p->compressed) { 1198 insn->header.execution_size = BRW_EXECUTE_16; 1199 } else { 1200 insn->header.execution_size = BRW_EXECUTE_8; 1201 } 1202 insn->bits1.branch_gen6.jump_count = 0; 1203 brw_set_src0(p, insn, src0); 1204 brw_set_src1(p, insn, src1); 1205 1206 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 1207 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1208 insn->header.destreg__conditionalmod = conditional; 1209 1210 if (!p->single_program_flow) 1211 insn->header.thread_control = BRW_THREAD_SWITCH; 1212 1213 push_if_stack(p, insn); 1214 return insn; 1215} 1216 1217/** 1218 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. 1219 */ 1220static void 1221convert_IF_ELSE_to_ADD(struct brw_compile *p, 1222 struct brw_instruction *if_inst, 1223 struct brw_instruction *else_inst) 1224{ 1225 /* The next instruction (where the ENDIF would be, if it existed) */ 1226 struct brw_instruction *next_inst = &p->store[p->nr_insn]; 1227 1228 assert(p->single_program_flow); 1229 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1230 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1231 assert(if_inst->header.execution_size == BRW_EXECUTE_1); 1232 1233 /* Convert IF to an ADD instruction that moves the instruction pointer 1234 * to the first instruction of the ELSE block. If there is no ELSE 1235 * block, point to where ENDIF would be. Reverse the predicate. 1236 * 1237 * There's no need to execute an ENDIF since we don't need to do any 1238 * stack operations, and if we're currently executing, we just want to 1239 * continue normally. 1240 */ 1241 if_inst->header.opcode = BRW_OPCODE_ADD; 1242 if_inst->header.predicate_inverse = 1; 1243 1244 if (else_inst != NULL) { 1245 /* Convert ELSE to an ADD instruction that points where the ENDIF 1246 * would be. 1247 */ 1248 else_inst->header.opcode = BRW_OPCODE_ADD; 1249 1250 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; 1251 else_inst->bits3.ud = (next_inst - else_inst) * 16; 1252 } else { 1253 if_inst->bits3.ud = (next_inst - if_inst) * 16; 1254 } 1255} 1256 1257/** 1258 * Patch IF and ELSE instructions with appropriate jump targets. 1259 */ 1260static void 1261patch_IF_ELSE(struct brw_compile *p, 1262 struct brw_instruction *if_inst, 1263 struct brw_instruction *else_inst, 1264 struct brw_instruction *endif_inst) 1265{ 1266 struct brw_context *brw = p->brw; 1267 1268 /* We shouldn't be patching IF and ELSE instructions in single program flow 1269 * mode when gen < 6, because in single program flow mode on those 1270 * platforms, we convert flow control instructions to conditional ADDs that 1271 * operate on IP (see brw_ENDIF). 1272 * 1273 * However, on Gen6, writing to IP doesn't work in single program flow mode 1274 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may 1275 * not be updated by non-flow control instructions."). And on later 1276 * platforms, there is no significant benefit to converting control flow 1277 * instructions to conditional ADDs. So we do patch IF and ELSE 1278 * instructions in single program flow mode on those platforms. 1279 */ 1280 if (brw->gen < 6) 1281 assert(!p->single_program_flow); 1282 1283 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1284 assert(endif_inst != NULL); 1285 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1286 1287 unsigned br = 1; 1288 /* Jump count is for 64bit data chunk each, so one 128bit instruction 1289 * requires 2 chunks. 1290 */ 1291 if (brw->gen >= 5) 1292 br = 2; 1293 1294 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); 1295 endif_inst->header.execution_size = if_inst->header.execution_size; 1296 1297 if (else_inst == NULL) { 1298 /* Patch IF -> ENDIF */ 1299 if (brw->gen < 6) { 1300 /* Turn it into an IFF, which means no mask stack operations for 1301 * all-false and jumping past the ENDIF. 1302 */ 1303 if_inst->header.opcode = BRW_OPCODE_IFF; 1304 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); 1305 if_inst->bits3.if_else.pop_count = 0; 1306 if_inst->bits3.if_else.pad0 = 0; 1307 } else if (brw->gen == 6) { 1308 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 1309 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); 1310 } else { 1311 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1312 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); 1313 } 1314 } else { 1315 else_inst->header.execution_size = if_inst->header.execution_size; 1316 1317 /* Patch IF -> ELSE */ 1318 if (brw->gen < 6) { 1319 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); 1320 if_inst->bits3.if_else.pop_count = 0; 1321 if_inst->bits3.if_else.pad0 = 0; 1322 } else if (brw->gen == 6) { 1323 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); 1324 } 1325 1326 /* Patch ELSE -> ENDIF */ 1327 if (brw->gen < 6) { 1328 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 1329 * matching ENDIF. 1330 */ 1331 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); 1332 else_inst->bits3.if_else.pop_count = 1; 1333 else_inst->bits3.if_else.pad0 = 0; 1334 } else if (brw->gen == 6) { 1335 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1336 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); 1337 } else { 1338 /* The IF instruction's JIP should point just past the ELSE */ 1339 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); 1340 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ 1341 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1342 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); 1343 } 1344 } 1345} 1346 1347void 1348brw_ELSE(struct brw_compile *p) 1349{ 1350 struct brw_context *brw = p->brw; 1351 struct brw_instruction *insn; 1352 1353 insn = next_insn(p, BRW_OPCODE_ELSE); 1354 1355 if (brw->gen < 6) { 1356 brw_set_dest(p, insn, brw_ip_reg()); 1357 brw_set_src0(p, insn, brw_ip_reg()); 1358 brw_set_src1(p, insn, brw_imm_d(0x0)); 1359 } else if (brw->gen == 6) { 1360 brw_set_dest(p, insn, brw_imm_w(0)); 1361 insn->bits1.branch_gen6.jump_count = 0; 1362 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1363 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1364 } else { 1365 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1366 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1367 brw_set_src1(p, insn, brw_imm_ud(0)); 1368 insn->bits3.break_cont.jip = 0; 1369 insn->bits3.break_cont.uip = 0; 1370 } 1371 1372 insn->header.compression_control = BRW_COMPRESSION_NONE; 1373 insn->header.mask_control = BRW_MASK_ENABLE; 1374 if (!p->single_program_flow) 1375 insn->header.thread_control = BRW_THREAD_SWITCH; 1376 1377 push_if_stack(p, insn); 1378} 1379 1380void 1381brw_ENDIF(struct brw_compile *p) 1382{ 1383 struct brw_context *brw = p->brw; 1384 struct brw_instruction *insn = NULL; 1385 struct brw_instruction *else_inst = NULL; 1386 struct brw_instruction *if_inst = NULL; 1387 struct brw_instruction *tmp; 1388 bool emit_endif = true; 1389 1390 /* In single program flow mode, we can express IF and ELSE instructions 1391 * equivalently as ADD instructions that operate on IP. On platforms prior 1392 * to Gen6, flow control instructions cause an implied thread switch, so 1393 * this is a significant savings. 1394 * 1395 * However, on Gen6, writing to IP doesn't work in single program flow mode 1396 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may 1397 * not be updated by non-flow control instructions."). And on later 1398 * platforms, there is no significant benefit to converting control flow 1399 * instructions to conditional ADDs. So we only do this trick on Gen4 and 1400 * Gen5. 1401 */ 1402 if (brw->gen < 6 && p->single_program_flow) 1403 emit_endif = false; 1404 1405 /* 1406 * A single next_insn() may change the base adress of instruction store 1407 * memory(p->store), so call it first before referencing the instruction 1408 * store pointer from an index 1409 */ 1410 if (emit_endif) 1411 insn = next_insn(p, BRW_OPCODE_ENDIF); 1412 1413 /* Pop the IF and (optional) ELSE instructions from the stack */ 1414 p->if_depth_in_loop[p->loop_stack_depth]--; 1415 tmp = pop_if_stack(p); 1416 if (tmp->header.opcode == BRW_OPCODE_ELSE) { 1417 else_inst = tmp; 1418 tmp = pop_if_stack(p); 1419 } 1420 if_inst = tmp; 1421 1422 if (!emit_endif) { 1423 /* ENDIF is useless; don't bother emitting it. */ 1424 convert_IF_ELSE_to_ADD(p, if_inst, else_inst); 1425 return; 1426 } 1427 1428 if (brw->gen < 6) { 1429 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1430 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1431 brw_set_src1(p, insn, brw_imm_d(0x0)); 1432 } else if (brw->gen == 6) { 1433 brw_set_dest(p, insn, brw_imm_w(0)); 1434 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1435 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1436 } else { 1437 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1438 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1439 brw_set_src1(p, insn, brw_imm_ud(0)); 1440 } 1441 1442 insn->header.compression_control = BRW_COMPRESSION_NONE; 1443 insn->header.mask_control = BRW_MASK_ENABLE; 1444 insn->header.thread_control = BRW_THREAD_SWITCH; 1445 1446 /* Also pop item off the stack in the endif instruction: */ 1447 if (brw->gen < 6) { 1448 insn->bits3.if_else.jump_count = 0; 1449 insn->bits3.if_else.pop_count = 1; 1450 insn->bits3.if_else.pad0 = 0; 1451 } else if (brw->gen == 6) { 1452 insn->bits1.branch_gen6.jump_count = 2; 1453 } else { 1454 insn->bits3.break_cont.jip = 2; 1455 } 1456 patch_IF_ELSE(p, if_inst, else_inst, insn); 1457} 1458 1459struct brw_instruction *brw_BREAK(struct brw_compile *p) 1460{ 1461 struct brw_context *brw = p->brw; 1462 struct brw_instruction *insn; 1463 1464 insn = next_insn(p, BRW_OPCODE_BREAK); 1465 if (brw->gen >= 6) { 1466 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1467 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1468 brw_set_src1(p, insn, brw_imm_d(0x0)); 1469 } else { 1470 brw_set_dest(p, insn, brw_ip_reg()); 1471 brw_set_src0(p, insn, brw_ip_reg()); 1472 brw_set_src1(p, insn, brw_imm_d(0x0)); 1473 insn->bits3.if_else.pad0 = 0; 1474 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; 1475 } 1476 insn->header.compression_control = BRW_COMPRESSION_NONE; 1477 insn->header.execution_size = BRW_EXECUTE_8; 1478 1479 return insn; 1480} 1481 1482struct brw_instruction *gen6_CONT(struct brw_compile *p) 1483{ 1484 struct brw_instruction *insn; 1485 1486 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1487 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1488 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1489 brw_set_dest(p, insn, brw_ip_reg()); 1490 brw_set_src0(p, insn, brw_ip_reg()); 1491 brw_set_src1(p, insn, brw_imm_d(0x0)); 1492 1493 insn->header.compression_control = BRW_COMPRESSION_NONE; 1494 insn->header.execution_size = BRW_EXECUTE_8; 1495 return insn; 1496} 1497 1498struct brw_instruction *brw_CONT(struct brw_compile *p) 1499{ 1500 struct brw_instruction *insn; 1501 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1502 brw_set_dest(p, insn, brw_ip_reg()); 1503 brw_set_src0(p, insn, brw_ip_reg()); 1504 brw_set_src1(p, insn, brw_imm_d(0x0)); 1505 insn->header.compression_control = BRW_COMPRESSION_NONE; 1506 insn->header.execution_size = BRW_EXECUTE_8; 1507 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1508 insn->bits3.if_else.pad0 = 0; 1509 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; 1510 return insn; 1511} 1512 1513struct brw_instruction *gen6_HALT(struct brw_compile *p) 1514{ 1515 struct brw_instruction *insn; 1516 1517 insn = next_insn(p, BRW_OPCODE_HALT); 1518 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1519 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1520 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ 1521 1522 if (p->compressed) { 1523 insn->header.execution_size = BRW_EXECUTE_16; 1524 } else { 1525 insn->header.compression_control = BRW_COMPRESSION_NONE; 1526 insn->header.execution_size = BRW_EXECUTE_8; 1527 } 1528 return insn; 1529} 1530 1531/* DO/WHILE loop: 1532 * 1533 * The DO/WHILE is just an unterminated loop -- break or continue are 1534 * used for control within the loop. We have a few ways they can be 1535 * done. 1536 * 1537 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1538 * jip and no DO instruction. 1539 * 1540 * For non-uniform control flow pre-gen6, there's a DO instruction to 1541 * push the mask, and a WHILE to jump back, and BREAK to get out and 1542 * pop the mask. 1543 * 1544 * For gen6, there's no more mask stack, so no need for DO. WHILE 1545 * just points back to the first instruction of the loop. 1546 */ 1547struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 1548{ 1549 struct brw_context *brw = p->brw; 1550 1551 if (brw->gen >= 6 || p->single_program_flow) { 1552 push_loop_stack(p, &p->store[p->nr_insn]); 1553 return &p->store[p->nr_insn]; 1554 } else { 1555 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1556 1557 push_loop_stack(p, insn); 1558 1559 /* Override the defaults for this instruction: 1560 */ 1561 brw_set_dest(p, insn, brw_null_reg()); 1562 brw_set_src0(p, insn, brw_null_reg()); 1563 brw_set_src1(p, insn, brw_null_reg()); 1564 1565 insn->header.compression_control = BRW_COMPRESSION_NONE; 1566 insn->header.execution_size = execute_size; 1567 insn->header.predicate_control = BRW_PREDICATE_NONE; 1568 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1569 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1570 1571 return insn; 1572 } 1573} 1574 1575/** 1576 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE 1577 * instruction here. 1578 * 1579 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop 1580 * nesting, since it can always just point to the end of the block/current loop. 1581 */ 1582static void 1583brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst) 1584{ 1585 struct brw_context *brw = p->brw; 1586 struct brw_instruction *do_inst = get_inner_do_insn(p); 1587 struct brw_instruction *inst; 1588 int br = (brw->gen == 5) ? 2 : 1; 1589 1590 for (inst = while_inst - 1; inst != do_inst; inst--) { 1591 /* If the jump count is != 0, that means that this instruction has already 1592 * been patched because it's part of a loop inside of the one we're 1593 * patching. 1594 */ 1595 if (inst->header.opcode == BRW_OPCODE_BREAK && 1596 inst->bits3.if_else.jump_count == 0) { 1597 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1); 1598 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE && 1599 inst->bits3.if_else.jump_count == 0) { 1600 inst->bits3.if_else.jump_count = br * (while_inst - inst); 1601 } 1602 } 1603} 1604 1605struct brw_instruction *brw_WHILE(struct brw_compile *p) 1606{ 1607 struct brw_context *brw = p->brw; 1608 struct brw_instruction *insn, *do_insn; 1609 GLuint br = 1; 1610 1611 if (brw->gen >= 5) 1612 br = 2; 1613 1614 if (brw->gen >= 7) { 1615 insn = next_insn(p, BRW_OPCODE_WHILE); 1616 do_insn = get_inner_do_insn(p); 1617 1618 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1619 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1620 brw_set_src1(p, insn, brw_imm_ud(0)); 1621 insn->bits3.break_cont.jip = br * (do_insn - insn); 1622 1623 insn->header.execution_size = BRW_EXECUTE_8; 1624 } else if (brw->gen == 6) { 1625 insn = next_insn(p, BRW_OPCODE_WHILE); 1626 do_insn = get_inner_do_insn(p); 1627 1628 brw_set_dest(p, insn, brw_imm_w(0)); 1629 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1630 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1631 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1632 1633 insn->header.execution_size = BRW_EXECUTE_8; 1634 } else { 1635 if (p->single_program_flow) { 1636 insn = next_insn(p, BRW_OPCODE_ADD); 1637 do_insn = get_inner_do_insn(p); 1638 1639 brw_set_dest(p, insn, brw_ip_reg()); 1640 brw_set_src0(p, insn, brw_ip_reg()); 1641 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); 1642 insn->header.execution_size = BRW_EXECUTE_1; 1643 } else { 1644 insn = next_insn(p, BRW_OPCODE_WHILE); 1645 do_insn = get_inner_do_insn(p); 1646 1647 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1648 1649 brw_set_dest(p, insn, brw_ip_reg()); 1650 brw_set_src0(p, insn, brw_ip_reg()); 1651 brw_set_src1(p, insn, brw_imm_d(0)); 1652 1653 insn->header.execution_size = do_insn->header.execution_size; 1654 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1655 insn->bits3.if_else.pop_count = 0; 1656 insn->bits3.if_else.pad0 = 0; 1657 1658 brw_patch_break_cont(p, insn); 1659 } 1660 } 1661 insn->header.compression_control = BRW_COMPRESSION_NONE; 1662 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1663 1664 p->loop_stack_depth--; 1665 1666 return insn; 1667} 1668 1669 1670/* FORWARD JUMPS: 1671 */ 1672void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx) 1673{ 1674 struct brw_context *brw = p->brw; 1675 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx]; 1676 GLuint jmpi = 1; 1677 1678 if (brw->gen >= 5) 1679 jmpi = 2; 1680 1681 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1682 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1683 1684 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1); 1685} 1686 1687 1688 1689/* To integrate with the above, it makes sense that the comparison 1690 * instruction should populate the flag register. It might be simpler 1691 * just to use the flag reg for most WM tasks? 1692 */ 1693void brw_CMP(struct brw_compile *p, 1694 struct brw_reg dest, 1695 GLuint conditional, 1696 struct brw_reg src0, 1697 struct brw_reg src1) 1698{ 1699 struct brw_context *brw = p->brw; 1700 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1701 1702 insn->header.destreg__conditionalmod = conditional; 1703 brw_set_dest(p, insn, dest); 1704 brw_set_src0(p, insn, src0); 1705 brw_set_src1(p, insn, src1); 1706 1707/* guess_execution_size(insn, src0); */ 1708 1709 1710 /* Make it so that future instructions will use the computed flag 1711 * value until brw_set_predicate_control_flag_value() is called 1712 * again. 1713 */ 1714 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1715 dest.nr == 0) { 1716 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1717 p->flag_value = 0xff; 1718 } 1719 1720 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds 1721 * page says: 1722 * "Any CMP instruction with a null destination must use a {switch}." 1723 * 1724 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't 1725 * mentioned on their work-arounds pages. 1726 */ 1727 if (brw->gen == 7) { 1728 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1729 dest.nr == BRW_ARF_NULL) { 1730 insn->header.thread_control = BRW_THREAD_SWITCH; 1731 } 1732 } 1733} 1734 1735/* Issue 'wait' instruction for n1, host could program MMIO 1736 to wake up thread. */ 1737void brw_WAIT (struct brw_compile *p) 1738{ 1739 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1740 struct brw_reg src = brw_notification_1_reg(); 1741 1742 brw_set_dest(p, insn, src); 1743 brw_set_src0(p, insn, src); 1744 brw_set_src1(p, insn, brw_null_reg()); 1745 insn->header.execution_size = 0; /* must */ 1746 insn->header.predicate_control = 0; 1747 insn->header.compression_control = 0; 1748} 1749 1750 1751/*********************************************************************** 1752 * Helpers for the various SEND message types: 1753 */ 1754 1755/** Extended math function, float[8]. 1756 */ 1757void brw_math( struct brw_compile *p, 1758 struct brw_reg dest, 1759 GLuint function, 1760 GLuint msg_reg_nr, 1761 struct brw_reg src, 1762 GLuint data_type, 1763 GLuint precision ) 1764{ 1765 struct brw_context *brw = p->brw; 1766 1767 if (brw->gen >= 6) { 1768 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1769 1770 assert(dest.file == BRW_GENERAL_REGISTER_FILE || 1771 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE)); 1772 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1773 1774 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1775 if (brw->gen == 6) 1776 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1777 1778 /* Source modifiers are ignored for extended math instructions on Gen6. */ 1779 if (brw->gen == 6) { 1780 assert(!src.negate); 1781 assert(!src.abs); 1782 } 1783 1784 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || 1785 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || 1786 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1787 assert(src.type != BRW_REGISTER_TYPE_F); 1788 } else { 1789 assert(src.type == BRW_REGISTER_TYPE_F); 1790 } 1791 1792 /* Math is the same ISA format as other opcodes, except that CondModifier 1793 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1794 */ 1795 insn->header.destreg__conditionalmod = function; 1796 1797 brw_set_dest(p, insn, dest); 1798 brw_set_src0(p, insn, src); 1799 brw_set_src1(p, insn, brw_null_reg()); 1800 } else { 1801 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1802 1803 /* Example code doesn't set predicate_control for send 1804 * instructions. 1805 */ 1806 insn->header.predicate_control = 0; 1807 insn->header.destreg__conditionalmod = msg_reg_nr; 1808 1809 brw_set_dest(p, insn, dest); 1810 brw_set_src0(p, insn, src); 1811 brw_set_math_message(p, 1812 insn, 1813 function, 1814 src.type == BRW_REGISTER_TYPE_D, 1815 precision, 1816 data_type); 1817 } 1818} 1819 1820/** Extended math function, float[8]. 1821 */ 1822void brw_math2(struct brw_compile *p, 1823 struct brw_reg dest, 1824 GLuint function, 1825 struct brw_reg src0, 1826 struct brw_reg src1) 1827{ 1828 struct brw_context *brw = p->brw; 1829 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1830 1831 assert(dest.file == BRW_GENERAL_REGISTER_FILE || 1832 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE)); 1833 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1834 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1835 1836 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1837 if (brw->gen == 6) { 1838 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1839 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1840 } 1841 1842 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || 1843 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || 1844 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1845 assert(src0.type != BRW_REGISTER_TYPE_F); 1846 assert(src1.type != BRW_REGISTER_TYPE_F); 1847 } else { 1848 assert(src0.type == BRW_REGISTER_TYPE_F); 1849 assert(src1.type == BRW_REGISTER_TYPE_F); 1850 } 1851 1852 /* Source modifiers are ignored for extended math instructions on Gen6. */ 1853 if (brw->gen == 6) { 1854 assert(!src0.negate); 1855 assert(!src0.abs); 1856 assert(!src1.negate); 1857 assert(!src1.abs); 1858 } 1859 1860 /* Math is the same ISA format as other opcodes, except that CondModifier 1861 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1862 */ 1863 insn->header.destreg__conditionalmod = function; 1864 1865 brw_set_dest(p, insn, dest); 1866 brw_set_src0(p, insn, src0); 1867 brw_set_src1(p, insn, src1); 1868} 1869 1870 1871/** 1872 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1873 * using a constant offset per channel. 1874 * 1875 * The offset must be aligned to oword size (16 bytes). Used for 1876 * register spilling. 1877 */ 1878void brw_oword_block_write_scratch(struct brw_compile *p, 1879 struct brw_reg mrf, 1880 int num_regs, 1881 GLuint offset) 1882{ 1883 struct brw_context *brw = p->brw; 1884 uint32_t msg_control, msg_type; 1885 int mlen; 1886 1887 if (brw->gen >= 6) 1888 offset /= 16; 1889 1890 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1891 1892 if (num_regs == 1) { 1893 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1894 mlen = 2; 1895 } else { 1896 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1897 mlen = 3; 1898 } 1899 1900 /* Set up the message header. This is g0, with g0.2 filled with 1901 * the offset. We don't want to leave our offset around in g0 or 1902 * it'll screw up texture samples, so set it up inside the message 1903 * reg. 1904 */ 1905 { 1906 brw_push_insn_state(p); 1907 brw_set_mask_control(p, BRW_MASK_DISABLE); 1908 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1909 1910 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1911 1912 /* set message header global offset field (reg 0, element 2) */ 1913 brw_MOV(p, 1914 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1915 mrf.nr, 1916 2), BRW_REGISTER_TYPE_UD), 1917 brw_imm_ud(offset)); 1918 1919 brw_pop_insn_state(p); 1920 } 1921 1922 { 1923 struct brw_reg dest; 1924 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1925 int send_commit_msg; 1926 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 1927 BRW_REGISTER_TYPE_UW); 1928 1929 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1930 insn->header.compression_control = BRW_COMPRESSION_NONE; 1931 src_header = vec16(src_header); 1932 } 1933 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1934 insn->header.destreg__conditionalmod = mrf.nr; 1935 1936 /* Until gen6, writes followed by reads from the same location 1937 * are not guaranteed to be ordered unless write_commit is set. 1938 * If set, then a no-op write is issued to the destination 1939 * register to set a dependency, and a read from the destination 1940 * can be used to ensure the ordering. 1941 * 1942 * For gen6, only writes between different threads need ordering 1943 * protection. Our use of DP writes is all about register 1944 * spilling within a thread. 1945 */ 1946 if (brw->gen >= 6) { 1947 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1948 send_commit_msg = 0; 1949 } else { 1950 dest = src_header; 1951 send_commit_msg = 1; 1952 } 1953 1954 brw_set_dest(p, insn, dest); 1955 if (brw->gen >= 6) { 1956 brw_set_src0(p, insn, mrf); 1957 } else { 1958 brw_set_src0(p, insn, brw_null_reg()); 1959 } 1960 1961 if (brw->gen >= 6) 1962 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1963 else 1964 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1965 1966 brw_set_dp_write_message(p, 1967 insn, 1968 255, /* binding table index (255=stateless) */ 1969 msg_control, 1970 msg_type, 1971 mlen, 1972 true, /* header_present */ 1973 0, /* not a render target */ 1974 send_commit_msg, /* response_length */ 1975 0, /* eot */ 1976 send_commit_msg); 1977 } 1978} 1979 1980 1981/** 1982 * Read a block of owords (half a GRF each) from the scratch buffer 1983 * using a constant index per channel. 1984 * 1985 * Offset must be aligned to oword size (16 bytes). Used for register 1986 * spilling. 1987 */ 1988void 1989brw_oword_block_read_scratch(struct brw_compile *p, 1990 struct brw_reg dest, 1991 struct brw_reg mrf, 1992 int num_regs, 1993 GLuint offset) 1994{ 1995 struct brw_context *brw = p->brw; 1996 uint32_t msg_control; 1997 int rlen; 1998 1999 if (brw->gen >= 6) 2000 offset /= 16; 2001 2002 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 2003 dest = retype(dest, BRW_REGISTER_TYPE_UW); 2004 2005 if (num_regs == 1) { 2006 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 2007 rlen = 1; 2008 } else { 2009 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 2010 rlen = 2; 2011 } 2012 2013 { 2014 brw_push_insn_state(p); 2015 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2016 brw_set_mask_control(p, BRW_MASK_DISABLE); 2017 2018 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 2019 2020 /* set message header global offset field (reg 0, element 2) */ 2021 brw_MOV(p, 2022 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 2023 mrf.nr, 2024 2), BRW_REGISTER_TYPE_UD), 2025 brw_imm_ud(offset)); 2026 2027 brw_pop_insn_state(p); 2028 } 2029 2030 { 2031 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 2032 2033 assert(insn->header.predicate_control == 0); 2034 insn->header.compression_control = BRW_COMPRESSION_NONE; 2035 insn->header.destreg__conditionalmod = mrf.nr; 2036 2037 brw_set_dest(p, insn, dest); /* UW? */ 2038 if (brw->gen >= 6) { 2039 brw_set_src0(p, insn, mrf); 2040 } else { 2041 brw_set_src0(p, insn, brw_null_reg()); 2042 } 2043 2044 brw_set_dp_read_message(p, 2045 insn, 2046 255, /* binding table index (255=stateless) */ 2047 msg_control, 2048 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 2049 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 2050 1, /* msg_length */ 2051 true, /* header_present */ 2052 rlen); 2053 } 2054} 2055 2056/** 2057 * Read a float[4] vector from the data port Data Cache (const buffer). 2058 * Location (in buffer) should be a multiple of 16. 2059 * Used for fetching shader constants. 2060 */ 2061void brw_oword_block_read(struct brw_compile *p, 2062 struct brw_reg dest, 2063 struct brw_reg mrf, 2064 uint32_t offset, 2065 uint32_t bind_table_index) 2066{ 2067 struct brw_context *brw = p->brw; 2068 2069 /* On newer hardware, offset is in units of owords. */ 2070 if (brw->gen >= 6) 2071 offset /= 16; 2072 2073 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 2074 2075 brw_push_insn_state(p); 2076 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2077 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2078 brw_set_mask_control(p, BRW_MASK_DISABLE); 2079 2080 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 2081 2082 /* set message header global offset field (reg 0, element 2) */ 2083 brw_MOV(p, 2084 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 2085 mrf.nr, 2086 2), BRW_REGISTER_TYPE_UD), 2087 brw_imm_ud(offset)); 2088 2089 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 2090 insn->header.destreg__conditionalmod = mrf.nr; 2091 2092 /* cast dest to a uword[8] vector */ 2093 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 2094 2095 brw_set_dest(p, insn, dest); 2096 if (brw->gen >= 6) { 2097 brw_set_src0(p, insn, mrf); 2098 } else { 2099 brw_set_src0(p, insn, brw_null_reg()); 2100 } 2101 2102 brw_set_dp_read_message(p, 2103 insn, 2104 bind_table_index, 2105 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 2106 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 2107 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2108 1, /* msg_length */ 2109 true, /* header_present */ 2110 1); /* response_length (1 reg, 2 owords!) */ 2111 2112 brw_pop_insn_state(p); 2113} 2114 2115 2116void brw_fb_WRITE(struct brw_compile *p, 2117 int dispatch_width, 2118 GLuint msg_reg_nr, 2119 struct brw_reg src0, 2120 GLuint msg_control, 2121 GLuint binding_table_index, 2122 GLuint msg_length, 2123 GLuint response_length, 2124 bool eot, 2125 bool header_present) 2126{ 2127 struct brw_context *brw = p->brw; 2128 struct brw_instruction *insn; 2129 GLuint msg_type; 2130 struct brw_reg dest; 2131 2132 if (dispatch_width == 16) 2133 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2134 else 2135 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2136 2137 if (brw->gen >= 6) { 2138 insn = next_insn(p, BRW_OPCODE_SENDC); 2139 } else { 2140 insn = next_insn(p, BRW_OPCODE_SEND); 2141 } 2142 /* The execution mask is ignored for render target writes. */ 2143 insn->header.predicate_control = 0; 2144 insn->header.compression_control = BRW_COMPRESSION_NONE; 2145 2146 if (brw->gen >= 6) { 2147 /* headerless version, just submit color payload */ 2148 src0 = brw_message_reg(msg_reg_nr); 2149 2150 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2151 } else { 2152 insn->header.destreg__conditionalmod = msg_reg_nr; 2153 2154 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2155 } 2156 2157 brw_set_dest(p, insn, dest); 2158 brw_set_src0(p, insn, src0); 2159 brw_set_dp_write_message(p, 2160 insn, 2161 binding_table_index, 2162 msg_control, 2163 msg_type, 2164 msg_length, 2165 header_present, 2166 eot, /* last render target write */ 2167 response_length, 2168 eot, 2169 0 /* send_commit_msg */); 2170} 2171 2172 2173/** 2174 * Texture sample instruction. 2175 * Note: the msg_type plus msg_length values determine exactly what kind 2176 * of sampling operation is performed. See volume 4, page 161 of docs. 2177 */ 2178void brw_SAMPLE(struct brw_compile *p, 2179 struct brw_reg dest, 2180 GLuint msg_reg_nr, 2181 struct brw_reg src0, 2182 GLuint binding_table_index, 2183 GLuint sampler, 2184 GLuint msg_type, 2185 GLuint response_length, 2186 GLuint msg_length, 2187 GLuint header_present, 2188 GLuint simd_mode, 2189 GLuint return_format) 2190{ 2191 struct brw_context *brw = p->brw; 2192 struct brw_instruction *insn; 2193 2194 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2195 2196 insn = next_insn(p, BRW_OPCODE_SEND); 2197 insn->header.predicate_control = 0; /* XXX */ 2198 insn->header.compression_control = BRW_COMPRESSION_NONE; 2199 if (brw->gen < 6) 2200 insn->header.destreg__conditionalmod = msg_reg_nr; 2201 2202 brw_set_dest(p, insn, dest); 2203 brw_set_src0(p, insn, src0); 2204 brw_set_sampler_message(p, insn, 2205 binding_table_index, 2206 sampler, 2207 msg_type, 2208 response_length, 2209 msg_length, 2210 header_present, 2211 simd_mode, 2212 return_format); 2213} 2214 2215/* All these variables are pretty confusing - we might be better off 2216 * using bitmasks and macros for this, in the old style. Or perhaps 2217 * just having the caller instantiate the fields in dword3 itself. 2218 */ 2219void brw_urb_WRITE(struct brw_compile *p, 2220 struct brw_reg dest, 2221 GLuint msg_reg_nr, 2222 struct brw_reg src0, 2223 enum brw_urb_write_flags flags, 2224 GLuint msg_length, 2225 GLuint response_length, 2226 GLuint offset, 2227 GLuint swizzle) 2228{ 2229 struct brw_context *brw = p->brw; 2230 struct brw_instruction *insn; 2231 2232 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2233 2234 if (brw->gen == 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) { 2235 /* Enable Channel Masks in the URB_WRITE_HWORD message header */ 2236 brw_push_insn_state(p); 2237 brw_set_access_mode(p, BRW_ALIGN_1); 2238 brw_set_mask_control(p, BRW_MASK_DISABLE); 2239 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), 2240 BRW_REGISTER_TYPE_UD), 2241 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), 2242 brw_imm_ud(0xff00)); 2243 brw_pop_insn_state(p); 2244 } 2245 2246 insn = next_insn(p, BRW_OPCODE_SEND); 2247 2248 assert(msg_length < BRW_MAX_MRF); 2249 2250 brw_set_dest(p, insn, dest); 2251 brw_set_src0(p, insn, src0); 2252 brw_set_src1(p, insn, brw_imm_d(0)); 2253 2254 if (brw->gen < 6) 2255 insn->header.destreg__conditionalmod = msg_reg_nr; 2256 2257 brw_set_urb_message(p, 2258 insn, 2259 flags, 2260 msg_length, 2261 response_length, 2262 offset, 2263 swizzle); 2264} 2265 2266static int 2267next_ip(struct brw_compile *p, int ip) 2268{ 2269 struct brw_instruction *insn = (void *)p->store + ip; 2270 2271 if (insn->header.cmpt_control) 2272 return ip + 8; 2273 else 2274 return ip + 16; 2275} 2276 2277static int 2278brw_find_next_block_end(struct brw_compile *p, int start) 2279{ 2280 int ip; 2281 void *store = p->store; 2282 2283 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) { 2284 struct brw_instruction *insn = store + ip; 2285 2286 switch (insn->header.opcode) { 2287 case BRW_OPCODE_ENDIF: 2288 case BRW_OPCODE_ELSE: 2289 case BRW_OPCODE_WHILE: 2290 case BRW_OPCODE_HALT: 2291 return ip; 2292 } 2293 } 2294 2295 return 0; 2296} 2297 2298/* There is no DO instruction on gen6, so to find the end of the loop 2299 * we have to see if the loop is jumping back before our start 2300 * instruction. 2301 */ 2302static int 2303brw_find_loop_end(struct brw_compile *p, int start) 2304{ 2305 struct brw_context *brw = p->brw; 2306 int ip; 2307 int scale = 8; 2308 void *store = p->store; 2309 2310 /* Always start after the instruction (such as a WHILE) we're trying to fix 2311 * up. 2312 */ 2313 for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) { 2314 struct brw_instruction *insn = store + ip; 2315 2316 if (insn->header.opcode == BRW_OPCODE_WHILE) { 2317 int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count 2318 : insn->bits3.break_cont.jip; 2319 if (ip + jip * scale <= start) 2320 return ip; 2321 } 2322 } 2323 assert(!"not reached"); 2324 return start; 2325} 2326 2327/* After program generation, go back and update the UIP and JIP of 2328 * BREAK, CONT, and HALT instructions to their correct locations. 2329 */ 2330void 2331brw_set_uip_jip(struct brw_compile *p) 2332{ 2333 struct brw_context *brw = p->brw; 2334 int ip; 2335 int scale = 8; 2336 void *store = p->store; 2337 2338 if (brw->gen < 6) 2339 return; 2340 2341 for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) { 2342 struct brw_instruction *insn = store + ip; 2343 2344 if (insn->header.cmpt_control) { 2345 /* Fixups for compacted BREAK/CONTINUE not supported yet. */ 2346 assert(insn->header.opcode != BRW_OPCODE_BREAK && 2347 insn->header.opcode != BRW_OPCODE_CONTINUE && 2348 insn->header.opcode != BRW_OPCODE_HALT); 2349 continue; 2350 } 2351 2352 int block_end_ip = brw_find_next_block_end(p, ip); 2353 switch (insn->header.opcode) { 2354 case BRW_OPCODE_BREAK: 2355 assert(block_end_ip != 0); 2356 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale; 2357 /* Gen7 UIP points to WHILE; Gen6 points just after it */ 2358 insn->bits3.break_cont.uip = 2359 (brw_find_loop_end(p, ip) - ip + 2360 (brw->gen == 6 ? 16 : 0)) / scale; 2361 break; 2362 case BRW_OPCODE_CONTINUE: 2363 assert(block_end_ip != 0); 2364 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale; 2365 insn->bits3.break_cont.uip = 2366 (brw_find_loop_end(p, ip) - ip) / scale; 2367 2368 assert(insn->bits3.break_cont.uip != 0); 2369 assert(insn->bits3.break_cont.jip != 0); 2370 break; 2371 2372 case BRW_OPCODE_ENDIF: 2373 if (block_end_ip == 0) 2374 insn->bits3.break_cont.jip = 2; 2375 else 2376 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale; 2377 break; 2378 2379 case BRW_OPCODE_HALT: 2380 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): 2381 * 2382 * "In case of the halt instruction not inside any conditional 2383 * code block, the value of <JIP> and <UIP> should be the 2384 * same. In case of the halt instruction inside conditional code 2385 * block, the <UIP> should be the end of the program, and the 2386 * <JIP> should be end of the most inner conditional code block." 2387 * 2388 * The uip will have already been set by whoever set up the 2389 * instruction. 2390 */ 2391 if (block_end_ip == 0) { 2392 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip; 2393 } else { 2394 insn->bits3.break_cont.jip = (block_end_ip - ip) / scale; 2395 } 2396 assert(insn->bits3.break_cont.uip != 0); 2397 assert(insn->bits3.break_cont.jip != 0); 2398 break; 2399 } 2400 } 2401} 2402 2403void brw_ff_sync(struct brw_compile *p, 2404 struct brw_reg dest, 2405 GLuint msg_reg_nr, 2406 struct brw_reg src0, 2407 bool allocate, 2408 GLuint response_length, 2409 bool eot) 2410{ 2411 struct brw_context *brw = p->brw; 2412 struct brw_instruction *insn; 2413 2414 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2415 2416 insn = next_insn(p, BRW_OPCODE_SEND); 2417 brw_set_dest(p, insn, dest); 2418 brw_set_src0(p, insn, src0); 2419 brw_set_src1(p, insn, brw_imm_d(0)); 2420 2421 if (brw->gen < 6) 2422 insn->header.destreg__conditionalmod = msg_reg_nr; 2423 2424 brw_set_ff_sync_message(p, 2425 insn, 2426 allocate, 2427 response_length, 2428 eot); 2429} 2430 2431/** 2432 * Emit the SEND instruction necessary to generate stream output data on Gen6 2433 * (for transform feedback). 2434 * 2435 * If send_commit_msg is true, this is the last piece of stream output data 2436 * from this thread, so send the data as a committed write. According to the 2437 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1): 2438 * 2439 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all 2440 * writes are complete by sending the final write as a committed write." 2441 */ 2442void 2443brw_svb_write(struct brw_compile *p, 2444 struct brw_reg dest, 2445 GLuint msg_reg_nr, 2446 struct brw_reg src0, 2447 GLuint binding_table_index, 2448 bool send_commit_msg) 2449{ 2450 struct brw_instruction *insn; 2451 2452 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2453 2454 insn = next_insn(p, BRW_OPCODE_SEND); 2455 brw_set_dest(p, insn, dest); 2456 brw_set_src0(p, insn, src0); 2457 brw_set_src1(p, insn, brw_imm_d(0)); 2458 brw_set_dp_write_message(p, insn, 2459 binding_table_index, 2460 0, /* msg_control: ignored */ 2461 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, 2462 1, /* msg_length */ 2463 true, /* header_present */ 2464 0, /* last_render_target: ignored */ 2465 send_commit_msg, /* response_length */ 2466 0, /* end_of_thread */ 2467 send_commit_msg); /* send_commit_msg */ 2468} 2469 2470/** 2471 * This instruction is generated as a single-channel align1 instruction by 2472 * both the VS and FS stages when using INTEL_DEBUG=shader_time. 2473 * 2474 * We can't use the typed atomic op in the FS because that has the execution 2475 * mask ANDed with the pixel mask, but we just want to write the one dword for 2476 * all the pixels. 2477 * 2478 * We don't use the SIMD4x2 atomic ops in the VS because want to just write 2479 * one u32. So we use the same untyped atomic write message as the pixel 2480 * shader. 2481 * 2482 * The untyped atomic operation requires a BUFFER surface type with RAW 2483 * format, and is only accessible through the legacy DATA_CACHE dataport 2484 * messages. 2485 */ 2486void brw_shader_time_add(struct brw_compile *p, 2487 struct brw_reg payload, 2488 uint32_t surf_index) 2489{ 2490 struct brw_context *brw = p->brw; 2491 assert(brw->gen >= 7); 2492 2493 brw_push_insn_state(p); 2494 brw_set_access_mode(p, BRW_ALIGN_1); 2495 brw_set_mask_control(p, BRW_MASK_DISABLE); 2496 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); 2497 brw_pop_insn_state(p); 2498 2499 /* We use brw_vec1_reg and unmasked because we want to increment the given 2500 * offset only once. 2501 */ 2502 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 2503 BRW_ARF_NULL, 0)); 2504 brw_set_src0(p, send, brw_vec1_reg(payload.file, 2505 payload.nr, 0)); 2506 2507 uint32_t sfid, msg_type; 2508 if (brw->is_haswell) { 2509 sfid = HSW_SFID_DATAPORT_DATA_CACHE_1; 2510 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP; 2511 } else { 2512 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 2513 msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP; 2514 } 2515 2516 bool header_present = false; 2517 bool eot = false; 2518 uint32_t mlen = 2; /* offset, value */ 2519 uint32_t rlen = 0; 2520 brw_set_message_descriptor(p, send, sfid, mlen, rlen, header_present, eot); 2521 2522 send->bits3.ud |= msg_type << 14; 2523 send->bits3.ud |= 0 << 13; /* no return data */ 2524 send->bits3.ud |= 1 << 12; /* SIMD8 mode */ 2525 send->bits3.ud |= BRW_AOP_ADD << 8; 2526 send->bits3.ud |= surf_index << 0; 2527} 2528