brw_eu_emit.c revision 0a17093eaf84696b05d04a45d6d51281f7b2786b
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37#include "glsl/ralloc.h" 38 39/*********************************************************************** 40 * Internal helper for constructing instructions 41 */ 42 43static void guess_execution_size(struct brw_compile *p, 44 struct brw_instruction *insn, 45 struct brw_reg reg) 46{ 47 if (reg.width == BRW_WIDTH_8 && p->compressed) 48 insn->header.execution_size = BRW_EXECUTE_16; 49 else 50 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 51} 52 53 54/** 55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source 56 * registers, implicitly moving the operand to a message register. 57 * 58 * On Sandybridge, this is no longer the case. This function performs the 59 * explicit move; it should be called before emitting a SEND instruction. 60 */ 61void 62gen6_resolve_implied_move(struct brw_compile *p, 63 struct brw_reg *src, 64 GLuint msg_reg_nr) 65{ 66 struct intel_context *intel = &p->brw->intel; 67 if (intel->gen < 6) 68 return; 69 70 if (src->file == BRW_MESSAGE_REGISTER_FILE) 71 return; 72 73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 74 brw_push_insn_state(p); 75 brw_set_mask_control(p, BRW_MASK_DISABLE); 76 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 78 retype(*src, BRW_REGISTER_TYPE_UD)); 79 brw_pop_insn_state(p); 80 } 81 *src = brw_message_reg(msg_reg_nr); 82} 83 84static void 85gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) 86{ 87 struct intel_context *intel = &p->brw->intel; 88 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { 89 reg->file = BRW_GENERAL_REGISTER_FILE; 90 reg->nr += 111; 91 } 92} 93 94 95void 96brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, 97 struct brw_reg dest) 98{ 99 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 100 dest.file != BRW_MESSAGE_REGISTER_FILE) 101 assert(dest.nr < 128); 102 103 gen7_convert_mrf_to_grf(p, &dest); 104 105 insn->bits1.da1.dest_reg_file = dest.file; 106 insn->bits1.da1.dest_reg_type = dest.type; 107 insn->bits1.da1.dest_address_mode = dest.address_mode; 108 109 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 110 insn->bits1.da1.dest_reg_nr = dest.nr; 111 112 if (insn->header.access_mode == BRW_ALIGN_1) { 113 insn->bits1.da1.dest_subreg_nr = dest.subnr; 114 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 115 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 116 insn->bits1.da1.dest_horiz_stride = dest.hstride; 117 } 118 else { 119 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 120 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 121 /* even ignored in da16, still need to set as '01' */ 122 insn->bits1.da16.dest_horiz_stride = 1; 123 } 124 } 125 else { 126 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 127 128 /* These are different sizes in align1 vs align16: 129 */ 130 if (insn->header.access_mode == BRW_ALIGN_1) { 131 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 132 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 133 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 134 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 135 } 136 else { 137 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 138 /* even ignored in da16, still need to set as '01' */ 139 insn->bits1.ia16.dest_horiz_stride = 1; 140 } 141 } 142 143 /* NEW: Set the execution size based on dest.width and 144 * insn->compression_control: 145 */ 146 guess_execution_size(p, insn, dest); 147} 148 149extern int reg_type_size[]; 150 151static void 152validate_reg(struct brw_instruction *insn, struct brw_reg reg) 153{ 154 int hstride_for_reg[] = {0, 1, 2, 4}; 155 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 156 int width_for_reg[] = {1, 2, 4, 8, 16}; 157 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 158 int width, hstride, vstride, execsize; 159 160 if (reg.file == BRW_IMMEDIATE_VALUE) { 161 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 162 * mean the destination has to be 128-bit aligned and the 163 * destination horiz stride has to be a word. 164 */ 165 if (reg.type == BRW_REGISTER_TYPE_V) { 166 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 167 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 168 } 169 170 return; 171 } 172 173 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 174 reg.file == BRW_ARF_NULL) 175 return; 176 177 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 178 hstride = hstride_for_reg[reg.hstride]; 179 180 if (reg.vstride == 0xf) { 181 vstride = -1; 182 } else { 183 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 184 vstride = vstride_for_reg[reg.vstride]; 185 } 186 187 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 188 width = width_for_reg[reg.width]; 189 190 assert(insn->header.execution_size >= 0 && 191 insn->header.execution_size < Elements(execsize_for_reg)); 192 execsize = execsize_for_reg[insn->header.execution_size]; 193 194 /* Restrictions from 3.3.10: Register Region Restrictions. */ 195 /* 3. */ 196 assert(execsize >= width); 197 198 /* 4. */ 199 if (execsize == width && hstride != 0) { 200 assert(vstride == -1 || vstride == width * hstride); 201 } 202 203 /* 5. */ 204 if (execsize == width && hstride == 0) { 205 /* no restriction on vstride. */ 206 } 207 208 /* 6. */ 209 if (width == 1) { 210 assert(hstride == 0); 211 } 212 213 /* 7. */ 214 if (execsize == 1 && width == 1) { 215 assert(hstride == 0); 216 assert(vstride == 0); 217 } 218 219 /* 8. */ 220 if (vstride == 0 && hstride == 0) { 221 assert(width == 1); 222 } 223 224 /* 10. Check destination issues. */ 225} 226 227void 228brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, 229 struct brw_reg reg) 230{ 231 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 232 assert(reg.nr < 128); 233 234 gen7_convert_mrf_to_grf(p, ®); 235 236 validate_reg(insn, reg); 237 238 insn->bits1.da1.src0_reg_file = reg.file; 239 insn->bits1.da1.src0_reg_type = reg.type; 240 insn->bits2.da1.src0_abs = reg.abs; 241 insn->bits2.da1.src0_negate = reg.negate; 242 insn->bits2.da1.src0_address_mode = reg.address_mode; 243 244 if (reg.file == BRW_IMMEDIATE_VALUE) { 245 insn->bits3.ud = reg.dw1.ud; 246 247 /* Required to set some fields in src1 as well: 248 */ 249 insn->bits1.da1.src1_reg_file = 0; /* arf */ 250 insn->bits1.da1.src1_reg_type = reg.type; 251 } 252 else 253 { 254 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 255 if (insn->header.access_mode == BRW_ALIGN_1) { 256 insn->bits2.da1.src0_subreg_nr = reg.subnr; 257 insn->bits2.da1.src0_reg_nr = reg.nr; 258 } 259 else { 260 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 261 insn->bits2.da16.src0_reg_nr = reg.nr; 262 } 263 } 264 else { 265 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 266 267 if (insn->header.access_mode == BRW_ALIGN_1) { 268 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 269 } 270 else { 271 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 272 } 273 } 274 275 if (insn->header.access_mode == BRW_ALIGN_1) { 276 if (reg.width == BRW_WIDTH_1 && 277 insn->header.execution_size == BRW_EXECUTE_1) { 278 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 279 insn->bits2.da1.src0_width = BRW_WIDTH_1; 280 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 281 } 282 else { 283 insn->bits2.da1.src0_horiz_stride = reg.hstride; 284 insn->bits2.da1.src0_width = reg.width; 285 insn->bits2.da1.src0_vert_stride = reg.vstride; 286 } 287 } 288 else { 289 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 290 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 291 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 292 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 293 294 /* This is an oddity of the fact we're using the same 295 * descriptions for registers in align_16 as align_1: 296 */ 297 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 298 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 299 else 300 insn->bits2.da16.src0_vert_stride = reg.vstride; 301 } 302 } 303} 304 305 306void brw_set_src1(struct brw_compile *p, 307 struct brw_instruction *insn, 308 struct brw_reg reg) 309{ 310 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 311 312 assert(reg.nr < 128); 313 314 gen7_convert_mrf_to_grf(p, ®); 315 316 validate_reg(insn, reg); 317 318 insn->bits1.da1.src1_reg_file = reg.file; 319 insn->bits1.da1.src1_reg_type = reg.type; 320 insn->bits3.da1.src1_abs = reg.abs; 321 insn->bits3.da1.src1_negate = reg.negate; 322 323 /* Only src1 can be immediate in two-argument instructions. 324 */ 325 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 326 327 if (reg.file == BRW_IMMEDIATE_VALUE) { 328 insn->bits3.ud = reg.dw1.ud; 329 } 330 else { 331 /* This is a hardware restriction, which may or may not be lifted 332 * in the future: 333 */ 334 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 335 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 336 337 if (insn->header.access_mode == BRW_ALIGN_1) { 338 insn->bits3.da1.src1_subreg_nr = reg.subnr; 339 insn->bits3.da1.src1_reg_nr = reg.nr; 340 } 341 else { 342 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 343 insn->bits3.da16.src1_reg_nr = reg.nr; 344 } 345 346 if (insn->header.access_mode == BRW_ALIGN_1) { 347 if (reg.width == BRW_WIDTH_1 && 348 insn->header.execution_size == BRW_EXECUTE_1) { 349 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 350 insn->bits3.da1.src1_width = BRW_WIDTH_1; 351 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 352 } 353 else { 354 insn->bits3.da1.src1_horiz_stride = reg.hstride; 355 insn->bits3.da1.src1_width = reg.width; 356 insn->bits3.da1.src1_vert_stride = reg.vstride; 357 } 358 } 359 else { 360 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 361 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 362 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 363 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 364 365 /* This is an oddity of the fact we're using the same 366 * descriptions for registers in align_16 as align_1: 367 */ 368 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 369 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 370 else 371 insn->bits3.da16.src1_vert_stride = reg.vstride; 372 } 373 } 374} 375 376/** 377 * Set the Message Descriptor and Extended Message Descriptor fields 378 * for SEND messages. 379 * 380 * \note This zeroes out the Function Control bits, so it must be called 381 * \b before filling out any message-specific data. Callers can 382 * choose not to fill in irrelevant bits; they will be zero. 383 */ 384static void 385brw_set_message_descriptor(struct brw_compile *p, 386 struct brw_instruction *inst, 387 enum brw_message_target sfid, 388 unsigned msg_length, 389 unsigned response_length, 390 bool header_present, 391 bool end_of_thread) 392{ 393 struct intel_context *intel = &p->brw->intel; 394 395 brw_set_src1(p, inst, brw_imm_d(0)); 396 397 if (intel->gen >= 5) { 398 inst->bits3.generic_gen5.header_present = header_present; 399 inst->bits3.generic_gen5.response_length = response_length; 400 inst->bits3.generic_gen5.msg_length = msg_length; 401 inst->bits3.generic_gen5.end_of_thread = end_of_thread; 402 403 if (intel->gen >= 6) { 404 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ 405 inst->header.destreg__conditionalmod = sfid; 406 } else { 407 /* Set Extended Message Descriptor (ex_desc) */ 408 inst->bits2.send_gen5.sfid = sfid; 409 inst->bits2.send_gen5.end_of_thread = end_of_thread; 410 } 411 } else { 412 inst->bits3.generic.response_length = response_length; 413 inst->bits3.generic.msg_length = msg_length; 414 inst->bits3.generic.msg_target = sfid; 415 inst->bits3.generic.end_of_thread = end_of_thread; 416 } 417} 418 419static void brw_set_math_message( struct brw_compile *p, 420 struct brw_instruction *insn, 421 GLuint function, 422 GLuint integer_type, 423 bool low_precision, 424 bool saturate, 425 GLuint dataType ) 426{ 427 struct brw_context *brw = p->brw; 428 struct intel_context *intel = &brw->intel; 429 unsigned msg_length; 430 unsigned response_length; 431 432 /* Infer message length from the function */ 433 switch (function) { 434 case BRW_MATH_FUNCTION_POW: 435 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 436 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 437 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 438 msg_length = 2; 439 break; 440 default: 441 msg_length = 1; 442 break; 443 } 444 445 /* Infer response length from the function */ 446 switch (function) { 447 case BRW_MATH_FUNCTION_SINCOS: 448 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 449 response_length = 2; 450 break; 451 default: 452 response_length = 1; 453 break; 454 } 455 456 brw_set_message_descriptor(p, insn, BRW_SFID_MATH, 457 msg_length, response_length, false, false); 458 if (intel->gen == 5) { 459 insn->bits3.math_gen5.function = function; 460 insn->bits3.math_gen5.int_type = integer_type; 461 insn->bits3.math_gen5.precision = low_precision; 462 insn->bits3.math_gen5.saturate = saturate; 463 insn->bits3.math_gen5.data_type = dataType; 464 insn->bits3.math_gen5.snapshot = 0; 465 } else { 466 insn->bits3.math.function = function; 467 insn->bits3.math.int_type = integer_type; 468 insn->bits3.math.precision = low_precision; 469 insn->bits3.math.saturate = saturate; 470 insn->bits3.math.data_type = dataType; 471 } 472} 473 474 475static void brw_set_ff_sync_message(struct brw_compile *p, 476 struct brw_instruction *insn, 477 bool allocate, 478 GLuint response_length, 479 bool end_of_thread) 480{ 481 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 482 1, response_length, true, end_of_thread); 483 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 484 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 485 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 486 insn->bits3.urb_gen5.allocate = allocate; 487 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 488 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 489} 490 491static void brw_set_urb_message( struct brw_compile *p, 492 struct brw_instruction *insn, 493 bool allocate, 494 bool used, 495 GLuint msg_length, 496 GLuint response_length, 497 bool end_of_thread, 498 bool complete, 499 GLuint offset, 500 GLuint swizzle_control ) 501{ 502 struct brw_context *brw = p->brw; 503 struct intel_context *intel = &brw->intel; 504 505 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 506 msg_length, response_length, true, end_of_thread); 507 if (intel->gen == 7) { 508 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ 509 insn->bits3.urb_gen7.offset = offset; 510 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); 511 insn->bits3.urb_gen7.swizzle_control = swizzle_control; 512 /* per_slot_offset = 0 makes it ignore offsets in message header */ 513 insn->bits3.urb_gen7.per_slot_offset = 0; 514 insn->bits3.urb_gen7.complete = complete; 515 } else if (intel->gen >= 5) { 516 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ 517 insn->bits3.urb_gen5.offset = offset; 518 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 519 insn->bits3.urb_gen5.allocate = allocate; 520 insn->bits3.urb_gen5.used = used; /* ? */ 521 insn->bits3.urb_gen5.complete = complete; 522 } else { 523 insn->bits3.urb.opcode = 0; /* ? */ 524 insn->bits3.urb.offset = offset; 525 insn->bits3.urb.swizzle_control = swizzle_control; 526 insn->bits3.urb.allocate = allocate; 527 insn->bits3.urb.used = used; /* ? */ 528 insn->bits3.urb.complete = complete; 529 } 530} 531 532void 533brw_set_dp_write_message(struct brw_compile *p, 534 struct brw_instruction *insn, 535 GLuint binding_table_index, 536 GLuint msg_control, 537 GLuint msg_type, 538 GLuint msg_length, 539 bool header_present, 540 GLuint last_render_target, 541 GLuint response_length, 542 GLuint end_of_thread, 543 GLuint send_commit_msg) 544{ 545 struct brw_context *brw = p->brw; 546 struct intel_context *intel = &brw->intel; 547 unsigned sfid; 548 549 if (intel->gen >= 7) { 550 /* Use the Render Cache for RT writes; otherwise use the Data Cache */ 551 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) 552 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 553 else 554 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 555 } else if (intel->gen == 6) { 556 /* Use the render cache for all write messages. */ 557 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 558 } else { 559 sfid = BRW_SFID_DATAPORT_WRITE; 560 } 561 562 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, 563 header_present, end_of_thread); 564 565 if (intel->gen >= 7) { 566 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 567 insn->bits3.gen7_dp.msg_control = msg_control; 568 insn->bits3.gen7_dp.last_render_target = last_render_target; 569 insn->bits3.gen7_dp.msg_type = msg_type; 570 } else if (intel->gen == 6) { 571 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 572 insn->bits3.gen6_dp.msg_control = msg_control; 573 insn->bits3.gen6_dp.last_render_target = last_render_target; 574 insn->bits3.gen6_dp.msg_type = msg_type; 575 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; 576 } else if (intel->gen == 5) { 577 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 578 insn->bits3.dp_write_gen5.msg_control = msg_control; 579 insn->bits3.dp_write_gen5.last_render_target = last_render_target; 580 insn->bits3.dp_write_gen5.msg_type = msg_type; 581 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 582 } else { 583 insn->bits3.dp_write.binding_table_index = binding_table_index; 584 insn->bits3.dp_write.msg_control = msg_control; 585 insn->bits3.dp_write.last_render_target = last_render_target; 586 insn->bits3.dp_write.msg_type = msg_type; 587 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 588 } 589} 590 591void 592brw_set_dp_read_message(struct brw_compile *p, 593 struct brw_instruction *insn, 594 GLuint binding_table_index, 595 GLuint msg_control, 596 GLuint msg_type, 597 GLuint target_cache, 598 GLuint msg_length, 599 GLuint response_length) 600{ 601 struct brw_context *brw = p->brw; 602 struct intel_context *intel = &brw->intel; 603 unsigned sfid; 604 605 if (intel->gen >= 7) { 606 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 607 } else if (intel->gen == 6) { 608 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) 609 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 610 else 611 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; 612 } else { 613 sfid = BRW_SFID_DATAPORT_READ; 614 } 615 616 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, 617 true, false); 618 619 if (intel->gen >= 7) { 620 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 621 insn->bits3.gen7_dp.msg_control = msg_control; 622 insn->bits3.gen7_dp.last_render_target = 0; 623 insn->bits3.gen7_dp.msg_type = msg_type; 624 } else if (intel->gen == 6) { 625 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 626 insn->bits3.gen6_dp.msg_control = msg_control; 627 insn->bits3.gen6_dp.last_render_target = 0; 628 insn->bits3.gen6_dp.msg_type = msg_type; 629 insn->bits3.gen6_dp.send_commit_msg = 0; 630 } else if (intel->gen == 5) { 631 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 632 insn->bits3.dp_read_gen5.msg_control = msg_control; 633 insn->bits3.dp_read_gen5.msg_type = msg_type; 634 insn->bits3.dp_read_gen5.target_cache = target_cache; 635 } else if (intel->is_g4x) { 636 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 637 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 638 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 639 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 640 } else { 641 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 642 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 643 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 644 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 645 } 646} 647 648static void brw_set_sampler_message(struct brw_compile *p, 649 struct brw_instruction *insn, 650 GLuint binding_table_index, 651 GLuint sampler, 652 GLuint msg_type, 653 GLuint response_length, 654 GLuint msg_length, 655 GLuint header_present, 656 GLuint simd_mode, 657 GLuint return_format) 658{ 659 struct brw_context *brw = p->brw; 660 struct intel_context *intel = &brw->intel; 661 662 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length, 663 response_length, header_present, false); 664 665 if (intel->gen >= 7) { 666 insn->bits3.sampler_gen7.binding_table_index = binding_table_index; 667 insn->bits3.sampler_gen7.sampler = sampler; 668 insn->bits3.sampler_gen7.msg_type = msg_type; 669 insn->bits3.sampler_gen7.simd_mode = simd_mode; 670 } else if (intel->gen >= 5) { 671 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 672 insn->bits3.sampler_gen5.sampler = sampler; 673 insn->bits3.sampler_gen5.msg_type = msg_type; 674 insn->bits3.sampler_gen5.simd_mode = simd_mode; 675 } else if (intel->is_g4x) { 676 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 677 insn->bits3.sampler_g4x.sampler = sampler; 678 insn->bits3.sampler_g4x.msg_type = msg_type; 679 } else { 680 insn->bits3.sampler.binding_table_index = binding_table_index; 681 insn->bits3.sampler.sampler = sampler; 682 insn->bits3.sampler.msg_type = msg_type; 683 insn->bits3.sampler.return_format = return_format; 684 } 685} 686 687 688#define next_insn brw_next_insn 689struct brw_instruction * 690brw_next_insn(struct brw_compile *p, GLuint opcode) 691{ 692 struct brw_instruction *insn; 693 694 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 695 696 insn = &p->store[p->nr_insn++]; 697 memcpy(insn, p->current, sizeof(*insn)); 698 699 /* Reset this one-shot flag: 700 */ 701 702 if (p->current->header.destreg__conditionalmod) { 703 p->current->header.destreg__conditionalmod = 0; 704 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 705 } 706 707 insn->header.opcode = opcode; 708 return insn; 709} 710 711static struct brw_instruction *brw_alu1( struct brw_compile *p, 712 GLuint opcode, 713 struct brw_reg dest, 714 struct brw_reg src ) 715{ 716 struct brw_instruction *insn = next_insn(p, opcode); 717 brw_set_dest(p, insn, dest); 718 brw_set_src0(p, insn, src); 719 return insn; 720} 721 722static struct brw_instruction *brw_alu2(struct brw_compile *p, 723 GLuint opcode, 724 struct brw_reg dest, 725 struct brw_reg src0, 726 struct brw_reg src1 ) 727{ 728 struct brw_instruction *insn = next_insn(p, opcode); 729 brw_set_dest(p, insn, dest); 730 brw_set_src0(p, insn, src0); 731 brw_set_src1(p, insn, src1); 732 return insn; 733} 734 735 736/*********************************************************************** 737 * Convenience routines. 738 */ 739#define ALU1(OP) \ 740struct brw_instruction *brw_##OP(struct brw_compile *p, \ 741 struct brw_reg dest, \ 742 struct brw_reg src0) \ 743{ \ 744 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 745} 746 747#define ALU2(OP) \ 748struct brw_instruction *brw_##OP(struct brw_compile *p, \ 749 struct brw_reg dest, \ 750 struct brw_reg src0, \ 751 struct brw_reg src1) \ 752{ \ 753 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 754} 755 756/* Rounding operations (other than RNDD) require two instructions - the first 757 * stores a rounded value (possibly the wrong way) in the dest register, but 758 * also sets a per-channel "increment bit" in the flag register. A predicated 759 * add of 1.0 fixes dest to contain the desired result. 760 * 761 * Sandybridge and later appear to round correctly without an ADD. 762 */ 763#define ROUND(OP) \ 764void brw_##OP(struct brw_compile *p, \ 765 struct brw_reg dest, \ 766 struct brw_reg src) \ 767{ \ 768 struct brw_instruction *rnd, *add; \ 769 rnd = next_insn(p, BRW_OPCODE_##OP); \ 770 brw_set_dest(p, rnd, dest); \ 771 brw_set_src0(p, rnd, src); \ 772 \ 773 if (p->brw->intel.gen < 6) { \ 774 /* turn on round-increments */ \ 775 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ 776 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 777 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 778 } \ 779} 780 781 782ALU1(MOV) 783ALU2(SEL) 784ALU1(NOT) 785ALU2(AND) 786ALU2(OR) 787ALU2(XOR) 788ALU2(SHR) 789ALU2(SHL) 790ALU2(RSR) 791ALU2(RSL) 792ALU2(ASR) 793ALU1(FRC) 794ALU1(RNDD) 795ALU2(MAC) 796ALU2(MACH) 797ALU1(LZD) 798ALU2(DP4) 799ALU2(DPH) 800ALU2(DP3) 801ALU2(DP2) 802ALU2(LINE) 803ALU2(PLN) 804 805 806ROUND(RNDZ) 807ROUND(RNDE) 808 809 810struct brw_instruction *brw_ADD(struct brw_compile *p, 811 struct brw_reg dest, 812 struct brw_reg src0, 813 struct brw_reg src1) 814{ 815 /* 6.2.2: add */ 816 if (src0.type == BRW_REGISTER_TYPE_F || 817 (src0.file == BRW_IMMEDIATE_VALUE && 818 src0.type == BRW_REGISTER_TYPE_VF)) { 819 assert(src1.type != BRW_REGISTER_TYPE_UD); 820 assert(src1.type != BRW_REGISTER_TYPE_D); 821 } 822 823 if (src1.type == BRW_REGISTER_TYPE_F || 824 (src1.file == BRW_IMMEDIATE_VALUE && 825 src1.type == BRW_REGISTER_TYPE_VF)) { 826 assert(src0.type != BRW_REGISTER_TYPE_UD); 827 assert(src0.type != BRW_REGISTER_TYPE_D); 828 } 829 830 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 831} 832 833struct brw_instruction *brw_MUL(struct brw_compile *p, 834 struct brw_reg dest, 835 struct brw_reg src0, 836 struct brw_reg src1) 837{ 838 /* 6.32.38: mul */ 839 if (src0.type == BRW_REGISTER_TYPE_D || 840 src0.type == BRW_REGISTER_TYPE_UD || 841 src1.type == BRW_REGISTER_TYPE_D || 842 src1.type == BRW_REGISTER_TYPE_UD) { 843 assert(dest.type != BRW_REGISTER_TYPE_F); 844 } 845 846 if (src0.type == BRW_REGISTER_TYPE_F || 847 (src0.file == BRW_IMMEDIATE_VALUE && 848 src0.type == BRW_REGISTER_TYPE_VF)) { 849 assert(src1.type != BRW_REGISTER_TYPE_UD); 850 assert(src1.type != BRW_REGISTER_TYPE_D); 851 } 852 853 if (src1.type == BRW_REGISTER_TYPE_F || 854 (src1.file == BRW_IMMEDIATE_VALUE && 855 src1.type == BRW_REGISTER_TYPE_VF)) { 856 assert(src0.type != BRW_REGISTER_TYPE_UD); 857 assert(src0.type != BRW_REGISTER_TYPE_D); 858 } 859 860 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 861 src0.nr != BRW_ARF_ACCUMULATOR); 862 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 863 src1.nr != BRW_ARF_ACCUMULATOR); 864 865 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 866} 867 868 869void brw_NOP(struct brw_compile *p) 870{ 871 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 872 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 873 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 874 brw_set_src1(p, insn, brw_imm_ud(0x0)); 875} 876 877 878 879 880 881/*********************************************************************** 882 * Comparisons, if/else/endif 883 */ 884 885struct brw_instruction *brw_JMPI(struct brw_compile *p, 886 struct brw_reg dest, 887 struct brw_reg src0, 888 struct brw_reg src1) 889{ 890 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 891 892 insn->header.execution_size = 1; 893 insn->header.compression_control = BRW_COMPRESSION_NONE; 894 insn->header.mask_control = BRW_MASK_DISABLE; 895 896 p->current->header.predicate_control = BRW_PREDICATE_NONE; 897 898 return insn; 899} 900 901static void 902push_if_stack(struct brw_compile *p, struct brw_instruction *inst) 903{ 904 p->if_stack[p->if_stack_depth] = inst - p->store; 905 906 p->if_stack_depth++; 907 if (p->if_stack_array_size <= p->if_stack_depth) { 908 p->if_stack_array_size *= 2; 909 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int, 910 p->if_stack_array_size); 911 } 912} 913 914static struct brw_instruction * 915pop_if_stack(struct brw_compile *p) 916{ 917 p->if_stack_depth--; 918 return &p->store[p->if_stack[p->if_stack_depth]]; 919} 920 921static void 922push_loop_stack(struct brw_compile *p, struct brw_instruction *inst) 923{ 924 if (p->loop_stack_array_size < p->loop_stack_depth) { 925 p->loop_stack_array_size *= 2; 926 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, 927 p->loop_stack_array_size); 928 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int, 929 p->loop_stack_array_size); 930 } 931 932 p->loop_stack[p->loop_stack_depth] = inst - p->store; 933 p->loop_stack_depth++; 934 p->if_depth_in_loop[p->loop_stack_depth] = 0; 935} 936 937static struct brw_instruction * 938get_inner_do_insn(struct brw_compile *p) 939{ 940 return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; 941} 942 943/* EU takes the value from the flag register and pushes it onto some 944 * sort of a stack (presumably merging with any flag value already on 945 * the stack). Within an if block, the flags at the top of the stack 946 * control execution on each channel of the unit, eg. on each of the 947 * 16 pixel values in our wm programs. 948 * 949 * When the matching 'else' instruction is reached (presumably by 950 * countdown of the instruction count patched in by our ELSE/ENDIF 951 * functions), the relevent flags are inverted. 952 * 953 * When the matching 'endif' instruction is reached, the flags are 954 * popped off. If the stack is now empty, normal execution resumes. 955 */ 956struct brw_instruction * 957brw_IF(struct brw_compile *p, GLuint execute_size) 958{ 959 struct intel_context *intel = &p->brw->intel; 960 struct brw_instruction *insn; 961 962 insn = next_insn(p, BRW_OPCODE_IF); 963 964 /* Override the defaults for this instruction: 965 */ 966 if (intel->gen < 6) { 967 brw_set_dest(p, insn, brw_ip_reg()); 968 brw_set_src0(p, insn, brw_ip_reg()); 969 brw_set_src1(p, insn, brw_imm_d(0x0)); 970 } else if (intel->gen == 6) { 971 brw_set_dest(p, insn, brw_imm_w(0)); 972 insn->bits1.branch_gen6.jump_count = 0; 973 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 974 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 975 } else { 976 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 977 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 978 brw_set_src1(p, insn, brw_imm_ud(0)); 979 insn->bits3.break_cont.jip = 0; 980 insn->bits3.break_cont.uip = 0; 981 } 982 983 insn->header.execution_size = execute_size; 984 insn->header.compression_control = BRW_COMPRESSION_NONE; 985 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 986 insn->header.mask_control = BRW_MASK_ENABLE; 987 if (!p->single_program_flow) 988 insn->header.thread_control = BRW_THREAD_SWITCH; 989 990 p->current->header.predicate_control = BRW_PREDICATE_NONE; 991 992 push_if_stack(p, insn); 993 p->if_depth_in_loop[p->loop_stack_depth]++; 994 return insn; 995} 996 997/* This function is only used for gen6-style IF instructions with an 998 * embedded comparison (conditional modifier). It is not used on gen7. 999 */ 1000struct brw_instruction * 1001gen6_IF(struct brw_compile *p, uint32_t conditional, 1002 struct brw_reg src0, struct brw_reg src1) 1003{ 1004 struct brw_instruction *insn; 1005 1006 insn = next_insn(p, BRW_OPCODE_IF); 1007 1008 brw_set_dest(p, insn, brw_imm_w(0)); 1009 if (p->compressed) { 1010 insn->header.execution_size = BRW_EXECUTE_16; 1011 } else { 1012 insn->header.execution_size = BRW_EXECUTE_8; 1013 } 1014 insn->bits1.branch_gen6.jump_count = 0; 1015 brw_set_src0(p, insn, src0); 1016 brw_set_src1(p, insn, src1); 1017 1018 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 1019 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1020 insn->header.destreg__conditionalmod = conditional; 1021 1022 if (!p->single_program_flow) 1023 insn->header.thread_control = BRW_THREAD_SWITCH; 1024 1025 push_if_stack(p, insn); 1026 return insn; 1027} 1028 1029/** 1030 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. 1031 */ 1032static void 1033convert_IF_ELSE_to_ADD(struct brw_compile *p, 1034 struct brw_instruction *if_inst, 1035 struct brw_instruction *else_inst) 1036{ 1037 /* The next instruction (where the ENDIF would be, if it existed) */ 1038 struct brw_instruction *next_inst = &p->store[p->nr_insn]; 1039 1040 assert(p->single_program_flow); 1041 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1042 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1043 assert(if_inst->header.execution_size == BRW_EXECUTE_1); 1044 1045 /* Convert IF to an ADD instruction that moves the instruction pointer 1046 * to the first instruction of the ELSE block. If there is no ELSE 1047 * block, point to where ENDIF would be. Reverse the predicate. 1048 * 1049 * There's no need to execute an ENDIF since we don't need to do any 1050 * stack operations, and if we're currently executing, we just want to 1051 * continue normally. 1052 */ 1053 if_inst->header.opcode = BRW_OPCODE_ADD; 1054 if_inst->header.predicate_inverse = 1; 1055 1056 if (else_inst != NULL) { 1057 /* Convert ELSE to an ADD instruction that points where the ENDIF 1058 * would be. 1059 */ 1060 else_inst->header.opcode = BRW_OPCODE_ADD; 1061 1062 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; 1063 else_inst->bits3.ud = (next_inst - else_inst) * 16; 1064 } else { 1065 if_inst->bits3.ud = (next_inst - if_inst) * 16; 1066 } 1067} 1068 1069/** 1070 * Patch IF and ELSE instructions with appropriate jump targets. 1071 */ 1072static void 1073patch_IF_ELSE(struct brw_compile *p, 1074 struct brw_instruction *if_inst, 1075 struct brw_instruction *else_inst, 1076 struct brw_instruction *endif_inst) 1077{ 1078 struct intel_context *intel = &p->brw->intel; 1079 1080 /* We shouldn't be patching IF and ELSE instructions in single program flow 1081 * mode when gen < 6, because in single program flow mode on those 1082 * platforms, we convert flow control instructions to conditional ADDs that 1083 * operate on IP (see brw_ENDIF). 1084 * 1085 * However, on Gen6, writing to IP doesn't work in single program flow mode 1086 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may 1087 * not be updated by non-flow control instructions."). And on later 1088 * platforms, there is no significant benefit to converting control flow 1089 * instructions to conditional ADDs. So we do patch IF and ELSE 1090 * instructions in single program flow mode on those platforms. 1091 */ 1092 if (intel->gen < 6) 1093 assert(!p->single_program_flow); 1094 1095 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1096 assert(endif_inst != NULL); 1097 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1098 1099 unsigned br = 1; 1100 /* Jump count is for 64bit data chunk each, so one 128bit instruction 1101 * requires 2 chunks. 1102 */ 1103 if (intel->gen >= 5) 1104 br = 2; 1105 1106 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); 1107 endif_inst->header.execution_size = if_inst->header.execution_size; 1108 1109 if (else_inst == NULL) { 1110 /* Patch IF -> ENDIF */ 1111 if (intel->gen < 6) { 1112 /* Turn it into an IFF, which means no mask stack operations for 1113 * all-false and jumping past the ENDIF. 1114 */ 1115 if_inst->header.opcode = BRW_OPCODE_IFF; 1116 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); 1117 if_inst->bits3.if_else.pop_count = 0; 1118 if_inst->bits3.if_else.pad0 = 0; 1119 } else if (intel->gen == 6) { 1120 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 1121 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); 1122 } else { 1123 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1124 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); 1125 } 1126 } else { 1127 else_inst->header.execution_size = if_inst->header.execution_size; 1128 1129 /* Patch IF -> ELSE */ 1130 if (intel->gen < 6) { 1131 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); 1132 if_inst->bits3.if_else.pop_count = 0; 1133 if_inst->bits3.if_else.pad0 = 0; 1134 } else if (intel->gen == 6) { 1135 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); 1136 } 1137 1138 /* Patch ELSE -> ENDIF */ 1139 if (intel->gen < 6) { 1140 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 1141 * matching ENDIF. 1142 */ 1143 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); 1144 else_inst->bits3.if_else.pop_count = 1; 1145 else_inst->bits3.if_else.pad0 = 0; 1146 } else if (intel->gen == 6) { 1147 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1148 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); 1149 } else { 1150 /* The IF instruction's JIP should point just past the ELSE */ 1151 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); 1152 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ 1153 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1154 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); 1155 } 1156 } 1157} 1158 1159void 1160brw_ELSE(struct brw_compile *p) 1161{ 1162 struct intel_context *intel = &p->brw->intel; 1163 struct brw_instruction *insn; 1164 1165 insn = next_insn(p, BRW_OPCODE_ELSE); 1166 1167 if (intel->gen < 6) { 1168 brw_set_dest(p, insn, brw_ip_reg()); 1169 brw_set_src0(p, insn, brw_ip_reg()); 1170 brw_set_src1(p, insn, brw_imm_d(0x0)); 1171 } else if (intel->gen == 6) { 1172 brw_set_dest(p, insn, brw_imm_w(0)); 1173 insn->bits1.branch_gen6.jump_count = 0; 1174 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1175 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1176 } else { 1177 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1178 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1179 brw_set_src1(p, insn, brw_imm_ud(0)); 1180 insn->bits3.break_cont.jip = 0; 1181 insn->bits3.break_cont.uip = 0; 1182 } 1183 1184 insn->header.compression_control = BRW_COMPRESSION_NONE; 1185 insn->header.mask_control = BRW_MASK_ENABLE; 1186 if (!p->single_program_flow) 1187 insn->header.thread_control = BRW_THREAD_SWITCH; 1188 1189 push_if_stack(p, insn); 1190} 1191 1192void 1193brw_ENDIF(struct brw_compile *p) 1194{ 1195 struct intel_context *intel = &p->brw->intel; 1196 struct brw_instruction *insn; 1197 struct brw_instruction *else_inst = NULL; 1198 struct brw_instruction *if_inst = NULL; 1199 struct brw_instruction *tmp; 1200 1201 /* Pop the IF and (optional) ELSE instructions from the stack */ 1202 p->if_depth_in_loop[p->loop_stack_depth]--; 1203 tmp = pop_if_stack(p); 1204 if (tmp->header.opcode == BRW_OPCODE_ELSE) { 1205 else_inst = tmp; 1206 tmp = pop_if_stack(p); 1207 } 1208 if_inst = tmp; 1209 1210 /* In single program flow mode, we can express IF and ELSE instructions 1211 * equivalently as ADD instructions that operate on IP. On platforms prior 1212 * to Gen6, flow control instructions cause an implied thread switch, so 1213 * this is a significant savings. 1214 * 1215 * However, on Gen6, writing to IP doesn't work in single program flow mode 1216 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may 1217 * not be updated by non-flow control instructions."). And on later 1218 * platforms, there is no significant benefit to converting control flow 1219 * instructions to conditional ADDs. So we only do this trick on Gen4 and 1220 * Gen5. 1221 */ 1222 if (intel->gen < 6 && p->single_program_flow) { 1223 /* ENDIF is useless; don't bother emitting it. */ 1224 convert_IF_ELSE_to_ADD(p, if_inst, else_inst); 1225 return; 1226 } 1227 1228 insn = next_insn(p, BRW_OPCODE_ENDIF); 1229 1230 if (intel->gen < 6) { 1231 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1232 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1233 brw_set_src1(p, insn, brw_imm_d(0x0)); 1234 } else if (intel->gen == 6) { 1235 brw_set_dest(p, insn, brw_imm_w(0)); 1236 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1237 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1238 } else { 1239 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1240 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1241 brw_set_src1(p, insn, brw_imm_ud(0)); 1242 } 1243 1244 insn->header.compression_control = BRW_COMPRESSION_NONE; 1245 insn->header.mask_control = BRW_MASK_ENABLE; 1246 insn->header.thread_control = BRW_THREAD_SWITCH; 1247 1248 /* Also pop item off the stack in the endif instruction: */ 1249 if (intel->gen < 6) { 1250 insn->bits3.if_else.jump_count = 0; 1251 insn->bits3.if_else.pop_count = 1; 1252 insn->bits3.if_else.pad0 = 0; 1253 } else if (intel->gen == 6) { 1254 insn->bits1.branch_gen6.jump_count = 2; 1255 } else { 1256 insn->bits3.break_cont.jip = 2; 1257 } 1258 patch_IF_ELSE(p, if_inst, else_inst, insn); 1259} 1260 1261struct brw_instruction *brw_BREAK(struct brw_compile *p) 1262{ 1263 struct intel_context *intel = &p->brw->intel; 1264 struct brw_instruction *insn; 1265 1266 insn = next_insn(p, BRW_OPCODE_BREAK); 1267 if (intel->gen >= 6) { 1268 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1269 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1270 brw_set_src1(p, insn, brw_imm_d(0x0)); 1271 } else { 1272 brw_set_dest(p, insn, brw_ip_reg()); 1273 brw_set_src0(p, insn, brw_ip_reg()); 1274 brw_set_src1(p, insn, brw_imm_d(0x0)); 1275 insn->bits3.if_else.pad0 = 0; 1276 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; 1277 } 1278 insn->header.compression_control = BRW_COMPRESSION_NONE; 1279 insn->header.execution_size = BRW_EXECUTE_8; 1280 1281 return insn; 1282} 1283 1284struct brw_instruction *gen6_CONT(struct brw_compile *p) 1285{ 1286 struct brw_instruction *insn; 1287 1288 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1289 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1290 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1291 brw_set_dest(p, insn, brw_ip_reg()); 1292 brw_set_src0(p, insn, brw_ip_reg()); 1293 brw_set_src1(p, insn, brw_imm_d(0x0)); 1294 1295 insn->header.compression_control = BRW_COMPRESSION_NONE; 1296 insn->header.execution_size = BRW_EXECUTE_8; 1297 return insn; 1298} 1299 1300struct brw_instruction *brw_CONT(struct brw_compile *p) 1301{ 1302 struct brw_instruction *insn; 1303 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1304 brw_set_dest(p, insn, brw_ip_reg()); 1305 brw_set_src0(p, insn, brw_ip_reg()); 1306 brw_set_src1(p, insn, brw_imm_d(0x0)); 1307 insn->header.compression_control = BRW_COMPRESSION_NONE; 1308 insn->header.execution_size = BRW_EXECUTE_8; 1309 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1310 insn->bits3.if_else.pad0 = 0; 1311 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; 1312 return insn; 1313} 1314 1315/* DO/WHILE loop: 1316 * 1317 * The DO/WHILE is just an unterminated loop -- break or continue are 1318 * used for control within the loop. We have a few ways they can be 1319 * done. 1320 * 1321 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1322 * jip and no DO instruction. 1323 * 1324 * For non-uniform control flow pre-gen6, there's a DO instruction to 1325 * push the mask, and a WHILE to jump back, and BREAK to get out and 1326 * pop the mask. 1327 * 1328 * For gen6, there's no more mask stack, so no need for DO. WHILE 1329 * just points back to the first instruction of the loop. 1330 */ 1331struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 1332{ 1333 struct intel_context *intel = &p->brw->intel; 1334 1335 if (intel->gen >= 6 || p->single_program_flow) { 1336 push_loop_stack(p, &p->store[p->nr_insn]); 1337 return &p->store[p->nr_insn]; 1338 } else { 1339 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1340 1341 push_loop_stack(p, insn); 1342 1343 /* Override the defaults for this instruction: 1344 */ 1345 brw_set_dest(p, insn, brw_null_reg()); 1346 brw_set_src0(p, insn, brw_null_reg()); 1347 brw_set_src1(p, insn, brw_null_reg()); 1348 1349 insn->header.compression_control = BRW_COMPRESSION_NONE; 1350 insn->header.execution_size = execute_size; 1351 insn->header.predicate_control = BRW_PREDICATE_NONE; 1352 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1353 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1354 1355 return insn; 1356 } 1357} 1358 1359/** 1360 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE 1361 * instruction here. 1362 * 1363 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop 1364 * nesting, since it can always just point to the end of the block/current loop. 1365 */ 1366static void 1367brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst) 1368{ 1369 struct intel_context *intel = &p->brw->intel; 1370 struct brw_instruction *do_inst = get_inner_do_insn(p); 1371 struct brw_instruction *inst; 1372 int br = (intel->gen == 5) ? 2 : 1; 1373 1374 for (inst = while_inst - 1; inst != do_inst; inst--) { 1375 /* If the jump count is != 0, that means that this instruction has already 1376 * been patched because it's part of a loop inside of the one we're 1377 * patching. 1378 */ 1379 if (inst->header.opcode == BRW_OPCODE_BREAK && 1380 inst->bits3.if_else.jump_count == 0) { 1381 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1); 1382 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE && 1383 inst->bits3.if_else.jump_count == 0) { 1384 inst->bits3.if_else.jump_count = br * (while_inst - inst); 1385 } 1386 } 1387} 1388 1389struct brw_instruction *brw_WHILE(struct brw_compile *p) 1390{ 1391 struct intel_context *intel = &p->brw->intel; 1392 struct brw_instruction *insn, *do_insn; 1393 GLuint br = 1; 1394 1395 do_insn = get_inner_do_insn(p); 1396 1397 if (intel->gen >= 5) 1398 br = 2; 1399 1400 if (intel->gen >= 7) { 1401 insn = next_insn(p, BRW_OPCODE_WHILE); 1402 1403 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1404 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1405 brw_set_src1(p, insn, brw_imm_ud(0)); 1406 insn->bits3.break_cont.jip = br * (do_insn - insn); 1407 1408 insn->header.execution_size = BRW_EXECUTE_8; 1409 } else if (intel->gen == 6) { 1410 insn = next_insn(p, BRW_OPCODE_WHILE); 1411 1412 brw_set_dest(p, insn, brw_imm_w(0)); 1413 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1414 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1415 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1416 1417 insn->header.execution_size = BRW_EXECUTE_8; 1418 } else { 1419 if (p->single_program_flow) { 1420 insn = next_insn(p, BRW_OPCODE_ADD); 1421 1422 brw_set_dest(p, insn, brw_ip_reg()); 1423 brw_set_src0(p, insn, brw_ip_reg()); 1424 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); 1425 insn->header.execution_size = BRW_EXECUTE_1; 1426 } else { 1427 insn = next_insn(p, BRW_OPCODE_WHILE); 1428 1429 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1430 1431 brw_set_dest(p, insn, brw_ip_reg()); 1432 brw_set_src0(p, insn, brw_ip_reg()); 1433 brw_set_src1(p, insn, brw_imm_d(0)); 1434 1435 insn->header.execution_size = do_insn->header.execution_size; 1436 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1437 insn->bits3.if_else.pop_count = 0; 1438 insn->bits3.if_else.pad0 = 0; 1439 1440 brw_patch_break_cont(p, insn); 1441 } 1442 } 1443 insn->header.compression_control = BRW_COMPRESSION_NONE; 1444 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1445 1446 p->loop_stack_depth--; 1447 1448 return insn; 1449} 1450 1451 1452/* FORWARD JUMPS: 1453 */ 1454void brw_land_fwd_jump(struct brw_compile *p, 1455 struct brw_instruction *jmp_insn) 1456{ 1457 struct intel_context *intel = &p->brw->intel; 1458 struct brw_instruction *landing = &p->store[p->nr_insn]; 1459 GLuint jmpi = 1; 1460 1461 if (intel->gen >= 5) 1462 jmpi = 2; 1463 1464 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1465 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1466 1467 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1468} 1469 1470 1471 1472/* To integrate with the above, it makes sense that the comparison 1473 * instruction should populate the flag register. It might be simpler 1474 * just to use the flag reg for most WM tasks? 1475 */ 1476void brw_CMP(struct brw_compile *p, 1477 struct brw_reg dest, 1478 GLuint conditional, 1479 struct brw_reg src0, 1480 struct brw_reg src1) 1481{ 1482 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1483 1484 insn->header.destreg__conditionalmod = conditional; 1485 brw_set_dest(p, insn, dest); 1486 brw_set_src0(p, insn, src0); 1487 brw_set_src1(p, insn, src1); 1488 1489/* guess_execution_size(insn, src0); */ 1490 1491 1492 /* Make it so that future instructions will use the computed flag 1493 * value until brw_set_predicate_control_flag_value() is called 1494 * again. 1495 */ 1496 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1497 dest.nr == 0) { 1498 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1499 p->flag_value = 0xff; 1500 } 1501} 1502 1503/* Issue 'wait' instruction for n1, host could program MMIO 1504 to wake up thread. */ 1505void brw_WAIT (struct brw_compile *p) 1506{ 1507 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1508 struct brw_reg src = brw_notification_1_reg(); 1509 1510 brw_set_dest(p, insn, src); 1511 brw_set_src0(p, insn, src); 1512 brw_set_src1(p, insn, brw_null_reg()); 1513 insn->header.execution_size = 0; /* must */ 1514 insn->header.predicate_control = 0; 1515 insn->header.compression_control = 0; 1516} 1517 1518 1519/*********************************************************************** 1520 * Helpers for the various SEND message types: 1521 */ 1522 1523/** Extended math function, float[8]. 1524 */ 1525void brw_math( struct brw_compile *p, 1526 struct brw_reg dest, 1527 GLuint function, 1528 GLuint saturate, 1529 GLuint msg_reg_nr, 1530 struct brw_reg src, 1531 GLuint data_type, 1532 GLuint precision ) 1533{ 1534 struct intel_context *intel = &p->brw->intel; 1535 1536 if (intel->gen >= 6) { 1537 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1538 1539 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1540 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1541 1542 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1543 if (intel->gen == 6) 1544 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1545 1546 /* Source modifiers are ignored for extended math instructions on Gen6. */ 1547 if (intel->gen == 6) { 1548 assert(!src.negate); 1549 assert(!src.abs); 1550 } 1551 1552 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || 1553 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || 1554 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1555 assert(src.type != BRW_REGISTER_TYPE_F); 1556 } else { 1557 assert(src.type == BRW_REGISTER_TYPE_F); 1558 } 1559 1560 /* Math is the same ISA format as other opcodes, except that CondModifier 1561 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1562 */ 1563 insn->header.destreg__conditionalmod = function; 1564 insn->header.saturate = saturate; 1565 1566 brw_set_dest(p, insn, dest); 1567 brw_set_src0(p, insn, src); 1568 brw_set_src1(p, insn, brw_null_reg()); 1569 } else { 1570 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1571 1572 /* Example code doesn't set predicate_control for send 1573 * instructions. 1574 */ 1575 insn->header.predicate_control = 0; 1576 insn->header.destreg__conditionalmod = msg_reg_nr; 1577 1578 brw_set_dest(p, insn, dest); 1579 brw_set_src0(p, insn, src); 1580 brw_set_math_message(p, 1581 insn, 1582 function, 1583 src.type == BRW_REGISTER_TYPE_D, 1584 precision, 1585 saturate, 1586 data_type); 1587 } 1588} 1589 1590/** Extended math function, float[8]. 1591 */ 1592void brw_math2(struct brw_compile *p, 1593 struct brw_reg dest, 1594 GLuint function, 1595 struct brw_reg src0, 1596 struct brw_reg src1) 1597{ 1598 struct intel_context *intel = &p->brw->intel; 1599 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1600 1601 assert(intel->gen >= 6); 1602 (void) intel; 1603 1604 1605 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1606 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1607 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1608 1609 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1610 if (intel->gen == 6) { 1611 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1612 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1613 } 1614 1615 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || 1616 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || 1617 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1618 assert(src0.type != BRW_REGISTER_TYPE_F); 1619 assert(src1.type != BRW_REGISTER_TYPE_F); 1620 } else { 1621 assert(src0.type == BRW_REGISTER_TYPE_F); 1622 assert(src1.type == BRW_REGISTER_TYPE_F); 1623 } 1624 1625 /* Source modifiers are ignored for extended math instructions on Gen6. */ 1626 if (intel->gen == 6) { 1627 assert(!src0.negate); 1628 assert(!src0.abs); 1629 assert(!src1.negate); 1630 assert(!src1.abs); 1631 } 1632 1633 /* Math is the same ISA format as other opcodes, except that CondModifier 1634 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1635 */ 1636 insn->header.destreg__conditionalmod = function; 1637 1638 brw_set_dest(p, insn, dest); 1639 brw_set_src0(p, insn, src0); 1640 brw_set_src1(p, insn, src1); 1641} 1642 1643/** 1644 * Extended math function, float[16]. 1645 * Use 2 send instructions. 1646 */ 1647void brw_math_16( struct brw_compile *p, 1648 struct brw_reg dest, 1649 GLuint function, 1650 GLuint saturate, 1651 GLuint msg_reg_nr, 1652 struct brw_reg src, 1653 GLuint precision ) 1654{ 1655 struct intel_context *intel = &p->brw->intel; 1656 struct brw_instruction *insn; 1657 1658 if (intel->gen >= 6) { 1659 insn = next_insn(p, BRW_OPCODE_MATH); 1660 1661 /* Math is the same ISA format as other opcodes, except that CondModifier 1662 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1663 */ 1664 insn->header.destreg__conditionalmod = function; 1665 insn->header.saturate = saturate; 1666 1667 /* Source modifiers are ignored for extended math instructions. */ 1668 assert(!src.negate); 1669 assert(!src.abs); 1670 1671 brw_set_dest(p, insn, dest); 1672 brw_set_src0(p, insn, src); 1673 brw_set_src1(p, insn, brw_null_reg()); 1674 return; 1675 } 1676 1677 /* First instruction: 1678 */ 1679 brw_push_insn_state(p); 1680 brw_set_predicate_control_flag_value(p, 0xff); 1681 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1682 1683 insn = next_insn(p, BRW_OPCODE_SEND); 1684 insn->header.destreg__conditionalmod = msg_reg_nr; 1685 1686 brw_set_dest(p, insn, dest); 1687 brw_set_src0(p, insn, src); 1688 brw_set_math_message(p, 1689 insn, 1690 function, 1691 BRW_MATH_INTEGER_UNSIGNED, 1692 precision, 1693 saturate, 1694 BRW_MATH_DATA_VECTOR); 1695 1696 /* Second instruction: 1697 */ 1698 insn = next_insn(p, BRW_OPCODE_SEND); 1699 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1700 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1701 1702 brw_set_dest(p, insn, offset(dest,1)); 1703 brw_set_src0(p, insn, src); 1704 brw_set_math_message(p, 1705 insn, 1706 function, 1707 BRW_MATH_INTEGER_UNSIGNED, 1708 precision, 1709 saturate, 1710 BRW_MATH_DATA_VECTOR); 1711 1712 brw_pop_insn_state(p); 1713} 1714 1715 1716/** 1717 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1718 * using a constant offset per channel. 1719 * 1720 * The offset must be aligned to oword size (16 bytes). Used for 1721 * register spilling. 1722 */ 1723void brw_oword_block_write_scratch(struct brw_compile *p, 1724 struct brw_reg mrf, 1725 int num_regs, 1726 GLuint offset) 1727{ 1728 struct intel_context *intel = &p->brw->intel; 1729 uint32_t msg_control, msg_type; 1730 int mlen; 1731 1732 if (intel->gen >= 6) 1733 offset /= 16; 1734 1735 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1736 1737 if (num_regs == 1) { 1738 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1739 mlen = 2; 1740 } else { 1741 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1742 mlen = 3; 1743 } 1744 1745 /* Set up the message header. This is g0, with g0.2 filled with 1746 * the offset. We don't want to leave our offset around in g0 or 1747 * it'll screw up texture samples, so set it up inside the message 1748 * reg. 1749 */ 1750 { 1751 brw_push_insn_state(p); 1752 brw_set_mask_control(p, BRW_MASK_DISABLE); 1753 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1754 1755 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1756 1757 /* set message header global offset field (reg 0, element 2) */ 1758 brw_MOV(p, 1759 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1760 mrf.nr, 1761 2), BRW_REGISTER_TYPE_UD), 1762 brw_imm_ud(offset)); 1763 1764 brw_pop_insn_state(p); 1765 } 1766 1767 { 1768 struct brw_reg dest; 1769 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1770 int send_commit_msg; 1771 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 1772 BRW_REGISTER_TYPE_UW); 1773 1774 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1775 insn->header.compression_control = BRW_COMPRESSION_NONE; 1776 src_header = vec16(src_header); 1777 } 1778 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1779 insn->header.destreg__conditionalmod = mrf.nr; 1780 1781 /* Until gen6, writes followed by reads from the same location 1782 * are not guaranteed to be ordered unless write_commit is set. 1783 * If set, then a no-op write is issued to the destination 1784 * register to set a dependency, and a read from the destination 1785 * can be used to ensure the ordering. 1786 * 1787 * For gen6, only writes between different threads need ordering 1788 * protection. Our use of DP writes is all about register 1789 * spilling within a thread. 1790 */ 1791 if (intel->gen >= 6) { 1792 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1793 send_commit_msg = 0; 1794 } else { 1795 dest = src_header; 1796 send_commit_msg = 1; 1797 } 1798 1799 brw_set_dest(p, insn, dest); 1800 if (intel->gen >= 6) { 1801 brw_set_src0(p, insn, mrf); 1802 } else { 1803 brw_set_src0(p, insn, brw_null_reg()); 1804 } 1805 1806 if (intel->gen >= 6) 1807 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1808 else 1809 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1810 1811 brw_set_dp_write_message(p, 1812 insn, 1813 255, /* binding table index (255=stateless) */ 1814 msg_control, 1815 msg_type, 1816 mlen, 1817 true, /* header_present */ 1818 0, /* not a render target */ 1819 send_commit_msg, /* response_length */ 1820 0, /* eot */ 1821 send_commit_msg); 1822 } 1823} 1824 1825 1826/** 1827 * Read a block of owords (half a GRF each) from the scratch buffer 1828 * using a constant index per channel. 1829 * 1830 * Offset must be aligned to oword size (16 bytes). Used for register 1831 * spilling. 1832 */ 1833void 1834brw_oword_block_read_scratch(struct brw_compile *p, 1835 struct brw_reg dest, 1836 struct brw_reg mrf, 1837 int num_regs, 1838 GLuint offset) 1839{ 1840 struct intel_context *intel = &p->brw->intel; 1841 uint32_t msg_control; 1842 int rlen; 1843 1844 if (intel->gen >= 6) 1845 offset /= 16; 1846 1847 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1848 dest = retype(dest, BRW_REGISTER_TYPE_UW); 1849 1850 if (num_regs == 1) { 1851 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1852 rlen = 1; 1853 } else { 1854 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1855 rlen = 2; 1856 } 1857 1858 { 1859 brw_push_insn_state(p); 1860 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1861 brw_set_mask_control(p, BRW_MASK_DISABLE); 1862 1863 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1864 1865 /* set message header global offset field (reg 0, element 2) */ 1866 brw_MOV(p, 1867 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1868 mrf.nr, 1869 2), BRW_REGISTER_TYPE_UD), 1870 brw_imm_ud(offset)); 1871 1872 brw_pop_insn_state(p); 1873 } 1874 1875 { 1876 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1877 1878 assert(insn->header.predicate_control == 0); 1879 insn->header.compression_control = BRW_COMPRESSION_NONE; 1880 insn->header.destreg__conditionalmod = mrf.nr; 1881 1882 brw_set_dest(p, insn, dest); /* UW? */ 1883 if (intel->gen >= 6) { 1884 brw_set_src0(p, insn, mrf); 1885 } else { 1886 brw_set_src0(p, insn, brw_null_reg()); 1887 } 1888 1889 brw_set_dp_read_message(p, 1890 insn, 1891 255, /* binding table index (255=stateless) */ 1892 msg_control, 1893 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1894 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 1895 1, /* msg_length */ 1896 rlen); 1897 } 1898} 1899 1900/** 1901 * Read a float[4] vector from the data port Data Cache (const buffer). 1902 * Location (in buffer) should be a multiple of 16. 1903 * Used for fetching shader constants. 1904 */ 1905void brw_oword_block_read(struct brw_compile *p, 1906 struct brw_reg dest, 1907 struct brw_reg mrf, 1908 uint32_t offset, 1909 uint32_t bind_table_index) 1910{ 1911 struct intel_context *intel = &p->brw->intel; 1912 1913 /* On newer hardware, offset is in units of owords. */ 1914 if (intel->gen >= 6) 1915 offset /= 16; 1916 1917 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1918 1919 brw_push_insn_state(p); 1920 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1921 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1922 brw_set_mask_control(p, BRW_MASK_DISABLE); 1923 1924 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1925 1926 /* set message header global offset field (reg 0, element 2) */ 1927 brw_MOV(p, 1928 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1929 mrf.nr, 1930 2), BRW_REGISTER_TYPE_UD), 1931 brw_imm_ud(offset)); 1932 1933 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1934 insn->header.destreg__conditionalmod = mrf.nr; 1935 1936 /* cast dest to a uword[8] vector */ 1937 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1938 1939 brw_set_dest(p, insn, dest); 1940 if (intel->gen >= 6) { 1941 brw_set_src0(p, insn, mrf); 1942 } else { 1943 brw_set_src0(p, insn, brw_null_reg()); 1944 } 1945 1946 brw_set_dp_read_message(p, 1947 insn, 1948 bind_table_index, 1949 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 1950 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 1951 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1952 1, /* msg_length */ 1953 1); /* response_length (1 reg, 2 owords!) */ 1954 1955 brw_pop_insn_state(p); 1956} 1957 1958/** 1959 * Read a set of dwords from the data port Data Cache (const buffer). 1960 * 1961 * Location (in buffer) appears as UD offsets in the register after 1962 * the provided mrf header reg. 1963 */ 1964void brw_dword_scattered_read(struct brw_compile *p, 1965 struct brw_reg dest, 1966 struct brw_reg mrf, 1967 uint32_t bind_table_index) 1968{ 1969 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1970 1971 brw_push_insn_state(p); 1972 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1973 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1974 brw_set_mask_control(p, BRW_MASK_DISABLE); 1975 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1976 brw_pop_insn_state(p); 1977 1978 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1979 insn->header.destreg__conditionalmod = mrf.nr; 1980 1981 /* cast dest to a uword[8] vector */ 1982 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1983 1984 brw_set_dest(p, insn, dest); 1985 brw_set_src0(p, insn, brw_null_reg()); 1986 1987 brw_set_dp_read_message(p, 1988 insn, 1989 bind_table_index, 1990 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, 1991 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, 1992 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1993 2, /* msg_length */ 1994 1); /* response_length */ 1995} 1996 1997 1998 1999/** 2000 * Read float[4] constant(s) from VS constant buffer. 2001 * For relative addressing, two float[4] constants will be read into 'dest'. 2002 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 2003 */ 2004void brw_dp_READ_4_vs(struct brw_compile *p, 2005 struct brw_reg dest, 2006 GLuint location, 2007 GLuint bind_table_index) 2008{ 2009 struct intel_context *intel = &p->brw->intel; 2010 struct brw_instruction *insn; 2011 GLuint msg_reg_nr = 1; 2012 2013 if (intel->gen >= 6) 2014 location /= 16; 2015 2016 /* Setup MRF[1] with location/offset into const buffer */ 2017 brw_push_insn_state(p); 2018 brw_set_access_mode(p, BRW_ALIGN_1); 2019 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2020 brw_set_mask_control(p, BRW_MASK_DISABLE); 2021 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2022 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), 2023 BRW_REGISTER_TYPE_UD), 2024 brw_imm_ud(location)); 2025 brw_pop_insn_state(p); 2026 2027 insn = next_insn(p, BRW_OPCODE_SEND); 2028 2029 insn->header.predicate_control = BRW_PREDICATE_NONE; 2030 insn->header.compression_control = BRW_COMPRESSION_NONE; 2031 insn->header.destreg__conditionalmod = msg_reg_nr; 2032 insn->header.mask_control = BRW_MASK_DISABLE; 2033 2034 brw_set_dest(p, insn, dest); 2035 if (intel->gen >= 6) { 2036 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); 2037 } else { 2038 brw_set_src0(p, insn, brw_null_reg()); 2039 } 2040 2041 brw_set_dp_read_message(p, 2042 insn, 2043 bind_table_index, 2044 0, 2045 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 2046 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2047 1, /* msg_length */ 2048 1); /* response_length (1 Oword) */ 2049} 2050 2051/** 2052 * Read a float[4] constant per vertex from VS constant buffer, with 2053 * relative addressing. 2054 */ 2055void brw_dp_READ_4_vs_relative(struct brw_compile *p, 2056 struct brw_reg dest, 2057 struct brw_reg addr_reg, 2058 GLuint offset, 2059 GLuint bind_table_index) 2060{ 2061 struct intel_context *intel = &p->brw->intel; 2062 struct brw_reg src = brw_vec8_grf(0, 0); 2063 int msg_type; 2064 2065 /* Setup MRF[1] with offset into const buffer */ 2066 brw_push_insn_state(p); 2067 brw_set_access_mode(p, BRW_ALIGN_1); 2068 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2069 brw_set_mask_control(p, BRW_MASK_DISABLE); 2070 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2071 2072 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 2073 * fields ignored. 2074 */ 2075 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), 2076 addr_reg, brw_imm_d(offset)); 2077 brw_pop_insn_state(p); 2078 2079 gen6_resolve_implied_move(p, &src, 0); 2080 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 2081 2082 insn->header.predicate_control = BRW_PREDICATE_NONE; 2083 insn->header.compression_control = BRW_COMPRESSION_NONE; 2084 insn->header.destreg__conditionalmod = 0; 2085 insn->header.mask_control = BRW_MASK_DISABLE; 2086 2087 brw_set_dest(p, insn, dest); 2088 brw_set_src0(p, insn, src); 2089 2090 if (intel->gen >= 6) 2091 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 2092 else if (intel->gen == 5 || intel->is_g4x) 2093 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 2094 else 2095 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 2096 2097 brw_set_dp_read_message(p, 2098 insn, 2099 bind_table_index, 2100 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 2101 msg_type, 2102 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2103 2, /* msg_length */ 2104 1); /* response_length */ 2105} 2106 2107 2108 2109void brw_fb_WRITE(struct brw_compile *p, 2110 int dispatch_width, 2111 GLuint msg_reg_nr, 2112 struct brw_reg src0, 2113 GLuint binding_table_index, 2114 GLuint msg_length, 2115 GLuint response_length, 2116 bool eot, 2117 bool header_present) 2118{ 2119 struct intel_context *intel = &p->brw->intel; 2120 struct brw_instruction *insn; 2121 GLuint msg_control, msg_type; 2122 struct brw_reg dest; 2123 2124 if (dispatch_width == 16) 2125 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2126 else 2127 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2128 2129 if (intel->gen >= 6 && binding_table_index == 0) { 2130 insn = next_insn(p, BRW_OPCODE_SENDC); 2131 } else { 2132 insn = next_insn(p, BRW_OPCODE_SEND); 2133 } 2134 /* The execution mask is ignored for render target writes. */ 2135 insn->header.predicate_control = 0; 2136 insn->header.compression_control = BRW_COMPRESSION_NONE; 2137 2138 if (intel->gen >= 6) { 2139 /* headerless version, just submit color payload */ 2140 src0 = brw_message_reg(msg_reg_nr); 2141 2142 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2143 } else { 2144 insn->header.destreg__conditionalmod = msg_reg_nr; 2145 2146 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2147 } 2148 2149 if (dispatch_width == 16) 2150 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 2151 else 2152 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 2153 2154 brw_set_dest(p, insn, dest); 2155 brw_set_src0(p, insn, src0); 2156 brw_set_dp_write_message(p, 2157 insn, 2158 binding_table_index, 2159 msg_control, 2160 msg_type, 2161 msg_length, 2162 header_present, 2163 1, /* last render target write */ 2164 response_length, 2165 eot, 2166 0 /* send_commit_msg */); 2167} 2168 2169 2170/** 2171 * Texture sample instruction. 2172 * Note: the msg_type plus msg_length values determine exactly what kind 2173 * of sampling operation is performed. See volume 4, page 161 of docs. 2174 */ 2175void brw_SAMPLE(struct brw_compile *p, 2176 struct brw_reg dest, 2177 GLuint msg_reg_nr, 2178 struct brw_reg src0, 2179 GLuint binding_table_index, 2180 GLuint sampler, 2181 GLuint writemask, 2182 GLuint msg_type, 2183 GLuint response_length, 2184 GLuint msg_length, 2185 GLuint header_present, 2186 GLuint simd_mode, 2187 GLuint return_format) 2188{ 2189 struct intel_context *intel = &p->brw->intel; 2190 bool need_stall = 0; 2191 2192 if (writemask == 0) { 2193 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 2194 return; 2195 } 2196 2197 /* Hardware doesn't do destination dependency checking on send 2198 * instructions properly. Add a workaround which generates the 2199 * dependency by other means. In practice it seems like this bug 2200 * only crops up for texture samples, and only where registers are 2201 * written by the send and then written again later without being 2202 * read in between. Luckily for us, we already track that 2203 * information and use it to modify the writemask for the 2204 * instruction, so that is a guide for whether a workaround is 2205 * needed. 2206 */ 2207 if (writemask != WRITEMASK_XYZW) { 2208 GLuint dst_offset = 0; 2209 GLuint i, newmask = 0, len = 0; 2210 2211 for (i = 0; i < 4; i++) { 2212 if (writemask & (1<<i)) 2213 break; 2214 dst_offset += 2; 2215 } 2216 for (; i < 4; i++) { 2217 if (!(writemask & (1<<i))) 2218 break; 2219 newmask |= 1<<i; 2220 len++; 2221 } 2222 2223 if (newmask != writemask) { 2224 need_stall = 1; 2225 /* printf("need stall %x %x\n", newmask , writemask); */ 2226 } 2227 else { 2228 bool dispatch_16 = false; 2229 2230 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 2231 2232 guess_execution_size(p, p->current, dest); 2233 if (p->current->header.execution_size == BRW_EXECUTE_16) 2234 dispatch_16 = true; 2235 2236 newmask = ~newmask & WRITEMASK_XYZW; 2237 2238 brw_push_insn_state(p); 2239 2240 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2241 brw_set_mask_control(p, BRW_MASK_DISABLE); 2242 2243 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), 2244 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); 2245 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 2246 2247 brw_pop_insn_state(p); 2248 2249 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 2250 dest = offset(dest, dst_offset); 2251 2252 /* For 16-wide dispatch, masked channels are skipped in the 2253 * response. For 8-wide, masked channels still take up slots, 2254 * and are just not written to. 2255 */ 2256 if (dispatch_16) 2257 response_length = len * 2; 2258 } 2259 } 2260 2261 { 2262 struct brw_instruction *insn; 2263 2264 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2265 2266 insn = next_insn(p, BRW_OPCODE_SEND); 2267 insn->header.predicate_control = 0; /* XXX */ 2268 insn->header.compression_control = BRW_COMPRESSION_NONE; 2269 if (intel->gen < 6) 2270 insn->header.destreg__conditionalmod = msg_reg_nr; 2271 2272 brw_set_dest(p, insn, dest); 2273 brw_set_src0(p, insn, src0); 2274 brw_set_sampler_message(p, insn, 2275 binding_table_index, 2276 sampler, 2277 msg_type, 2278 response_length, 2279 msg_length, 2280 header_present, 2281 simd_mode, 2282 return_format); 2283 } 2284 2285 if (need_stall) { 2286 struct brw_reg reg = vec8(offset(dest, response_length-1)); 2287 2288 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 2289 */ 2290 brw_push_insn_state(p); 2291 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2292 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), 2293 retype(reg, BRW_REGISTER_TYPE_UD)); 2294 brw_pop_insn_state(p); 2295 } 2296 2297} 2298 2299/* All these variables are pretty confusing - we might be better off 2300 * using bitmasks and macros for this, in the old style. Or perhaps 2301 * just having the caller instantiate the fields in dword3 itself. 2302 */ 2303void brw_urb_WRITE(struct brw_compile *p, 2304 struct brw_reg dest, 2305 GLuint msg_reg_nr, 2306 struct brw_reg src0, 2307 bool allocate, 2308 bool used, 2309 GLuint msg_length, 2310 GLuint response_length, 2311 bool eot, 2312 bool writes_complete, 2313 GLuint offset, 2314 GLuint swizzle) 2315{ 2316 struct intel_context *intel = &p->brw->intel; 2317 struct brw_instruction *insn; 2318 2319 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2320 2321 if (intel->gen == 7) { 2322 /* Enable Channel Masks in the URB_WRITE_HWORD message header */ 2323 brw_push_insn_state(p); 2324 brw_set_access_mode(p, BRW_ALIGN_1); 2325 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), 2326 BRW_REGISTER_TYPE_UD), 2327 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), 2328 brw_imm_ud(0xff00)); 2329 brw_pop_insn_state(p); 2330 } 2331 2332 insn = next_insn(p, BRW_OPCODE_SEND); 2333 2334 assert(msg_length < BRW_MAX_MRF); 2335 2336 brw_set_dest(p, insn, dest); 2337 brw_set_src0(p, insn, src0); 2338 brw_set_src1(p, insn, brw_imm_d(0)); 2339 2340 if (intel->gen < 6) 2341 insn->header.destreg__conditionalmod = msg_reg_nr; 2342 2343 brw_set_urb_message(p, 2344 insn, 2345 allocate, 2346 used, 2347 msg_length, 2348 response_length, 2349 eot, 2350 writes_complete, 2351 offset, 2352 swizzle); 2353} 2354 2355static int 2356brw_find_next_block_end(struct brw_compile *p, int start) 2357{ 2358 int ip; 2359 2360 for (ip = start + 1; ip < p->nr_insn; ip++) { 2361 struct brw_instruction *insn = &p->store[ip]; 2362 2363 switch (insn->header.opcode) { 2364 case BRW_OPCODE_ENDIF: 2365 case BRW_OPCODE_ELSE: 2366 case BRW_OPCODE_WHILE: 2367 return ip; 2368 } 2369 } 2370 assert(!"not reached"); 2371 return start + 1; 2372} 2373 2374/* There is no DO instruction on gen6, so to find the end of the loop 2375 * we have to see if the loop is jumping back before our start 2376 * instruction. 2377 */ 2378static int 2379brw_find_loop_end(struct brw_compile *p, int start) 2380{ 2381 struct intel_context *intel = &p->brw->intel; 2382 int ip; 2383 int br = 2; 2384 2385 for (ip = start + 1; ip < p->nr_insn; ip++) { 2386 struct brw_instruction *insn = &p->store[ip]; 2387 2388 if (insn->header.opcode == BRW_OPCODE_WHILE) { 2389 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count 2390 : insn->bits3.break_cont.jip; 2391 if (ip + jip / br <= start) 2392 return ip; 2393 } 2394 } 2395 assert(!"not reached"); 2396 return start + 1; 2397} 2398 2399/* After program generation, go back and update the UIP and JIP of 2400 * BREAK and CONT instructions to their correct locations. 2401 */ 2402void 2403brw_set_uip_jip(struct brw_compile *p) 2404{ 2405 struct intel_context *intel = &p->brw->intel; 2406 int ip; 2407 int br = 2; 2408 2409 if (intel->gen < 6) 2410 return; 2411 2412 for (ip = 0; ip < p->nr_insn; ip++) { 2413 struct brw_instruction *insn = &p->store[ip]; 2414 2415 switch (insn->header.opcode) { 2416 case BRW_OPCODE_BREAK: 2417 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2418 /* Gen7 UIP points to WHILE; Gen6 points just after it */ 2419 insn->bits3.break_cont.uip = 2420 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0)); 2421 break; 2422 case BRW_OPCODE_CONTINUE: 2423 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2424 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); 2425 2426 assert(insn->bits3.break_cont.uip != 0); 2427 assert(insn->bits3.break_cont.jip != 0); 2428 break; 2429 } 2430 } 2431} 2432 2433void brw_ff_sync(struct brw_compile *p, 2434 struct brw_reg dest, 2435 GLuint msg_reg_nr, 2436 struct brw_reg src0, 2437 bool allocate, 2438 GLuint response_length, 2439 bool eot) 2440{ 2441 struct intel_context *intel = &p->brw->intel; 2442 struct brw_instruction *insn; 2443 2444 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2445 2446 insn = next_insn(p, BRW_OPCODE_SEND); 2447 brw_set_dest(p, insn, dest); 2448 brw_set_src0(p, insn, src0); 2449 brw_set_src1(p, insn, brw_imm_d(0)); 2450 2451 if (intel->gen < 6) 2452 insn->header.destreg__conditionalmod = msg_reg_nr; 2453 2454 brw_set_ff_sync_message(p, 2455 insn, 2456 allocate, 2457 response_length, 2458 eot); 2459} 2460 2461/** 2462 * Emit the SEND instruction necessary to generate stream output data on Gen6 2463 * (for transform feedback). 2464 * 2465 * If send_commit_msg is true, this is the last piece of stream output data 2466 * from this thread, so send the data as a committed write. According to the 2467 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1): 2468 * 2469 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all 2470 * writes are complete by sending the final write as a committed write." 2471 */ 2472void 2473brw_svb_write(struct brw_compile *p, 2474 struct brw_reg dest, 2475 GLuint msg_reg_nr, 2476 struct brw_reg src0, 2477 GLuint binding_table_index, 2478 bool send_commit_msg) 2479{ 2480 struct brw_instruction *insn; 2481 2482 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2483 2484 insn = next_insn(p, BRW_OPCODE_SEND); 2485 brw_set_dest(p, insn, dest); 2486 brw_set_src0(p, insn, src0); 2487 brw_set_src1(p, insn, brw_imm_d(0)); 2488 brw_set_dp_write_message(p, insn, 2489 binding_table_index, 2490 0, /* msg_control: ignored */ 2491 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, 2492 1, /* msg_length */ 2493 true, /* header_present */ 2494 0, /* last_render_target: ignored */ 2495 send_commit_msg, /* response_length */ 2496 0, /* end_of_thread */ 2497 send_commit_msg); /* send_commit_msg */ 2498} 2499