brw_eu_emit.c revision 172bb92db1a3c317867d9cfec6f15c09c37a0f6c
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37#include "glsl/ralloc.h" 38 39/*********************************************************************** 40 * Internal helper for constructing instructions 41 */ 42 43static void guess_execution_size(struct brw_compile *p, 44 struct brw_instruction *insn, 45 struct brw_reg reg) 46{ 47 if (reg.width == BRW_WIDTH_8 && p->compressed) 48 insn->header.execution_size = BRW_EXECUTE_16; 49 else 50 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 51} 52 53 54/** 55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source 56 * registers, implicitly moving the operand to a message register. 57 * 58 * On Sandybridge, this is no longer the case. This function performs the 59 * explicit move; it should be called before emitting a SEND instruction. 60 */ 61void 62gen6_resolve_implied_move(struct brw_compile *p, 63 struct brw_reg *src, 64 GLuint msg_reg_nr) 65{ 66 struct intel_context *intel = &p->brw->intel; 67 if (intel->gen < 6) 68 return; 69 70 if (src->file == BRW_MESSAGE_REGISTER_FILE) 71 return; 72 73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 74 brw_push_insn_state(p); 75 brw_set_mask_control(p, BRW_MASK_DISABLE); 76 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 78 retype(*src, BRW_REGISTER_TYPE_UD)); 79 brw_pop_insn_state(p); 80 } 81 *src = brw_message_reg(msg_reg_nr); 82} 83 84static void 85gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) 86{ 87 /* From the BSpec / ISA Reference / send - [DevIVB+]: 88 * "The send with EOT should use register space R112-R127 for <src>. This is 89 * to enable loading of a new thread into the same slot while the message 90 * with EOT for current thread is pending dispatch." 91 * 92 * Since we're pretending to have 16 MRFs anyway, we may as well use the 93 * registers required for messages with EOT. 94 */ 95 struct intel_context *intel = &p->brw->intel; 96 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { 97 reg->file = BRW_GENERAL_REGISTER_FILE; 98 reg->nr += GEN7_MRF_HACK_START; 99 } 100} 101 102 103void 104brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, 105 struct brw_reg dest) 106{ 107 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 108 dest.file != BRW_MESSAGE_REGISTER_FILE) 109 assert(dest.nr < 128); 110 111 gen7_convert_mrf_to_grf(p, &dest); 112 113 insn->bits1.da1.dest_reg_file = dest.file; 114 insn->bits1.da1.dest_reg_type = dest.type; 115 insn->bits1.da1.dest_address_mode = dest.address_mode; 116 117 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 118 insn->bits1.da1.dest_reg_nr = dest.nr; 119 120 if (insn->header.access_mode == BRW_ALIGN_1) { 121 insn->bits1.da1.dest_subreg_nr = dest.subnr; 122 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 123 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 124 insn->bits1.da1.dest_horiz_stride = dest.hstride; 125 } 126 else { 127 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 128 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 129 /* even ignored in da16, still need to set as '01' */ 130 insn->bits1.da16.dest_horiz_stride = 1; 131 } 132 } 133 else { 134 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 135 136 /* These are different sizes in align1 vs align16: 137 */ 138 if (insn->header.access_mode == BRW_ALIGN_1) { 139 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 140 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 141 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 142 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 143 } 144 else { 145 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 146 /* even ignored in da16, still need to set as '01' */ 147 insn->bits1.ia16.dest_horiz_stride = 1; 148 } 149 } 150 151 /* NEW: Set the execution size based on dest.width and 152 * insn->compression_control: 153 */ 154 guess_execution_size(p, insn, dest); 155} 156 157extern int reg_type_size[]; 158 159static void 160validate_reg(struct brw_instruction *insn, struct brw_reg reg) 161{ 162 int hstride_for_reg[] = {0, 1, 2, 4}; 163 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 164 int width_for_reg[] = {1, 2, 4, 8, 16}; 165 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 166 int width, hstride, vstride, execsize; 167 168 if (reg.file == BRW_IMMEDIATE_VALUE) { 169 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 170 * mean the destination has to be 128-bit aligned and the 171 * destination horiz stride has to be a word. 172 */ 173 if (reg.type == BRW_REGISTER_TYPE_V) { 174 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 175 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 176 } 177 178 return; 179 } 180 181 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 182 reg.file == BRW_ARF_NULL) 183 return; 184 185 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 186 hstride = hstride_for_reg[reg.hstride]; 187 188 if (reg.vstride == 0xf) { 189 vstride = -1; 190 } else { 191 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 192 vstride = vstride_for_reg[reg.vstride]; 193 } 194 195 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 196 width = width_for_reg[reg.width]; 197 198 assert(insn->header.execution_size >= 0 && 199 insn->header.execution_size < Elements(execsize_for_reg)); 200 execsize = execsize_for_reg[insn->header.execution_size]; 201 202 /* Restrictions from 3.3.10: Register Region Restrictions. */ 203 /* 3. */ 204 assert(execsize >= width); 205 206 /* 4. */ 207 if (execsize == width && hstride != 0) { 208 assert(vstride == -1 || vstride == width * hstride); 209 } 210 211 /* 5. */ 212 if (execsize == width && hstride == 0) { 213 /* no restriction on vstride. */ 214 } 215 216 /* 6. */ 217 if (width == 1) { 218 assert(hstride == 0); 219 } 220 221 /* 7. */ 222 if (execsize == 1 && width == 1) { 223 assert(hstride == 0); 224 assert(vstride == 0); 225 } 226 227 /* 8. */ 228 if (vstride == 0 && hstride == 0) { 229 assert(width == 1); 230 } 231 232 /* 10. Check destination issues. */ 233} 234 235void 236brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, 237 struct brw_reg reg) 238{ 239 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 240 assert(reg.nr < 128); 241 242 gen7_convert_mrf_to_grf(p, ®); 243 244 validate_reg(insn, reg); 245 246 insn->bits1.da1.src0_reg_file = reg.file; 247 insn->bits1.da1.src0_reg_type = reg.type; 248 insn->bits2.da1.src0_abs = reg.abs; 249 insn->bits2.da1.src0_negate = reg.negate; 250 insn->bits2.da1.src0_address_mode = reg.address_mode; 251 252 if (reg.file == BRW_IMMEDIATE_VALUE) { 253 insn->bits3.ud = reg.dw1.ud; 254 255 /* Required to set some fields in src1 as well: 256 */ 257 insn->bits1.da1.src1_reg_file = 0; /* arf */ 258 insn->bits1.da1.src1_reg_type = reg.type; 259 } 260 else 261 { 262 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 263 if (insn->header.access_mode == BRW_ALIGN_1) { 264 insn->bits2.da1.src0_subreg_nr = reg.subnr; 265 insn->bits2.da1.src0_reg_nr = reg.nr; 266 } 267 else { 268 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 269 insn->bits2.da16.src0_reg_nr = reg.nr; 270 } 271 } 272 else { 273 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 274 275 if (insn->header.access_mode == BRW_ALIGN_1) { 276 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 277 } 278 else { 279 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 280 } 281 } 282 283 if (insn->header.access_mode == BRW_ALIGN_1) { 284 if (reg.width == BRW_WIDTH_1 && 285 insn->header.execution_size == BRW_EXECUTE_1) { 286 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 287 insn->bits2.da1.src0_width = BRW_WIDTH_1; 288 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 289 } 290 else { 291 insn->bits2.da1.src0_horiz_stride = reg.hstride; 292 insn->bits2.da1.src0_width = reg.width; 293 insn->bits2.da1.src0_vert_stride = reg.vstride; 294 } 295 } 296 else { 297 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 298 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 299 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 300 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 301 302 /* This is an oddity of the fact we're using the same 303 * descriptions for registers in align_16 as align_1: 304 */ 305 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 306 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 307 else 308 insn->bits2.da16.src0_vert_stride = reg.vstride; 309 } 310 } 311} 312 313 314void brw_set_src1(struct brw_compile *p, 315 struct brw_instruction *insn, 316 struct brw_reg reg) 317{ 318 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 319 320 assert(reg.nr < 128); 321 322 gen7_convert_mrf_to_grf(p, ®); 323 324 validate_reg(insn, reg); 325 326 insn->bits1.da1.src1_reg_file = reg.file; 327 insn->bits1.da1.src1_reg_type = reg.type; 328 insn->bits3.da1.src1_abs = reg.abs; 329 insn->bits3.da1.src1_negate = reg.negate; 330 331 /* Only src1 can be immediate in two-argument instructions. 332 */ 333 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 334 335 if (reg.file == BRW_IMMEDIATE_VALUE) { 336 insn->bits3.ud = reg.dw1.ud; 337 } 338 else { 339 /* This is a hardware restriction, which may or may not be lifted 340 * in the future: 341 */ 342 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 343 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 344 345 if (insn->header.access_mode == BRW_ALIGN_1) { 346 insn->bits3.da1.src1_subreg_nr = reg.subnr; 347 insn->bits3.da1.src1_reg_nr = reg.nr; 348 } 349 else { 350 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 351 insn->bits3.da16.src1_reg_nr = reg.nr; 352 } 353 354 if (insn->header.access_mode == BRW_ALIGN_1) { 355 if (reg.width == BRW_WIDTH_1 && 356 insn->header.execution_size == BRW_EXECUTE_1) { 357 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 358 insn->bits3.da1.src1_width = BRW_WIDTH_1; 359 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 360 } 361 else { 362 insn->bits3.da1.src1_horiz_stride = reg.hstride; 363 insn->bits3.da1.src1_width = reg.width; 364 insn->bits3.da1.src1_vert_stride = reg.vstride; 365 } 366 } 367 else { 368 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 369 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 370 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 371 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 372 373 /* This is an oddity of the fact we're using the same 374 * descriptions for registers in align_16 as align_1: 375 */ 376 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 377 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 378 else 379 insn->bits3.da16.src1_vert_stride = reg.vstride; 380 } 381 } 382} 383 384/** 385 * Set the Message Descriptor and Extended Message Descriptor fields 386 * for SEND messages. 387 * 388 * \note This zeroes out the Function Control bits, so it must be called 389 * \b before filling out any message-specific data. Callers can 390 * choose not to fill in irrelevant bits; they will be zero. 391 */ 392static void 393brw_set_message_descriptor(struct brw_compile *p, 394 struct brw_instruction *inst, 395 enum brw_message_target sfid, 396 unsigned msg_length, 397 unsigned response_length, 398 bool header_present, 399 bool end_of_thread) 400{ 401 struct intel_context *intel = &p->brw->intel; 402 403 brw_set_src1(p, inst, brw_imm_d(0)); 404 405 if (intel->gen >= 5) { 406 inst->bits3.generic_gen5.header_present = header_present; 407 inst->bits3.generic_gen5.response_length = response_length; 408 inst->bits3.generic_gen5.msg_length = msg_length; 409 inst->bits3.generic_gen5.end_of_thread = end_of_thread; 410 411 if (intel->gen >= 6) { 412 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ 413 inst->header.destreg__conditionalmod = sfid; 414 } else { 415 /* Set Extended Message Descriptor (ex_desc) */ 416 inst->bits2.send_gen5.sfid = sfid; 417 inst->bits2.send_gen5.end_of_thread = end_of_thread; 418 } 419 } else { 420 inst->bits3.generic.response_length = response_length; 421 inst->bits3.generic.msg_length = msg_length; 422 inst->bits3.generic.msg_target = sfid; 423 inst->bits3.generic.end_of_thread = end_of_thread; 424 } 425} 426 427static void brw_set_math_message( struct brw_compile *p, 428 struct brw_instruction *insn, 429 GLuint function, 430 GLuint integer_type, 431 bool low_precision, 432 bool saturate, 433 GLuint dataType ) 434{ 435 struct brw_context *brw = p->brw; 436 struct intel_context *intel = &brw->intel; 437 unsigned msg_length; 438 unsigned response_length; 439 440 /* Infer message length from the function */ 441 switch (function) { 442 case BRW_MATH_FUNCTION_POW: 443 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 444 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 445 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 446 msg_length = 2; 447 break; 448 default: 449 msg_length = 1; 450 break; 451 } 452 453 /* Infer response length from the function */ 454 switch (function) { 455 case BRW_MATH_FUNCTION_SINCOS: 456 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 457 response_length = 2; 458 break; 459 default: 460 response_length = 1; 461 break; 462 } 463 464 brw_set_message_descriptor(p, insn, BRW_SFID_MATH, 465 msg_length, response_length, false, false); 466 if (intel->gen == 5) { 467 insn->bits3.math_gen5.function = function; 468 insn->bits3.math_gen5.int_type = integer_type; 469 insn->bits3.math_gen5.precision = low_precision; 470 insn->bits3.math_gen5.saturate = saturate; 471 insn->bits3.math_gen5.data_type = dataType; 472 insn->bits3.math_gen5.snapshot = 0; 473 } else { 474 insn->bits3.math.function = function; 475 insn->bits3.math.int_type = integer_type; 476 insn->bits3.math.precision = low_precision; 477 insn->bits3.math.saturate = saturate; 478 insn->bits3.math.data_type = dataType; 479 } 480} 481 482 483static void brw_set_ff_sync_message(struct brw_compile *p, 484 struct brw_instruction *insn, 485 bool allocate, 486 GLuint response_length, 487 bool end_of_thread) 488{ 489 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 490 1, response_length, true, end_of_thread); 491 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 492 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 493 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 494 insn->bits3.urb_gen5.allocate = allocate; 495 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 496 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 497} 498 499static void brw_set_urb_message( struct brw_compile *p, 500 struct brw_instruction *insn, 501 bool allocate, 502 bool used, 503 GLuint msg_length, 504 GLuint response_length, 505 bool end_of_thread, 506 bool complete, 507 GLuint offset, 508 GLuint swizzle_control ) 509{ 510 struct brw_context *brw = p->brw; 511 struct intel_context *intel = &brw->intel; 512 513 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 514 msg_length, response_length, true, end_of_thread); 515 if (intel->gen == 7) { 516 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ 517 insn->bits3.urb_gen7.offset = offset; 518 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); 519 insn->bits3.urb_gen7.swizzle_control = swizzle_control; 520 /* per_slot_offset = 0 makes it ignore offsets in message header */ 521 insn->bits3.urb_gen7.per_slot_offset = 0; 522 insn->bits3.urb_gen7.complete = complete; 523 } else if (intel->gen >= 5) { 524 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ 525 insn->bits3.urb_gen5.offset = offset; 526 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 527 insn->bits3.urb_gen5.allocate = allocate; 528 insn->bits3.urb_gen5.used = used; /* ? */ 529 insn->bits3.urb_gen5.complete = complete; 530 } else { 531 insn->bits3.urb.opcode = 0; /* ? */ 532 insn->bits3.urb.offset = offset; 533 insn->bits3.urb.swizzle_control = swizzle_control; 534 insn->bits3.urb.allocate = allocate; 535 insn->bits3.urb.used = used; /* ? */ 536 insn->bits3.urb.complete = complete; 537 } 538} 539 540void 541brw_set_dp_write_message(struct brw_compile *p, 542 struct brw_instruction *insn, 543 GLuint binding_table_index, 544 GLuint msg_control, 545 GLuint msg_type, 546 GLuint msg_length, 547 bool header_present, 548 GLuint last_render_target, 549 GLuint response_length, 550 GLuint end_of_thread, 551 GLuint send_commit_msg) 552{ 553 struct brw_context *brw = p->brw; 554 struct intel_context *intel = &brw->intel; 555 unsigned sfid; 556 557 if (intel->gen >= 7) { 558 /* Use the Render Cache for RT writes; otherwise use the Data Cache */ 559 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) 560 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 561 else 562 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 563 } else if (intel->gen == 6) { 564 /* Use the render cache for all write messages. */ 565 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 566 } else { 567 sfid = BRW_SFID_DATAPORT_WRITE; 568 } 569 570 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, 571 header_present, end_of_thread); 572 573 if (intel->gen >= 7) { 574 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 575 insn->bits3.gen7_dp.msg_control = msg_control; 576 insn->bits3.gen7_dp.last_render_target = last_render_target; 577 insn->bits3.gen7_dp.msg_type = msg_type; 578 } else if (intel->gen == 6) { 579 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 580 insn->bits3.gen6_dp.msg_control = msg_control; 581 insn->bits3.gen6_dp.last_render_target = last_render_target; 582 insn->bits3.gen6_dp.msg_type = msg_type; 583 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; 584 } else if (intel->gen == 5) { 585 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 586 insn->bits3.dp_write_gen5.msg_control = msg_control; 587 insn->bits3.dp_write_gen5.last_render_target = last_render_target; 588 insn->bits3.dp_write_gen5.msg_type = msg_type; 589 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 590 } else { 591 insn->bits3.dp_write.binding_table_index = binding_table_index; 592 insn->bits3.dp_write.msg_control = msg_control; 593 insn->bits3.dp_write.last_render_target = last_render_target; 594 insn->bits3.dp_write.msg_type = msg_type; 595 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 596 } 597} 598 599void 600brw_set_dp_read_message(struct brw_compile *p, 601 struct brw_instruction *insn, 602 GLuint binding_table_index, 603 GLuint msg_control, 604 GLuint msg_type, 605 GLuint target_cache, 606 GLuint msg_length, 607 GLuint response_length) 608{ 609 struct brw_context *brw = p->brw; 610 struct intel_context *intel = &brw->intel; 611 unsigned sfid; 612 613 if (intel->gen >= 7) { 614 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 615 } else if (intel->gen == 6) { 616 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) 617 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 618 else 619 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; 620 } else { 621 sfid = BRW_SFID_DATAPORT_READ; 622 } 623 624 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, 625 true, false); 626 627 if (intel->gen >= 7) { 628 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 629 insn->bits3.gen7_dp.msg_control = msg_control; 630 insn->bits3.gen7_dp.last_render_target = 0; 631 insn->bits3.gen7_dp.msg_type = msg_type; 632 } else if (intel->gen == 6) { 633 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 634 insn->bits3.gen6_dp.msg_control = msg_control; 635 insn->bits3.gen6_dp.last_render_target = 0; 636 insn->bits3.gen6_dp.msg_type = msg_type; 637 insn->bits3.gen6_dp.send_commit_msg = 0; 638 } else if (intel->gen == 5) { 639 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 640 insn->bits3.dp_read_gen5.msg_control = msg_control; 641 insn->bits3.dp_read_gen5.msg_type = msg_type; 642 insn->bits3.dp_read_gen5.target_cache = target_cache; 643 } else if (intel->is_g4x) { 644 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 645 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 646 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 647 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 648 } else { 649 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 650 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 651 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 652 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 653 } 654} 655 656void 657brw_set_sampler_message(struct brw_compile *p, 658 struct brw_instruction *insn, 659 GLuint binding_table_index, 660 GLuint sampler, 661 GLuint msg_type, 662 GLuint response_length, 663 GLuint msg_length, 664 GLuint header_present, 665 GLuint simd_mode, 666 GLuint return_format) 667{ 668 struct brw_context *brw = p->brw; 669 struct intel_context *intel = &brw->intel; 670 671 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length, 672 response_length, header_present, false); 673 674 if (intel->gen >= 7) { 675 insn->bits3.sampler_gen7.binding_table_index = binding_table_index; 676 insn->bits3.sampler_gen7.sampler = sampler; 677 insn->bits3.sampler_gen7.msg_type = msg_type; 678 insn->bits3.sampler_gen7.simd_mode = simd_mode; 679 } else if (intel->gen >= 5) { 680 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 681 insn->bits3.sampler_gen5.sampler = sampler; 682 insn->bits3.sampler_gen5.msg_type = msg_type; 683 insn->bits3.sampler_gen5.simd_mode = simd_mode; 684 } else if (intel->is_g4x) { 685 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 686 insn->bits3.sampler_g4x.sampler = sampler; 687 insn->bits3.sampler_g4x.msg_type = msg_type; 688 } else { 689 insn->bits3.sampler.binding_table_index = binding_table_index; 690 insn->bits3.sampler.sampler = sampler; 691 insn->bits3.sampler.msg_type = msg_type; 692 insn->bits3.sampler.return_format = return_format; 693 } 694} 695 696 697#define next_insn brw_next_insn 698struct brw_instruction * 699brw_next_insn(struct brw_compile *p, GLuint opcode) 700{ 701 struct brw_instruction *insn; 702 703 if (p->nr_insn + 1 > p->store_size) { 704 if (0) 705 printf("incresing the store size to %d\n", p->store_size << 1); 706 p->store_size <<= 1; 707 p->store = reralloc(p->mem_ctx, p->store, 708 struct brw_instruction, p->store_size); 709 if (!p->store) 710 assert(!"realloc eu store memeory failed"); 711 } 712 713 insn = &p->store[p->nr_insn++]; 714 memcpy(insn, p->current, sizeof(*insn)); 715 716 /* Reset this one-shot flag: 717 */ 718 719 if (p->current->header.destreg__conditionalmod) { 720 p->current->header.destreg__conditionalmod = 0; 721 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 722 } 723 724 insn->header.opcode = opcode; 725 return insn; 726} 727 728static struct brw_instruction *brw_alu1( struct brw_compile *p, 729 GLuint opcode, 730 struct brw_reg dest, 731 struct brw_reg src ) 732{ 733 struct brw_instruction *insn = next_insn(p, opcode); 734 brw_set_dest(p, insn, dest); 735 brw_set_src0(p, insn, src); 736 return insn; 737} 738 739static struct brw_instruction *brw_alu2(struct brw_compile *p, 740 GLuint opcode, 741 struct brw_reg dest, 742 struct brw_reg src0, 743 struct brw_reg src1 ) 744{ 745 struct brw_instruction *insn = next_insn(p, opcode); 746 brw_set_dest(p, insn, dest); 747 brw_set_src0(p, insn, src0); 748 brw_set_src1(p, insn, src1); 749 return insn; 750} 751 752static int 753get_3src_subreg_nr(struct brw_reg reg) 754{ 755 if (reg.vstride == BRW_VERTICAL_STRIDE_0) { 756 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle)); 757 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0); 758 } else { 759 return reg.subnr / 4; 760 } 761} 762 763static struct brw_instruction *brw_alu3(struct brw_compile *p, 764 GLuint opcode, 765 struct brw_reg dest, 766 struct brw_reg src0, 767 struct brw_reg src1, 768 struct brw_reg src2) 769{ 770 struct brw_instruction *insn = next_insn(p, opcode); 771 772 gen7_convert_mrf_to_grf(p, &dest); 773 774 assert(insn->header.access_mode == BRW_ALIGN_16); 775 776 assert(dest.file == BRW_GENERAL_REGISTER_FILE || 777 dest.file == BRW_MESSAGE_REGISTER_FILE); 778 assert(dest.nr < 128); 779 assert(dest.address_mode == BRW_ADDRESS_DIRECT); 780 assert(dest.type = BRW_REGISTER_TYPE_F); 781 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE); 782 insn->bits1.da3src.dest_reg_nr = dest.nr; 783 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16; 784 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask; 785 guess_execution_size(p, insn, dest); 786 787 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 788 assert(src0.address_mode == BRW_ADDRESS_DIRECT); 789 assert(src0.nr < 128); 790 assert(src0.type == BRW_REGISTER_TYPE_F); 791 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle; 792 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0); 793 insn->bits2.da3src.src0_reg_nr = src0.nr; 794 insn->bits1.da3src.src0_abs = src0.abs; 795 insn->bits1.da3src.src0_negate = src0.negate; 796 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0; 797 798 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 799 assert(src1.address_mode == BRW_ADDRESS_DIRECT); 800 assert(src1.nr < 128); 801 assert(src1.type == BRW_REGISTER_TYPE_F); 802 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle; 803 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3; 804 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2; 805 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0; 806 insn->bits3.da3src.src1_reg_nr = src1.nr; 807 insn->bits1.da3src.src1_abs = src1.abs; 808 insn->bits1.da3src.src1_negate = src1.negate; 809 810 assert(src2.file == BRW_GENERAL_REGISTER_FILE); 811 assert(src2.address_mode == BRW_ADDRESS_DIRECT); 812 assert(src2.nr < 128); 813 assert(src2.type == BRW_REGISTER_TYPE_F); 814 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle; 815 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2); 816 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0; 817 insn->bits3.da3src.src2_reg_nr = src2.nr; 818 insn->bits1.da3src.src2_abs = src2.abs; 819 insn->bits1.da3src.src2_negate = src2.negate; 820 821 return insn; 822} 823 824 825/*********************************************************************** 826 * Convenience routines. 827 */ 828#define ALU1(OP) \ 829struct brw_instruction *brw_##OP(struct brw_compile *p, \ 830 struct brw_reg dest, \ 831 struct brw_reg src0) \ 832{ \ 833 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 834} 835 836#define ALU2(OP) \ 837struct brw_instruction *brw_##OP(struct brw_compile *p, \ 838 struct brw_reg dest, \ 839 struct brw_reg src0, \ 840 struct brw_reg src1) \ 841{ \ 842 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 843} 844 845#define ALU3(OP) \ 846struct brw_instruction *brw_##OP(struct brw_compile *p, \ 847 struct brw_reg dest, \ 848 struct brw_reg src0, \ 849 struct brw_reg src1, \ 850 struct brw_reg src2) \ 851{ \ 852 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ 853} 854 855/* Rounding operations (other than RNDD) require two instructions - the first 856 * stores a rounded value (possibly the wrong way) in the dest register, but 857 * also sets a per-channel "increment bit" in the flag register. A predicated 858 * add of 1.0 fixes dest to contain the desired result. 859 * 860 * Sandybridge and later appear to round correctly without an ADD. 861 */ 862#define ROUND(OP) \ 863void brw_##OP(struct brw_compile *p, \ 864 struct brw_reg dest, \ 865 struct brw_reg src) \ 866{ \ 867 struct brw_instruction *rnd, *add; \ 868 rnd = next_insn(p, BRW_OPCODE_##OP); \ 869 brw_set_dest(p, rnd, dest); \ 870 brw_set_src0(p, rnd, src); \ 871 \ 872 if (p->brw->intel.gen < 6) { \ 873 /* turn on round-increments */ \ 874 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ 875 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 876 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 877 } \ 878} 879 880 881ALU1(MOV) 882ALU2(SEL) 883ALU1(NOT) 884ALU2(AND) 885ALU2(OR) 886ALU2(XOR) 887ALU2(SHR) 888ALU2(SHL) 889ALU2(RSR) 890ALU2(RSL) 891ALU2(ASR) 892ALU1(FRC) 893ALU1(RNDD) 894ALU2(MAC) 895ALU2(MACH) 896ALU1(LZD) 897ALU2(DP4) 898ALU2(DPH) 899ALU2(DP3) 900ALU2(DP2) 901ALU2(LINE) 902ALU2(PLN) 903ALU3(MAD) 904 905ROUND(RNDZ) 906ROUND(RNDE) 907 908 909struct brw_instruction *brw_ADD(struct brw_compile *p, 910 struct brw_reg dest, 911 struct brw_reg src0, 912 struct brw_reg src1) 913{ 914 /* 6.2.2: add */ 915 if (src0.type == BRW_REGISTER_TYPE_F || 916 (src0.file == BRW_IMMEDIATE_VALUE && 917 src0.type == BRW_REGISTER_TYPE_VF)) { 918 assert(src1.type != BRW_REGISTER_TYPE_UD); 919 assert(src1.type != BRW_REGISTER_TYPE_D); 920 } 921 922 if (src1.type == BRW_REGISTER_TYPE_F || 923 (src1.file == BRW_IMMEDIATE_VALUE && 924 src1.type == BRW_REGISTER_TYPE_VF)) { 925 assert(src0.type != BRW_REGISTER_TYPE_UD); 926 assert(src0.type != BRW_REGISTER_TYPE_D); 927 } 928 929 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 930} 931 932struct brw_instruction *brw_MUL(struct brw_compile *p, 933 struct brw_reg dest, 934 struct brw_reg src0, 935 struct brw_reg src1) 936{ 937 /* 6.32.38: mul */ 938 if (src0.type == BRW_REGISTER_TYPE_D || 939 src0.type == BRW_REGISTER_TYPE_UD || 940 src1.type == BRW_REGISTER_TYPE_D || 941 src1.type == BRW_REGISTER_TYPE_UD) { 942 assert(dest.type != BRW_REGISTER_TYPE_F); 943 } 944 945 if (src0.type == BRW_REGISTER_TYPE_F || 946 (src0.file == BRW_IMMEDIATE_VALUE && 947 src0.type == BRW_REGISTER_TYPE_VF)) { 948 assert(src1.type != BRW_REGISTER_TYPE_UD); 949 assert(src1.type != BRW_REGISTER_TYPE_D); 950 } 951 952 if (src1.type == BRW_REGISTER_TYPE_F || 953 (src1.file == BRW_IMMEDIATE_VALUE && 954 src1.type == BRW_REGISTER_TYPE_VF)) { 955 assert(src0.type != BRW_REGISTER_TYPE_UD); 956 assert(src0.type != BRW_REGISTER_TYPE_D); 957 } 958 959 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 960 src0.nr != BRW_ARF_ACCUMULATOR); 961 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 962 src1.nr != BRW_ARF_ACCUMULATOR); 963 964 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 965} 966 967 968void brw_NOP(struct brw_compile *p) 969{ 970 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 971 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 972 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 973 brw_set_src1(p, insn, brw_imm_ud(0x0)); 974} 975 976 977 978 979 980/*********************************************************************** 981 * Comparisons, if/else/endif 982 */ 983 984struct brw_instruction *brw_JMPI(struct brw_compile *p, 985 struct brw_reg dest, 986 struct brw_reg src0, 987 struct brw_reg src1) 988{ 989 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 990 991 insn->header.execution_size = 1; 992 insn->header.compression_control = BRW_COMPRESSION_NONE; 993 insn->header.mask_control = BRW_MASK_DISABLE; 994 995 p->current->header.predicate_control = BRW_PREDICATE_NONE; 996 997 return insn; 998} 999 1000static void 1001push_if_stack(struct brw_compile *p, struct brw_instruction *inst) 1002{ 1003 p->if_stack[p->if_stack_depth] = inst - p->store; 1004 1005 p->if_stack_depth++; 1006 if (p->if_stack_array_size <= p->if_stack_depth) { 1007 p->if_stack_array_size *= 2; 1008 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int, 1009 p->if_stack_array_size); 1010 } 1011} 1012 1013static struct brw_instruction * 1014pop_if_stack(struct brw_compile *p) 1015{ 1016 p->if_stack_depth--; 1017 return &p->store[p->if_stack[p->if_stack_depth]]; 1018} 1019 1020static void 1021push_loop_stack(struct brw_compile *p, struct brw_instruction *inst) 1022{ 1023 if (p->loop_stack_array_size < p->loop_stack_depth) { 1024 p->loop_stack_array_size *= 2; 1025 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, 1026 p->loop_stack_array_size); 1027 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int, 1028 p->loop_stack_array_size); 1029 } 1030 1031 p->loop_stack[p->loop_stack_depth] = inst - p->store; 1032 p->loop_stack_depth++; 1033 p->if_depth_in_loop[p->loop_stack_depth] = 0; 1034} 1035 1036static struct brw_instruction * 1037get_inner_do_insn(struct brw_compile *p) 1038{ 1039 return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; 1040} 1041 1042/* EU takes the value from the flag register and pushes it onto some 1043 * sort of a stack (presumably merging with any flag value already on 1044 * the stack). Within an if block, the flags at the top of the stack 1045 * control execution on each channel of the unit, eg. on each of the 1046 * 16 pixel values in our wm programs. 1047 * 1048 * When the matching 'else' instruction is reached (presumably by 1049 * countdown of the instruction count patched in by our ELSE/ENDIF 1050 * functions), the relevent flags are inverted. 1051 * 1052 * When the matching 'endif' instruction is reached, the flags are 1053 * popped off. If the stack is now empty, normal execution resumes. 1054 */ 1055struct brw_instruction * 1056brw_IF(struct brw_compile *p, GLuint execute_size) 1057{ 1058 struct intel_context *intel = &p->brw->intel; 1059 struct brw_instruction *insn; 1060 1061 insn = next_insn(p, BRW_OPCODE_IF); 1062 1063 /* Override the defaults for this instruction: 1064 */ 1065 if (intel->gen < 6) { 1066 brw_set_dest(p, insn, brw_ip_reg()); 1067 brw_set_src0(p, insn, brw_ip_reg()); 1068 brw_set_src1(p, insn, brw_imm_d(0x0)); 1069 } else if (intel->gen == 6) { 1070 brw_set_dest(p, insn, brw_imm_w(0)); 1071 insn->bits1.branch_gen6.jump_count = 0; 1072 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1073 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1074 } else { 1075 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1076 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1077 brw_set_src1(p, insn, brw_imm_ud(0)); 1078 insn->bits3.break_cont.jip = 0; 1079 insn->bits3.break_cont.uip = 0; 1080 } 1081 1082 insn->header.execution_size = execute_size; 1083 insn->header.compression_control = BRW_COMPRESSION_NONE; 1084 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 1085 insn->header.mask_control = BRW_MASK_ENABLE; 1086 if (!p->single_program_flow) 1087 insn->header.thread_control = BRW_THREAD_SWITCH; 1088 1089 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1090 1091 push_if_stack(p, insn); 1092 p->if_depth_in_loop[p->loop_stack_depth]++; 1093 return insn; 1094} 1095 1096/* This function is only used for gen6-style IF instructions with an 1097 * embedded comparison (conditional modifier). It is not used on gen7. 1098 */ 1099struct brw_instruction * 1100gen6_IF(struct brw_compile *p, uint32_t conditional, 1101 struct brw_reg src0, struct brw_reg src1) 1102{ 1103 struct brw_instruction *insn; 1104 1105 insn = next_insn(p, BRW_OPCODE_IF); 1106 1107 brw_set_dest(p, insn, brw_imm_w(0)); 1108 if (p->compressed) { 1109 insn->header.execution_size = BRW_EXECUTE_16; 1110 } else { 1111 insn->header.execution_size = BRW_EXECUTE_8; 1112 } 1113 insn->bits1.branch_gen6.jump_count = 0; 1114 brw_set_src0(p, insn, src0); 1115 brw_set_src1(p, insn, src1); 1116 1117 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 1118 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1119 insn->header.destreg__conditionalmod = conditional; 1120 1121 if (!p->single_program_flow) 1122 insn->header.thread_control = BRW_THREAD_SWITCH; 1123 1124 push_if_stack(p, insn); 1125 return insn; 1126} 1127 1128/** 1129 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. 1130 */ 1131static void 1132convert_IF_ELSE_to_ADD(struct brw_compile *p, 1133 struct brw_instruction *if_inst, 1134 struct brw_instruction *else_inst) 1135{ 1136 /* The next instruction (where the ENDIF would be, if it existed) */ 1137 struct brw_instruction *next_inst = &p->store[p->nr_insn]; 1138 1139 assert(p->single_program_flow); 1140 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1141 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1142 assert(if_inst->header.execution_size == BRW_EXECUTE_1); 1143 1144 /* Convert IF to an ADD instruction that moves the instruction pointer 1145 * to the first instruction of the ELSE block. If there is no ELSE 1146 * block, point to where ENDIF would be. Reverse the predicate. 1147 * 1148 * There's no need to execute an ENDIF since we don't need to do any 1149 * stack operations, and if we're currently executing, we just want to 1150 * continue normally. 1151 */ 1152 if_inst->header.opcode = BRW_OPCODE_ADD; 1153 if_inst->header.predicate_inverse = 1; 1154 1155 if (else_inst != NULL) { 1156 /* Convert ELSE to an ADD instruction that points where the ENDIF 1157 * would be. 1158 */ 1159 else_inst->header.opcode = BRW_OPCODE_ADD; 1160 1161 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; 1162 else_inst->bits3.ud = (next_inst - else_inst) * 16; 1163 } else { 1164 if_inst->bits3.ud = (next_inst - if_inst) * 16; 1165 } 1166} 1167 1168/** 1169 * Patch IF and ELSE instructions with appropriate jump targets. 1170 */ 1171static void 1172patch_IF_ELSE(struct brw_compile *p, 1173 struct brw_instruction *if_inst, 1174 struct brw_instruction *else_inst, 1175 struct brw_instruction *endif_inst) 1176{ 1177 struct intel_context *intel = &p->brw->intel; 1178 1179 /* We shouldn't be patching IF and ELSE instructions in single program flow 1180 * mode when gen < 6, because in single program flow mode on those 1181 * platforms, we convert flow control instructions to conditional ADDs that 1182 * operate on IP (see brw_ENDIF). 1183 * 1184 * However, on Gen6, writing to IP doesn't work in single program flow mode 1185 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may 1186 * not be updated by non-flow control instructions."). And on later 1187 * platforms, there is no significant benefit to converting control flow 1188 * instructions to conditional ADDs. So we do patch IF and ELSE 1189 * instructions in single program flow mode on those platforms. 1190 */ 1191 if (intel->gen < 6) 1192 assert(!p->single_program_flow); 1193 1194 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1195 assert(endif_inst != NULL); 1196 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1197 1198 unsigned br = 1; 1199 /* Jump count is for 64bit data chunk each, so one 128bit instruction 1200 * requires 2 chunks. 1201 */ 1202 if (intel->gen >= 5) 1203 br = 2; 1204 1205 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); 1206 endif_inst->header.execution_size = if_inst->header.execution_size; 1207 1208 if (else_inst == NULL) { 1209 /* Patch IF -> ENDIF */ 1210 if (intel->gen < 6) { 1211 /* Turn it into an IFF, which means no mask stack operations for 1212 * all-false and jumping past the ENDIF. 1213 */ 1214 if_inst->header.opcode = BRW_OPCODE_IFF; 1215 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); 1216 if_inst->bits3.if_else.pop_count = 0; 1217 if_inst->bits3.if_else.pad0 = 0; 1218 } else if (intel->gen == 6) { 1219 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 1220 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); 1221 } else { 1222 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1223 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); 1224 } 1225 } else { 1226 else_inst->header.execution_size = if_inst->header.execution_size; 1227 1228 /* Patch IF -> ELSE */ 1229 if (intel->gen < 6) { 1230 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); 1231 if_inst->bits3.if_else.pop_count = 0; 1232 if_inst->bits3.if_else.pad0 = 0; 1233 } else if (intel->gen == 6) { 1234 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); 1235 } 1236 1237 /* Patch ELSE -> ENDIF */ 1238 if (intel->gen < 6) { 1239 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 1240 * matching ENDIF. 1241 */ 1242 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); 1243 else_inst->bits3.if_else.pop_count = 1; 1244 else_inst->bits3.if_else.pad0 = 0; 1245 } else if (intel->gen == 6) { 1246 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1247 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); 1248 } else { 1249 /* The IF instruction's JIP should point just past the ELSE */ 1250 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); 1251 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ 1252 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1253 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); 1254 } 1255 } 1256} 1257 1258void 1259brw_ELSE(struct brw_compile *p) 1260{ 1261 struct intel_context *intel = &p->brw->intel; 1262 struct brw_instruction *insn; 1263 1264 insn = next_insn(p, BRW_OPCODE_ELSE); 1265 1266 if (intel->gen < 6) { 1267 brw_set_dest(p, insn, brw_ip_reg()); 1268 brw_set_src0(p, insn, brw_ip_reg()); 1269 brw_set_src1(p, insn, brw_imm_d(0x0)); 1270 } else if (intel->gen == 6) { 1271 brw_set_dest(p, insn, brw_imm_w(0)); 1272 insn->bits1.branch_gen6.jump_count = 0; 1273 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1274 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1275 } else { 1276 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1277 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1278 brw_set_src1(p, insn, brw_imm_ud(0)); 1279 insn->bits3.break_cont.jip = 0; 1280 insn->bits3.break_cont.uip = 0; 1281 } 1282 1283 insn->header.compression_control = BRW_COMPRESSION_NONE; 1284 insn->header.mask_control = BRW_MASK_ENABLE; 1285 if (!p->single_program_flow) 1286 insn->header.thread_control = BRW_THREAD_SWITCH; 1287 1288 push_if_stack(p, insn); 1289} 1290 1291void 1292brw_ENDIF(struct brw_compile *p) 1293{ 1294 struct intel_context *intel = &p->brw->intel; 1295 struct brw_instruction *insn = NULL; 1296 struct brw_instruction *else_inst = NULL; 1297 struct brw_instruction *if_inst = NULL; 1298 struct brw_instruction *tmp; 1299 bool emit_endif = true; 1300 1301 /* In single program flow mode, we can express IF and ELSE instructions 1302 * equivalently as ADD instructions that operate on IP. On platforms prior 1303 * to Gen6, flow control instructions cause an implied thread switch, so 1304 * this is a significant savings. 1305 * 1306 * However, on Gen6, writing to IP doesn't work in single program flow mode 1307 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may 1308 * not be updated by non-flow control instructions."). And on later 1309 * platforms, there is no significant benefit to converting control flow 1310 * instructions to conditional ADDs. So we only do this trick on Gen4 and 1311 * Gen5. 1312 */ 1313 if (intel->gen < 6 && p->single_program_flow) 1314 emit_endif = false; 1315 1316 /* 1317 * A single next_insn() may change the base adress of instruction store 1318 * memory(p->store), so call it first before referencing the instruction 1319 * store pointer from an index 1320 */ 1321 if (emit_endif) 1322 insn = next_insn(p, BRW_OPCODE_ENDIF); 1323 1324 /* Pop the IF and (optional) ELSE instructions from the stack */ 1325 p->if_depth_in_loop[p->loop_stack_depth]--; 1326 tmp = pop_if_stack(p); 1327 if (tmp->header.opcode == BRW_OPCODE_ELSE) { 1328 else_inst = tmp; 1329 tmp = pop_if_stack(p); 1330 } 1331 if_inst = tmp; 1332 1333 if (!emit_endif) { 1334 /* ENDIF is useless; don't bother emitting it. */ 1335 convert_IF_ELSE_to_ADD(p, if_inst, else_inst); 1336 return; 1337 } 1338 1339 if (intel->gen < 6) { 1340 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1341 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1342 brw_set_src1(p, insn, brw_imm_d(0x0)); 1343 } else if (intel->gen == 6) { 1344 brw_set_dest(p, insn, brw_imm_w(0)); 1345 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1346 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1347 } else { 1348 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1349 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1350 brw_set_src1(p, insn, brw_imm_ud(0)); 1351 } 1352 1353 insn->header.compression_control = BRW_COMPRESSION_NONE; 1354 insn->header.mask_control = BRW_MASK_ENABLE; 1355 insn->header.thread_control = BRW_THREAD_SWITCH; 1356 1357 /* Also pop item off the stack in the endif instruction: */ 1358 if (intel->gen < 6) { 1359 insn->bits3.if_else.jump_count = 0; 1360 insn->bits3.if_else.pop_count = 1; 1361 insn->bits3.if_else.pad0 = 0; 1362 } else if (intel->gen == 6) { 1363 insn->bits1.branch_gen6.jump_count = 2; 1364 } else { 1365 insn->bits3.break_cont.jip = 2; 1366 } 1367 patch_IF_ELSE(p, if_inst, else_inst, insn); 1368} 1369 1370struct brw_instruction *brw_BREAK(struct brw_compile *p) 1371{ 1372 struct intel_context *intel = &p->brw->intel; 1373 struct brw_instruction *insn; 1374 1375 insn = next_insn(p, BRW_OPCODE_BREAK); 1376 if (intel->gen >= 6) { 1377 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1378 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1379 brw_set_src1(p, insn, brw_imm_d(0x0)); 1380 } else { 1381 brw_set_dest(p, insn, brw_ip_reg()); 1382 brw_set_src0(p, insn, brw_ip_reg()); 1383 brw_set_src1(p, insn, brw_imm_d(0x0)); 1384 insn->bits3.if_else.pad0 = 0; 1385 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; 1386 } 1387 insn->header.compression_control = BRW_COMPRESSION_NONE; 1388 insn->header.execution_size = BRW_EXECUTE_8; 1389 1390 return insn; 1391} 1392 1393struct brw_instruction *gen6_CONT(struct brw_compile *p) 1394{ 1395 struct brw_instruction *insn; 1396 1397 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1398 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1399 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1400 brw_set_dest(p, insn, brw_ip_reg()); 1401 brw_set_src0(p, insn, brw_ip_reg()); 1402 brw_set_src1(p, insn, brw_imm_d(0x0)); 1403 1404 insn->header.compression_control = BRW_COMPRESSION_NONE; 1405 insn->header.execution_size = BRW_EXECUTE_8; 1406 return insn; 1407} 1408 1409struct brw_instruction *brw_CONT(struct brw_compile *p) 1410{ 1411 struct brw_instruction *insn; 1412 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1413 brw_set_dest(p, insn, brw_ip_reg()); 1414 brw_set_src0(p, insn, brw_ip_reg()); 1415 brw_set_src1(p, insn, brw_imm_d(0x0)); 1416 insn->header.compression_control = BRW_COMPRESSION_NONE; 1417 insn->header.execution_size = BRW_EXECUTE_8; 1418 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1419 insn->bits3.if_else.pad0 = 0; 1420 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; 1421 return insn; 1422} 1423 1424/* DO/WHILE loop: 1425 * 1426 * The DO/WHILE is just an unterminated loop -- break or continue are 1427 * used for control within the loop. We have a few ways they can be 1428 * done. 1429 * 1430 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1431 * jip and no DO instruction. 1432 * 1433 * For non-uniform control flow pre-gen6, there's a DO instruction to 1434 * push the mask, and a WHILE to jump back, and BREAK to get out and 1435 * pop the mask. 1436 * 1437 * For gen6, there's no more mask stack, so no need for DO. WHILE 1438 * just points back to the first instruction of the loop. 1439 */ 1440struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 1441{ 1442 struct intel_context *intel = &p->brw->intel; 1443 1444 if (intel->gen >= 6 || p->single_program_flow) { 1445 push_loop_stack(p, &p->store[p->nr_insn]); 1446 return &p->store[p->nr_insn]; 1447 } else { 1448 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1449 1450 push_loop_stack(p, insn); 1451 1452 /* Override the defaults for this instruction: 1453 */ 1454 brw_set_dest(p, insn, brw_null_reg()); 1455 brw_set_src0(p, insn, brw_null_reg()); 1456 brw_set_src1(p, insn, brw_null_reg()); 1457 1458 insn->header.compression_control = BRW_COMPRESSION_NONE; 1459 insn->header.execution_size = execute_size; 1460 insn->header.predicate_control = BRW_PREDICATE_NONE; 1461 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1462 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1463 1464 return insn; 1465 } 1466} 1467 1468/** 1469 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE 1470 * instruction here. 1471 * 1472 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop 1473 * nesting, since it can always just point to the end of the block/current loop. 1474 */ 1475static void 1476brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst) 1477{ 1478 struct intel_context *intel = &p->brw->intel; 1479 struct brw_instruction *do_inst = get_inner_do_insn(p); 1480 struct brw_instruction *inst; 1481 int br = (intel->gen == 5) ? 2 : 1; 1482 1483 for (inst = while_inst - 1; inst != do_inst; inst--) { 1484 /* If the jump count is != 0, that means that this instruction has already 1485 * been patched because it's part of a loop inside of the one we're 1486 * patching. 1487 */ 1488 if (inst->header.opcode == BRW_OPCODE_BREAK && 1489 inst->bits3.if_else.jump_count == 0) { 1490 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1); 1491 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE && 1492 inst->bits3.if_else.jump_count == 0) { 1493 inst->bits3.if_else.jump_count = br * (while_inst - inst); 1494 } 1495 } 1496} 1497 1498struct brw_instruction *brw_WHILE(struct brw_compile *p) 1499{ 1500 struct intel_context *intel = &p->brw->intel; 1501 struct brw_instruction *insn, *do_insn; 1502 GLuint br = 1; 1503 1504 if (intel->gen >= 5) 1505 br = 2; 1506 1507 if (intel->gen >= 7) { 1508 insn = next_insn(p, BRW_OPCODE_WHILE); 1509 do_insn = get_inner_do_insn(p); 1510 1511 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1512 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1513 brw_set_src1(p, insn, brw_imm_ud(0)); 1514 insn->bits3.break_cont.jip = br * (do_insn - insn); 1515 1516 insn->header.execution_size = BRW_EXECUTE_8; 1517 } else if (intel->gen == 6) { 1518 insn = next_insn(p, BRW_OPCODE_WHILE); 1519 do_insn = get_inner_do_insn(p); 1520 1521 brw_set_dest(p, insn, brw_imm_w(0)); 1522 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1523 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1524 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1525 1526 insn->header.execution_size = BRW_EXECUTE_8; 1527 } else { 1528 if (p->single_program_flow) { 1529 insn = next_insn(p, BRW_OPCODE_ADD); 1530 do_insn = get_inner_do_insn(p); 1531 1532 brw_set_dest(p, insn, brw_ip_reg()); 1533 brw_set_src0(p, insn, brw_ip_reg()); 1534 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); 1535 insn->header.execution_size = BRW_EXECUTE_1; 1536 } else { 1537 insn = next_insn(p, BRW_OPCODE_WHILE); 1538 do_insn = get_inner_do_insn(p); 1539 1540 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1541 1542 brw_set_dest(p, insn, brw_ip_reg()); 1543 brw_set_src0(p, insn, brw_ip_reg()); 1544 brw_set_src1(p, insn, brw_imm_d(0)); 1545 1546 insn->header.execution_size = do_insn->header.execution_size; 1547 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1548 insn->bits3.if_else.pop_count = 0; 1549 insn->bits3.if_else.pad0 = 0; 1550 1551 brw_patch_break_cont(p, insn); 1552 } 1553 } 1554 insn->header.compression_control = BRW_COMPRESSION_NONE; 1555 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1556 1557 p->loop_stack_depth--; 1558 1559 return insn; 1560} 1561 1562 1563/* FORWARD JUMPS: 1564 */ 1565void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx) 1566{ 1567 struct intel_context *intel = &p->brw->intel; 1568 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx]; 1569 GLuint jmpi = 1; 1570 1571 if (intel->gen >= 5) 1572 jmpi = 2; 1573 1574 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1575 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1576 1577 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1); 1578} 1579 1580 1581 1582/* To integrate with the above, it makes sense that the comparison 1583 * instruction should populate the flag register. It might be simpler 1584 * just to use the flag reg for most WM tasks? 1585 */ 1586void brw_CMP(struct brw_compile *p, 1587 struct brw_reg dest, 1588 GLuint conditional, 1589 struct brw_reg src0, 1590 struct brw_reg src1) 1591{ 1592 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1593 1594 insn->header.destreg__conditionalmod = conditional; 1595 brw_set_dest(p, insn, dest); 1596 brw_set_src0(p, insn, src0); 1597 brw_set_src1(p, insn, src1); 1598 1599/* guess_execution_size(insn, src0); */ 1600 1601 1602 /* Make it so that future instructions will use the computed flag 1603 * value until brw_set_predicate_control_flag_value() is called 1604 * again. 1605 */ 1606 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1607 dest.nr == 0) { 1608 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1609 p->flag_value = 0xff; 1610 } 1611} 1612 1613/* Issue 'wait' instruction for n1, host could program MMIO 1614 to wake up thread. */ 1615void brw_WAIT (struct brw_compile *p) 1616{ 1617 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1618 struct brw_reg src = brw_notification_1_reg(); 1619 1620 brw_set_dest(p, insn, src); 1621 brw_set_src0(p, insn, src); 1622 brw_set_src1(p, insn, brw_null_reg()); 1623 insn->header.execution_size = 0; /* must */ 1624 insn->header.predicate_control = 0; 1625 insn->header.compression_control = 0; 1626} 1627 1628 1629/*********************************************************************** 1630 * Helpers for the various SEND message types: 1631 */ 1632 1633/** Extended math function, float[8]. 1634 */ 1635void brw_math( struct brw_compile *p, 1636 struct brw_reg dest, 1637 GLuint function, 1638 GLuint saturate, 1639 GLuint msg_reg_nr, 1640 struct brw_reg src, 1641 GLuint data_type, 1642 GLuint precision ) 1643{ 1644 struct intel_context *intel = &p->brw->intel; 1645 1646 if (intel->gen >= 6) { 1647 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1648 1649 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1650 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1651 1652 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1653 if (intel->gen == 6) 1654 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1655 1656 /* Source modifiers are ignored for extended math instructions on Gen6. */ 1657 if (intel->gen == 6) { 1658 assert(!src.negate); 1659 assert(!src.abs); 1660 } 1661 1662 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || 1663 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || 1664 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1665 assert(src.type != BRW_REGISTER_TYPE_F); 1666 } else { 1667 assert(src.type == BRW_REGISTER_TYPE_F); 1668 } 1669 1670 /* Math is the same ISA format as other opcodes, except that CondModifier 1671 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1672 */ 1673 insn->header.destreg__conditionalmod = function; 1674 insn->header.saturate = saturate; 1675 1676 brw_set_dest(p, insn, dest); 1677 brw_set_src0(p, insn, src); 1678 brw_set_src1(p, insn, brw_null_reg()); 1679 } else { 1680 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1681 1682 /* Example code doesn't set predicate_control for send 1683 * instructions. 1684 */ 1685 insn->header.predicate_control = 0; 1686 insn->header.destreg__conditionalmod = msg_reg_nr; 1687 1688 brw_set_dest(p, insn, dest); 1689 brw_set_src0(p, insn, src); 1690 brw_set_math_message(p, 1691 insn, 1692 function, 1693 src.type == BRW_REGISTER_TYPE_D, 1694 precision, 1695 saturate, 1696 data_type); 1697 } 1698} 1699 1700/** Extended math function, float[8]. 1701 */ 1702void brw_math2(struct brw_compile *p, 1703 struct brw_reg dest, 1704 GLuint function, 1705 struct brw_reg src0, 1706 struct brw_reg src1) 1707{ 1708 struct intel_context *intel = &p->brw->intel; 1709 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1710 1711 assert(intel->gen >= 6); 1712 (void) intel; 1713 1714 1715 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1716 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1717 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1718 1719 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1720 if (intel->gen == 6) { 1721 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1722 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1723 } 1724 1725 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || 1726 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || 1727 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1728 assert(src0.type != BRW_REGISTER_TYPE_F); 1729 assert(src1.type != BRW_REGISTER_TYPE_F); 1730 } else { 1731 assert(src0.type == BRW_REGISTER_TYPE_F); 1732 assert(src1.type == BRW_REGISTER_TYPE_F); 1733 } 1734 1735 /* Source modifiers are ignored for extended math instructions on Gen6. */ 1736 if (intel->gen == 6) { 1737 assert(!src0.negate); 1738 assert(!src0.abs); 1739 assert(!src1.negate); 1740 assert(!src1.abs); 1741 } 1742 1743 /* Math is the same ISA format as other opcodes, except that CondModifier 1744 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1745 */ 1746 insn->header.destreg__conditionalmod = function; 1747 1748 brw_set_dest(p, insn, dest); 1749 brw_set_src0(p, insn, src0); 1750 brw_set_src1(p, insn, src1); 1751} 1752 1753/** 1754 * Extended math function, float[16]. 1755 * Use 2 send instructions. 1756 */ 1757void brw_math_16( struct brw_compile *p, 1758 struct brw_reg dest, 1759 GLuint function, 1760 GLuint saturate, 1761 GLuint msg_reg_nr, 1762 struct brw_reg src, 1763 GLuint precision ) 1764{ 1765 struct intel_context *intel = &p->brw->intel; 1766 struct brw_instruction *insn; 1767 1768 if (intel->gen >= 6) { 1769 insn = next_insn(p, BRW_OPCODE_MATH); 1770 1771 /* Math is the same ISA format as other opcodes, except that CondModifier 1772 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1773 */ 1774 insn->header.destreg__conditionalmod = function; 1775 insn->header.saturate = saturate; 1776 1777 /* Source modifiers are ignored for extended math instructions. */ 1778 assert(!src.negate); 1779 assert(!src.abs); 1780 1781 brw_set_dest(p, insn, dest); 1782 brw_set_src0(p, insn, src); 1783 brw_set_src1(p, insn, brw_null_reg()); 1784 return; 1785 } 1786 1787 /* First instruction: 1788 */ 1789 brw_push_insn_state(p); 1790 brw_set_predicate_control_flag_value(p, 0xff); 1791 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1792 1793 insn = next_insn(p, BRW_OPCODE_SEND); 1794 insn->header.destreg__conditionalmod = msg_reg_nr; 1795 1796 brw_set_dest(p, insn, dest); 1797 brw_set_src0(p, insn, src); 1798 brw_set_math_message(p, 1799 insn, 1800 function, 1801 BRW_MATH_INTEGER_UNSIGNED, 1802 precision, 1803 saturate, 1804 BRW_MATH_DATA_VECTOR); 1805 1806 /* Second instruction: 1807 */ 1808 insn = next_insn(p, BRW_OPCODE_SEND); 1809 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1810 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1811 1812 brw_set_dest(p, insn, offset(dest,1)); 1813 brw_set_src0(p, insn, src); 1814 brw_set_math_message(p, 1815 insn, 1816 function, 1817 BRW_MATH_INTEGER_UNSIGNED, 1818 precision, 1819 saturate, 1820 BRW_MATH_DATA_VECTOR); 1821 1822 brw_pop_insn_state(p); 1823} 1824 1825 1826/** 1827 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1828 * using a constant offset per channel. 1829 * 1830 * The offset must be aligned to oword size (16 bytes). Used for 1831 * register spilling. 1832 */ 1833void brw_oword_block_write_scratch(struct brw_compile *p, 1834 struct brw_reg mrf, 1835 int num_regs, 1836 GLuint offset) 1837{ 1838 struct intel_context *intel = &p->brw->intel; 1839 uint32_t msg_control, msg_type; 1840 int mlen; 1841 1842 if (intel->gen >= 6) 1843 offset /= 16; 1844 1845 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1846 1847 if (num_regs == 1) { 1848 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1849 mlen = 2; 1850 } else { 1851 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1852 mlen = 3; 1853 } 1854 1855 /* Set up the message header. This is g0, with g0.2 filled with 1856 * the offset. We don't want to leave our offset around in g0 or 1857 * it'll screw up texture samples, so set it up inside the message 1858 * reg. 1859 */ 1860 { 1861 brw_push_insn_state(p); 1862 brw_set_mask_control(p, BRW_MASK_DISABLE); 1863 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1864 1865 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1866 1867 /* set message header global offset field (reg 0, element 2) */ 1868 brw_MOV(p, 1869 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1870 mrf.nr, 1871 2), BRW_REGISTER_TYPE_UD), 1872 brw_imm_ud(offset)); 1873 1874 brw_pop_insn_state(p); 1875 } 1876 1877 { 1878 struct brw_reg dest; 1879 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1880 int send_commit_msg; 1881 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 1882 BRW_REGISTER_TYPE_UW); 1883 1884 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1885 insn->header.compression_control = BRW_COMPRESSION_NONE; 1886 src_header = vec16(src_header); 1887 } 1888 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1889 insn->header.destreg__conditionalmod = mrf.nr; 1890 1891 /* Until gen6, writes followed by reads from the same location 1892 * are not guaranteed to be ordered unless write_commit is set. 1893 * If set, then a no-op write is issued to the destination 1894 * register to set a dependency, and a read from the destination 1895 * can be used to ensure the ordering. 1896 * 1897 * For gen6, only writes between different threads need ordering 1898 * protection. Our use of DP writes is all about register 1899 * spilling within a thread. 1900 */ 1901 if (intel->gen >= 6) { 1902 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1903 send_commit_msg = 0; 1904 } else { 1905 dest = src_header; 1906 send_commit_msg = 1; 1907 } 1908 1909 brw_set_dest(p, insn, dest); 1910 if (intel->gen >= 6) { 1911 brw_set_src0(p, insn, mrf); 1912 } else { 1913 brw_set_src0(p, insn, brw_null_reg()); 1914 } 1915 1916 if (intel->gen >= 6) 1917 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1918 else 1919 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1920 1921 brw_set_dp_write_message(p, 1922 insn, 1923 255, /* binding table index (255=stateless) */ 1924 msg_control, 1925 msg_type, 1926 mlen, 1927 true, /* header_present */ 1928 0, /* not a render target */ 1929 send_commit_msg, /* response_length */ 1930 0, /* eot */ 1931 send_commit_msg); 1932 } 1933} 1934 1935 1936/** 1937 * Read a block of owords (half a GRF each) from the scratch buffer 1938 * using a constant index per channel. 1939 * 1940 * Offset must be aligned to oword size (16 bytes). Used for register 1941 * spilling. 1942 */ 1943void 1944brw_oword_block_read_scratch(struct brw_compile *p, 1945 struct brw_reg dest, 1946 struct brw_reg mrf, 1947 int num_regs, 1948 GLuint offset) 1949{ 1950 struct intel_context *intel = &p->brw->intel; 1951 uint32_t msg_control; 1952 int rlen; 1953 1954 if (intel->gen >= 6) 1955 offset /= 16; 1956 1957 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1958 dest = retype(dest, BRW_REGISTER_TYPE_UW); 1959 1960 if (num_regs == 1) { 1961 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1962 rlen = 1; 1963 } else { 1964 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1965 rlen = 2; 1966 } 1967 1968 { 1969 brw_push_insn_state(p); 1970 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1971 brw_set_mask_control(p, BRW_MASK_DISABLE); 1972 1973 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1974 1975 /* set message header global offset field (reg 0, element 2) */ 1976 brw_MOV(p, 1977 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1978 mrf.nr, 1979 2), BRW_REGISTER_TYPE_UD), 1980 brw_imm_ud(offset)); 1981 1982 brw_pop_insn_state(p); 1983 } 1984 1985 { 1986 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1987 1988 assert(insn->header.predicate_control == 0); 1989 insn->header.compression_control = BRW_COMPRESSION_NONE; 1990 insn->header.destreg__conditionalmod = mrf.nr; 1991 1992 brw_set_dest(p, insn, dest); /* UW? */ 1993 if (intel->gen >= 6) { 1994 brw_set_src0(p, insn, mrf); 1995 } else { 1996 brw_set_src0(p, insn, brw_null_reg()); 1997 } 1998 1999 brw_set_dp_read_message(p, 2000 insn, 2001 255, /* binding table index (255=stateless) */ 2002 msg_control, 2003 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 2004 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 2005 1, /* msg_length */ 2006 rlen); 2007 } 2008} 2009 2010/** 2011 * Read a float[4] vector from the data port Data Cache (const buffer). 2012 * Location (in buffer) should be a multiple of 16. 2013 * Used for fetching shader constants. 2014 */ 2015void brw_oword_block_read(struct brw_compile *p, 2016 struct brw_reg dest, 2017 struct brw_reg mrf, 2018 uint32_t offset, 2019 uint32_t bind_table_index) 2020{ 2021 struct intel_context *intel = &p->brw->intel; 2022 2023 /* On newer hardware, offset is in units of owords. */ 2024 if (intel->gen >= 6) 2025 offset /= 16; 2026 2027 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 2028 2029 brw_push_insn_state(p); 2030 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2031 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2032 brw_set_mask_control(p, BRW_MASK_DISABLE); 2033 2034 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 2035 2036 /* set message header global offset field (reg 0, element 2) */ 2037 brw_MOV(p, 2038 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 2039 mrf.nr, 2040 2), BRW_REGISTER_TYPE_UD), 2041 brw_imm_ud(offset)); 2042 2043 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 2044 insn->header.destreg__conditionalmod = mrf.nr; 2045 2046 /* cast dest to a uword[8] vector */ 2047 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 2048 2049 brw_set_dest(p, insn, dest); 2050 if (intel->gen >= 6) { 2051 brw_set_src0(p, insn, mrf); 2052 } else { 2053 brw_set_src0(p, insn, brw_null_reg()); 2054 } 2055 2056 brw_set_dp_read_message(p, 2057 insn, 2058 bind_table_index, 2059 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 2060 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 2061 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2062 1, /* msg_length */ 2063 1); /* response_length (1 reg, 2 owords!) */ 2064 2065 brw_pop_insn_state(p); 2066} 2067 2068/** 2069 * Read a set of dwords from the data port Data Cache (const buffer). 2070 * 2071 * Location (in buffer) appears as UD offsets in the register after 2072 * the provided mrf header reg. 2073 */ 2074void brw_dword_scattered_read(struct brw_compile *p, 2075 struct brw_reg dest, 2076 struct brw_reg mrf, 2077 uint32_t bind_table_index) 2078{ 2079 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 2080 2081 brw_push_insn_state(p); 2082 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2083 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2084 brw_set_mask_control(p, BRW_MASK_DISABLE); 2085 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 2086 brw_pop_insn_state(p); 2087 2088 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 2089 insn->header.destreg__conditionalmod = mrf.nr; 2090 2091 /* cast dest to a uword[8] vector */ 2092 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 2093 2094 brw_set_dest(p, insn, dest); 2095 brw_set_src0(p, insn, brw_null_reg()); 2096 2097 brw_set_dp_read_message(p, 2098 insn, 2099 bind_table_index, 2100 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, 2101 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, 2102 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2103 2, /* msg_length */ 2104 1); /* response_length */ 2105} 2106 2107 2108 2109/** 2110 * Read float[4] constant(s) from VS constant buffer. 2111 * For relative addressing, two float[4] constants will be read into 'dest'. 2112 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 2113 */ 2114void brw_dp_READ_4_vs(struct brw_compile *p, 2115 struct brw_reg dest, 2116 GLuint location, 2117 GLuint bind_table_index) 2118{ 2119 struct intel_context *intel = &p->brw->intel; 2120 struct brw_instruction *insn; 2121 GLuint msg_reg_nr = 1; 2122 2123 if (intel->gen >= 6) 2124 location /= 16; 2125 2126 /* Setup MRF[1] with location/offset into const buffer */ 2127 brw_push_insn_state(p); 2128 brw_set_access_mode(p, BRW_ALIGN_1); 2129 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2130 brw_set_mask_control(p, BRW_MASK_DISABLE); 2131 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2132 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), 2133 BRW_REGISTER_TYPE_UD), 2134 brw_imm_ud(location)); 2135 brw_pop_insn_state(p); 2136 2137 insn = next_insn(p, BRW_OPCODE_SEND); 2138 2139 insn->header.predicate_control = BRW_PREDICATE_NONE; 2140 insn->header.compression_control = BRW_COMPRESSION_NONE; 2141 insn->header.destreg__conditionalmod = msg_reg_nr; 2142 insn->header.mask_control = BRW_MASK_DISABLE; 2143 2144 brw_set_dest(p, insn, dest); 2145 if (intel->gen >= 6) { 2146 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); 2147 } else { 2148 brw_set_src0(p, insn, brw_null_reg()); 2149 } 2150 2151 brw_set_dp_read_message(p, 2152 insn, 2153 bind_table_index, 2154 0, 2155 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 2156 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2157 1, /* msg_length */ 2158 1); /* response_length (1 Oword) */ 2159} 2160 2161/** 2162 * Read a float[4] constant per vertex from VS constant buffer, with 2163 * relative addressing. 2164 */ 2165void brw_dp_READ_4_vs_relative(struct brw_compile *p, 2166 struct brw_reg dest, 2167 struct brw_reg addr_reg, 2168 GLuint offset, 2169 GLuint bind_table_index) 2170{ 2171 struct intel_context *intel = &p->brw->intel; 2172 struct brw_reg src = brw_vec8_grf(0, 0); 2173 int msg_type; 2174 2175 /* Setup MRF[1] with offset into const buffer */ 2176 brw_push_insn_state(p); 2177 brw_set_access_mode(p, BRW_ALIGN_1); 2178 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2179 brw_set_mask_control(p, BRW_MASK_DISABLE); 2180 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2181 2182 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 2183 * fields ignored. 2184 */ 2185 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), 2186 addr_reg, brw_imm_d(offset)); 2187 brw_pop_insn_state(p); 2188 2189 gen6_resolve_implied_move(p, &src, 0); 2190 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 2191 2192 insn->header.predicate_control = BRW_PREDICATE_NONE; 2193 insn->header.compression_control = BRW_COMPRESSION_NONE; 2194 insn->header.destreg__conditionalmod = 0; 2195 insn->header.mask_control = BRW_MASK_DISABLE; 2196 2197 brw_set_dest(p, insn, dest); 2198 brw_set_src0(p, insn, src); 2199 2200 if (intel->gen >= 6) 2201 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 2202 else if (intel->gen == 5 || intel->is_g4x) 2203 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 2204 else 2205 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 2206 2207 brw_set_dp_read_message(p, 2208 insn, 2209 bind_table_index, 2210 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 2211 msg_type, 2212 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2213 2, /* msg_length */ 2214 1); /* response_length */ 2215} 2216 2217 2218 2219void brw_fb_WRITE(struct brw_compile *p, 2220 int dispatch_width, 2221 GLuint msg_reg_nr, 2222 struct brw_reg src0, 2223 GLuint binding_table_index, 2224 GLuint msg_length, 2225 GLuint response_length, 2226 bool eot, 2227 bool header_present) 2228{ 2229 struct intel_context *intel = &p->brw->intel; 2230 struct brw_instruction *insn; 2231 GLuint msg_control, msg_type; 2232 struct brw_reg dest; 2233 2234 if (dispatch_width == 16) 2235 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2236 else 2237 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2238 2239 if (intel->gen >= 6 && binding_table_index == 0) { 2240 insn = next_insn(p, BRW_OPCODE_SENDC); 2241 } else { 2242 insn = next_insn(p, BRW_OPCODE_SEND); 2243 } 2244 /* The execution mask is ignored for render target writes. */ 2245 insn->header.predicate_control = 0; 2246 insn->header.compression_control = BRW_COMPRESSION_NONE; 2247 2248 if (intel->gen >= 6) { 2249 /* headerless version, just submit color payload */ 2250 src0 = brw_message_reg(msg_reg_nr); 2251 2252 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2253 } else { 2254 insn->header.destreg__conditionalmod = msg_reg_nr; 2255 2256 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2257 } 2258 2259 if (dispatch_width == 16) 2260 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 2261 else 2262 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 2263 2264 brw_set_dest(p, insn, dest); 2265 brw_set_src0(p, insn, src0); 2266 brw_set_dp_write_message(p, 2267 insn, 2268 binding_table_index, 2269 msg_control, 2270 msg_type, 2271 msg_length, 2272 header_present, 2273 eot, /* last render target write */ 2274 response_length, 2275 eot, 2276 0 /* send_commit_msg */); 2277} 2278 2279 2280/** 2281 * Texture sample instruction. 2282 * Note: the msg_type plus msg_length values determine exactly what kind 2283 * of sampling operation is performed. See volume 4, page 161 of docs. 2284 */ 2285void brw_SAMPLE(struct brw_compile *p, 2286 struct brw_reg dest, 2287 GLuint msg_reg_nr, 2288 struct brw_reg src0, 2289 GLuint binding_table_index, 2290 GLuint sampler, 2291 GLuint writemask, 2292 GLuint msg_type, 2293 GLuint response_length, 2294 GLuint msg_length, 2295 GLuint header_present, 2296 GLuint simd_mode, 2297 GLuint return_format) 2298{ 2299 struct intel_context *intel = &p->brw->intel; 2300 bool need_stall = 0; 2301 2302 if (writemask == 0) { 2303 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 2304 return; 2305 } 2306 2307 /* Hardware doesn't do destination dependency checking on send 2308 * instructions properly. Add a workaround which generates the 2309 * dependency by other means. In practice it seems like this bug 2310 * only crops up for texture samples, and only where registers are 2311 * written by the send and then written again later without being 2312 * read in between. Luckily for us, we already track that 2313 * information and use it to modify the writemask for the 2314 * instruction, so that is a guide for whether a workaround is 2315 * needed. 2316 */ 2317 if (writemask != WRITEMASK_XYZW) { 2318 GLuint dst_offset = 0; 2319 GLuint i, newmask = 0, len = 0; 2320 2321 for (i = 0; i < 4; i++) { 2322 if (writemask & (1<<i)) 2323 break; 2324 dst_offset += 2; 2325 } 2326 for (; i < 4; i++) { 2327 if (!(writemask & (1<<i))) 2328 break; 2329 newmask |= 1<<i; 2330 len++; 2331 } 2332 2333 if (newmask != writemask) { 2334 need_stall = 1; 2335 /* printf("need stall %x %x\n", newmask , writemask); */ 2336 } 2337 else { 2338 bool dispatch_16 = false; 2339 2340 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 2341 2342 guess_execution_size(p, p->current, dest); 2343 if (p->current->header.execution_size == BRW_EXECUTE_16) 2344 dispatch_16 = true; 2345 2346 newmask = ~newmask & WRITEMASK_XYZW; 2347 2348 brw_push_insn_state(p); 2349 2350 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2351 brw_set_mask_control(p, BRW_MASK_DISABLE); 2352 2353 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), 2354 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); 2355 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 2356 2357 brw_pop_insn_state(p); 2358 2359 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 2360 dest = offset(dest, dst_offset); 2361 2362 /* For 16-wide dispatch, masked channels are skipped in the 2363 * response. For 8-wide, masked channels still take up slots, 2364 * and are just not written to. 2365 */ 2366 if (dispatch_16) 2367 response_length = len * 2; 2368 } 2369 } 2370 2371 { 2372 struct brw_instruction *insn; 2373 2374 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2375 2376 insn = next_insn(p, BRW_OPCODE_SEND); 2377 insn->header.predicate_control = 0; /* XXX */ 2378 insn->header.compression_control = BRW_COMPRESSION_NONE; 2379 if (intel->gen < 6) 2380 insn->header.destreg__conditionalmod = msg_reg_nr; 2381 2382 brw_set_dest(p, insn, dest); 2383 brw_set_src0(p, insn, src0); 2384 brw_set_sampler_message(p, insn, 2385 binding_table_index, 2386 sampler, 2387 msg_type, 2388 response_length, 2389 msg_length, 2390 header_present, 2391 simd_mode, 2392 return_format); 2393 } 2394 2395 if (need_stall) { 2396 struct brw_reg reg = vec8(offset(dest, response_length-1)); 2397 2398 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 2399 */ 2400 brw_push_insn_state(p); 2401 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2402 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), 2403 retype(reg, BRW_REGISTER_TYPE_UD)); 2404 brw_pop_insn_state(p); 2405 } 2406 2407} 2408 2409/* All these variables are pretty confusing - we might be better off 2410 * using bitmasks and macros for this, in the old style. Or perhaps 2411 * just having the caller instantiate the fields in dword3 itself. 2412 */ 2413void brw_urb_WRITE(struct brw_compile *p, 2414 struct brw_reg dest, 2415 GLuint msg_reg_nr, 2416 struct brw_reg src0, 2417 bool allocate, 2418 bool used, 2419 GLuint msg_length, 2420 GLuint response_length, 2421 bool eot, 2422 bool writes_complete, 2423 GLuint offset, 2424 GLuint swizzle) 2425{ 2426 struct intel_context *intel = &p->brw->intel; 2427 struct brw_instruction *insn; 2428 2429 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2430 2431 if (intel->gen == 7) { 2432 /* Enable Channel Masks in the URB_WRITE_HWORD message header */ 2433 brw_push_insn_state(p); 2434 brw_set_access_mode(p, BRW_ALIGN_1); 2435 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), 2436 BRW_REGISTER_TYPE_UD), 2437 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), 2438 brw_imm_ud(0xff00)); 2439 brw_pop_insn_state(p); 2440 } 2441 2442 insn = next_insn(p, BRW_OPCODE_SEND); 2443 2444 assert(msg_length < BRW_MAX_MRF); 2445 2446 brw_set_dest(p, insn, dest); 2447 brw_set_src0(p, insn, src0); 2448 brw_set_src1(p, insn, brw_imm_d(0)); 2449 2450 if (intel->gen < 6) 2451 insn->header.destreg__conditionalmod = msg_reg_nr; 2452 2453 brw_set_urb_message(p, 2454 insn, 2455 allocate, 2456 used, 2457 msg_length, 2458 response_length, 2459 eot, 2460 writes_complete, 2461 offset, 2462 swizzle); 2463} 2464 2465static int 2466brw_find_next_block_end(struct brw_compile *p, int start) 2467{ 2468 int ip; 2469 2470 for (ip = start + 1; ip < p->nr_insn; ip++) { 2471 struct brw_instruction *insn = &p->store[ip]; 2472 2473 switch (insn->header.opcode) { 2474 case BRW_OPCODE_ENDIF: 2475 case BRW_OPCODE_ELSE: 2476 case BRW_OPCODE_WHILE: 2477 return ip; 2478 } 2479 } 2480 assert(!"not reached"); 2481 return start + 1; 2482} 2483 2484/* There is no DO instruction on gen6, so to find the end of the loop 2485 * we have to see if the loop is jumping back before our start 2486 * instruction. 2487 */ 2488static int 2489brw_find_loop_end(struct brw_compile *p, int start) 2490{ 2491 struct intel_context *intel = &p->brw->intel; 2492 int ip; 2493 int br = 2; 2494 2495 for (ip = start + 1; ip < p->nr_insn; ip++) { 2496 struct brw_instruction *insn = &p->store[ip]; 2497 2498 if (insn->header.opcode == BRW_OPCODE_WHILE) { 2499 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count 2500 : insn->bits3.break_cont.jip; 2501 if (ip + jip / br <= start) 2502 return ip; 2503 } 2504 } 2505 assert(!"not reached"); 2506 return start + 1; 2507} 2508 2509/* After program generation, go back and update the UIP and JIP of 2510 * BREAK and CONT instructions to their correct locations. 2511 */ 2512void 2513brw_set_uip_jip(struct brw_compile *p) 2514{ 2515 struct intel_context *intel = &p->brw->intel; 2516 int ip; 2517 int br = 2; 2518 2519 if (intel->gen < 6) 2520 return; 2521 2522 for (ip = 0; ip < p->nr_insn; ip++) { 2523 struct brw_instruction *insn = &p->store[ip]; 2524 2525 switch (insn->header.opcode) { 2526 case BRW_OPCODE_BREAK: 2527 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2528 /* Gen7 UIP points to WHILE; Gen6 points just after it */ 2529 insn->bits3.break_cont.uip = 2530 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0)); 2531 break; 2532 case BRW_OPCODE_CONTINUE: 2533 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2534 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); 2535 2536 assert(insn->bits3.break_cont.uip != 0); 2537 assert(insn->bits3.break_cont.jip != 0); 2538 break; 2539 } 2540 } 2541} 2542 2543void brw_ff_sync(struct brw_compile *p, 2544 struct brw_reg dest, 2545 GLuint msg_reg_nr, 2546 struct brw_reg src0, 2547 bool allocate, 2548 GLuint response_length, 2549 bool eot) 2550{ 2551 struct intel_context *intel = &p->brw->intel; 2552 struct brw_instruction *insn; 2553 2554 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2555 2556 insn = next_insn(p, BRW_OPCODE_SEND); 2557 brw_set_dest(p, insn, dest); 2558 brw_set_src0(p, insn, src0); 2559 brw_set_src1(p, insn, brw_imm_d(0)); 2560 2561 if (intel->gen < 6) 2562 insn->header.destreg__conditionalmod = msg_reg_nr; 2563 2564 brw_set_ff_sync_message(p, 2565 insn, 2566 allocate, 2567 response_length, 2568 eot); 2569} 2570 2571/** 2572 * Emit the SEND instruction necessary to generate stream output data on Gen6 2573 * (for transform feedback). 2574 * 2575 * If send_commit_msg is true, this is the last piece of stream output data 2576 * from this thread, so send the data as a committed write. According to the 2577 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1): 2578 * 2579 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all 2580 * writes are complete by sending the final write as a committed write." 2581 */ 2582void 2583brw_svb_write(struct brw_compile *p, 2584 struct brw_reg dest, 2585 GLuint msg_reg_nr, 2586 struct brw_reg src0, 2587 GLuint binding_table_index, 2588 bool send_commit_msg) 2589{ 2590 struct brw_instruction *insn; 2591 2592 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2593 2594 insn = next_insn(p, BRW_OPCODE_SEND); 2595 brw_set_dest(p, insn, dest); 2596 brw_set_src0(p, insn, src0); 2597 brw_set_src1(p, insn, brw_imm_d(0)); 2598 brw_set_dp_write_message(p, insn, 2599 binding_table_index, 2600 0, /* msg_control: ignored */ 2601 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, 2602 1, /* msg_length */ 2603 true, /* header_present */ 2604 0, /* last_render_target: ignored */ 2605 send_commit_msg, /* response_length */ 2606 0, /* end_of_thread */ 2607 send_commit_msg); /* send_commit_msg */ 2608} 2609