brw_eu_emit.c revision a73c65c5342bf41fa0dfefe7daa9197ce6a11db4
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37#include "glsl/ralloc.h" 38 39/*********************************************************************** 40 * Internal helper for constructing instructions 41 */ 42 43static void guess_execution_size(struct brw_compile *p, 44 struct brw_instruction *insn, 45 struct brw_reg reg) 46{ 47 if (reg.width == BRW_WIDTH_8 && p->compressed) 48 insn->header.execution_size = BRW_EXECUTE_16; 49 else 50 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 51} 52 53 54/** 55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source 56 * registers, implicitly moving the operand to a message register. 57 * 58 * On Sandybridge, this is no longer the case. This function performs the 59 * explicit move; it should be called before emitting a SEND instruction. 60 */ 61void 62gen6_resolve_implied_move(struct brw_compile *p, 63 struct brw_reg *src, 64 GLuint msg_reg_nr) 65{ 66 struct intel_context *intel = &p->brw->intel; 67 if (intel->gen < 6) 68 return; 69 70 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 71 brw_push_insn_state(p); 72 brw_set_mask_control(p, BRW_MASK_DISABLE); 73 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 74 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 75 retype(*src, BRW_REGISTER_TYPE_UD)); 76 brw_pop_insn_state(p); 77 } 78 *src = brw_message_reg(msg_reg_nr); 79} 80 81static void 82gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) 83{ 84 struct intel_context *intel = &p->brw->intel; 85 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { 86 reg->file = BRW_GENERAL_REGISTER_FILE; 87 reg->nr += 111; 88 } 89} 90 91 92void 93brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, 94 struct brw_reg dest) 95{ 96 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 97 dest.file != BRW_MESSAGE_REGISTER_FILE) 98 assert(dest.nr < 128); 99 100 gen7_convert_mrf_to_grf(p, &dest); 101 102 insn->bits1.da1.dest_reg_file = dest.file; 103 insn->bits1.da1.dest_reg_type = dest.type; 104 insn->bits1.da1.dest_address_mode = dest.address_mode; 105 106 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 107 insn->bits1.da1.dest_reg_nr = dest.nr; 108 109 if (insn->header.access_mode == BRW_ALIGN_1) { 110 insn->bits1.da1.dest_subreg_nr = dest.subnr; 111 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 112 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 113 insn->bits1.da1.dest_horiz_stride = dest.hstride; 114 } 115 else { 116 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 117 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 118 /* even ignored in da16, still need to set as '01' */ 119 insn->bits1.da16.dest_horiz_stride = 1; 120 } 121 } 122 else { 123 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 124 125 /* These are different sizes in align1 vs align16: 126 */ 127 if (insn->header.access_mode == BRW_ALIGN_1) { 128 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 129 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 130 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 131 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 132 } 133 else { 134 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 135 /* even ignored in da16, still need to set as '01' */ 136 insn->bits1.ia16.dest_horiz_stride = 1; 137 } 138 } 139 140 /* NEW: Set the execution size based on dest.width and 141 * insn->compression_control: 142 */ 143 guess_execution_size(p, insn, dest); 144} 145 146extern int reg_type_size[]; 147 148static void 149validate_reg(struct brw_instruction *insn, struct brw_reg reg) 150{ 151 int hstride_for_reg[] = {0, 1, 2, 4}; 152 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 153 int width_for_reg[] = {1, 2, 4, 8, 16}; 154 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 155 int width, hstride, vstride, execsize; 156 157 if (reg.file == BRW_IMMEDIATE_VALUE) { 158 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 159 * mean the destination has to be 128-bit aligned and the 160 * destination horiz stride has to be a word. 161 */ 162 if (reg.type == BRW_REGISTER_TYPE_V) { 163 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 164 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 165 } 166 167 return; 168 } 169 170 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 171 reg.file == BRW_ARF_NULL) 172 return; 173 174 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 175 hstride = hstride_for_reg[reg.hstride]; 176 177 if (reg.vstride == 0xf) { 178 vstride = -1; 179 } else { 180 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 181 vstride = vstride_for_reg[reg.vstride]; 182 } 183 184 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 185 width = width_for_reg[reg.width]; 186 187 assert(insn->header.execution_size >= 0 && 188 insn->header.execution_size < Elements(execsize_for_reg)); 189 execsize = execsize_for_reg[insn->header.execution_size]; 190 191 /* Restrictions from 3.3.10: Register Region Restrictions. */ 192 /* 3. */ 193 assert(execsize >= width); 194 195 /* 4. */ 196 if (execsize == width && hstride != 0) { 197 assert(vstride == -1 || vstride == width * hstride); 198 } 199 200 /* 5. */ 201 if (execsize == width && hstride == 0) { 202 /* no restriction on vstride. */ 203 } 204 205 /* 6. */ 206 if (width == 1) { 207 assert(hstride == 0); 208 } 209 210 /* 7. */ 211 if (execsize == 1 && width == 1) { 212 assert(hstride == 0); 213 assert(vstride == 0); 214 } 215 216 /* 8. */ 217 if (vstride == 0 && hstride == 0) { 218 assert(width == 1); 219 } 220 221 /* 10. Check destination issues. */ 222} 223 224void 225brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, 226 struct brw_reg reg) 227{ 228 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 229 assert(reg.nr < 128); 230 231 gen7_convert_mrf_to_grf(p, ®); 232 233 validate_reg(insn, reg); 234 235 insn->bits1.da1.src0_reg_file = reg.file; 236 insn->bits1.da1.src0_reg_type = reg.type; 237 insn->bits2.da1.src0_abs = reg.abs; 238 insn->bits2.da1.src0_negate = reg.negate; 239 insn->bits2.da1.src0_address_mode = reg.address_mode; 240 241 if (reg.file == BRW_IMMEDIATE_VALUE) { 242 insn->bits3.ud = reg.dw1.ud; 243 244 /* Required to set some fields in src1 as well: 245 */ 246 insn->bits1.da1.src1_reg_file = 0; /* arf */ 247 insn->bits1.da1.src1_reg_type = reg.type; 248 } 249 else 250 { 251 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 252 if (insn->header.access_mode == BRW_ALIGN_1) { 253 insn->bits2.da1.src0_subreg_nr = reg.subnr; 254 insn->bits2.da1.src0_reg_nr = reg.nr; 255 } 256 else { 257 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 258 insn->bits2.da16.src0_reg_nr = reg.nr; 259 } 260 } 261 else { 262 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 263 264 if (insn->header.access_mode == BRW_ALIGN_1) { 265 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 266 } 267 else { 268 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 269 } 270 } 271 272 if (insn->header.access_mode == BRW_ALIGN_1) { 273 if (reg.width == BRW_WIDTH_1 && 274 insn->header.execution_size == BRW_EXECUTE_1) { 275 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 276 insn->bits2.da1.src0_width = BRW_WIDTH_1; 277 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 278 } 279 else { 280 insn->bits2.da1.src0_horiz_stride = reg.hstride; 281 insn->bits2.da1.src0_width = reg.width; 282 insn->bits2.da1.src0_vert_stride = reg.vstride; 283 } 284 } 285 else { 286 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 287 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 288 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 289 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 290 291 /* This is an oddity of the fact we're using the same 292 * descriptions for registers in align_16 as align_1: 293 */ 294 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 295 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 296 else 297 insn->bits2.da16.src0_vert_stride = reg.vstride; 298 } 299 } 300} 301 302 303void brw_set_src1(struct brw_compile *p, 304 struct brw_instruction *insn, 305 struct brw_reg reg) 306{ 307 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 308 309 assert(reg.nr < 128); 310 311 gen7_convert_mrf_to_grf(p, ®); 312 313 validate_reg(insn, reg); 314 315 insn->bits1.da1.src1_reg_file = reg.file; 316 insn->bits1.da1.src1_reg_type = reg.type; 317 insn->bits3.da1.src1_abs = reg.abs; 318 insn->bits3.da1.src1_negate = reg.negate; 319 320 /* Only src1 can be immediate in two-argument instructions. 321 */ 322 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 323 324 if (reg.file == BRW_IMMEDIATE_VALUE) { 325 insn->bits3.ud = reg.dw1.ud; 326 } 327 else { 328 /* This is a hardware restriction, which may or may not be lifted 329 * in the future: 330 */ 331 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 332 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 333 334 if (insn->header.access_mode == BRW_ALIGN_1) { 335 insn->bits3.da1.src1_subreg_nr = reg.subnr; 336 insn->bits3.da1.src1_reg_nr = reg.nr; 337 } 338 else { 339 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 340 insn->bits3.da16.src1_reg_nr = reg.nr; 341 } 342 343 if (insn->header.access_mode == BRW_ALIGN_1) { 344 if (reg.width == BRW_WIDTH_1 && 345 insn->header.execution_size == BRW_EXECUTE_1) { 346 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 347 insn->bits3.da1.src1_width = BRW_WIDTH_1; 348 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 349 } 350 else { 351 insn->bits3.da1.src1_horiz_stride = reg.hstride; 352 insn->bits3.da1.src1_width = reg.width; 353 insn->bits3.da1.src1_vert_stride = reg.vstride; 354 } 355 } 356 else { 357 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 358 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 359 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 360 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 361 362 /* This is an oddity of the fact we're using the same 363 * descriptions for registers in align_16 as align_1: 364 */ 365 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 366 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 367 else 368 insn->bits3.da16.src1_vert_stride = reg.vstride; 369 } 370 } 371} 372 373/** 374 * Set the Message Descriptor and Extended Message Descriptor fields 375 * for SEND messages. 376 * 377 * \note This zeroes out the Function Control bits, so it must be called 378 * \b before filling out any message-specific data. Callers can 379 * choose not to fill in irrelevant bits; they will be zero. 380 */ 381static void 382brw_set_message_descriptor(struct brw_compile *p, 383 struct brw_instruction *inst, 384 enum brw_message_target sfid, 385 unsigned msg_length, 386 unsigned response_length, 387 bool header_present, 388 bool end_of_thread) 389{ 390 struct intel_context *intel = &p->brw->intel; 391 392 brw_set_src1(p, inst, brw_imm_d(0)); 393 394 if (intel->gen >= 5) { 395 inst->bits3.generic_gen5.header_present = header_present; 396 inst->bits3.generic_gen5.response_length = response_length; 397 inst->bits3.generic_gen5.msg_length = msg_length; 398 inst->bits3.generic_gen5.end_of_thread = end_of_thread; 399 400 if (intel->gen >= 6) { 401 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ 402 inst->header.destreg__conditionalmod = sfid; 403 } else { 404 /* Set Extended Message Descriptor (ex_desc) */ 405 inst->bits2.send_gen5.sfid = sfid; 406 inst->bits2.send_gen5.end_of_thread = end_of_thread; 407 } 408 } else { 409 inst->bits3.generic.response_length = response_length; 410 inst->bits3.generic.msg_length = msg_length; 411 inst->bits3.generic.msg_target = sfid; 412 inst->bits3.generic.end_of_thread = end_of_thread; 413 } 414} 415 416static void brw_set_math_message( struct brw_compile *p, 417 struct brw_instruction *insn, 418 GLuint function, 419 GLuint integer_type, 420 bool low_precision, 421 bool saturate, 422 GLuint dataType ) 423{ 424 struct brw_context *brw = p->brw; 425 struct intel_context *intel = &brw->intel; 426 unsigned msg_length; 427 unsigned response_length; 428 429 /* Infer message length from the function */ 430 switch (function) { 431 case BRW_MATH_FUNCTION_POW: 432 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 433 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 434 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 435 msg_length = 2; 436 break; 437 default: 438 msg_length = 1; 439 break; 440 } 441 442 /* Infer response length from the function */ 443 switch (function) { 444 case BRW_MATH_FUNCTION_SINCOS: 445 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 446 response_length = 2; 447 break; 448 default: 449 response_length = 1; 450 break; 451 } 452 453 brw_set_message_descriptor(p, insn, BRW_SFID_MATH, 454 msg_length, response_length, false, false); 455 if (intel->gen == 5) { 456 insn->bits3.math_gen5.function = function; 457 insn->bits3.math_gen5.int_type = integer_type; 458 insn->bits3.math_gen5.precision = low_precision; 459 insn->bits3.math_gen5.saturate = saturate; 460 insn->bits3.math_gen5.data_type = dataType; 461 insn->bits3.math_gen5.snapshot = 0; 462 } else { 463 insn->bits3.math.function = function; 464 insn->bits3.math.int_type = integer_type; 465 insn->bits3.math.precision = low_precision; 466 insn->bits3.math.saturate = saturate; 467 insn->bits3.math.data_type = dataType; 468 } 469} 470 471 472static void brw_set_ff_sync_message(struct brw_compile *p, 473 struct brw_instruction *insn, 474 bool allocate, 475 GLuint response_length, 476 bool end_of_thread) 477{ 478 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 479 1, response_length, true, end_of_thread); 480 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 481 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 482 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 483 insn->bits3.urb_gen5.allocate = allocate; 484 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 485 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 486} 487 488static void brw_set_urb_message( struct brw_compile *p, 489 struct brw_instruction *insn, 490 bool allocate, 491 bool used, 492 GLuint msg_length, 493 GLuint response_length, 494 bool end_of_thread, 495 bool complete, 496 GLuint offset, 497 GLuint swizzle_control ) 498{ 499 struct brw_context *brw = p->brw; 500 struct intel_context *intel = &brw->intel; 501 502 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 503 msg_length, response_length, true, end_of_thread); 504 if (intel->gen == 7) { 505 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ 506 insn->bits3.urb_gen7.offset = offset; 507 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); 508 insn->bits3.urb_gen7.swizzle_control = swizzle_control; 509 /* per_slot_offset = 0 makes it ignore offsets in message header */ 510 insn->bits3.urb_gen7.per_slot_offset = 0; 511 insn->bits3.urb_gen7.complete = complete; 512 } else if (intel->gen >= 5) { 513 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ 514 insn->bits3.urb_gen5.offset = offset; 515 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 516 insn->bits3.urb_gen5.allocate = allocate; 517 insn->bits3.urb_gen5.used = used; /* ? */ 518 insn->bits3.urb_gen5.complete = complete; 519 } else { 520 insn->bits3.urb.opcode = 0; /* ? */ 521 insn->bits3.urb.offset = offset; 522 insn->bits3.urb.swizzle_control = swizzle_control; 523 insn->bits3.urb.allocate = allocate; 524 insn->bits3.urb.used = used; /* ? */ 525 insn->bits3.urb.complete = complete; 526 } 527} 528 529void 530brw_set_dp_write_message(struct brw_compile *p, 531 struct brw_instruction *insn, 532 GLuint binding_table_index, 533 GLuint msg_control, 534 GLuint msg_type, 535 GLuint msg_length, 536 bool header_present, 537 GLuint last_render_target, 538 GLuint response_length, 539 GLuint end_of_thread, 540 GLuint send_commit_msg) 541{ 542 struct brw_context *brw = p->brw; 543 struct intel_context *intel = &brw->intel; 544 unsigned sfid; 545 546 if (intel->gen >= 7) { 547 /* Use the Render Cache for RT writes; otherwise use the Data Cache */ 548 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) 549 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 550 else 551 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 552 } else if (intel->gen == 6) { 553 /* Use the render cache for all write messages. */ 554 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 555 } else { 556 sfid = BRW_SFID_DATAPORT_WRITE; 557 } 558 559 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, 560 header_present, end_of_thread); 561 562 if (intel->gen >= 7) { 563 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 564 insn->bits3.gen7_dp.msg_control = msg_control; 565 insn->bits3.gen7_dp.last_render_target = last_render_target; 566 insn->bits3.gen7_dp.msg_type = msg_type; 567 } else if (intel->gen == 6) { 568 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 569 insn->bits3.gen6_dp.msg_control = msg_control; 570 insn->bits3.gen6_dp.last_render_target = last_render_target; 571 insn->bits3.gen6_dp.msg_type = msg_type; 572 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; 573 } else if (intel->gen == 5) { 574 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 575 insn->bits3.dp_write_gen5.msg_control = msg_control; 576 insn->bits3.dp_write_gen5.last_render_target = last_render_target; 577 insn->bits3.dp_write_gen5.msg_type = msg_type; 578 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 579 } else { 580 insn->bits3.dp_write.binding_table_index = binding_table_index; 581 insn->bits3.dp_write.msg_control = msg_control; 582 insn->bits3.dp_write.last_render_target = last_render_target; 583 insn->bits3.dp_write.msg_type = msg_type; 584 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 585 } 586} 587 588void 589brw_set_dp_read_message(struct brw_compile *p, 590 struct brw_instruction *insn, 591 GLuint binding_table_index, 592 GLuint msg_control, 593 GLuint msg_type, 594 GLuint target_cache, 595 GLuint msg_length, 596 GLuint response_length) 597{ 598 struct brw_context *brw = p->brw; 599 struct intel_context *intel = &brw->intel; 600 unsigned sfid; 601 602 if (intel->gen >= 7) { 603 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 604 } else if (intel->gen == 6) { 605 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) 606 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 607 else 608 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; 609 } else { 610 sfid = BRW_SFID_DATAPORT_READ; 611 } 612 613 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, 614 true, false); 615 616 if (intel->gen >= 7) { 617 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 618 insn->bits3.gen7_dp.msg_control = msg_control; 619 insn->bits3.gen7_dp.last_render_target = 0; 620 insn->bits3.gen7_dp.msg_type = msg_type; 621 } else if (intel->gen == 6) { 622 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 623 insn->bits3.gen6_dp.msg_control = msg_control; 624 insn->bits3.gen6_dp.last_render_target = 0; 625 insn->bits3.gen6_dp.msg_type = msg_type; 626 insn->bits3.gen6_dp.send_commit_msg = 0; 627 } else if (intel->gen == 5) { 628 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 629 insn->bits3.dp_read_gen5.msg_control = msg_control; 630 insn->bits3.dp_read_gen5.msg_type = msg_type; 631 insn->bits3.dp_read_gen5.target_cache = target_cache; 632 } else if (intel->is_g4x) { 633 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 634 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 635 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 636 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 637 } else { 638 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 639 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 640 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 641 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 642 } 643} 644 645static void brw_set_sampler_message(struct brw_compile *p, 646 struct brw_instruction *insn, 647 GLuint binding_table_index, 648 GLuint sampler, 649 GLuint msg_type, 650 GLuint response_length, 651 GLuint msg_length, 652 GLuint header_present, 653 GLuint simd_mode) 654{ 655 struct brw_context *brw = p->brw; 656 struct intel_context *intel = &brw->intel; 657 658 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length, 659 response_length, header_present, false); 660 661 if (intel->gen >= 7) { 662 insn->bits3.sampler_gen7.binding_table_index = binding_table_index; 663 insn->bits3.sampler_gen7.sampler = sampler; 664 insn->bits3.sampler_gen7.msg_type = msg_type; 665 insn->bits3.sampler_gen7.simd_mode = simd_mode; 666 } else if (intel->gen >= 5) { 667 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 668 insn->bits3.sampler_gen5.sampler = sampler; 669 insn->bits3.sampler_gen5.msg_type = msg_type; 670 insn->bits3.sampler_gen5.simd_mode = simd_mode; 671 } else if (intel->is_g4x) { 672 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 673 insn->bits3.sampler_g4x.sampler = sampler; 674 insn->bits3.sampler_g4x.msg_type = msg_type; 675 } else { 676 insn->bits3.sampler.binding_table_index = binding_table_index; 677 insn->bits3.sampler.sampler = sampler; 678 insn->bits3.sampler.msg_type = msg_type; 679 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 680 } 681} 682 683 684#define next_insn brw_next_insn 685struct brw_instruction * 686brw_next_insn(struct brw_compile *p, GLuint opcode) 687{ 688 struct brw_instruction *insn; 689 690 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 691 692 insn = &p->store[p->nr_insn++]; 693 memcpy(insn, p->current, sizeof(*insn)); 694 695 /* Reset this one-shot flag: 696 */ 697 698 if (p->current->header.destreg__conditionalmod) { 699 p->current->header.destreg__conditionalmod = 0; 700 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 701 } 702 703 insn->header.opcode = opcode; 704 return insn; 705} 706 707static struct brw_instruction *brw_alu1( struct brw_compile *p, 708 GLuint opcode, 709 struct brw_reg dest, 710 struct brw_reg src ) 711{ 712 struct brw_instruction *insn = next_insn(p, opcode); 713 brw_set_dest(p, insn, dest); 714 brw_set_src0(p, insn, src); 715 return insn; 716} 717 718static struct brw_instruction *brw_alu2(struct brw_compile *p, 719 GLuint opcode, 720 struct brw_reg dest, 721 struct brw_reg src0, 722 struct brw_reg src1 ) 723{ 724 struct brw_instruction *insn = next_insn(p, opcode); 725 brw_set_dest(p, insn, dest); 726 brw_set_src0(p, insn, src0); 727 brw_set_src1(p, insn, src1); 728 return insn; 729} 730 731 732/*********************************************************************** 733 * Convenience routines. 734 */ 735#define ALU1(OP) \ 736struct brw_instruction *brw_##OP(struct brw_compile *p, \ 737 struct brw_reg dest, \ 738 struct brw_reg src0) \ 739{ \ 740 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 741} 742 743#define ALU2(OP) \ 744struct brw_instruction *brw_##OP(struct brw_compile *p, \ 745 struct brw_reg dest, \ 746 struct brw_reg src0, \ 747 struct brw_reg src1) \ 748{ \ 749 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 750} 751 752/* Rounding operations (other than RNDD) require two instructions - the first 753 * stores a rounded value (possibly the wrong way) in the dest register, but 754 * also sets a per-channel "increment bit" in the flag register. A predicated 755 * add of 1.0 fixes dest to contain the desired result. 756 * 757 * Sandybridge and later appear to round correctly without an ADD. 758 */ 759#define ROUND(OP) \ 760void brw_##OP(struct brw_compile *p, \ 761 struct brw_reg dest, \ 762 struct brw_reg src) \ 763{ \ 764 struct brw_instruction *rnd, *add; \ 765 rnd = next_insn(p, BRW_OPCODE_##OP); \ 766 brw_set_dest(p, rnd, dest); \ 767 brw_set_src0(p, rnd, src); \ 768 \ 769 if (p->brw->intel.gen < 6) { \ 770 /* turn on round-increments */ \ 771 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ 772 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 773 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 774 } \ 775} 776 777 778ALU1(MOV) 779ALU2(SEL) 780ALU1(NOT) 781ALU2(AND) 782ALU2(OR) 783ALU2(XOR) 784ALU2(SHR) 785ALU2(SHL) 786ALU2(RSR) 787ALU2(RSL) 788ALU2(ASR) 789ALU1(FRC) 790ALU1(RNDD) 791ALU2(MAC) 792ALU2(MACH) 793ALU1(LZD) 794ALU2(DP4) 795ALU2(DPH) 796ALU2(DP3) 797ALU2(DP2) 798ALU2(LINE) 799ALU2(PLN) 800 801 802ROUND(RNDZ) 803ROUND(RNDE) 804 805 806struct brw_instruction *brw_ADD(struct brw_compile *p, 807 struct brw_reg dest, 808 struct brw_reg src0, 809 struct brw_reg src1) 810{ 811 /* 6.2.2: add */ 812 if (src0.type == BRW_REGISTER_TYPE_F || 813 (src0.file == BRW_IMMEDIATE_VALUE && 814 src0.type == BRW_REGISTER_TYPE_VF)) { 815 assert(src1.type != BRW_REGISTER_TYPE_UD); 816 assert(src1.type != BRW_REGISTER_TYPE_D); 817 } 818 819 if (src1.type == BRW_REGISTER_TYPE_F || 820 (src1.file == BRW_IMMEDIATE_VALUE && 821 src1.type == BRW_REGISTER_TYPE_VF)) { 822 assert(src0.type != BRW_REGISTER_TYPE_UD); 823 assert(src0.type != BRW_REGISTER_TYPE_D); 824 } 825 826 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 827} 828 829struct brw_instruction *brw_MUL(struct brw_compile *p, 830 struct brw_reg dest, 831 struct brw_reg src0, 832 struct brw_reg src1) 833{ 834 /* 6.32.38: mul */ 835 if (src0.type == BRW_REGISTER_TYPE_D || 836 src0.type == BRW_REGISTER_TYPE_UD || 837 src1.type == BRW_REGISTER_TYPE_D || 838 src1.type == BRW_REGISTER_TYPE_UD) { 839 assert(dest.type != BRW_REGISTER_TYPE_F); 840 } 841 842 if (src0.type == BRW_REGISTER_TYPE_F || 843 (src0.file == BRW_IMMEDIATE_VALUE && 844 src0.type == BRW_REGISTER_TYPE_VF)) { 845 assert(src1.type != BRW_REGISTER_TYPE_UD); 846 assert(src1.type != BRW_REGISTER_TYPE_D); 847 } 848 849 if (src1.type == BRW_REGISTER_TYPE_F || 850 (src1.file == BRW_IMMEDIATE_VALUE && 851 src1.type == BRW_REGISTER_TYPE_VF)) { 852 assert(src0.type != BRW_REGISTER_TYPE_UD); 853 assert(src0.type != BRW_REGISTER_TYPE_D); 854 } 855 856 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 857 src0.nr != BRW_ARF_ACCUMULATOR); 858 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 859 src1.nr != BRW_ARF_ACCUMULATOR); 860 861 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 862} 863 864 865void brw_NOP(struct brw_compile *p) 866{ 867 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 868 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 869 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 870 brw_set_src1(p, insn, brw_imm_ud(0x0)); 871} 872 873 874 875 876 877/*********************************************************************** 878 * Comparisons, if/else/endif 879 */ 880 881struct brw_instruction *brw_JMPI(struct brw_compile *p, 882 struct brw_reg dest, 883 struct brw_reg src0, 884 struct brw_reg src1) 885{ 886 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 887 888 insn->header.execution_size = 1; 889 insn->header.compression_control = BRW_COMPRESSION_NONE; 890 insn->header.mask_control = BRW_MASK_DISABLE; 891 892 p->current->header.predicate_control = BRW_PREDICATE_NONE; 893 894 return insn; 895} 896 897static void 898push_if_stack(struct brw_compile *p, struct brw_instruction *inst) 899{ 900 p->if_stack[p->if_stack_depth] = inst; 901 902 p->if_stack_depth++; 903 if (p->if_stack_array_size <= p->if_stack_depth) { 904 p->if_stack_array_size *= 2; 905 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *, 906 p->if_stack_array_size); 907 } 908} 909 910/* EU takes the value from the flag register and pushes it onto some 911 * sort of a stack (presumably merging with any flag value already on 912 * the stack). Within an if block, the flags at the top of the stack 913 * control execution on each channel of the unit, eg. on each of the 914 * 16 pixel values in our wm programs. 915 * 916 * When the matching 'else' instruction is reached (presumably by 917 * countdown of the instruction count patched in by our ELSE/ENDIF 918 * functions), the relevent flags are inverted. 919 * 920 * When the matching 'endif' instruction is reached, the flags are 921 * popped off. If the stack is now empty, normal execution resumes. 922 */ 923struct brw_instruction * 924brw_IF(struct brw_compile *p, GLuint execute_size) 925{ 926 struct intel_context *intel = &p->brw->intel; 927 struct brw_instruction *insn; 928 929 insn = next_insn(p, BRW_OPCODE_IF); 930 931 /* Override the defaults for this instruction: 932 */ 933 if (intel->gen < 6) { 934 brw_set_dest(p, insn, brw_ip_reg()); 935 brw_set_src0(p, insn, brw_ip_reg()); 936 brw_set_src1(p, insn, brw_imm_d(0x0)); 937 } else if (intel->gen == 6) { 938 brw_set_dest(p, insn, brw_imm_w(0)); 939 insn->bits1.branch_gen6.jump_count = 0; 940 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 941 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 942 } else { 943 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 944 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 945 brw_set_src1(p, insn, brw_imm_ud(0)); 946 insn->bits3.break_cont.jip = 0; 947 insn->bits3.break_cont.uip = 0; 948 } 949 950 insn->header.execution_size = execute_size; 951 insn->header.compression_control = BRW_COMPRESSION_NONE; 952 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 953 insn->header.mask_control = BRW_MASK_ENABLE; 954 if (!p->single_program_flow) 955 insn->header.thread_control = BRW_THREAD_SWITCH; 956 957 p->current->header.predicate_control = BRW_PREDICATE_NONE; 958 959 push_if_stack(p, insn); 960 return insn; 961} 962 963/* This function is only used for gen6-style IF instructions with an 964 * embedded comparison (conditional modifier). It is not used on gen7. 965 */ 966struct brw_instruction * 967gen6_IF(struct brw_compile *p, uint32_t conditional, 968 struct brw_reg src0, struct brw_reg src1) 969{ 970 struct brw_instruction *insn; 971 972 insn = next_insn(p, BRW_OPCODE_IF); 973 974 brw_set_dest(p, insn, brw_imm_w(0)); 975 if (p->compressed) { 976 insn->header.execution_size = BRW_EXECUTE_16; 977 } else { 978 insn->header.execution_size = BRW_EXECUTE_8; 979 } 980 insn->bits1.branch_gen6.jump_count = 0; 981 brw_set_src0(p, insn, src0); 982 brw_set_src1(p, insn, src1); 983 984 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 985 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 986 insn->header.destreg__conditionalmod = conditional; 987 988 if (!p->single_program_flow) 989 insn->header.thread_control = BRW_THREAD_SWITCH; 990 991 push_if_stack(p, insn); 992 return insn; 993} 994 995/** 996 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. 997 */ 998static void 999convert_IF_ELSE_to_ADD(struct brw_compile *p, 1000 struct brw_instruction *if_inst, 1001 struct brw_instruction *else_inst) 1002{ 1003 /* The next instruction (where the ENDIF would be, if it existed) */ 1004 struct brw_instruction *next_inst = &p->store[p->nr_insn]; 1005 1006 assert(p->single_program_flow); 1007 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1008 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1009 assert(if_inst->header.execution_size == BRW_EXECUTE_1); 1010 1011 /* Convert IF to an ADD instruction that moves the instruction pointer 1012 * to the first instruction of the ELSE block. If there is no ELSE 1013 * block, point to where ENDIF would be. Reverse the predicate. 1014 * 1015 * There's no need to execute an ENDIF since we don't need to do any 1016 * stack operations, and if we're currently executing, we just want to 1017 * continue normally. 1018 */ 1019 if_inst->header.opcode = BRW_OPCODE_ADD; 1020 if_inst->header.predicate_inverse = 1; 1021 1022 if (else_inst != NULL) { 1023 /* Convert ELSE to an ADD instruction that points where the ENDIF 1024 * would be. 1025 */ 1026 else_inst->header.opcode = BRW_OPCODE_ADD; 1027 1028 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; 1029 else_inst->bits3.ud = (next_inst - else_inst) * 16; 1030 } else { 1031 if_inst->bits3.ud = (next_inst - if_inst) * 16; 1032 } 1033} 1034 1035/** 1036 * Patch IF and ELSE instructions with appropriate jump targets. 1037 */ 1038static void 1039patch_IF_ELSE(struct brw_compile *p, 1040 struct brw_instruction *if_inst, 1041 struct brw_instruction *else_inst, 1042 struct brw_instruction *endif_inst) 1043{ 1044 struct intel_context *intel = &p->brw->intel; 1045 1046 assert(!p->single_program_flow); 1047 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1048 assert(endif_inst != NULL); 1049 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1050 1051 unsigned br = 1; 1052 /* Jump count is for 64bit data chunk each, so one 128bit instruction 1053 * requires 2 chunks. 1054 */ 1055 if (intel->gen >= 5) 1056 br = 2; 1057 1058 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); 1059 endif_inst->header.execution_size = if_inst->header.execution_size; 1060 1061 if (else_inst == NULL) { 1062 /* Patch IF -> ENDIF */ 1063 if (intel->gen < 6) { 1064 /* Turn it into an IFF, which means no mask stack operations for 1065 * all-false and jumping past the ENDIF. 1066 */ 1067 if_inst->header.opcode = BRW_OPCODE_IFF; 1068 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); 1069 if_inst->bits3.if_else.pop_count = 0; 1070 if_inst->bits3.if_else.pad0 = 0; 1071 } else if (intel->gen == 6) { 1072 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 1073 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); 1074 } else { 1075 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1076 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); 1077 } 1078 } else { 1079 else_inst->header.execution_size = if_inst->header.execution_size; 1080 1081 /* Patch IF -> ELSE */ 1082 if (intel->gen < 6) { 1083 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); 1084 if_inst->bits3.if_else.pop_count = 0; 1085 if_inst->bits3.if_else.pad0 = 0; 1086 } else if (intel->gen == 6) { 1087 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); 1088 } 1089 1090 /* Patch ELSE -> ENDIF */ 1091 if (intel->gen < 6) { 1092 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 1093 * matching ENDIF. 1094 */ 1095 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); 1096 else_inst->bits3.if_else.pop_count = 1; 1097 else_inst->bits3.if_else.pad0 = 0; 1098 } else if (intel->gen == 6) { 1099 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1100 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); 1101 } else { 1102 /* The IF instruction's JIP should point just past the ELSE */ 1103 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); 1104 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ 1105 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1106 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); 1107 } 1108 } 1109} 1110 1111void 1112brw_ELSE(struct brw_compile *p) 1113{ 1114 struct intel_context *intel = &p->brw->intel; 1115 struct brw_instruction *insn; 1116 1117 insn = next_insn(p, BRW_OPCODE_ELSE); 1118 1119 if (intel->gen < 6) { 1120 brw_set_dest(p, insn, brw_ip_reg()); 1121 brw_set_src0(p, insn, brw_ip_reg()); 1122 brw_set_src1(p, insn, brw_imm_d(0x0)); 1123 } else if (intel->gen == 6) { 1124 brw_set_dest(p, insn, brw_imm_w(0)); 1125 insn->bits1.branch_gen6.jump_count = 0; 1126 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1127 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1128 } else { 1129 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1130 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1131 brw_set_src1(p, insn, brw_imm_ud(0)); 1132 insn->bits3.break_cont.jip = 0; 1133 insn->bits3.break_cont.uip = 0; 1134 } 1135 1136 insn->header.compression_control = BRW_COMPRESSION_NONE; 1137 insn->header.mask_control = BRW_MASK_ENABLE; 1138 if (!p->single_program_flow) 1139 insn->header.thread_control = BRW_THREAD_SWITCH; 1140 1141 push_if_stack(p, insn); 1142} 1143 1144void 1145brw_ENDIF(struct brw_compile *p) 1146{ 1147 struct intel_context *intel = &p->brw->intel; 1148 struct brw_instruction *insn; 1149 struct brw_instruction *else_inst = NULL; 1150 struct brw_instruction *if_inst = NULL; 1151 1152 /* Pop the IF and (optional) ELSE instructions from the stack */ 1153 p->if_stack_depth--; 1154 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { 1155 else_inst = p->if_stack[p->if_stack_depth]; 1156 p->if_stack_depth--; 1157 } 1158 if_inst = p->if_stack[p->if_stack_depth]; 1159 1160 if (p->single_program_flow) { 1161 /* ENDIF is useless; don't bother emitting it. */ 1162 convert_IF_ELSE_to_ADD(p, if_inst, else_inst); 1163 return; 1164 } 1165 1166 insn = next_insn(p, BRW_OPCODE_ENDIF); 1167 1168 if (intel->gen < 6) { 1169 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1170 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1171 brw_set_src1(p, insn, brw_imm_d(0x0)); 1172 } else if (intel->gen == 6) { 1173 brw_set_dest(p, insn, brw_imm_w(0)); 1174 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1175 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1176 } else { 1177 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1178 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1179 brw_set_src1(p, insn, brw_imm_ud(0)); 1180 } 1181 1182 insn->header.compression_control = BRW_COMPRESSION_NONE; 1183 insn->header.mask_control = BRW_MASK_ENABLE; 1184 insn->header.thread_control = BRW_THREAD_SWITCH; 1185 1186 /* Also pop item off the stack in the endif instruction: */ 1187 if (intel->gen < 6) { 1188 insn->bits3.if_else.jump_count = 0; 1189 insn->bits3.if_else.pop_count = 1; 1190 insn->bits3.if_else.pad0 = 0; 1191 } else if (intel->gen == 6) { 1192 insn->bits1.branch_gen6.jump_count = 2; 1193 } else { 1194 insn->bits3.break_cont.jip = 2; 1195 } 1196 patch_IF_ELSE(p, if_inst, else_inst, insn); 1197} 1198 1199struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 1200{ 1201 struct intel_context *intel = &p->brw->intel; 1202 struct brw_instruction *insn; 1203 1204 insn = next_insn(p, BRW_OPCODE_BREAK); 1205 if (intel->gen >= 6) { 1206 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1207 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1208 brw_set_src1(p, insn, brw_imm_d(0x0)); 1209 } else { 1210 brw_set_dest(p, insn, brw_ip_reg()); 1211 brw_set_src0(p, insn, brw_ip_reg()); 1212 brw_set_src1(p, insn, brw_imm_d(0x0)); 1213 insn->bits3.if_else.pad0 = 0; 1214 insn->bits3.if_else.pop_count = pop_count; 1215 } 1216 insn->header.compression_control = BRW_COMPRESSION_NONE; 1217 insn->header.execution_size = BRW_EXECUTE_8; 1218 1219 return insn; 1220} 1221 1222struct brw_instruction *gen6_CONT(struct brw_compile *p, 1223 struct brw_instruction *do_insn) 1224{ 1225 struct brw_instruction *insn; 1226 1227 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1228 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1229 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1230 brw_set_dest(p, insn, brw_ip_reg()); 1231 brw_set_src0(p, insn, brw_ip_reg()); 1232 brw_set_src1(p, insn, brw_imm_d(0x0)); 1233 1234 insn->header.compression_control = BRW_COMPRESSION_NONE; 1235 insn->header.execution_size = BRW_EXECUTE_8; 1236 return insn; 1237} 1238 1239struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 1240{ 1241 struct brw_instruction *insn; 1242 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1243 brw_set_dest(p, insn, brw_ip_reg()); 1244 brw_set_src0(p, insn, brw_ip_reg()); 1245 brw_set_src1(p, insn, brw_imm_d(0x0)); 1246 insn->header.compression_control = BRW_COMPRESSION_NONE; 1247 insn->header.execution_size = BRW_EXECUTE_8; 1248 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1249 insn->bits3.if_else.pad0 = 0; 1250 insn->bits3.if_else.pop_count = pop_count; 1251 return insn; 1252} 1253 1254/* DO/WHILE loop: 1255 * 1256 * The DO/WHILE is just an unterminated loop -- break or continue are 1257 * used for control within the loop. We have a few ways they can be 1258 * done. 1259 * 1260 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1261 * jip and no DO instruction. 1262 * 1263 * For non-uniform control flow pre-gen6, there's a DO instruction to 1264 * push the mask, and a WHILE to jump back, and BREAK to get out and 1265 * pop the mask. 1266 * 1267 * For gen6, there's no more mask stack, so no need for DO. WHILE 1268 * just points back to the first instruction of the loop. 1269 */ 1270struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 1271{ 1272 struct intel_context *intel = &p->brw->intel; 1273 1274 if (intel->gen >= 6 || p->single_program_flow) { 1275 return &p->store[p->nr_insn]; 1276 } else { 1277 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1278 1279 /* Override the defaults for this instruction: 1280 */ 1281 brw_set_dest(p, insn, brw_null_reg()); 1282 brw_set_src0(p, insn, brw_null_reg()); 1283 brw_set_src1(p, insn, brw_null_reg()); 1284 1285 insn->header.compression_control = BRW_COMPRESSION_NONE; 1286 insn->header.execution_size = execute_size; 1287 insn->header.predicate_control = BRW_PREDICATE_NONE; 1288 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1289 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1290 1291 return insn; 1292 } 1293} 1294 1295 1296 1297struct brw_instruction *brw_WHILE(struct brw_compile *p, 1298 struct brw_instruction *do_insn) 1299{ 1300 struct intel_context *intel = &p->brw->intel; 1301 struct brw_instruction *insn; 1302 GLuint br = 1; 1303 1304 if (intel->gen >= 5) 1305 br = 2; 1306 1307 if (intel->gen >= 7) { 1308 insn = next_insn(p, BRW_OPCODE_WHILE); 1309 1310 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1311 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1312 brw_set_src1(p, insn, brw_imm_ud(0)); 1313 insn->bits3.break_cont.jip = br * (do_insn - insn); 1314 1315 insn->header.execution_size = BRW_EXECUTE_8; 1316 } else if (intel->gen == 6) { 1317 insn = next_insn(p, BRW_OPCODE_WHILE); 1318 1319 brw_set_dest(p, insn, brw_imm_w(0)); 1320 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1321 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1322 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1323 1324 insn->header.execution_size = BRW_EXECUTE_8; 1325 } else { 1326 if (p->single_program_flow) { 1327 insn = next_insn(p, BRW_OPCODE_ADD); 1328 1329 brw_set_dest(p, insn, brw_ip_reg()); 1330 brw_set_src0(p, insn, brw_ip_reg()); 1331 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); 1332 insn->header.execution_size = BRW_EXECUTE_1; 1333 } else { 1334 insn = next_insn(p, BRW_OPCODE_WHILE); 1335 1336 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1337 1338 brw_set_dest(p, insn, brw_ip_reg()); 1339 brw_set_src0(p, insn, brw_ip_reg()); 1340 brw_set_src1(p, insn, brw_imm_d(0)); 1341 1342 insn->header.execution_size = do_insn->header.execution_size; 1343 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1344 insn->bits3.if_else.pop_count = 0; 1345 insn->bits3.if_else.pad0 = 0; 1346 } 1347 } 1348 insn->header.compression_control = BRW_COMPRESSION_NONE; 1349 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1350 1351 return insn; 1352} 1353 1354 1355/* FORWARD JUMPS: 1356 */ 1357void brw_land_fwd_jump(struct brw_compile *p, 1358 struct brw_instruction *jmp_insn) 1359{ 1360 struct intel_context *intel = &p->brw->intel; 1361 struct brw_instruction *landing = &p->store[p->nr_insn]; 1362 GLuint jmpi = 1; 1363 1364 if (intel->gen >= 5) 1365 jmpi = 2; 1366 1367 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1368 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1369 1370 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1371} 1372 1373 1374 1375/* To integrate with the above, it makes sense that the comparison 1376 * instruction should populate the flag register. It might be simpler 1377 * just to use the flag reg for most WM tasks? 1378 */ 1379void brw_CMP(struct brw_compile *p, 1380 struct brw_reg dest, 1381 GLuint conditional, 1382 struct brw_reg src0, 1383 struct brw_reg src1) 1384{ 1385 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1386 1387 insn->header.destreg__conditionalmod = conditional; 1388 brw_set_dest(p, insn, dest); 1389 brw_set_src0(p, insn, src0); 1390 brw_set_src1(p, insn, src1); 1391 1392/* guess_execution_size(insn, src0); */ 1393 1394 1395 /* Make it so that future instructions will use the computed flag 1396 * value until brw_set_predicate_control_flag_value() is called 1397 * again. 1398 */ 1399 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1400 dest.nr == 0) { 1401 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1402 p->flag_value = 0xff; 1403 } 1404} 1405 1406/* Issue 'wait' instruction for n1, host could program MMIO 1407 to wake up thread. */ 1408void brw_WAIT (struct brw_compile *p) 1409{ 1410 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1411 struct brw_reg src = brw_notification_1_reg(); 1412 1413 brw_set_dest(p, insn, src); 1414 brw_set_src0(p, insn, src); 1415 brw_set_src1(p, insn, brw_null_reg()); 1416 insn->header.execution_size = 0; /* must */ 1417 insn->header.predicate_control = 0; 1418 insn->header.compression_control = 0; 1419} 1420 1421 1422/*********************************************************************** 1423 * Helpers for the various SEND message types: 1424 */ 1425 1426/** Extended math function, float[8]. 1427 */ 1428void brw_math( struct brw_compile *p, 1429 struct brw_reg dest, 1430 GLuint function, 1431 GLuint saturate, 1432 GLuint msg_reg_nr, 1433 struct brw_reg src, 1434 GLuint data_type, 1435 GLuint precision ) 1436{ 1437 struct intel_context *intel = &p->brw->intel; 1438 1439 if (intel->gen >= 6) { 1440 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1441 1442 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1443 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1444 1445 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1446 if (intel->gen == 6) 1447 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1448 1449 /* Source modifiers are ignored for extended math instructions on Gen6. */ 1450 if (intel->gen == 6) { 1451 assert(!src.negate); 1452 assert(!src.abs); 1453 } 1454 1455 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || 1456 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || 1457 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1458 assert(src.type != BRW_REGISTER_TYPE_F); 1459 } else { 1460 assert(src.type == BRW_REGISTER_TYPE_F); 1461 } 1462 1463 /* Math is the same ISA format as other opcodes, except that CondModifier 1464 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1465 */ 1466 insn->header.destreg__conditionalmod = function; 1467 insn->header.saturate = saturate; 1468 1469 brw_set_dest(p, insn, dest); 1470 brw_set_src0(p, insn, src); 1471 brw_set_src1(p, insn, brw_null_reg()); 1472 } else { 1473 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1474 1475 /* Example code doesn't set predicate_control for send 1476 * instructions. 1477 */ 1478 insn->header.predicate_control = 0; 1479 insn->header.destreg__conditionalmod = msg_reg_nr; 1480 1481 brw_set_dest(p, insn, dest); 1482 brw_set_src0(p, insn, src); 1483 brw_set_math_message(p, 1484 insn, 1485 function, 1486 src.type == BRW_REGISTER_TYPE_D, 1487 precision, 1488 saturate, 1489 data_type); 1490 } 1491} 1492 1493/** Extended math function, float[8]. 1494 */ 1495void brw_math2(struct brw_compile *p, 1496 struct brw_reg dest, 1497 GLuint function, 1498 struct brw_reg src0, 1499 struct brw_reg src1) 1500{ 1501 struct intel_context *intel = &p->brw->intel; 1502 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1503 1504 assert(intel->gen >= 6); 1505 (void) intel; 1506 1507 1508 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1509 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1510 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1511 1512 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1513 if (intel->gen == 6) { 1514 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1515 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1516 } 1517 1518 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || 1519 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || 1520 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1521 assert(src0.type != BRW_REGISTER_TYPE_F); 1522 assert(src1.type != BRW_REGISTER_TYPE_F); 1523 } else { 1524 assert(src0.type == BRW_REGISTER_TYPE_F); 1525 assert(src1.type == BRW_REGISTER_TYPE_F); 1526 } 1527 1528 /* Source modifiers are ignored for extended math instructions on Gen6. */ 1529 if (intel->gen == 6) { 1530 assert(!src0.negate); 1531 assert(!src0.abs); 1532 assert(!src1.negate); 1533 assert(!src1.abs); 1534 } 1535 1536 /* Math is the same ISA format as other opcodes, except that CondModifier 1537 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1538 */ 1539 insn->header.destreg__conditionalmod = function; 1540 1541 brw_set_dest(p, insn, dest); 1542 brw_set_src0(p, insn, src0); 1543 brw_set_src1(p, insn, src1); 1544} 1545 1546/** 1547 * Extended math function, float[16]. 1548 * Use 2 send instructions. 1549 */ 1550void brw_math_16( struct brw_compile *p, 1551 struct brw_reg dest, 1552 GLuint function, 1553 GLuint saturate, 1554 GLuint msg_reg_nr, 1555 struct brw_reg src, 1556 GLuint precision ) 1557{ 1558 struct intel_context *intel = &p->brw->intel; 1559 struct brw_instruction *insn; 1560 1561 if (intel->gen >= 6) { 1562 insn = next_insn(p, BRW_OPCODE_MATH); 1563 1564 /* Math is the same ISA format as other opcodes, except that CondModifier 1565 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1566 */ 1567 insn->header.destreg__conditionalmod = function; 1568 insn->header.saturate = saturate; 1569 1570 /* Source modifiers are ignored for extended math instructions. */ 1571 assert(!src.negate); 1572 assert(!src.abs); 1573 1574 brw_set_dest(p, insn, dest); 1575 brw_set_src0(p, insn, src); 1576 brw_set_src1(p, insn, brw_null_reg()); 1577 return; 1578 } 1579 1580 /* First instruction: 1581 */ 1582 brw_push_insn_state(p); 1583 brw_set_predicate_control_flag_value(p, 0xff); 1584 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1585 1586 insn = next_insn(p, BRW_OPCODE_SEND); 1587 insn->header.destreg__conditionalmod = msg_reg_nr; 1588 1589 brw_set_dest(p, insn, dest); 1590 brw_set_src0(p, insn, src); 1591 brw_set_math_message(p, 1592 insn, 1593 function, 1594 BRW_MATH_INTEGER_UNSIGNED, 1595 precision, 1596 saturate, 1597 BRW_MATH_DATA_VECTOR); 1598 1599 /* Second instruction: 1600 */ 1601 insn = next_insn(p, BRW_OPCODE_SEND); 1602 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1603 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1604 1605 brw_set_dest(p, insn, offset(dest,1)); 1606 brw_set_src0(p, insn, src); 1607 brw_set_math_message(p, 1608 insn, 1609 function, 1610 BRW_MATH_INTEGER_UNSIGNED, 1611 precision, 1612 saturate, 1613 BRW_MATH_DATA_VECTOR); 1614 1615 brw_pop_insn_state(p); 1616} 1617 1618 1619/** 1620 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1621 * using a constant offset per channel. 1622 * 1623 * The offset must be aligned to oword size (16 bytes). Used for 1624 * register spilling. 1625 */ 1626void brw_oword_block_write_scratch(struct brw_compile *p, 1627 struct brw_reg mrf, 1628 int num_regs, 1629 GLuint offset) 1630{ 1631 struct intel_context *intel = &p->brw->intel; 1632 uint32_t msg_control, msg_type; 1633 int mlen; 1634 1635 if (intel->gen >= 6) 1636 offset /= 16; 1637 1638 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1639 1640 if (num_regs == 1) { 1641 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1642 mlen = 2; 1643 } else { 1644 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1645 mlen = 3; 1646 } 1647 1648 /* Set up the message header. This is g0, with g0.2 filled with 1649 * the offset. We don't want to leave our offset around in g0 or 1650 * it'll screw up texture samples, so set it up inside the message 1651 * reg. 1652 */ 1653 { 1654 brw_push_insn_state(p); 1655 brw_set_mask_control(p, BRW_MASK_DISABLE); 1656 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1657 1658 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1659 1660 /* set message header global offset field (reg 0, element 2) */ 1661 brw_MOV(p, 1662 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1663 mrf.nr, 1664 2), BRW_REGISTER_TYPE_UD), 1665 brw_imm_ud(offset)); 1666 1667 brw_pop_insn_state(p); 1668 } 1669 1670 { 1671 struct brw_reg dest; 1672 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1673 int send_commit_msg; 1674 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 1675 BRW_REGISTER_TYPE_UW); 1676 1677 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1678 insn->header.compression_control = BRW_COMPRESSION_NONE; 1679 src_header = vec16(src_header); 1680 } 1681 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1682 insn->header.destreg__conditionalmod = mrf.nr; 1683 1684 /* Until gen6, writes followed by reads from the same location 1685 * are not guaranteed to be ordered unless write_commit is set. 1686 * If set, then a no-op write is issued to the destination 1687 * register to set a dependency, and a read from the destination 1688 * can be used to ensure the ordering. 1689 * 1690 * For gen6, only writes between different threads need ordering 1691 * protection. Our use of DP writes is all about register 1692 * spilling within a thread. 1693 */ 1694 if (intel->gen >= 6) { 1695 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1696 send_commit_msg = 0; 1697 } else { 1698 dest = src_header; 1699 send_commit_msg = 1; 1700 } 1701 1702 brw_set_dest(p, insn, dest); 1703 if (intel->gen >= 6) { 1704 brw_set_src0(p, insn, mrf); 1705 } else { 1706 brw_set_src0(p, insn, brw_null_reg()); 1707 } 1708 1709 if (intel->gen >= 6) 1710 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1711 else 1712 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1713 1714 brw_set_dp_write_message(p, 1715 insn, 1716 255, /* binding table index (255=stateless) */ 1717 msg_control, 1718 msg_type, 1719 mlen, 1720 true, /* header_present */ 1721 0, /* not a render target */ 1722 send_commit_msg, /* response_length */ 1723 0, /* eot */ 1724 send_commit_msg); 1725 } 1726} 1727 1728 1729/** 1730 * Read a block of owords (half a GRF each) from the scratch buffer 1731 * using a constant index per channel. 1732 * 1733 * Offset must be aligned to oword size (16 bytes). Used for register 1734 * spilling. 1735 */ 1736void 1737brw_oword_block_read_scratch(struct brw_compile *p, 1738 struct brw_reg dest, 1739 struct brw_reg mrf, 1740 int num_regs, 1741 GLuint offset) 1742{ 1743 struct intel_context *intel = &p->brw->intel; 1744 uint32_t msg_control; 1745 int rlen; 1746 1747 if (intel->gen >= 6) 1748 offset /= 16; 1749 1750 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1751 dest = retype(dest, BRW_REGISTER_TYPE_UW); 1752 1753 if (num_regs == 1) { 1754 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1755 rlen = 1; 1756 } else { 1757 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1758 rlen = 2; 1759 } 1760 1761 { 1762 brw_push_insn_state(p); 1763 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1764 brw_set_mask_control(p, BRW_MASK_DISABLE); 1765 1766 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1767 1768 /* set message header global offset field (reg 0, element 2) */ 1769 brw_MOV(p, 1770 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1771 mrf.nr, 1772 2), BRW_REGISTER_TYPE_UD), 1773 brw_imm_ud(offset)); 1774 1775 brw_pop_insn_state(p); 1776 } 1777 1778 { 1779 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1780 1781 assert(insn->header.predicate_control == 0); 1782 insn->header.compression_control = BRW_COMPRESSION_NONE; 1783 insn->header.destreg__conditionalmod = mrf.nr; 1784 1785 brw_set_dest(p, insn, dest); /* UW? */ 1786 if (intel->gen >= 6) { 1787 brw_set_src0(p, insn, mrf); 1788 } else { 1789 brw_set_src0(p, insn, brw_null_reg()); 1790 } 1791 1792 brw_set_dp_read_message(p, 1793 insn, 1794 255, /* binding table index (255=stateless) */ 1795 msg_control, 1796 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1797 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 1798 1, /* msg_length */ 1799 rlen); 1800 } 1801} 1802 1803/** 1804 * Read a float[4] vector from the data port Data Cache (const buffer). 1805 * Location (in buffer) should be a multiple of 16. 1806 * Used for fetching shader constants. 1807 */ 1808void brw_oword_block_read(struct brw_compile *p, 1809 struct brw_reg dest, 1810 struct brw_reg mrf, 1811 uint32_t offset, 1812 uint32_t bind_table_index) 1813{ 1814 struct intel_context *intel = &p->brw->intel; 1815 1816 /* On newer hardware, offset is in units of owords. */ 1817 if (intel->gen >= 6) 1818 offset /= 16; 1819 1820 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1821 1822 brw_push_insn_state(p); 1823 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1824 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1825 brw_set_mask_control(p, BRW_MASK_DISABLE); 1826 1827 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1828 1829 /* set message header global offset field (reg 0, element 2) */ 1830 brw_MOV(p, 1831 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1832 mrf.nr, 1833 2), BRW_REGISTER_TYPE_UD), 1834 brw_imm_ud(offset)); 1835 1836 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1837 insn->header.destreg__conditionalmod = mrf.nr; 1838 1839 /* cast dest to a uword[8] vector */ 1840 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1841 1842 brw_set_dest(p, insn, dest); 1843 if (intel->gen >= 6) { 1844 brw_set_src0(p, insn, mrf); 1845 } else { 1846 brw_set_src0(p, insn, brw_null_reg()); 1847 } 1848 1849 brw_set_dp_read_message(p, 1850 insn, 1851 bind_table_index, 1852 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 1853 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 1854 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1855 1, /* msg_length */ 1856 1); /* response_length (1 reg, 2 owords!) */ 1857 1858 brw_pop_insn_state(p); 1859} 1860 1861/** 1862 * Read a set of dwords from the data port Data Cache (const buffer). 1863 * 1864 * Location (in buffer) appears as UD offsets in the register after 1865 * the provided mrf header reg. 1866 */ 1867void brw_dword_scattered_read(struct brw_compile *p, 1868 struct brw_reg dest, 1869 struct brw_reg mrf, 1870 uint32_t bind_table_index) 1871{ 1872 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1873 1874 brw_push_insn_state(p); 1875 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1876 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1877 brw_set_mask_control(p, BRW_MASK_DISABLE); 1878 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1879 brw_pop_insn_state(p); 1880 1881 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1882 insn->header.destreg__conditionalmod = mrf.nr; 1883 1884 /* cast dest to a uword[8] vector */ 1885 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1886 1887 brw_set_dest(p, insn, dest); 1888 brw_set_src0(p, insn, brw_null_reg()); 1889 1890 brw_set_dp_read_message(p, 1891 insn, 1892 bind_table_index, 1893 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, 1894 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, 1895 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1896 2, /* msg_length */ 1897 1); /* response_length */ 1898} 1899 1900 1901 1902/** 1903 * Read float[4] constant(s) from VS constant buffer. 1904 * For relative addressing, two float[4] constants will be read into 'dest'. 1905 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1906 */ 1907void brw_dp_READ_4_vs(struct brw_compile *p, 1908 struct brw_reg dest, 1909 GLuint location, 1910 GLuint bind_table_index) 1911{ 1912 struct intel_context *intel = &p->brw->intel; 1913 struct brw_instruction *insn; 1914 GLuint msg_reg_nr = 1; 1915 1916 if (intel->gen >= 6) 1917 location /= 16; 1918 1919 /* Setup MRF[1] with location/offset into const buffer */ 1920 brw_push_insn_state(p); 1921 brw_set_access_mode(p, BRW_ALIGN_1); 1922 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1923 brw_set_mask_control(p, BRW_MASK_DISABLE); 1924 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1925 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), 1926 BRW_REGISTER_TYPE_UD), 1927 brw_imm_ud(location)); 1928 brw_pop_insn_state(p); 1929 1930 insn = next_insn(p, BRW_OPCODE_SEND); 1931 1932 insn->header.predicate_control = BRW_PREDICATE_NONE; 1933 insn->header.compression_control = BRW_COMPRESSION_NONE; 1934 insn->header.destreg__conditionalmod = msg_reg_nr; 1935 insn->header.mask_control = BRW_MASK_DISABLE; 1936 1937 brw_set_dest(p, insn, dest); 1938 if (intel->gen >= 6) { 1939 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); 1940 } else { 1941 brw_set_src0(p, insn, brw_null_reg()); 1942 } 1943 1944 brw_set_dp_read_message(p, 1945 insn, 1946 bind_table_index, 1947 0, 1948 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1949 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1950 1, /* msg_length */ 1951 1); /* response_length (1 Oword) */ 1952} 1953 1954/** 1955 * Read a float[4] constant per vertex from VS constant buffer, with 1956 * relative addressing. 1957 */ 1958void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1959 struct brw_reg dest, 1960 struct brw_reg addr_reg, 1961 GLuint offset, 1962 GLuint bind_table_index) 1963{ 1964 struct intel_context *intel = &p->brw->intel; 1965 struct brw_reg src = brw_vec8_grf(0, 0); 1966 int msg_type; 1967 1968 /* Setup MRF[1] with offset into const buffer */ 1969 brw_push_insn_state(p); 1970 brw_set_access_mode(p, BRW_ALIGN_1); 1971 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1972 brw_set_mask_control(p, BRW_MASK_DISABLE); 1973 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1974 1975 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1976 * fields ignored. 1977 */ 1978 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), 1979 addr_reg, brw_imm_d(offset)); 1980 brw_pop_insn_state(p); 1981 1982 gen6_resolve_implied_move(p, &src, 0); 1983 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1984 1985 insn->header.predicate_control = BRW_PREDICATE_NONE; 1986 insn->header.compression_control = BRW_COMPRESSION_NONE; 1987 insn->header.destreg__conditionalmod = 0; 1988 insn->header.mask_control = BRW_MASK_DISABLE; 1989 1990 brw_set_dest(p, insn, dest); 1991 brw_set_src0(p, insn, src); 1992 1993 if (intel->gen >= 6) 1994 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1995 else if (intel->gen == 5 || intel->is_g4x) 1996 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1997 else 1998 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1999 2000 brw_set_dp_read_message(p, 2001 insn, 2002 bind_table_index, 2003 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 2004 msg_type, 2005 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2006 2, /* msg_length */ 2007 1); /* response_length */ 2008} 2009 2010 2011 2012void brw_fb_WRITE(struct brw_compile *p, 2013 int dispatch_width, 2014 GLuint msg_reg_nr, 2015 struct brw_reg src0, 2016 GLuint binding_table_index, 2017 GLuint msg_length, 2018 GLuint response_length, 2019 bool eot, 2020 bool header_present) 2021{ 2022 struct intel_context *intel = &p->brw->intel; 2023 struct brw_instruction *insn; 2024 GLuint msg_control, msg_type; 2025 struct brw_reg dest; 2026 2027 if (dispatch_width == 16) 2028 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2029 else 2030 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2031 2032 if (intel->gen >= 6 && binding_table_index == 0) { 2033 insn = next_insn(p, BRW_OPCODE_SENDC); 2034 } else { 2035 insn = next_insn(p, BRW_OPCODE_SEND); 2036 } 2037 /* The execution mask is ignored for render target writes. */ 2038 insn->header.predicate_control = 0; 2039 insn->header.compression_control = BRW_COMPRESSION_NONE; 2040 2041 if (intel->gen >= 6) { 2042 /* headerless version, just submit color payload */ 2043 src0 = brw_message_reg(msg_reg_nr); 2044 2045 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2046 } else { 2047 insn->header.destreg__conditionalmod = msg_reg_nr; 2048 2049 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2050 } 2051 2052 if (dispatch_width == 16) 2053 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 2054 else 2055 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 2056 2057 brw_set_dest(p, insn, dest); 2058 brw_set_src0(p, insn, src0); 2059 brw_set_dp_write_message(p, 2060 insn, 2061 binding_table_index, 2062 msg_control, 2063 msg_type, 2064 msg_length, 2065 header_present, 2066 1, /* last render target write */ 2067 response_length, 2068 eot, 2069 0 /* send_commit_msg */); 2070} 2071 2072 2073/** 2074 * Texture sample instruction. 2075 * Note: the msg_type plus msg_length values determine exactly what kind 2076 * of sampling operation is performed. See volume 4, page 161 of docs. 2077 */ 2078void brw_SAMPLE(struct brw_compile *p, 2079 struct brw_reg dest, 2080 GLuint msg_reg_nr, 2081 struct brw_reg src0, 2082 GLuint binding_table_index, 2083 GLuint sampler, 2084 GLuint writemask, 2085 GLuint msg_type, 2086 GLuint response_length, 2087 GLuint msg_length, 2088 GLuint header_present, 2089 GLuint simd_mode) 2090{ 2091 struct intel_context *intel = &p->brw->intel; 2092 bool need_stall = 0; 2093 2094 if (writemask == 0) { 2095 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 2096 return; 2097 } 2098 2099 /* Hardware doesn't do destination dependency checking on send 2100 * instructions properly. Add a workaround which generates the 2101 * dependency by other means. In practice it seems like this bug 2102 * only crops up for texture samples, and only where registers are 2103 * written by the send and then written again later without being 2104 * read in between. Luckily for us, we already track that 2105 * information and use it to modify the writemask for the 2106 * instruction, so that is a guide for whether a workaround is 2107 * needed. 2108 */ 2109 if (writemask != WRITEMASK_XYZW) { 2110 GLuint dst_offset = 0; 2111 GLuint i, newmask = 0, len = 0; 2112 2113 for (i = 0; i < 4; i++) { 2114 if (writemask & (1<<i)) 2115 break; 2116 dst_offset += 2; 2117 } 2118 for (; i < 4; i++) { 2119 if (!(writemask & (1<<i))) 2120 break; 2121 newmask |= 1<<i; 2122 len++; 2123 } 2124 2125 if (newmask != writemask) { 2126 need_stall = 1; 2127 /* printf("need stall %x %x\n", newmask , writemask); */ 2128 } 2129 else { 2130 bool dispatch_16 = false; 2131 2132 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 2133 2134 guess_execution_size(p, p->current, dest); 2135 if (p->current->header.execution_size == BRW_EXECUTE_16) 2136 dispatch_16 = true; 2137 2138 newmask = ~newmask & WRITEMASK_XYZW; 2139 2140 brw_push_insn_state(p); 2141 2142 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2143 brw_set_mask_control(p, BRW_MASK_DISABLE); 2144 2145 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), 2146 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); 2147 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 2148 2149 brw_pop_insn_state(p); 2150 2151 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 2152 dest = offset(dest, dst_offset); 2153 2154 /* For 16-wide dispatch, masked channels are skipped in the 2155 * response. For 8-wide, masked channels still take up slots, 2156 * and are just not written to. 2157 */ 2158 if (dispatch_16) 2159 response_length = len * 2; 2160 } 2161 } 2162 2163 { 2164 struct brw_instruction *insn; 2165 2166 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2167 2168 insn = next_insn(p, BRW_OPCODE_SEND); 2169 insn->header.predicate_control = 0; /* XXX */ 2170 insn->header.compression_control = BRW_COMPRESSION_NONE; 2171 if (intel->gen < 6) 2172 insn->header.destreg__conditionalmod = msg_reg_nr; 2173 2174 brw_set_dest(p, insn, dest); 2175 brw_set_src0(p, insn, src0); 2176 brw_set_sampler_message(p, insn, 2177 binding_table_index, 2178 sampler, 2179 msg_type, 2180 response_length, 2181 msg_length, 2182 header_present, 2183 simd_mode); 2184 } 2185 2186 if (need_stall) { 2187 struct brw_reg reg = vec8(offset(dest, response_length-1)); 2188 2189 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 2190 */ 2191 brw_push_insn_state(p); 2192 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2193 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), 2194 retype(reg, BRW_REGISTER_TYPE_UD)); 2195 brw_pop_insn_state(p); 2196 } 2197 2198} 2199 2200/* All these variables are pretty confusing - we might be better off 2201 * using bitmasks and macros for this, in the old style. Or perhaps 2202 * just having the caller instantiate the fields in dword3 itself. 2203 */ 2204void brw_urb_WRITE(struct brw_compile *p, 2205 struct brw_reg dest, 2206 GLuint msg_reg_nr, 2207 struct brw_reg src0, 2208 bool allocate, 2209 bool used, 2210 GLuint msg_length, 2211 GLuint response_length, 2212 bool eot, 2213 bool writes_complete, 2214 GLuint offset, 2215 GLuint swizzle) 2216{ 2217 struct intel_context *intel = &p->brw->intel; 2218 struct brw_instruction *insn; 2219 2220 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2221 2222 if (intel->gen == 7) { 2223 /* Enable Channel Masks in the URB_WRITE_HWORD message header */ 2224 brw_push_insn_state(p); 2225 brw_set_access_mode(p, BRW_ALIGN_1); 2226 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), 2227 BRW_REGISTER_TYPE_UD), 2228 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), 2229 brw_imm_ud(0xff00)); 2230 brw_pop_insn_state(p); 2231 } 2232 2233 insn = next_insn(p, BRW_OPCODE_SEND); 2234 2235 assert(msg_length < BRW_MAX_MRF); 2236 2237 brw_set_dest(p, insn, dest); 2238 brw_set_src0(p, insn, src0); 2239 brw_set_src1(p, insn, brw_imm_d(0)); 2240 2241 if (intel->gen < 6) 2242 insn->header.destreg__conditionalmod = msg_reg_nr; 2243 2244 brw_set_urb_message(p, 2245 insn, 2246 allocate, 2247 used, 2248 msg_length, 2249 response_length, 2250 eot, 2251 writes_complete, 2252 offset, 2253 swizzle); 2254} 2255 2256static int 2257brw_find_next_block_end(struct brw_compile *p, int start) 2258{ 2259 int ip; 2260 2261 for (ip = start + 1; ip < p->nr_insn; ip++) { 2262 struct brw_instruction *insn = &p->store[ip]; 2263 2264 switch (insn->header.opcode) { 2265 case BRW_OPCODE_ENDIF: 2266 case BRW_OPCODE_ELSE: 2267 case BRW_OPCODE_WHILE: 2268 return ip; 2269 } 2270 } 2271 assert(!"not reached"); 2272 return start + 1; 2273} 2274 2275/* There is no DO instruction on gen6, so to find the end of the loop 2276 * we have to see if the loop is jumping back before our start 2277 * instruction. 2278 */ 2279static int 2280brw_find_loop_end(struct brw_compile *p, int start) 2281{ 2282 struct intel_context *intel = &p->brw->intel; 2283 int ip; 2284 int br = 2; 2285 2286 for (ip = start + 1; ip < p->nr_insn; ip++) { 2287 struct brw_instruction *insn = &p->store[ip]; 2288 2289 if (insn->header.opcode == BRW_OPCODE_WHILE) { 2290 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count 2291 : insn->bits3.break_cont.jip; 2292 if (ip + jip / br <= start) 2293 return ip; 2294 } 2295 } 2296 assert(!"not reached"); 2297 return start + 1; 2298} 2299 2300/* After program generation, go back and update the UIP and JIP of 2301 * BREAK and CONT instructions to their correct locations. 2302 */ 2303void 2304brw_set_uip_jip(struct brw_compile *p) 2305{ 2306 struct intel_context *intel = &p->brw->intel; 2307 int ip; 2308 int br = 2; 2309 2310 if (intel->gen < 6) 2311 return; 2312 2313 for (ip = 0; ip < p->nr_insn; ip++) { 2314 struct brw_instruction *insn = &p->store[ip]; 2315 2316 switch (insn->header.opcode) { 2317 case BRW_OPCODE_BREAK: 2318 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2319 /* Gen7 UIP points to WHILE; Gen6 points just after it */ 2320 insn->bits3.break_cont.uip = 2321 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0)); 2322 break; 2323 case BRW_OPCODE_CONTINUE: 2324 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2325 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); 2326 2327 assert(insn->bits3.break_cont.uip != 0); 2328 assert(insn->bits3.break_cont.jip != 0); 2329 break; 2330 } 2331 } 2332} 2333 2334void brw_ff_sync(struct brw_compile *p, 2335 struct brw_reg dest, 2336 GLuint msg_reg_nr, 2337 struct brw_reg src0, 2338 bool allocate, 2339 GLuint response_length, 2340 bool eot) 2341{ 2342 struct intel_context *intel = &p->brw->intel; 2343 struct brw_instruction *insn; 2344 2345 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2346 2347 insn = next_insn(p, BRW_OPCODE_SEND); 2348 brw_set_dest(p, insn, dest); 2349 brw_set_src0(p, insn, src0); 2350 brw_set_src1(p, insn, brw_imm_d(0)); 2351 2352 if (intel->gen < 6) 2353 insn->header.destreg__conditionalmod = msg_reg_nr; 2354 2355 brw_set_ff_sync_message(p, 2356 insn, 2357 allocate, 2358 response_length, 2359 eot); 2360} 2361