brw_eu_emit.c revision fb977c90d1ef29f47b686c27500005025543cf11
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keithw@vmware.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37#include "glsl/ralloc.h" 38 39/*********************************************************************** 40 * Internal helper for constructing instructions 41 */ 42 43static void guess_execution_size(struct brw_compile *p, 44 struct brw_instruction *insn, 45 struct brw_reg reg) 46{ 47 if (reg.width == BRW_WIDTH_8 && p->compressed) 48 insn->header.execution_size = BRW_EXECUTE_16; 49 else 50 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 51} 52 53 54/** 55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source 56 * registers, implicitly moving the operand to a message register. 57 * 58 * On Sandybridge, this is no longer the case. This function performs the 59 * explicit move; it should be called before emitting a SEND instruction. 60 */ 61void 62gen6_resolve_implied_move(struct brw_compile *p, 63 struct brw_reg *src, 64 unsigned msg_reg_nr) 65{ 66 struct brw_context *brw = p->brw; 67 if (brw->gen < 6) 68 return; 69 70 if (src->file == BRW_MESSAGE_REGISTER_FILE) 71 return; 72 73 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 74 brw_push_insn_state(p); 75 brw_set_mask_control(p, BRW_MASK_DISABLE); 76 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 78 retype(*src, BRW_REGISTER_TYPE_UD)); 79 brw_pop_insn_state(p); 80 } 81 *src = brw_message_reg(msg_reg_nr); 82} 83 84static void 85gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) 86{ 87 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"): 88 * "The send with EOT should use register space R112-R127 for <src>. This is 89 * to enable loading of a new thread into the same slot while the message 90 * with EOT for current thread is pending dispatch." 91 * 92 * Since we're pretending to have 16 MRFs anyway, we may as well use the 93 * registers required for messages with EOT. 94 */ 95 struct brw_context *brw = p->brw; 96 if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { 97 reg->file = BRW_GENERAL_REGISTER_FILE; 98 reg->nr += GEN7_MRF_HACK_START; 99 } 100} 101 102/** 103 * Convert a brw_reg_type enumeration value into the hardware representation. 104 * 105 * The hardware encoding may depend on whether the value is an immediate. 106 */ 107unsigned 108brw_reg_type_to_hw_type(const struct brw_context *brw, 109 enum brw_reg_type type, unsigned file) 110{ 111 if (file == BRW_IMMEDIATE_VALUE) { 112 const static int imm_hw_types[] = { 113 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD, 114 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D, 115 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW, 116 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W, 117 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F, 118 [BRW_REGISTER_TYPE_UB] = -1, 119 [BRW_REGISTER_TYPE_B] = -1, 120 [BRW_REGISTER_TYPE_UV] = BRW_HW_REG_IMM_TYPE_UV, 121 [BRW_REGISTER_TYPE_VF] = BRW_HW_REG_IMM_TYPE_VF, 122 [BRW_REGISTER_TYPE_V] = BRW_HW_REG_IMM_TYPE_V, 123 [BRW_REGISTER_TYPE_DF] = GEN8_HW_REG_IMM_TYPE_DF, 124 [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_IMM_TYPE_HF, 125 [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ, 126 [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q, 127 }; 128 assert(type < ARRAY_SIZE(imm_hw_types)); 129 assert(imm_hw_types[type] != -1); 130 assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_DF); 131 return imm_hw_types[type]; 132 } else { 133 /* Non-immediate registers */ 134 const static int hw_types[] = { 135 [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD, 136 [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D, 137 [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW, 138 [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W, 139 [BRW_REGISTER_TYPE_UB] = BRW_HW_REG_NON_IMM_TYPE_UB, 140 [BRW_REGISTER_TYPE_B] = BRW_HW_REG_NON_IMM_TYPE_B, 141 [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F, 142 [BRW_REGISTER_TYPE_UV] = -1, 143 [BRW_REGISTER_TYPE_VF] = -1, 144 [BRW_REGISTER_TYPE_V] = -1, 145 [BRW_REGISTER_TYPE_DF] = GEN7_HW_REG_NON_IMM_TYPE_DF, 146 [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_NON_IMM_TYPE_HF, 147 [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ, 148 [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q, 149 }; 150 assert(type < ARRAY_SIZE(hw_types)); 151 assert(hw_types[type] != -1); 152 assert(brw->gen >= 7 || type < BRW_REGISTER_TYPE_DF); 153 assert(brw->gen >= 8 || type < BRW_REGISTER_TYPE_HF); 154 return hw_types[type]; 155 } 156} 157 158void 159brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, 160 struct brw_reg dest) 161{ 162 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 163 dest.file != BRW_MESSAGE_REGISTER_FILE) 164 assert(dest.nr < 128); 165 166 gen7_convert_mrf_to_grf(p, &dest); 167 168 insn->bits1.da1.dest_reg_file = dest.file; 169 insn->bits1.da1.dest_reg_type = 170 brw_reg_type_to_hw_type(p->brw, dest.type, dest.file); 171 insn->bits1.da1.dest_address_mode = dest.address_mode; 172 173 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 174 insn->bits1.da1.dest_reg_nr = dest.nr; 175 176 if (insn->header.access_mode == BRW_ALIGN_1) { 177 insn->bits1.da1.dest_subreg_nr = dest.subnr; 178 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 179 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 180 insn->bits1.da1.dest_horiz_stride = dest.hstride; 181 } 182 else { 183 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 184 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 185 if (dest.file == BRW_GENERAL_REGISTER_FILE || 186 dest.file == BRW_MESSAGE_REGISTER_FILE) { 187 assert(dest.dw1.bits.writemask != 0); 188 } 189 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: 190 * Although Dst.HorzStride is a don't care for Align16, HW needs 191 * this to be programmed as "01". 192 */ 193 insn->bits1.da16.dest_horiz_stride = 1; 194 } 195 } 196 else { 197 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 198 199 /* These are different sizes in align1 vs align16: 200 */ 201 if (insn->header.access_mode == BRW_ALIGN_1) { 202 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 203 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 204 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 205 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 206 } 207 else { 208 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 209 /* even ignored in da16, still need to set as '01' */ 210 insn->bits1.ia16.dest_horiz_stride = 1; 211 } 212 } 213 214 /* NEW: Set the execution size based on dest.width and 215 * insn->compression_control: 216 */ 217 guess_execution_size(p, insn, dest); 218} 219 220extern int reg_type_size[]; 221 222static void 223validate_reg(struct brw_instruction *insn, struct brw_reg reg) 224{ 225 int hstride_for_reg[] = {0, 1, 2, 4}; 226 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 227 int width_for_reg[] = {1, 2, 4, 8, 16}; 228 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 229 int width, hstride, vstride, execsize; 230 231 if (reg.file == BRW_IMMEDIATE_VALUE) { 232 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 233 * mean the destination has to be 128-bit aligned and the 234 * destination horiz stride has to be a word. 235 */ 236 if (reg.type == BRW_REGISTER_TYPE_V) { 237 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 238 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 239 } 240 241 return; 242 } 243 244 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 245 reg.file == BRW_ARF_NULL) 246 return; 247 248 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 249 hstride = hstride_for_reg[reg.hstride]; 250 251 if (reg.vstride == 0xf) { 252 vstride = -1; 253 } else { 254 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 255 vstride = vstride_for_reg[reg.vstride]; 256 } 257 258 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 259 width = width_for_reg[reg.width]; 260 261 assert(insn->header.execution_size >= 0 && 262 insn->header.execution_size < Elements(execsize_for_reg)); 263 execsize = execsize_for_reg[insn->header.execution_size]; 264 265 /* Restrictions from 3.3.10: Register Region Restrictions. */ 266 /* 3. */ 267 assert(execsize >= width); 268 269 /* 4. */ 270 if (execsize == width && hstride != 0) { 271 assert(vstride == -1 || vstride == width * hstride); 272 } 273 274 /* 5. */ 275 if (execsize == width && hstride == 0) { 276 /* no restriction on vstride. */ 277 } 278 279 /* 6. */ 280 if (width == 1) { 281 assert(hstride == 0); 282 } 283 284 /* 7. */ 285 if (execsize == 1 && width == 1) { 286 assert(hstride == 0); 287 assert(vstride == 0); 288 } 289 290 /* 8. */ 291 if (vstride == 0 && hstride == 0) { 292 assert(width == 1); 293 } 294 295 /* 10. Check destination issues. */ 296} 297 298void 299brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, 300 struct brw_reg reg) 301{ 302 struct brw_context *brw = p->brw; 303 304 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 305 assert(reg.nr < 128); 306 307 gen7_convert_mrf_to_grf(p, ®); 308 309 if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND || 310 insn->header.opcode == BRW_OPCODE_SENDC)) { 311 /* Any source modifiers or regions will be ignored, since this just 312 * identifies the MRF/GRF to start reading the message contents from. 313 * Check for some likely failures. 314 */ 315 assert(!reg.negate); 316 assert(!reg.abs); 317 assert(reg.address_mode == BRW_ADDRESS_DIRECT); 318 } 319 320 validate_reg(insn, reg); 321 322 insn->bits1.da1.src0_reg_file = reg.file; 323 insn->bits1.da1.src0_reg_type = 324 brw_reg_type_to_hw_type(brw, reg.type, reg.file); 325 insn->bits2.da1.src0_abs = reg.abs; 326 insn->bits2.da1.src0_negate = reg.negate; 327 insn->bits2.da1.src0_address_mode = reg.address_mode; 328 329 if (reg.file == BRW_IMMEDIATE_VALUE) { 330 insn->bits3.ud = reg.dw1.ud; 331 332 /* The Bspec's section titled "Non-present Operands" claims that if src0 333 * is an immediate that src1's type must be the same as that of src0. 334 * 335 * The SNB+ DataTypeIndex instruction compaction tables contain mappings 336 * that do not follow this rule. E.g., from the IVB/HSW table: 337 * 338 * DataTypeIndex 18-Bit Mapping Mapped Meaning 339 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir | 340 * 341 * And from the SNB table: 342 * 343 * DataTypeIndex 18-Bit Mapping Mapped Meaning 344 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir | 345 * 346 * Neither of these cause warnings from the simulator when used, 347 * compacted or otherwise. In fact, all compaction mappings that have an 348 * immediate in src0 use a:ud for src1. 349 * 350 * The GM45 instruction compaction tables do not contain mapped meanings 351 * so it's not clear whether it has the restriction. We'll assume it was 352 * lifted on SNB. (FINISHME: decode the GM45 tables and check.) 353 */ 354 insn->bits1.da1.src1_reg_file = 0; /* arf */ 355 if (brw->gen < 6) { 356 insn->bits1.da1.src1_reg_type = insn->bits1.da1.src0_reg_type; 357 } else { 358 insn->bits1.da1.src1_reg_type = BRW_HW_REG_TYPE_UD; 359 } 360 } 361 else 362 { 363 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 364 if (insn->header.access_mode == BRW_ALIGN_1) { 365 insn->bits2.da1.src0_subreg_nr = reg.subnr; 366 insn->bits2.da1.src0_reg_nr = reg.nr; 367 } 368 else { 369 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 370 insn->bits2.da16.src0_reg_nr = reg.nr; 371 } 372 } 373 else { 374 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 375 376 if (insn->header.access_mode == BRW_ALIGN_1) { 377 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 378 } 379 else { 380 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 381 } 382 } 383 384 if (insn->header.access_mode == BRW_ALIGN_1) { 385 if (reg.width == BRW_WIDTH_1 && 386 insn->header.execution_size == BRW_EXECUTE_1) { 387 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 388 insn->bits2.da1.src0_width = BRW_WIDTH_1; 389 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 390 } 391 else { 392 insn->bits2.da1.src0_horiz_stride = reg.hstride; 393 insn->bits2.da1.src0_width = reg.width; 394 insn->bits2.da1.src0_vert_stride = reg.vstride; 395 } 396 } 397 else { 398 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 399 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 400 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 401 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 402 403 /* This is an oddity of the fact we're using the same 404 * descriptions for registers in align_16 as align_1: 405 */ 406 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 407 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 408 else 409 insn->bits2.da16.src0_vert_stride = reg.vstride; 410 } 411 } 412} 413 414 415void 416brw_set_src1(struct brw_compile *p, 417 struct brw_instruction *insn, 418 struct brw_reg reg) 419{ 420 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 421 422 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 423 assert(reg.nr < 128); 424 425 gen7_convert_mrf_to_grf(p, ®); 426 427 validate_reg(insn, reg); 428 429 insn->bits1.da1.src1_reg_file = reg.file; 430 insn->bits1.da1.src1_reg_type = 431 brw_reg_type_to_hw_type(p->brw, reg.type, reg.file); 432 insn->bits3.da1.src1_abs = reg.abs; 433 insn->bits3.da1.src1_negate = reg.negate; 434 435 /* Only src1 can be immediate in two-argument instructions. 436 */ 437 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 438 439 if (reg.file == BRW_IMMEDIATE_VALUE) { 440 insn->bits3.ud = reg.dw1.ud; 441 } 442 else { 443 /* This is a hardware restriction, which may or may not be lifted 444 * in the future: 445 */ 446 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 447 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 448 449 if (insn->header.access_mode == BRW_ALIGN_1) { 450 insn->bits3.da1.src1_subreg_nr = reg.subnr; 451 insn->bits3.da1.src1_reg_nr = reg.nr; 452 } 453 else { 454 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 455 insn->bits3.da16.src1_reg_nr = reg.nr; 456 } 457 458 if (insn->header.access_mode == BRW_ALIGN_1) { 459 if (reg.width == BRW_WIDTH_1 && 460 insn->header.execution_size == BRW_EXECUTE_1) { 461 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 462 insn->bits3.da1.src1_width = BRW_WIDTH_1; 463 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 464 } 465 else { 466 insn->bits3.da1.src1_horiz_stride = reg.hstride; 467 insn->bits3.da1.src1_width = reg.width; 468 insn->bits3.da1.src1_vert_stride = reg.vstride; 469 } 470 } 471 else { 472 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 473 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 474 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 475 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 476 477 /* This is an oddity of the fact we're using the same 478 * descriptions for registers in align_16 as align_1: 479 */ 480 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 481 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 482 else 483 insn->bits3.da16.src1_vert_stride = reg.vstride; 484 } 485 } 486} 487 488/** 489 * Set the Message Descriptor and Extended Message Descriptor fields 490 * for SEND messages. 491 * 492 * \note This zeroes out the Function Control bits, so it must be called 493 * \b before filling out any message-specific data. Callers can 494 * choose not to fill in irrelevant bits; they will be zero. 495 */ 496static void 497brw_set_message_descriptor(struct brw_compile *p, 498 struct brw_instruction *inst, 499 enum brw_message_target sfid, 500 unsigned msg_length, 501 unsigned response_length, 502 bool header_present, 503 bool end_of_thread) 504{ 505 struct brw_context *brw = p->brw; 506 507 brw_set_src1(p, inst, brw_imm_d(0)); 508 509 if (brw->gen >= 5) { 510 inst->bits3.generic_gen5.header_present = header_present; 511 inst->bits3.generic_gen5.response_length = response_length; 512 inst->bits3.generic_gen5.msg_length = msg_length; 513 inst->bits3.generic_gen5.end_of_thread = end_of_thread; 514 515 if (brw->gen >= 6) { 516 /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ 517 inst->header.destreg__conditionalmod = sfid; 518 } else { 519 /* Set Extended Message Descriptor (ex_desc) */ 520 inst->bits2.send_gen5.sfid = sfid; 521 inst->bits2.send_gen5.end_of_thread = end_of_thread; 522 } 523 } else { 524 inst->bits3.generic.response_length = response_length; 525 inst->bits3.generic.msg_length = msg_length; 526 inst->bits3.generic.msg_target = sfid; 527 inst->bits3.generic.end_of_thread = end_of_thread; 528 } 529} 530 531static void brw_set_math_message( struct brw_compile *p, 532 struct brw_instruction *insn, 533 unsigned function, 534 unsigned integer_type, 535 bool low_precision, 536 unsigned dataType ) 537{ 538 struct brw_context *brw = p->brw; 539 unsigned msg_length; 540 unsigned response_length; 541 542 /* Infer message length from the function */ 543 switch (function) { 544 case BRW_MATH_FUNCTION_POW: 545 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 546 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 547 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 548 msg_length = 2; 549 break; 550 default: 551 msg_length = 1; 552 break; 553 } 554 555 /* Infer response length from the function */ 556 switch (function) { 557 case BRW_MATH_FUNCTION_SINCOS: 558 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 559 response_length = 2; 560 break; 561 default: 562 response_length = 1; 563 break; 564 } 565 566 567 brw_set_message_descriptor(p, insn, BRW_SFID_MATH, 568 msg_length, response_length, false, false); 569 if (brw->gen == 5) { 570 insn->bits3.math_gen5.function = function; 571 insn->bits3.math_gen5.int_type = integer_type; 572 insn->bits3.math_gen5.precision = low_precision; 573 insn->bits3.math_gen5.saturate = insn->header.saturate; 574 insn->bits3.math_gen5.data_type = dataType; 575 insn->bits3.math_gen5.snapshot = 0; 576 } else { 577 insn->bits3.math.function = function; 578 insn->bits3.math.int_type = integer_type; 579 insn->bits3.math.precision = low_precision; 580 insn->bits3.math.saturate = insn->header.saturate; 581 insn->bits3.math.data_type = dataType; 582 } 583 insn->header.saturate = 0; 584} 585 586 587static void brw_set_ff_sync_message(struct brw_compile *p, 588 struct brw_instruction *insn, 589 bool allocate, 590 unsigned response_length, 591 bool end_of_thread) 592{ 593 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 594 1, response_length, true, end_of_thread); 595 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 596 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 597 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 598 insn->bits3.urb_gen5.allocate = allocate; 599 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 600 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 601} 602 603static void brw_set_urb_message( struct brw_compile *p, 604 struct brw_instruction *insn, 605 enum brw_urb_write_flags flags, 606 unsigned msg_length, 607 unsigned response_length, 608 unsigned offset, 609 unsigned swizzle_control ) 610{ 611 struct brw_context *brw = p->brw; 612 613 brw_set_message_descriptor(p, insn, BRW_SFID_URB, 614 msg_length, response_length, true, 615 flags & BRW_URB_WRITE_EOT); 616 if (brw->gen == 7) { 617 if (flags & BRW_URB_WRITE_OWORD) { 618 assert(msg_length == 2); /* header + one OWORD of data */ 619 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_OWORD; 620 } else { 621 insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_HWORD; 622 } 623 insn->bits3.urb_gen7.offset = offset; 624 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); 625 insn->bits3.urb_gen7.swizzle_control = swizzle_control; 626 insn->bits3.urb_gen7.per_slot_offset = 627 flags & BRW_URB_WRITE_PER_SLOT_OFFSET ? 1 : 0; 628 insn->bits3.urb_gen7.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0; 629 } else if (brw->gen >= 5) { 630 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ 631 insn->bits3.urb_gen5.offset = offset; 632 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 633 insn->bits3.urb_gen5.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0; 634 insn->bits3.urb_gen5.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1; 635 insn->bits3.urb_gen5.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0; 636 } else { 637 insn->bits3.urb.opcode = 0; /* ? */ 638 insn->bits3.urb.offset = offset; 639 insn->bits3.urb.swizzle_control = swizzle_control; 640 insn->bits3.urb.allocate = flags & BRW_URB_WRITE_ALLOCATE ? 1 : 0; 641 insn->bits3.urb.used = flags & BRW_URB_WRITE_UNUSED ? 0 : 1; 642 insn->bits3.urb.complete = flags & BRW_URB_WRITE_COMPLETE ? 1 : 0; 643 } 644} 645 646void 647brw_set_dp_write_message(struct brw_compile *p, 648 struct brw_instruction *insn, 649 unsigned binding_table_index, 650 unsigned msg_control, 651 unsigned msg_type, 652 unsigned msg_length, 653 bool header_present, 654 unsigned last_render_target, 655 unsigned response_length, 656 unsigned end_of_thread, 657 unsigned send_commit_msg) 658{ 659 struct brw_context *brw = p->brw; 660 unsigned sfid; 661 662 if (brw->gen >= 7) { 663 /* Use the Render Cache for RT writes; otherwise use the Data Cache */ 664 if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) 665 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 666 else 667 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 668 } else if (brw->gen == 6) { 669 /* Use the render cache for all write messages. */ 670 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 671 } else { 672 sfid = BRW_SFID_DATAPORT_WRITE; 673 } 674 675 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, 676 header_present, end_of_thread); 677 678 if (brw->gen >= 7) { 679 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 680 insn->bits3.gen7_dp.msg_control = msg_control; 681 insn->bits3.gen7_dp.last_render_target = last_render_target; 682 insn->bits3.gen7_dp.msg_type = msg_type; 683 } else if (brw->gen == 6) { 684 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 685 insn->bits3.gen6_dp.msg_control = msg_control; 686 insn->bits3.gen6_dp.last_render_target = last_render_target; 687 insn->bits3.gen6_dp.msg_type = msg_type; 688 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; 689 } else if (brw->gen == 5) { 690 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 691 insn->bits3.dp_write_gen5.msg_control = msg_control; 692 insn->bits3.dp_write_gen5.last_render_target = last_render_target; 693 insn->bits3.dp_write_gen5.msg_type = msg_type; 694 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 695 } else { 696 insn->bits3.dp_write.binding_table_index = binding_table_index; 697 insn->bits3.dp_write.msg_control = msg_control; 698 insn->bits3.dp_write.last_render_target = last_render_target; 699 insn->bits3.dp_write.msg_type = msg_type; 700 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 701 } 702} 703 704void 705brw_set_dp_read_message(struct brw_compile *p, 706 struct brw_instruction *insn, 707 unsigned binding_table_index, 708 unsigned msg_control, 709 unsigned msg_type, 710 unsigned target_cache, 711 unsigned msg_length, 712 bool header_present, 713 unsigned response_length) 714{ 715 struct brw_context *brw = p->brw; 716 unsigned sfid; 717 718 if (brw->gen >= 7) { 719 sfid = GEN7_SFID_DATAPORT_DATA_CACHE; 720 } else if (brw->gen == 6) { 721 if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) 722 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; 723 else 724 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; 725 } else { 726 sfid = BRW_SFID_DATAPORT_READ; 727 } 728 729 brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, 730 header_present, false); 731 732 if (brw->gen >= 7) { 733 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 734 insn->bits3.gen7_dp.msg_control = msg_control; 735 insn->bits3.gen7_dp.last_render_target = 0; 736 insn->bits3.gen7_dp.msg_type = msg_type; 737 } else if (brw->gen == 6) { 738 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 739 insn->bits3.gen6_dp.msg_control = msg_control; 740 insn->bits3.gen6_dp.last_render_target = 0; 741 insn->bits3.gen6_dp.msg_type = msg_type; 742 insn->bits3.gen6_dp.send_commit_msg = 0; 743 } else if (brw->gen == 5) { 744 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 745 insn->bits3.dp_read_gen5.msg_control = msg_control; 746 insn->bits3.dp_read_gen5.msg_type = msg_type; 747 insn->bits3.dp_read_gen5.target_cache = target_cache; 748 } else if (brw->is_g4x) { 749 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 750 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 751 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 752 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 753 } else { 754 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 755 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 756 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 757 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 758 } 759} 760 761void 762brw_set_sampler_message(struct brw_compile *p, 763 struct brw_instruction *insn, 764 unsigned binding_table_index, 765 unsigned sampler, 766 unsigned msg_type, 767 unsigned response_length, 768 unsigned msg_length, 769 unsigned header_present, 770 unsigned simd_mode, 771 unsigned return_format) 772{ 773 struct brw_context *brw = p->brw; 774 775 brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length, 776 response_length, header_present, false); 777 778 if (brw->gen >= 7) { 779 insn->bits3.sampler_gen7.binding_table_index = binding_table_index; 780 insn->bits3.sampler_gen7.sampler = sampler; 781 insn->bits3.sampler_gen7.msg_type = msg_type; 782 insn->bits3.sampler_gen7.simd_mode = simd_mode; 783 } else if (brw->gen >= 5) { 784 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 785 insn->bits3.sampler_gen5.sampler = sampler; 786 insn->bits3.sampler_gen5.msg_type = msg_type; 787 insn->bits3.sampler_gen5.simd_mode = simd_mode; 788 } else if (brw->is_g4x) { 789 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 790 insn->bits3.sampler_g4x.sampler = sampler; 791 insn->bits3.sampler_g4x.msg_type = msg_type; 792 } else { 793 insn->bits3.sampler.binding_table_index = binding_table_index; 794 insn->bits3.sampler.sampler = sampler; 795 insn->bits3.sampler.msg_type = msg_type; 796 insn->bits3.sampler.return_format = return_format; 797 } 798} 799 800 801#define next_insn brw_next_insn 802struct brw_instruction * 803brw_next_insn(struct brw_compile *p, unsigned opcode) 804{ 805 struct brw_instruction *insn; 806 807 if (p->nr_insn + 1 > p->store_size) { 808 if (0) { 809 fprintf(stderr, "incresing the store size to %d\n", 810 p->store_size << 1); 811 } 812 p->store_size <<= 1; 813 p->store = reralloc(p->mem_ctx, p->store, 814 struct brw_instruction, p->store_size); 815 if (!p->store) 816 assert(!"realloc eu store memeory failed"); 817 } 818 819 p->next_insn_offset += 16; 820 insn = &p->store[p->nr_insn++]; 821 memcpy(insn, p->current, sizeof(*insn)); 822 823 /* Reset this one-shot flag: 824 */ 825 826 if (p->current->header.destreg__conditionalmod) { 827 p->current->header.destreg__conditionalmod = 0; 828 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 829 } 830 831 insn->header.opcode = opcode; 832 return insn; 833} 834 835static struct brw_instruction *brw_alu1( struct brw_compile *p, 836 unsigned opcode, 837 struct brw_reg dest, 838 struct brw_reg src ) 839{ 840 struct brw_instruction *insn = next_insn(p, opcode); 841 brw_set_dest(p, insn, dest); 842 brw_set_src0(p, insn, src); 843 return insn; 844} 845 846static struct brw_instruction *brw_alu2(struct brw_compile *p, 847 unsigned opcode, 848 struct brw_reg dest, 849 struct brw_reg src0, 850 struct brw_reg src1 ) 851{ 852 struct brw_instruction *insn = next_insn(p, opcode); 853 brw_set_dest(p, insn, dest); 854 brw_set_src0(p, insn, src0); 855 brw_set_src1(p, insn, src1); 856 return insn; 857} 858 859static int 860get_3src_subreg_nr(struct brw_reg reg) 861{ 862 if (reg.vstride == BRW_VERTICAL_STRIDE_0) { 863 assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle)); 864 return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0); 865 } else { 866 return reg.subnr / 4; 867 } 868} 869 870static struct brw_instruction *brw_alu3(struct brw_compile *p, 871 unsigned opcode, 872 struct brw_reg dest, 873 struct brw_reg src0, 874 struct brw_reg src1, 875 struct brw_reg src2) 876{ 877 struct brw_context *brw = p->brw; 878 struct brw_instruction *insn = next_insn(p, opcode); 879 880 gen7_convert_mrf_to_grf(p, &dest); 881 882 assert(insn->header.access_mode == BRW_ALIGN_16); 883 884 assert(dest.file == BRW_GENERAL_REGISTER_FILE || 885 dest.file == BRW_MESSAGE_REGISTER_FILE); 886 assert(dest.nr < 128); 887 assert(dest.address_mode == BRW_ADDRESS_DIRECT); 888 assert(dest.type == BRW_REGISTER_TYPE_F || 889 dest.type == BRW_REGISTER_TYPE_D || 890 dest.type == BRW_REGISTER_TYPE_UD); 891 insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE); 892 insn->bits1.da3src.dest_reg_nr = dest.nr; 893 insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16; 894 insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask; 895 guess_execution_size(p, insn, dest); 896 897 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 898 assert(src0.address_mode == BRW_ADDRESS_DIRECT); 899 assert(src0.nr < 128); 900 insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle; 901 insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0); 902 insn->bits2.da3src.src0_reg_nr = src0.nr; 903 insn->bits1.da3src.src0_abs = src0.abs; 904 insn->bits1.da3src.src0_negate = src0.negate; 905 insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0; 906 907 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 908 assert(src1.address_mode == BRW_ADDRESS_DIRECT); 909 assert(src1.nr < 128); 910 insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle; 911 insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3; 912 insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2; 913 insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0; 914 insn->bits3.da3src.src1_reg_nr = src1.nr; 915 insn->bits1.da3src.src1_abs = src1.abs; 916 insn->bits1.da3src.src1_negate = src1.negate; 917 918 assert(src2.file == BRW_GENERAL_REGISTER_FILE); 919 assert(src2.address_mode == BRW_ADDRESS_DIRECT); 920 assert(src2.nr < 128); 921 insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle; 922 insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2); 923 insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0; 924 insn->bits3.da3src.src2_reg_nr = src2.nr; 925 insn->bits1.da3src.src2_abs = src2.abs; 926 insn->bits1.da3src.src2_negate = src2.negate; 927 928 if (brw->gen >= 7) { 929 /* Set both the source and destination types based on dest.type, 930 * ignoring the source register types. The MAD and LRP emitters ensure 931 * that all four types are float. The BFE and BFI2 emitters, however, 932 * may send us mixed D and UD types and want us to ignore that and use 933 * the destination type. 934 */ 935 switch (dest.type) { 936 case BRW_REGISTER_TYPE_F: 937 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F; 938 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F; 939 break; 940 case BRW_REGISTER_TYPE_D: 941 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D; 942 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D; 943 break; 944 case BRW_REGISTER_TYPE_UD: 945 insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD; 946 insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD; 947 break; 948 } 949 } 950 951 return insn; 952} 953 954 955/*********************************************************************** 956 * Convenience routines. 957 */ 958#define ALU1(OP) \ 959struct brw_instruction *brw_##OP(struct brw_compile *p, \ 960 struct brw_reg dest, \ 961 struct brw_reg src0) \ 962{ \ 963 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 964} 965 966#define ALU2(OP) \ 967struct brw_instruction *brw_##OP(struct brw_compile *p, \ 968 struct brw_reg dest, \ 969 struct brw_reg src0, \ 970 struct brw_reg src1) \ 971{ \ 972 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 973} 974 975#define ALU3(OP) \ 976struct brw_instruction *brw_##OP(struct brw_compile *p, \ 977 struct brw_reg dest, \ 978 struct brw_reg src0, \ 979 struct brw_reg src1, \ 980 struct brw_reg src2) \ 981{ \ 982 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ 983} 984 985#define ALU3F(OP) \ 986struct brw_instruction *brw_##OP(struct brw_compile *p, \ 987 struct brw_reg dest, \ 988 struct brw_reg src0, \ 989 struct brw_reg src1, \ 990 struct brw_reg src2) \ 991{ \ 992 assert(dest.type == BRW_REGISTER_TYPE_F); \ 993 assert(src0.type == BRW_REGISTER_TYPE_F); \ 994 assert(src1.type == BRW_REGISTER_TYPE_F); \ 995 assert(src2.type == BRW_REGISTER_TYPE_F); \ 996 return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ 997} 998 999/* Rounding operations (other than RNDD) require two instructions - the first 1000 * stores a rounded value (possibly the wrong way) in the dest register, but 1001 * also sets a per-channel "increment bit" in the flag register. A predicated 1002 * add of 1.0 fixes dest to contain the desired result. 1003 * 1004 * Sandybridge and later appear to round correctly without an ADD. 1005 */ 1006#define ROUND(OP) \ 1007void brw_##OP(struct brw_compile *p, \ 1008 struct brw_reg dest, \ 1009 struct brw_reg src) \ 1010{ \ 1011 struct brw_instruction *rnd, *add; \ 1012 rnd = next_insn(p, BRW_OPCODE_##OP); \ 1013 brw_set_dest(p, rnd, dest); \ 1014 brw_set_src0(p, rnd, src); \ 1015 \ 1016 if (p->brw->gen < 6) { \ 1017 /* turn on round-increments */ \ 1018 rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ 1019 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 1020 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 1021 } \ 1022} 1023 1024 1025ALU1(MOV) 1026ALU2(SEL) 1027ALU1(NOT) 1028ALU2(AND) 1029ALU2(OR) 1030ALU2(XOR) 1031ALU2(SHR) 1032ALU2(SHL) 1033ALU2(ASR) 1034ALU1(F32TO16) 1035ALU1(F16TO32) 1036ALU1(FRC) 1037ALU1(RNDD) 1038ALU2(MAC) 1039ALU2(MACH) 1040ALU1(LZD) 1041ALU2(DP4) 1042ALU2(DPH) 1043ALU2(DP3) 1044ALU2(DP2) 1045ALU2(LINE) 1046ALU2(PLN) 1047ALU3F(MAD) 1048ALU3F(LRP) 1049ALU1(BFREV) 1050ALU3(BFE) 1051ALU2(BFI1) 1052ALU3(BFI2) 1053ALU1(FBH) 1054ALU1(FBL) 1055ALU1(CBIT) 1056ALU2(ADDC) 1057ALU2(SUBB) 1058 1059ROUND(RNDZ) 1060ROUND(RNDE) 1061 1062 1063struct brw_instruction *brw_ADD(struct brw_compile *p, 1064 struct brw_reg dest, 1065 struct brw_reg src0, 1066 struct brw_reg src1) 1067{ 1068 /* 6.2.2: add */ 1069 if (src0.type == BRW_REGISTER_TYPE_F || 1070 (src0.file == BRW_IMMEDIATE_VALUE && 1071 src0.type == BRW_REGISTER_TYPE_VF)) { 1072 assert(src1.type != BRW_REGISTER_TYPE_UD); 1073 assert(src1.type != BRW_REGISTER_TYPE_D); 1074 } 1075 1076 if (src1.type == BRW_REGISTER_TYPE_F || 1077 (src1.file == BRW_IMMEDIATE_VALUE && 1078 src1.type == BRW_REGISTER_TYPE_VF)) { 1079 assert(src0.type != BRW_REGISTER_TYPE_UD); 1080 assert(src0.type != BRW_REGISTER_TYPE_D); 1081 } 1082 1083 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 1084} 1085 1086struct brw_instruction *brw_AVG(struct brw_compile *p, 1087 struct brw_reg dest, 1088 struct brw_reg src0, 1089 struct brw_reg src1) 1090{ 1091 assert(dest.type == src0.type); 1092 assert(src0.type == src1.type); 1093 switch (src0.type) { 1094 case BRW_REGISTER_TYPE_B: 1095 case BRW_REGISTER_TYPE_UB: 1096 case BRW_REGISTER_TYPE_W: 1097 case BRW_REGISTER_TYPE_UW: 1098 case BRW_REGISTER_TYPE_D: 1099 case BRW_REGISTER_TYPE_UD: 1100 break; 1101 default: 1102 assert(!"Bad type for brw_AVG"); 1103 } 1104 1105 return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1); 1106} 1107 1108struct brw_instruction *brw_MUL(struct brw_compile *p, 1109 struct brw_reg dest, 1110 struct brw_reg src0, 1111 struct brw_reg src1) 1112{ 1113 /* 6.32.38: mul */ 1114 if (src0.type == BRW_REGISTER_TYPE_D || 1115 src0.type == BRW_REGISTER_TYPE_UD || 1116 src1.type == BRW_REGISTER_TYPE_D || 1117 src1.type == BRW_REGISTER_TYPE_UD) { 1118 assert(dest.type != BRW_REGISTER_TYPE_F); 1119 } 1120 1121 if (src0.type == BRW_REGISTER_TYPE_F || 1122 (src0.file == BRW_IMMEDIATE_VALUE && 1123 src0.type == BRW_REGISTER_TYPE_VF)) { 1124 assert(src1.type != BRW_REGISTER_TYPE_UD); 1125 assert(src1.type != BRW_REGISTER_TYPE_D); 1126 } 1127 1128 if (src1.type == BRW_REGISTER_TYPE_F || 1129 (src1.file == BRW_IMMEDIATE_VALUE && 1130 src1.type == BRW_REGISTER_TYPE_VF)) { 1131 assert(src0.type != BRW_REGISTER_TYPE_UD); 1132 assert(src0.type != BRW_REGISTER_TYPE_D); 1133 } 1134 1135 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 1136 src0.nr != BRW_ARF_ACCUMULATOR); 1137 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 1138 src1.nr != BRW_ARF_ACCUMULATOR); 1139 1140 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 1141} 1142 1143 1144void brw_NOP(struct brw_compile *p) 1145{ 1146 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 1147 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1148 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1149 brw_set_src1(p, insn, brw_imm_ud(0x0)); 1150} 1151 1152 1153 1154 1155 1156/*********************************************************************** 1157 * Comparisons, if/else/endif 1158 */ 1159 1160struct brw_instruction *brw_JMPI(struct brw_compile *p, 1161 struct brw_reg dest, 1162 struct brw_reg src0, 1163 struct brw_reg src1) 1164{ 1165 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 1166 1167 insn->header.execution_size = 1; 1168 insn->header.compression_control = BRW_COMPRESSION_NONE; 1169 insn->header.mask_control = BRW_MASK_DISABLE; 1170 1171 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1172 1173 return insn; 1174} 1175 1176static void 1177push_if_stack(struct brw_compile *p, struct brw_instruction *inst) 1178{ 1179 p->if_stack[p->if_stack_depth] = inst - p->store; 1180 1181 p->if_stack_depth++; 1182 if (p->if_stack_array_size <= p->if_stack_depth) { 1183 p->if_stack_array_size *= 2; 1184 p->if_stack = reralloc(p->mem_ctx, p->if_stack, int, 1185 p->if_stack_array_size); 1186 } 1187} 1188 1189static struct brw_instruction * 1190pop_if_stack(struct brw_compile *p) 1191{ 1192 p->if_stack_depth--; 1193 return &p->store[p->if_stack[p->if_stack_depth]]; 1194} 1195 1196static void 1197push_loop_stack(struct brw_compile *p, struct brw_instruction *inst) 1198{ 1199 if (p->loop_stack_array_size < p->loop_stack_depth) { 1200 p->loop_stack_array_size *= 2; 1201 p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, 1202 p->loop_stack_array_size); 1203 p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int, 1204 p->loop_stack_array_size); 1205 } 1206 1207 p->loop_stack[p->loop_stack_depth] = inst - p->store; 1208 p->loop_stack_depth++; 1209 p->if_depth_in_loop[p->loop_stack_depth] = 0; 1210} 1211 1212static struct brw_instruction * 1213get_inner_do_insn(struct brw_compile *p) 1214{ 1215 return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; 1216} 1217 1218/* EU takes the value from the flag register and pushes it onto some 1219 * sort of a stack (presumably merging with any flag value already on 1220 * the stack). Within an if block, the flags at the top of the stack 1221 * control execution on each channel of the unit, eg. on each of the 1222 * 16 pixel values in our wm programs. 1223 * 1224 * When the matching 'else' instruction is reached (presumably by 1225 * countdown of the instruction count patched in by our ELSE/ENDIF 1226 * functions), the relevent flags are inverted. 1227 * 1228 * When the matching 'endif' instruction is reached, the flags are 1229 * popped off. If the stack is now empty, normal execution resumes. 1230 */ 1231struct brw_instruction * 1232brw_IF(struct brw_compile *p, unsigned execute_size) 1233{ 1234 struct brw_context *brw = p->brw; 1235 struct brw_instruction *insn; 1236 1237 insn = next_insn(p, BRW_OPCODE_IF); 1238 1239 /* Override the defaults for this instruction: 1240 */ 1241 if (brw->gen < 6) { 1242 brw_set_dest(p, insn, brw_ip_reg()); 1243 brw_set_src0(p, insn, brw_ip_reg()); 1244 brw_set_src1(p, insn, brw_imm_d(0x0)); 1245 } else if (brw->gen == 6) { 1246 brw_set_dest(p, insn, brw_imm_w(0)); 1247 insn->bits1.branch_gen6.jump_count = 0; 1248 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1249 brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1250 } else { 1251 brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1252 brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); 1253 brw_set_src1(p, insn, brw_imm_ud(0)); 1254 insn->bits3.break_cont.jip = 0; 1255 insn->bits3.break_cont.uip = 0; 1256 } 1257 1258 insn->header.execution_size = execute_size; 1259 insn->header.compression_control = BRW_COMPRESSION_NONE; 1260 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 1261 insn->header.mask_control = BRW_MASK_ENABLE; 1262 if (!p->single_program_flow) 1263 insn->header.thread_control = BRW_THREAD_SWITCH; 1264 1265 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1266 1267 push_if_stack(p, insn); 1268 p->if_depth_in_loop[p->loop_stack_depth]++; 1269 return insn; 1270} 1271 1272/* This function is only used for gen6-style IF instructions with an 1273 * embedded comparison (conditional modifier). It is not used on gen7. 1274 */ 1275struct brw_instruction * 1276gen6_IF(struct brw_compile *p, uint32_t conditional, 1277 struct brw_reg src0, struct brw_reg src1) 1278{ 1279 struct brw_instruction *insn; 1280 1281 insn = next_insn(p, BRW_OPCODE_IF); 1282 1283 brw_set_dest(p, insn, brw_imm_w(0)); 1284 if (p->compressed) { 1285 insn->header.execution_size = BRW_EXECUTE_16; 1286 } else { 1287 insn->header.execution_size = BRW_EXECUTE_8; 1288 } 1289 insn->bits1.branch_gen6.jump_count = 0; 1290 brw_set_src0(p, insn, src0); 1291 brw_set_src1(p, insn, src1); 1292 1293 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 1294 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1295 insn->header.destreg__conditionalmod = conditional; 1296 1297 if (!p->single_program_flow) 1298 insn->header.thread_control = BRW_THREAD_SWITCH; 1299 1300 push_if_stack(p, insn); 1301 return insn; 1302} 1303 1304/** 1305 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. 1306 */ 1307static void 1308convert_IF_ELSE_to_ADD(struct brw_compile *p, 1309 struct brw_instruction *if_inst, 1310 struct brw_instruction *else_inst) 1311{ 1312 /* The next instruction (where the ENDIF would be, if it existed) */ 1313 struct brw_instruction *next_inst = &p->store[p->nr_insn]; 1314 1315 assert(p->single_program_flow); 1316 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1317 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1318 assert(if_inst->header.execution_size == BRW_EXECUTE_1); 1319 1320 /* Convert IF to an ADD instruction that moves the instruction pointer 1321 * to the first instruction of the ELSE block. If there is no ELSE 1322 * block, point to where ENDIF would be. Reverse the predicate. 1323 * 1324 * There's no need to execute an ENDIF since we don't need to do any 1325 * stack operations, and if we're currently executing, we just want to 1326 * continue normally. 1327 */ 1328 if_inst->header.opcode = BRW_OPCODE_ADD; 1329 if_inst->header.predicate_inverse = 1; 1330 1331 if (else_inst != NULL) { 1332 /* Convert ELSE to an ADD instruction that points where the ENDIF 1333 * would be. 1334 */ 1335 else_inst->header.opcode = BRW_OPCODE_ADD; 1336 1337 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; 1338 else_inst->bits3.ud = (next_inst - else_inst) * 16; 1339 } else { 1340 if_inst->bits3.ud = (next_inst - if_inst) * 16; 1341 } 1342} 1343 1344/** 1345 * Patch IF and ELSE instructions with appropriate jump targets. 1346 */ 1347static void 1348patch_IF_ELSE(struct brw_compile *p, 1349 struct brw_instruction *if_inst, 1350 struct brw_instruction *else_inst, 1351 struct brw_instruction *endif_inst) 1352{ 1353 struct brw_context *brw = p->brw; 1354 1355 /* We shouldn't be patching IF and ELSE instructions in single program flow 1356 * mode when gen < 6, because in single program flow mode on those 1357 * platforms, we convert flow control instructions to conditional ADDs that 1358 * operate on IP (see brw_ENDIF). 1359 * 1360 * However, on Gen6, writing to IP doesn't work in single program flow mode 1361 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may 1362 * not be updated by non-flow control instructions."). And on later 1363 * platforms, there is no significant benefit to converting control flow 1364 * instructions to conditional ADDs. So we do patch IF and ELSE 1365 * instructions in single program flow mode on those platforms. 1366 */ 1367 if (brw->gen < 6) 1368 assert(!p->single_program_flow); 1369 1370 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1371 assert(endif_inst != NULL); 1372 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1373 1374 unsigned br = 1; 1375 /* Jump count is for 64bit data chunk each, so one 128bit instruction 1376 * requires 2 chunks. 1377 */ 1378 if (brw->gen >= 5) 1379 br = 2; 1380 1381 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); 1382 endif_inst->header.execution_size = if_inst->header.execution_size; 1383 1384 if (else_inst == NULL) { 1385 /* Patch IF -> ENDIF */ 1386 if (brw->gen < 6) { 1387 /* Turn it into an IFF, which means no mask stack operations for 1388 * all-false and jumping past the ENDIF. 1389 */ 1390 if_inst->header.opcode = BRW_OPCODE_IFF; 1391 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); 1392 if_inst->bits3.if_else.pop_count = 0; 1393 if_inst->bits3.if_else.pad0 = 0; 1394 } else if (brw->gen == 6) { 1395 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 1396 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); 1397 } else { 1398 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1399 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); 1400 } 1401 } else { 1402 else_inst->header.execution_size = if_inst->header.execution_size; 1403 1404 /* Patch IF -> ELSE */ 1405 if (brw->gen < 6) { 1406 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); 1407 if_inst->bits3.if_else.pop_count = 0; 1408 if_inst->bits3.if_else.pad0 = 0; 1409 } else if (brw->gen == 6) { 1410 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); 1411 } 1412 1413 /* Patch ELSE -> ENDIF */ 1414 if (brw->gen < 6) { 1415 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 1416 * matching ENDIF. 1417 */ 1418 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); 1419 else_inst->bits3.if_else.pop_count = 1; 1420 else_inst->bits3.if_else.pad0 = 0; 1421 } else if (brw->gen == 6) { 1422 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1423 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); 1424 } else { 1425 /* The IF instruction's JIP should point just past the ELSE */ 1426 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); 1427 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ 1428 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1429 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); 1430 } 1431 } 1432} 1433 1434void 1435brw_ELSE(struct brw_compile *p) 1436{ 1437 struct brw_context *brw = p->brw; 1438 struct brw_instruction *insn; 1439 1440 insn = next_insn(p, BRW_OPCODE_ELSE); 1441 1442 if (brw->gen < 6) { 1443 brw_set_dest(p, insn, brw_ip_reg()); 1444 brw_set_src0(p, insn, brw_ip_reg()); 1445 brw_set_src1(p, insn, brw_imm_d(0x0)); 1446 } else if (brw->gen == 6) { 1447 brw_set_dest(p, insn, brw_imm_w(0)); 1448 insn->bits1.branch_gen6.jump_count = 0; 1449 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1450 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1451 } else { 1452 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1453 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1454 brw_set_src1(p, insn, brw_imm_ud(0)); 1455 insn->bits3.break_cont.jip = 0; 1456 insn->bits3.break_cont.uip = 0; 1457 } 1458 1459 insn->header.compression_control = BRW_COMPRESSION_NONE; 1460 insn->header.mask_control = BRW_MASK_ENABLE; 1461 if (!p->single_program_flow) 1462 insn->header.thread_control = BRW_THREAD_SWITCH; 1463 1464 push_if_stack(p, insn); 1465} 1466 1467void 1468brw_ENDIF(struct brw_compile *p) 1469{ 1470 struct brw_context *brw = p->brw; 1471 struct brw_instruction *insn = NULL; 1472 struct brw_instruction *else_inst = NULL; 1473 struct brw_instruction *if_inst = NULL; 1474 struct brw_instruction *tmp; 1475 bool emit_endif = true; 1476 1477 /* In single program flow mode, we can express IF and ELSE instructions 1478 * equivalently as ADD instructions that operate on IP. On platforms prior 1479 * to Gen6, flow control instructions cause an implied thread switch, so 1480 * this is a significant savings. 1481 * 1482 * However, on Gen6, writing to IP doesn't work in single program flow mode 1483 * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may 1484 * not be updated by non-flow control instructions."). And on later 1485 * platforms, there is no significant benefit to converting control flow 1486 * instructions to conditional ADDs. So we only do this trick on Gen4 and 1487 * Gen5. 1488 */ 1489 if (brw->gen < 6 && p->single_program_flow) 1490 emit_endif = false; 1491 1492 /* 1493 * A single next_insn() may change the base adress of instruction store 1494 * memory(p->store), so call it first before referencing the instruction 1495 * store pointer from an index 1496 */ 1497 if (emit_endif) 1498 insn = next_insn(p, BRW_OPCODE_ENDIF); 1499 1500 /* Pop the IF and (optional) ELSE instructions from the stack */ 1501 p->if_depth_in_loop[p->loop_stack_depth]--; 1502 tmp = pop_if_stack(p); 1503 if (tmp->header.opcode == BRW_OPCODE_ELSE) { 1504 else_inst = tmp; 1505 tmp = pop_if_stack(p); 1506 } 1507 if_inst = tmp; 1508 1509 if (!emit_endif) { 1510 /* ENDIF is useless; don't bother emitting it. */ 1511 convert_IF_ELSE_to_ADD(p, if_inst, else_inst); 1512 return; 1513 } 1514 1515 if (brw->gen < 6) { 1516 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1517 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1518 brw_set_src1(p, insn, brw_imm_d(0x0)); 1519 } else if (brw->gen == 6) { 1520 brw_set_dest(p, insn, brw_imm_w(0)); 1521 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1522 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1523 } else { 1524 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1525 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1526 brw_set_src1(p, insn, brw_imm_ud(0)); 1527 } 1528 1529 insn->header.compression_control = BRW_COMPRESSION_NONE; 1530 insn->header.mask_control = BRW_MASK_ENABLE; 1531 insn->header.thread_control = BRW_THREAD_SWITCH; 1532 1533 /* Also pop item off the stack in the endif instruction: */ 1534 if (brw->gen < 6) { 1535 insn->bits3.if_else.jump_count = 0; 1536 insn->bits3.if_else.pop_count = 1; 1537 insn->bits3.if_else.pad0 = 0; 1538 } else if (brw->gen == 6) { 1539 insn->bits1.branch_gen6.jump_count = 2; 1540 } else { 1541 insn->bits3.break_cont.jip = 2; 1542 } 1543 patch_IF_ELSE(p, if_inst, else_inst, insn); 1544} 1545 1546struct brw_instruction *brw_BREAK(struct brw_compile *p) 1547{ 1548 struct brw_context *brw = p->brw; 1549 struct brw_instruction *insn; 1550 1551 insn = next_insn(p, BRW_OPCODE_BREAK); 1552 if (brw->gen >= 6) { 1553 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1554 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1555 brw_set_src1(p, insn, brw_imm_d(0x0)); 1556 } else { 1557 brw_set_dest(p, insn, brw_ip_reg()); 1558 brw_set_src0(p, insn, brw_ip_reg()); 1559 brw_set_src1(p, insn, brw_imm_d(0x0)); 1560 insn->bits3.if_else.pad0 = 0; 1561 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; 1562 } 1563 insn->header.compression_control = BRW_COMPRESSION_NONE; 1564 insn->header.execution_size = BRW_EXECUTE_8; 1565 1566 return insn; 1567} 1568 1569struct brw_instruction *gen6_CONT(struct brw_compile *p) 1570{ 1571 struct brw_instruction *insn; 1572 1573 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1574 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1575 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1576 brw_set_dest(p, insn, brw_ip_reg()); 1577 brw_set_src0(p, insn, brw_ip_reg()); 1578 brw_set_src1(p, insn, brw_imm_d(0x0)); 1579 1580 insn->header.compression_control = BRW_COMPRESSION_NONE; 1581 insn->header.execution_size = BRW_EXECUTE_8; 1582 return insn; 1583} 1584 1585struct brw_instruction *brw_CONT(struct brw_compile *p) 1586{ 1587 struct brw_instruction *insn; 1588 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1589 brw_set_dest(p, insn, brw_ip_reg()); 1590 brw_set_src0(p, insn, brw_ip_reg()); 1591 brw_set_src1(p, insn, brw_imm_d(0x0)); 1592 insn->header.compression_control = BRW_COMPRESSION_NONE; 1593 insn->header.execution_size = BRW_EXECUTE_8; 1594 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1595 insn->bits3.if_else.pad0 = 0; 1596 insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth]; 1597 return insn; 1598} 1599 1600struct brw_instruction *gen6_HALT(struct brw_compile *p) 1601{ 1602 struct brw_instruction *insn; 1603 1604 insn = next_insn(p, BRW_OPCODE_HALT); 1605 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1606 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1607 brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ 1608 1609 if (p->compressed) { 1610 insn->header.execution_size = BRW_EXECUTE_16; 1611 } else { 1612 insn->header.compression_control = BRW_COMPRESSION_NONE; 1613 insn->header.execution_size = BRW_EXECUTE_8; 1614 } 1615 return insn; 1616} 1617 1618/* DO/WHILE loop: 1619 * 1620 * The DO/WHILE is just an unterminated loop -- break or continue are 1621 * used for control within the loop. We have a few ways they can be 1622 * done. 1623 * 1624 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1625 * jip and no DO instruction. 1626 * 1627 * For non-uniform control flow pre-gen6, there's a DO instruction to 1628 * push the mask, and a WHILE to jump back, and BREAK to get out and 1629 * pop the mask. 1630 * 1631 * For gen6, there's no more mask stack, so no need for DO. WHILE 1632 * just points back to the first instruction of the loop. 1633 */ 1634struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) 1635{ 1636 struct brw_context *brw = p->brw; 1637 1638 if (brw->gen >= 6 || p->single_program_flow) { 1639 push_loop_stack(p, &p->store[p->nr_insn]); 1640 return &p->store[p->nr_insn]; 1641 } else { 1642 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1643 1644 push_loop_stack(p, insn); 1645 1646 /* Override the defaults for this instruction: 1647 */ 1648 brw_set_dest(p, insn, brw_null_reg()); 1649 brw_set_src0(p, insn, brw_null_reg()); 1650 brw_set_src1(p, insn, brw_null_reg()); 1651 1652 insn->header.compression_control = BRW_COMPRESSION_NONE; 1653 insn->header.execution_size = execute_size; 1654 insn->header.predicate_control = BRW_PREDICATE_NONE; 1655 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1656 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1657 1658 return insn; 1659 } 1660} 1661 1662/** 1663 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE 1664 * instruction here. 1665 * 1666 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop 1667 * nesting, since it can always just point to the end of the block/current loop. 1668 */ 1669static void 1670brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst) 1671{ 1672 struct brw_context *brw = p->brw; 1673 struct brw_instruction *do_inst = get_inner_do_insn(p); 1674 struct brw_instruction *inst; 1675 int br = (brw->gen == 5) ? 2 : 1; 1676 1677 for (inst = while_inst - 1; inst != do_inst; inst--) { 1678 /* If the jump count is != 0, that means that this instruction has already 1679 * been patched because it's part of a loop inside of the one we're 1680 * patching. 1681 */ 1682 if (inst->header.opcode == BRW_OPCODE_BREAK && 1683 inst->bits3.if_else.jump_count == 0) { 1684 inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1); 1685 } else if (inst->header.opcode == BRW_OPCODE_CONTINUE && 1686 inst->bits3.if_else.jump_count == 0) { 1687 inst->bits3.if_else.jump_count = br * (while_inst - inst); 1688 } 1689 } 1690} 1691 1692struct brw_instruction *brw_WHILE(struct brw_compile *p) 1693{ 1694 struct brw_context *brw = p->brw; 1695 struct brw_instruction *insn, *do_insn; 1696 unsigned br = 1; 1697 1698 if (brw->gen >= 5) 1699 br = 2; 1700 1701 if (brw->gen >= 7) { 1702 insn = next_insn(p, BRW_OPCODE_WHILE); 1703 do_insn = get_inner_do_insn(p); 1704 1705 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1706 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1707 brw_set_src1(p, insn, brw_imm_ud(0)); 1708 insn->bits3.break_cont.jip = br * (do_insn - insn); 1709 1710 insn->header.execution_size = BRW_EXECUTE_8; 1711 } else if (brw->gen == 6) { 1712 insn = next_insn(p, BRW_OPCODE_WHILE); 1713 do_insn = get_inner_do_insn(p); 1714 1715 brw_set_dest(p, insn, brw_imm_w(0)); 1716 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1717 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1718 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1719 1720 insn->header.execution_size = BRW_EXECUTE_8; 1721 } else { 1722 if (p->single_program_flow) { 1723 insn = next_insn(p, BRW_OPCODE_ADD); 1724 do_insn = get_inner_do_insn(p); 1725 1726 brw_set_dest(p, insn, brw_ip_reg()); 1727 brw_set_src0(p, insn, brw_ip_reg()); 1728 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); 1729 insn->header.execution_size = BRW_EXECUTE_1; 1730 } else { 1731 insn = next_insn(p, BRW_OPCODE_WHILE); 1732 do_insn = get_inner_do_insn(p); 1733 1734 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1735 1736 brw_set_dest(p, insn, brw_ip_reg()); 1737 brw_set_src0(p, insn, brw_ip_reg()); 1738 brw_set_src1(p, insn, brw_imm_d(0)); 1739 1740 insn->header.execution_size = do_insn->header.execution_size; 1741 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1742 insn->bits3.if_else.pop_count = 0; 1743 insn->bits3.if_else.pad0 = 0; 1744 1745 brw_patch_break_cont(p, insn); 1746 } 1747 } 1748 insn->header.compression_control = BRW_COMPRESSION_NONE; 1749 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1750 1751 p->loop_stack_depth--; 1752 1753 return insn; 1754} 1755 1756 1757/* FORWARD JUMPS: 1758 */ 1759void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx) 1760{ 1761 struct brw_context *brw = p->brw; 1762 struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx]; 1763 unsigned jmpi = 1; 1764 1765 if (brw->gen >= 5) 1766 jmpi = 2; 1767 1768 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1769 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1770 1771 jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1); 1772} 1773 1774 1775 1776/* To integrate with the above, it makes sense that the comparison 1777 * instruction should populate the flag register. It might be simpler 1778 * just to use the flag reg for most WM tasks? 1779 */ 1780void brw_CMP(struct brw_compile *p, 1781 struct brw_reg dest, 1782 unsigned conditional, 1783 struct brw_reg src0, 1784 struct brw_reg src1) 1785{ 1786 struct brw_context *brw = p->brw; 1787 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1788 1789 insn->header.destreg__conditionalmod = conditional; 1790 brw_set_dest(p, insn, dest); 1791 brw_set_src0(p, insn, src0); 1792 brw_set_src1(p, insn, src1); 1793 1794/* guess_execution_size(insn, src0); */ 1795 1796 1797 /* Make it so that future instructions will use the computed flag 1798 * value until brw_set_predicate_control_flag_value() is called 1799 * again. 1800 */ 1801 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1802 dest.nr == 0) { 1803 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1804 p->flag_value = 0xff; 1805 } 1806 1807 /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds 1808 * page says: 1809 * "Any CMP instruction with a null destination must use a {switch}." 1810 * 1811 * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't 1812 * mentioned on their work-arounds pages. 1813 */ 1814 if (brw->gen == 7) { 1815 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1816 dest.nr == BRW_ARF_NULL) { 1817 insn->header.thread_control = BRW_THREAD_SWITCH; 1818 } 1819 } 1820} 1821 1822/* Issue 'wait' instruction for n1, host could program MMIO 1823 to wake up thread. */ 1824void brw_WAIT (struct brw_compile *p) 1825{ 1826 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1827 struct brw_reg src = brw_notification_1_reg(); 1828 1829 brw_set_dest(p, insn, src); 1830 brw_set_src0(p, insn, src); 1831 brw_set_src1(p, insn, brw_null_reg()); 1832 insn->header.execution_size = 0; /* must */ 1833 insn->header.predicate_control = 0; 1834 insn->header.compression_control = 0; 1835} 1836 1837 1838/*********************************************************************** 1839 * Helpers for the various SEND message types: 1840 */ 1841 1842/** Extended math function, float[8]. 1843 */ 1844void brw_math( struct brw_compile *p, 1845 struct brw_reg dest, 1846 unsigned function, 1847 unsigned msg_reg_nr, 1848 struct brw_reg src, 1849 unsigned data_type, 1850 unsigned precision ) 1851{ 1852 struct brw_context *brw = p->brw; 1853 1854 if (brw->gen >= 6) { 1855 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1856 1857 assert(dest.file == BRW_GENERAL_REGISTER_FILE || 1858 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE)); 1859 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1860 1861 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1862 if (brw->gen == 6) 1863 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1864 1865 /* Source modifiers are ignored for extended math instructions on Gen6. */ 1866 if (brw->gen == 6) { 1867 assert(!src.negate); 1868 assert(!src.abs); 1869 } 1870 1871 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || 1872 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || 1873 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1874 assert(src.type != BRW_REGISTER_TYPE_F); 1875 } else { 1876 assert(src.type == BRW_REGISTER_TYPE_F); 1877 } 1878 1879 /* Math is the same ISA format as other opcodes, except that CondModifier 1880 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1881 */ 1882 insn->header.destreg__conditionalmod = function; 1883 1884 brw_set_dest(p, insn, dest); 1885 brw_set_src0(p, insn, src); 1886 brw_set_src1(p, insn, brw_null_reg()); 1887 } else { 1888 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1889 1890 /* Example code doesn't set predicate_control for send 1891 * instructions. 1892 */ 1893 insn->header.predicate_control = 0; 1894 insn->header.destreg__conditionalmod = msg_reg_nr; 1895 1896 brw_set_dest(p, insn, dest); 1897 brw_set_src0(p, insn, src); 1898 brw_set_math_message(p, 1899 insn, 1900 function, 1901 src.type == BRW_REGISTER_TYPE_D, 1902 precision, 1903 data_type); 1904 } 1905} 1906 1907/** Extended math function, float[8]. 1908 */ 1909void brw_math2(struct brw_compile *p, 1910 struct brw_reg dest, 1911 unsigned function, 1912 struct brw_reg src0, 1913 struct brw_reg src1) 1914{ 1915 struct brw_context *brw = p->brw; 1916 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1917 1918 assert(dest.file == BRW_GENERAL_REGISTER_FILE || 1919 (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE)); 1920 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1921 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1922 1923 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1924 if (brw->gen == 6) { 1925 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1926 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1927 } 1928 1929 if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || 1930 function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || 1931 function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1932 assert(src0.type != BRW_REGISTER_TYPE_F); 1933 assert(src1.type != BRW_REGISTER_TYPE_F); 1934 } else { 1935 assert(src0.type == BRW_REGISTER_TYPE_F); 1936 assert(src1.type == BRW_REGISTER_TYPE_F); 1937 } 1938 1939 /* Source modifiers are ignored for extended math instructions on Gen6. */ 1940 if (brw->gen == 6) { 1941 assert(!src0.negate); 1942 assert(!src0.abs); 1943 assert(!src1.negate); 1944 assert(!src1.abs); 1945 } 1946 1947 /* Math is the same ISA format as other opcodes, except that CondModifier 1948 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1949 */ 1950 insn->header.destreg__conditionalmod = function; 1951 1952 brw_set_dest(p, insn, dest); 1953 brw_set_src0(p, insn, src0); 1954 brw_set_src1(p, insn, src1); 1955} 1956 1957 1958/** 1959 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1960 * using a constant offset per channel. 1961 * 1962 * The offset must be aligned to oword size (16 bytes). Used for 1963 * register spilling. 1964 */ 1965void brw_oword_block_write_scratch(struct brw_compile *p, 1966 struct brw_reg mrf, 1967 int num_regs, 1968 unsigned offset) 1969{ 1970 struct brw_context *brw = p->brw; 1971 uint32_t msg_control, msg_type; 1972 int mlen; 1973 1974 if (brw->gen >= 6) 1975 offset /= 16; 1976 1977 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1978 1979 if (num_regs == 1) { 1980 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1981 mlen = 2; 1982 } else { 1983 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1984 mlen = 3; 1985 } 1986 1987 /* Set up the message header. This is g0, with g0.2 filled with 1988 * the offset. We don't want to leave our offset around in g0 or 1989 * it'll screw up texture samples, so set it up inside the message 1990 * reg. 1991 */ 1992 { 1993 brw_push_insn_state(p); 1994 brw_set_mask_control(p, BRW_MASK_DISABLE); 1995 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1996 1997 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1998 1999 /* set message header global offset field (reg 0, element 2) */ 2000 brw_MOV(p, 2001 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 2002 mrf.nr, 2003 2), BRW_REGISTER_TYPE_UD), 2004 brw_imm_ud(offset)); 2005 2006 brw_pop_insn_state(p); 2007 } 2008 2009 { 2010 struct brw_reg dest; 2011 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 2012 int send_commit_msg; 2013 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 2014 BRW_REGISTER_TYPE_UW); 2015 2016 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 2017 insn->header.compression_control = BRW_COMPRESSION_NONE; 2018 src_header = vec16(src_header); 2019 } 2020 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 2021 insn->header.destreg__conditionalmod = mrf.nr; 2022 2023 /* Until gen6, writes followed by reads from the same location 2024 * are not guaranteed to be ordered unless write_commit is set. 2025 * If set, then a no-op write is issued to the destination 2026 * register to set a dependency, and a read from the destination 2027 * can be used to ensure the ordering. 2028 * 2029 * For gen6, only writes between different threads need ordering 2030 * protection. Our use of DP writes is all about register 2031 * spilling within a thread. 2032 */ 2033 if (brw->gen >= 6) { 2034 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2035 send_commit_msg = 0; 2036 } else { 2037 dest = src_header; 2038 send_commit_msg = 1; 2039 } 2040 2041 brw_set_dest(p, insn, dest); 2042 if (brw->gen >= 6) { 2043 brw_set_src0(p, insn, mrf); 2044 } else { 2045 brw_set_src0(p, insn, brw_null_reg()); 2046 } 2047 2048 if (brw->gen >= 6) 2049 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 2050 else 2051 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 2052 2053 brw_set_dp_write_message(p, 2054 insn, 2055 255, /* binding table index (255=stateless) */ 2056 msg_control, 2057 msg_type, 2058 mlen, 2059 true, /* header_present */ 2060 0, /* not a render target */ 2061 send_commit_msg, /* response_length */ 2062 0, /* eot */ 2063 send_commit_msg); 2064 } 2065} 2066 2067 2068/** 2069 * Read a block of owords (half a GRF each) from the scratch buffer 2070 * using a constant index per channel. 2071 * 2072 * Offset must be aligned to oword size (16 bytes). Used for register 2073 * spilling. 2074 */ 2075void 2076brw_oword_block_read_scratch(struct brw_compile *p, 2077 struct brw_reg dest, 2078 struct brw_reg mrf, 2079 int num_regs, 2080 unsigned offset) 2081{ 2082 struct brw_context *brw = p->brw; 2083 uint32_t msg_control; 2084 int rlen; 2085 2086 if (brw->gen >= 6) 2087 offset /= 16; 2088 2089 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 2090 dest = retype(dest, BRW_REGISTER_TYPE_UW); 2091 2092 if (num_regs == 1) { 2093 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 2094 rlen = 1; 2095 } else { 2096 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 2097 rlen = 2; 2098 } 2099 2100 { 2101 brw_push_insn_state(p); 2102 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2103 brw_set_mask_control(p, BRW_MASK_DISABLE); 2104 2105 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 2106 2107 /* set message header global offset field (reg 0, element 2) */ 2108 brw_MOV(p, 2109 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 2110 mrf.nr, 2111 2), BRW_REGISTER_TYPE_UD), 2112 brw_imm_ud(offset)); 2113 2114 brw_pop_insn_state(p); 2115 } 2116 2117 { 2118 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 2119 2120 assert(insn->header.predicate_control == 0); 2121 insn->header.compression_control = BRW_COMPRESSION_NONE; 2122 insn->header.destreg__conditionalmod = mrf.nr; 2123 2124 brw_set_dest(p, insn, dest); /* UW? */ 2125 if (brw->gen >= 6) { 2126 brw_set_src0(p, insn, mrf); 2127 } else { 2128 brw_set_src0(p, insn, brw_null_reg()); 2129 } 2130 2131 brw_set_dp_read_message(p, 2132 insn, 2133 255, /* binding table index (255=stateless) */ 2134 msg_control, 2135 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 2136 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 2137 1, /* msg_length */ 2138 true, /* header_present */ 2139 rlen); 2140 } 2141} 2142 2143void 2144gen7_block_read_scratch(struct brw_compile *p, 2145 struct brw_reg dest, 2146 int num_regs, 2147 unsigned offset) 2148{ 2149 dest = retype(dest, BRW_REGISTER_TYPE_UW); 2150 2151 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 2152 2153 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 2154 insn->header.compression_control = BRW_COMPRESSION_NONE; 2155 2156 brw_set_dest(p, insn, dest); 2157 2158 /* The HW requires that the header is present; this is to get the g0.5 2159 * scratch offset. 2160 */ 2161 bool header_present = true; 2162 brw_set_src0(p, insn, brw_vec8_grf(0, 0)); 2163 2164 brw_set_message_descriptor(p, insn, 2165 GEN7_SFID_DATAPORT_DATA_CACHE, 2166 1, /* mlen: just g0 */ 2167 num_regs, 2168 header_present, 2169 false); 2170 2171 insn->bits3.ud |= GEN7_DATAPORT_SCRATCH_READ; 2172 2173 assert(num_regs == 1 || num_regs == 2 || num_regs == 4); 2174 insn->bits3.ud |= (num_regs - 1) << GEN7_DATAPORT_SCRATCH_NUM_REGS_SHIFT; 2175 2176 /* According to the docs, offset is "A 12-bit HWord offset into the memory 2177 * Immediate Memory buffer as specified by binding table 0xFF." An HWORD 2178 * is 32 bytes, which happens to be the size of a register. 2179 */ 2180 offset /= REG_SIZE; 2181 assert(offset < (1 << 12)); 2182 insn->bits3.ud |= offset; 2183} 2184 2185/** 2186 * Read a float[4] vector from the data port Data Cache (const buffer). 2187 * Location (in buffer) should be a multiple of 16. 2188 * Used for fetching shader constants. 2189 */ 2190void brw_oword_block_read(struct brw_compile *p, 2191 struct brw_reg dest, 2192 struct brw_reg mrf, 2193 uint32_t offset, 2194 uint32_t bind_table_index) 2195{ 2196 struct brw_context *brw = p->brw; 2197 2198 /* On newer hardware, offset is in units of owords. */ 2199 if (brw->gen >= 6) 2200 offset /= 16; 2201 2202 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 2203 2204 brw_push_insn_state(p); 2205 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 2206 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2207 brw_set_mask_control(p, BRW_MASK_DISABLE); 2208 2209 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 2210 2211 /* set message header global offset field (reg 0, element 2) */ 2212 brw_MOV(p, 2213 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 2214 mrf.nr, 2215 2), BRW_REGISTER_TYPE_UD), 2216 brw_imm_ud(offset)); 2217 2218 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 2219 insn->header.destreg__conditionalmod = mrf.nr; 2220 2221 /* cast dest to a uword[8] vector */ 2222 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 2223 2224 brw_set_dest(p, insn, dest); 2225 if (brw->gen >= 6) { 2226 brw_set_src0(p, insn, mrf); 2227 } else { 2228 brw_set_src0(p, insn, brw_null_reg()); 2229 } 2230 2231 brw_set_dp_read_message(p, 2232 insn, 2233 bind_table_index, 2234 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 2235 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 2236 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2237 1, /* msg_length */ 2238 true, /* header_present */ 2239 1); /* response_length (1 reg, 2 owords!) */ 2240 2241 brw_pop_insn_state(p); 2242} 2243 2244 2245void brw_fb_WRITE(struct brw_compile *p, 2246 int dispatch_width, 2247 unsigned msg_reg_nr, 2248 struct brw_reg src0, 2249 unsigned msg_control, 2250 unsigned binding_table_index, 2251 unsigned msg_length, 2252 unsigned response_length, 2253 bool eot, 2254 bool header_present) 2255{ 2256 struct brw_context *brw = p->brw; 2257 struct brw_instruction *insn; 2258 unsigned msg_type; 2259 struct brw_reg dest; 2260 2261 if (dispatch_width == 16) 2262 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2263 else 2264 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2265 2266 if (brw->gen >= 6) { 2267 insn = next_insn(p, BRW_OPCODE_SENDC); 2268 } else { 2269 insn = next_insn(p, BRW_OPCODE_SEND); 2270 } 2271 insn->header.compression_control = BRW_COMPRESSION_NONE; 2272 2273 if (brw->gen >= 6) { 2274 /* headerless version, just submit color payload */ 2275 src0 = brw_message_reg(msg_reg_nr); 2276 2277 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2278 } else { 2279 insn->header.destreg__conditionalmod = msg_reg_nr; 2280 2281 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2282 } 2283 2284 brw_set_dest(p, insn, dest); 2285 brw_set_src0(p, insn, src0); 2286 brw_set_dp_write_message(p, 2287 insn, 2288 binding_table_index, 2289 msg_control, 2290 msg_type, 2291 msg_length, 2292 header_present, 2293 eot, /* last render target write */ 2294 response_length, 2295 eot, 2296 0 /* send_commit_msg */); 2297} 2298 2299 2300/** 2301 * Texture sample instruction. 2302 * Note: the msg_type plus msg_length values determine exactly what kind 2303 * of sampling operation is performed. See volume 4, page 161 of docs. 2304 */ 2305void brw_SAMPLE(struct brw_compile *p, 2306 struct brw_reg dest, 2307 unsigned msg_reg_nr, 2308 struct brw_reg src0, 2309 unsigned binding_table_index, 2310 unsigned sampler, 2311 unsigned msg_type, 2312 unsigned response_length, 2313 unsigned msg_length, 2314 unsigned header_present, 2315 unsigned simd_mode, 2316 unsigned return_format) 2317{ 2318 struct brw_context *brw = p->brw; 2319 struct brw_instruction *insn; 2320 2321 if (msg_reg_nr != -1) 2322 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2323 2324 insn = next_insn(p, BRW_OPCODE_SEND); 2325 insn->header.predicate_control = 0; /* XXX */ 2326 2327 /* From the 965 PRM (volume 4, part 1, section 14.2.41): 2328 * 2329 * "Instruction compression is not allowed for this instruction (that 2330 * is, send). The hardware behavior is undefined if this instruction is 2331 * set as compressed. However, compress control can be set to "SecHalf" 2332 * to affect the EMask generation." 2333 * 2334 * No similar wording is found in later PRMs, but there are examples 2335 * utilizing send with SecHalf. More importantly, SIMD8 sampler messages 2336 * are allowed in SIMD16 mode and they could not work without SecHalf. For 2337 * these reasons, we allow BRW_COMPRESSION_2NDHALF here. 2338 */ 2339 if (insn->header.compression_control != BRW_COMPRESSION_2NDHALF) 2340 insn->header.compression_control = BRW_COMPRESSION_NONE; 2341 2342 if (brw->gen < 6) 2343 insn->header.destreg__conditionalmod = msg_reg_nr; 2344 2345 brw_set_dest(p, insn, dest); 2346 brw_set_src0(p, insn, src0); 2347 brw_set_sampler_message(p, insn, 2348 binding_table_index, 2349 sampler, 2350 msg_type, 2351 response_length, 2352 msg_length, 2353 header_present, 2354 simd_mode, 2355 return_format); 2356} 2357 2358/* All these variables are pretty confusing - we might be better off 2359 * using bitmasks and macros for this, in the old style. Or perhaps 2360 * just having the caller instantiate the fields in dword3 itself. 2361 */ 2362void brw_urb_WRITE(struct brw_compile *p, 2363 struct brw_reg dest, 2364 unsigned msg_reg_nr, 2365 struct brw_reg src0, 2366 enum brw_urb_write_flags flags, 2367 unsigned msg_length, 2368 unsigned response_length, 2369 unsigned offset, 2370 unsigned swizzle) 2371{ 2372 struct brw_context *brw = p->brw; 2373 struct brw_instruction *insn; 2374 2375 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2376 2377 if (brw->gen == 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) { 2378 /* Enable Channel Masks in the URB_WRITE_HWORD message header */ 2379 brw_push_insn_state(p); 2380 brw_set_access_mode(p, BRW_ALIGN_1); 2381 brw_set_mask_control(p, BRW_MASK_DISABLE); 2382 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), 2383 BRW_REGISTER_TYPE_UD), 2384 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), 2385 brw_imm_ud(0xff00)); 2386 brw_pop_insn_state(p); 2387 } 2388 2389 insn = next_insn(p, BRW_OPCODE_SEND); 2390 2391 assert(msg_length < BRW_MAX_MRF); 2392 2393 brw_set_dest(p, insn, dest); 2394 brw_set_src0(p, insn, src0); 2395 brw_set_src1(p, insn, brw_imm_d(0)); 2396 2397 if (brw->gen < 6) 2398 insn->header.destreg__conditionalmod = msg_reg_nr; 2399 2400 brw_set_urb_message(p, 2401 insn, 2402 flags, 2403 msg_length, 2404 response_length, 2405 offset, 2406 swizzle); 2407} 2408 2409static int 2410brw_find_next_block_end(struct brw_compile *p, int start_offset) 2411{ 2412 int offset; 2413 void *store = p->store; 2414 2415 for (offset = next_offset(store, start_offset); offset < p->next_insn_offset; 2416 offset = next_offset(store, offset)) { 2417 struct brw_instruction *insn = store + offset; 2418 2419 switch (insn->header.opcode) { 2420 case BRW_OPCODE_ENDIF: 2421 case BRW_OPCODE_ELSE: 2422 case BRW_OPCODE_WHILE: 2423 case BRW_OPCODE_HALT: 2424 return offset; 2425 } 2426 } 2427 2428 return 0; 2429} 2430 2431/* There is no DO instruction on gen6, so to find the end of the loop 2432 * we have to see if the loop is jumping back before our start 2433 * instruction. 2434 */ 2435static int 2436brw_find_loop_end(struct brw_compile *p, int start_offset) 2437{ 2438 struct brw_context *brw = p->brw; 2439 int offset; 2440 int scale = 8; 2441 void *store = p->store; 2442 2443 /* Always start after the instruction (such as a WHILE) we're trying to fix 2444 * up. 2445 */ 2446 for (offset = next_offset(store, start_offset); offset < p->next_insn_offset; 2447 offset = next_offset(store, offset)) { 2448 struct brw_instruction *insn = store + offset; 2449 2450 if (insn->header.opcode == BRW_OPCODE_WHILE) { 2451 int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count 2452 : insn->bits3.break_cont.jip; 2453 if (offset + jip * scale <= start_offset) 2454 return offset; 2455 } 2456 } 2457 assert(!"not reached"); 2458 return start_offset; 2459} 2460 2461/* After program generation, go back and update the UIP and JIP of 2462 * BREAK, CONT, and HALT instructions to their correct locations. 2463 */ 2464void 2465brw_set_uip_jip(struct brw_compile *p) 2466{ 2467 struct brw_context *brw = p->brw; 2468 int offset; 2469 int scale = 8; 2470 void *store = p->store; 2471 2472 if (brw->gen < 6) 2473 return; 2474 2475 for (offset = 0; offset < p->next_insn_offset; 2476 offset = next_offset(store, offset)) { 2477 struct brw_instruction *insn = store + offset; 2478 2479 if (insn->header.cmpt_control) { 2480 /* Fixups for compacted BREAK/CONTINUE not supported yet. */ 2481 assert(insn->header.opcode != BRW_OPCODE_BREAK && 2482 insn->header.opcode != BRW_OPCODE_CONTINUE && 2483 insn->header.opcode != BRW_OPCODE_HALT); 2484 continue; 2485 } 2486 2487 int block_end_offset = brw_find_next_block_end(p, offset); 2488 switch (insn->header.opcode) { 2489 case BRW_OPCODE_BREAK: 2490 assert(block_end_offset != 0); 2491 insn->bits3.break_cont.jip = (block_end_offset - offset) / scale; 2492 /* Gen7 UIP points to WHILE; Gen6 points just after it */ 2493 insn->bits3.break_cont.uip = 2494 (brw_find_loop_end(p, offset) - offset + 2495 (brw->gen == 6 ? 16 : 0)) / scale; 2496 break; 2497 case BRW_OPCODE_CONTINUE: 2498 assert(block_end_offset != 0); 2499 insn->bits3.break_cont.jip = (block_end_offset - offset) / scale; 2500 insn->bits3.break_cont.uip = 2501 (brw_find_loop_end(p, offset) - offset) / scale; 2502 2503 assert(insn->bits3.break_cont.uip != 0); 2504 assert(insn->bits3.break_cont.jip != 0); 2505 break; 2506 2507 case BRW_OPCODE_ENDIF: 2508 if (block_end_offset == 0) 2509 insn->bits3.break_cont.jip = 2; 2510 else 2511 insn->bits3.break_cont.jip = (block_end_offset - offset) / scale; 2512 break; 2513 2514 case BRW_OPCODE_HALT: 2515 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): 2516 * 2517 * "In case of the halt instruction not inside any conditional 2518 * code block, the value of <JIP> and <UIP> should be the 2519 * same. In case of the halt instruction inside conditional code 2520 * block, the <UIP> should be the end of the program, and the 2521 * <JIP> should be end of the most inner conditional code block." 2522 * 2523 * The uip will have already been set by whoever set up the 2524 * instruction. 2525 */ 2526 if (block_end_offset == 0) { 2527 insn->bits3.break_cont.jip = insn->bits3.break_cont.uip; 2528 } else { 2529 insn->bits3.break_cont.jip = (block_end_offset - offset) / scale; 2530 } 2531 assert(insn->bits3.break_cont.uip != 0); 2532 assert(insn->bits3.break_cont.jip != 0); 2533 break; 2534 } 2535 } 2536} 2537 2538void brw_ff_sync(struct brw_compile *p, 2539 struct brw_reg dest, 2540 unsigned msg_reg_nr, 2541 struct brw_reg src0, 2542 bool allocate, 2543 unsigned response_length, 2544 bool eot) 2545{ 2546 struct brw_context *brw = p->brw; 2547 struct brw_instruction *insn; 2548 2549 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2550 2551 insn = next_insn(p, BRW_OPCODE_SEND); 2552 brw_set_dest(p, insn, dest); 2553 brw_set_src0(p, insn, src0); 2554 brw_set_src1(p, insn, brw_imm_d(0)); 2555 2556 if (brw->gen < 6) 2557 insn->header.destreg__conditionalmod = msg_reg_nr; 2558 2559 brw_set_ff_sync_message(p, 2560 insn, 2561 allocate, 2562 response_length, 2563 eot); 2564} 2565 2566/** 2567 * Emit the SEND instruction necessary to generate stream output data on Gen6 2568 * (for transform feedback). 2569 * 2570 * If send_commit_msg is true, this is the last piece of stream output data 2571 * from this thread, so send the data as a committed write. According to the 2572 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1): 2573 * 2574 * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all 2575 * writes are complete by sending the final write as a committed write." 2576 */ 2577void 2578brw_svb_write(struct brw_compile *p, 2579 struct brw_reg dest, 2580 unsigned msg_reg_nr, 2581 struct brw_reg src0, 2582 unsigned binding_table_index, 2583 bool send_commit_msg) 2584{ 2585 struct brw_instruction *insn; 2586 2587 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2588 2589 insn = next_insn(p, BRW_OPCODE_SEND); 2590 brw_set_dest(p, insn, dest); 2591 brw_set_src0(p, insn, src0); 2592 brw_set_src1(p, insn, brw_imm_d(0)); 2593 brw_set_dp_write_message(p, insn, 2594 binding_table_index, 2595 0, /* msg_control: ignored */ 2596 GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, 2597 1, /* msg_length */ 2598 true, /* header_present */ 2599 0, /* last_render_target: ignored */ 2600 send_commit_msg, /* response_length */ 2601 0, /* end_of_thread */ 2602 send_commit_msg); /* send_commit_msg */ 2603} 2604 2605static void 2606brw_set_dp_untyped_atomic_message(struct brw_compile *p, 2607 struct brw_instruction *insn, 2608 unsigned atomic_op, 2609 unsigned bind_table_index, 2610 unsigned msg_length, 2611 unsigned response_length, 2612 bool header_present) 2613{ 2614 if (p->brw->is_haswell) { 2615 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1, 2616 msg_length, response_length, 2617 header_present, false); 2618 2619 2620 if (insn->header.access_mode == BRW_ALIGN_1) { 2621 if (insn->header.execution_size != BRW_EXECUTE_16) 2622 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */ 2623 2624 insn->bits3.gen7_dp.msg_type = 2625 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP; 2626 } else { 2627 insn->bits3.gen7_dp.msg_type = 2628 HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2; 2629 } 2630 2631 } else { 2632 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, 2633 msg_length, response_length, 2634 header_present, false); 2635 2636 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP; 2637 2638 if (insn->header.execution_size != BRW_EXECUTE_16) 2639 insn->bits3.ud |= 1 << 12; /* SIMD8 mode */ 2640 } 2641 2642 if (response_length) 2643 insn->bits3.ud |= 1 << 13; /* Return data expected */ 2644 2645 insn->bits3.gen7_dp.binding_table_index = bind_table_index; 2646 insn->bits3.ud |= atomic_op << 8; 2647} 2648 2649void 2650brw_untyped_atomic(struct brw_compile *p, 2651 struct brw_reg dest, 2652 struct brw_reg mrf, 2653 unsigned atomic_op, 2654 unsigned bind_table_index, 2655 unsigned msg_length, 2656 unsigned response_length) { 2657 struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); 2658 2659 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD)); 2660 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD)); 2661 brw_set_src1(p, insn, brw_imm_d(0)); 2662 brw_set_dp_untyped_atomic_message( 2663 p, insn, atomic_op, bind_table_index, msg_length, response_length, 2664 insn->header.access_mode == BRW_ALIGN_1); 2665} 2666 2667static void 2668brw_set_dp_untyped_surface_read_message(struct brw_compile *p, 2669 struct brw_instruction *insn, 2670 unsigned bind_table_index, 2671 unsigned msg_length, 2672 unsigned response_length, 2673 bool header_present) 2674{ 2675 const unsigned dispatch_width = 2676 (insn->header.execution_size == BRW_EXECUTE_16 ? 16 : 8); 2677 const unsigned num_channels = response_length / (dispatch_width / 8); 2678 2679 if (p->brw->is_haswell) { 2680 brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1, 2681 msg_length, response_length, 2682 header_present, false); 2683 2684 insn->bits3.gen7_dp.msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ; 2685 } else { 2686 brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, 2687 msg_length, response_length, 2688 header_present, false); 2689 2690 insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ; 2691 } 2692 2693 if (insn->header.access_mode == BRW_ALIGN_1) { 2694 if (dispatch_width == 16) 2695 insn->bits3.ud |= 1 << 12; /* SIMD16 mode */ 2696 else 2697 insn->bits3.ud |= 2 << 12; /* SIMD8 mode */ 2698 } 2699 2700 insn->bits3.gen7_dp.binding_table_index = bind_table_index; 2701 2702 /* Set mask of 32-bit channels to drop. */ 2703 insn->bits3.ud |= (0xf & (0xf << num_channels)) << 8; 2704} 2705 2706void 2707brw_untyped_surface_read(struct brw_compile *p, 2708 struct brw_reg dest, 2709 struct brw_reg mrf, 2710 unsigned bind_table_index, 2711 unsigned msg_length, 2712 unsigned response_length) 2713{ 2714 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 2715 2716 brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD)); 2717 brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD)); 2718 brw_set_dp_untyped_surface_read_message( 2719 p, insn, bind_table_index, msg_length, response_length, 2720 insn->header.access_mode == BRW_ALIGN_1); 2721} 2722 2723/** 2724 * This instruction is generated as a single-channel align1 instruction by 2725 * both the VS and FS stages when using INTEL_DEBUG=shader_time. 2726 * 2727 * We can't use the typed atomic op in the FS because that has the execution 2728 * mask ANDed with the pixel mask, but we just want to write the one dword for 2729 * all the pixels. 2730 * 2731 * We don't use the SIMD4x2 atomic ops in the VS because want to just write 2732 * one u32. So we use the same untyped atomic write message as the pixel 2733 * shader. 2734 * 2735 * The untyped atomic operation requires a BUFFER surface type with RAW 2736 * format, and is only accessible through the legacy DATA_CACHE dataport 2737 * messages. 2738 */ 2739void brw_shader_time_add(struct brw_compile *p, 2740 struct brw_reg payload, 2741 uint32_t surf_index) 2742{ 2743 struct brw_context *brw = p->brw; 2744 assert(brw->gen >= 7); 2745 2746 brw_push_insn_state(p); 2747 brw_set_access_mode(p, BRW_ALIGN_1); 2748 brw_set_mask_control(p, BRW_MASK_DISABLE); 2749 struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); 2750 brw_pop_insn_state(p); 2751 2752 /* We use brw_vec1_reg and unmasked because we want to increment the given 2753 * offset only once. 2754 */ 2755 brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 2756 BRW_ARF_NULL, 0)); 2757 brw_set_src0(p, send, brw_vec1_reg(payload.file, 2758 payload.nr, 0)); 2759 brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index, 2760 2 /* message length */, 2761 0 /* response length */, 2762 false /* header present */); 2763} 2764