brw_eu_emit.c revision 09d881bf7420c97a0f684283c24b8ec3e42404ff
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37#include "../glsl/ralloc.h" 38 39/*********************************************************************** 40 * Internal helper for constructing instructions 41 */ 42 43static void guess_execution_size(struct brw_compile *p, 44 struct brw_instruction *insn, 45 struct brw_reg reg) 46{ 47 if (reg.width == BRW_WIDTH_8 && p->compressed) 48 insn->header.execution_size = BRW_EXECUTE_16; 49 else 50 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 51} 52 53 54/** 55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source 56 * registers, implicitly moving the operand to a message register. 57 * 58 * On Sandybridge, this is no longer the case. This function performs the 59 * explicit move; it should be called before emitting a SEND instruction. 60 */ 61static void 62gen6_resolve_implied_move(struct brw_compile *p, 63 struct brw_reg *src, 64 GLuint msg_reg_nr) 65{ 66 struct intel_context *intel = &p->brw->intel; 67 if (intel->gen < 6) 68 return; 69 70 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 71 brw_push_insn_state(p); 72 brw_set_mask_control(p, BRW_MASK_DISABLE); 73 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 74 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 75 retype(*src, BRW_REGISTER_TYPE_UD)); 76 brw_pop_insn_state(p); 77 } 78 *src = brw_message_reg(msg_reg_nr); 79} 80 81static void 82gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) 83{ 84 struct intel_context *intel = &p->brw->intel; 85 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { 86 reg->file = BRW_GENERAL_REGISTER_FILE; 87 reg->nr += 111; 88 } 89} 90 91 92static void brw_set_dest(struct brw_compile *p, 93 struct brw_instruction *insn, 94 struct brw_reg dest) 95{ 96 struct intel_context *intel = &p->brw->intel; 97 98 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 99 dest.file != BRW_MESSAGE_REGISTER_FILE) 100 assert(dest.nr < 128); 101 102 gen7_convert_mrf_to_grf(p, &dest); 103 104 insn->bits1.da1.dest_reg_file = dest.file; 105 insn->bits1.da1.dest_reg_type = dest.type; 106 insn->bits1.da1.dest_address_mode = dest.address_mode; 107 108 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 109 insn->bits1.da1.dest_reg_nr = dest.nr; 110 111 if (insn->header.access_mode == BRW_ALIGN_1) { 112 insn->bits1.da1.dest_subreg_nr = dest.subnr; 113 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 114 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 115 insn->bits1.da1.dest_horiz_stride = dest.hstride; 116 } 117 else { 118 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 119 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 120 /* even ignored in da16, still need to set as '01' */ 121 insn->bits1.da16.dest_horiz_stride = 1; 122 } 123 } 124 else { 125 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 126 127 /* These are different sizes in align1 vs align16: 128 */ 129 if (insn->header.access_mode == BRW_ALIGN_1) { 130 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 131 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 132 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 133 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 134 } 135 else { 136 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 137 /* even ignored in da16, still need to set as '01' */ 138 insn->bits1.ia16.dest_horiz_stride = 1; 139 } 140 } 141 142 /* NEW: Set the execution size based on dest.width and 143 * insn->compression_control: 144 */ 145 guess_execution_size(p, insn, dest); 146} 147 148extern int reg_type_size[]; 149 150static void 151validate_reg(struct brw_instruction *insn, struct brw_reg reg) 152{ 153 int hstride_for_reg[] = {0, 1, 2, 4}; 154 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 155 int width_for_reg[] = {1, 2, 4, 8, 16}; 156 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 157 int width, hstride, vstride, execsize; 158 159 if (reg.file == BRW_IMMEDIATE_VALUE) { 160 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 161 * mean the destination has to be 128-bit aligned and the 162 * destination horiz stride has to be a word. 163 */ 164 if (reg.type == BRW_REGISTER_TYPE_V) { 165 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 166 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 167 } 168 169 return; 170 } 171 172 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 173 reg.file == BRW_ARF_NULL) 174 return; 175 176 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 177 hstride = hstride_for_reg[reg.hstride]; 178 179 if (reg.vstride == 0xf) { 180 vstride = -1; 181 } else { 182 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 183 vstride = vstride_for_reg[reg.vstride]; 184 } 185 186 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 187 width = width_for_reg[reg.width]; 188 189 assert(insn->header.execution_size >= 0 && 190 insn->header.execution_size < Elements(execsize_for_reg)); 191 execsize = execsize_for_reg[insn->header.execution_size]; 192 193 /* Restrictions from 3.3.10: Register Region Restrictions. */ 194 /* 3. */ 195 assert(execsize >= width); 196 197 /* 4. */ 198 if (execsize == width && hstride != 0) { 199 assert(vstride == -1 || vstride == width * hstride); 200 } 201 202 /* 5. */ 203 if (execsize == width && hstride == 0) { 204 /* no restriction on vstride. */ 205 } 206 207 /* 6. */ 208 if (width == 1) { 209 assert(hstride == 0); 210 } 211 212 /* 7. */ 213 if (execsize == 1 && width == 1) { 214 assert(hstride == 0); 215 assert(vstride == 0); 216 } 217 218 /* 8. */ 219 if (vstride == 0 && hstride == 0) { 220 assert(width == 1); 221 } 222 223 /* 10. Check destination issues. */ 224} 225 226static void brw_set_src0(struct brw_compile *p, 227 struct brw_instruction *insn, 228 struct brw_reg reg) 229{ 230 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 231 assert(reg.nr < 128); 232 233 gen7_convert_mrf_to_grf(p, ®); 234 235 validate_reg(insn, reg); 236 237 insn->bits1.da1.src0_reg_file = reg.file; 238 insn->bits1.da1.src0_reg_type = reg.type; 239 insn->bits2.da1.src0_abs = reg.abs; 240 insn->bits2.da1.src0_negate = reg.negate; 241 insn->bits2.da1.src0_address_mode = reg.address_mode; 242 243 if (reg.file == BRW_IMMEDIATE_VALUE) { 244 insn->bits3.ud = reg.dw1.ud; 245 246 /* Required to set some fields in src1 as well: 247 */ 248 insn->bits1.da1.src1_reg_file = 0; /* arf */ 249 insn->bits1.da1.src1_reg_type = reg.type; 250 } 251 else 252 { 253 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 254 if (insn->header.access_mode == BRW_ALIGN_1) { 255 insn->bits2.da1.src0_subreg_nr = reg.subnr; 256 insn->bits2.da1.src0_reg_nr = reg.nr; 257 } 258 else { 259 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 260 insn->bits2.da16.src0_reg_nr = reg.nr; 261 } 262 } 263 else { 264 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 265 266 if (insn->header.access_mode == BRW_ALIGN_1) { 267 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 268 } 269 else { 270 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 271 } 272 } 273 274 if (insn->header.access_mode == BRW_ALIGN_1) { 275 if (reg.width == BRW_WIDTH_1 && 276 insn->header.execution_size == BRW_EXECUTE_1) { 277 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 278 insn->bits2.da1.src0_width = BRW_WIDTH_1; 279 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 280 } 281 else { 282 insn->bits2.da1.src0_horiz_stride = reg.hstride; 283 insn->bits2.da1.src0_width = reg.width; 284 insn->bits2.da1.src0_vert_stride = reg.vstride; 285 } 286 } 287 else { 288 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 289 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 290 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 291 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 292 293 /* This is an oddity of the fact we're using the same 294 * descriptions for registers in align_16 as align_1: 295 */ 296 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 297 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 298 else 299 insn->bits2.da16.src0_vert_stride = reg.vstride; 300 } 301 } 302} 303 304 305void brw_set_src1(struct brw_compile *p, 306 struct brw_instruction *insn, 307 struct brw_reg reg) 308{ 309 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 310 311 assert(reg.nr < 128); 312 313 gen7_convert_mrf_to_grf(p, ®); 314 315 validate_reg(insn, reg); 316 317 insn->bits1.da1.src1_reg_file = reg.file; 318 insn->bits1.da1.src1_reg_type = reg.type; 319 insn->bits3.da1.src1_abs = reg.abs; 320 insn->bits3.da1.src1_negate = reg.negate; 321 322 /* Only src1 can be immediate in two-argument instructions. 323 */ 324 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 325 326 if (reg.file == BRW_IMMEDIATE_VALUE) { 327 insn->bits3.ud = reg.dw1.ud; 328 } 329 else { 330 /* This is a hardware restriction, which may or may not be lifted 331 * in the future: 332 */ 333 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 334 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 335 336 if (insn->header.access_mode == BRW_ALIGN_1) { 337 insn->bits3.da1.src1_subreg_nr = reg.subnr; 338 insn->bits3.da1.src1_reg_nr = reg.nr; 339 } 340 else { 341 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 342 insn->bits3.da16.src1_reg_nr = reg.nr; 343 } 344 345 if (insn->header.access_mode == BRW_ALIGN_1) { 346 if (reg.width == BRW_WIDTH_1 && 347 insn->header.execution_size == BRW_EXECUTE_1) { 348 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 349 insn->bits3.da1.src1_width = BRW_WIDTH_1; 350 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 351 } 352 else { 353 insn->bits3.da1.src1_horiz_stride = reg.hstride; 354 insn->bits3.da1.src1_width = reg.width; 355 insn->bits3.da1.src1_vert_stride = reg.vstride; 356 } 357 } 358 else { 359 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 360 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 361 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 362 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 363 364 /* This is an oddity of the fact we're using the same 365 * descriptions for registers in align_16 as align_1: 366 */ 367 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 368 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 369 else 370 insn->bits3.da16.src1_vert_stride = reg.vstride; 371 } 372 } 373} 374 375 376 377static void brw_set_math_message( struct brw_compile *p, 378 struct brw_instruction *insn, 379 GLuint msg_length, 380 GLuint response_length, 381 GLuint function, 382 GLuint integer_type, 383 GLboolean low_precision, 384 GLboolean saturate, 385 GLuint dataType ) 386{ 387 struct brw_context *brw = p->brw; 388 struct intel_context *intel = &brw->intel; 389 brw_set_src1(p, insn, brw_imm_d(0)); 390 391 if (intel->gen == 5) { 392 insn->bits3.math_gen5.function = function; 393 insn->bits3.math_gen5.int_type = integer_type; 394 insn->bits3.math_gen5.precision = low_precision; 395 insn->bits3.math_gen5.saturate = saturate; 396 insn->bits3.math_gen5.data_type = dataType; 397 insn->bits3.math_gen5.snapshot = 0; 398 insn->bits3.math_gen5.header_present = 0; 399 insn->bits3.math_gen5.response_length = response_length; 400 insn->bits3.math_gen5.msg_length = msg_length; 401 insn->bits3.math_gen5.end_of_thread = 0; 402 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; 403 insn->bits2.send_gen5.end_of_thread = 0; 404 } else { 405 insn->bits3.math.function = function; 406 insn->bits3.math.int_type = integer_type; 407 insn->bits3.math.precision = low_precision; 408 insn->bits3.math.saturate = saturate; 409 insn->bits3.math.data_type = dataType; 410 insn->bits3.math.response_length = response_length; 411 insn->bits3.math.msg_length = msg_length; 412 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 413 insn->bits3.math.end_of_thread = 0; 414 } 415} 416 417 418static void brw_set_ff_sync_message(struct brw_compile *p, 419 struct brw_instruction *insn, 420 GLboolean allocate, 421 GLuint response_length, 422 GLboolean end_of_thread) 423{ 424 struct brw_context *brw = p->brw; 425 struct intel_context *intel = &brw->intel; 426 brw_set_src1(p, insn, brw_imm_d(0)); 427 428 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 429 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 430 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 431 insn->bits3.urb_gen5.allocate = allocate; 432 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 433 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 434 insn->bits3.urb_gen5.header_present = 1; 435 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ 436 insn->bits3.urb_gen5.msg_length = 1; 437 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 438 if (intel->gen >= 6) { 439 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 440 } else { 441 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 442 insn->bits2.send_gen5.end_of_thread = end_of_thread; 443 } 444} 445 446static void brw_set_urb_message( struct brw_compile *p, 447 struct brw_instruction *insn, 448 GLboolean allocate, 449 GLboolean used, 450 GLuint msg_length, 451 GLuint response_length, 452 GLboolean end_of_thread, 453 GLboolean complete, 454 GLuint offset, 455 GLuint swizzle_control ) 456{ 457 struct brw_context *brw = p->brw; 458 struct intel_context *intel = &brw->intel; 459 brw_set_src1(p, insn, brw_imm_d(0)); 460 461 if (intel->gen == 7) { 462 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ 463 insn->bits3.urb_gen7.offset = offset; 464 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); 465 insn->bits3.urb_gen7.swizzle_control = swizzle_control; 466 /* per_slot_offset = 0 makes it ignore offsets in message header */ 467 insn->bits3.urb_gen7.per_slot_offset = 0; 468 insn->bits3.urb_gen7.complete = complete; 469 insn->bits3.urb_gen7.header_present = 1; 470 insn->bits3.urb_gen7.response_length = response_length; 471 insn->bits3.urb_gen7.msg_length = msg_length; 472 insn->bits3.urb_gen7.end_of_thread = end_of_thread; 473 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 474 } else if (intel->gen >= 5) { 475 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ 476 insn->bits3.urb_gen5.offset = offset; 477 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 478 insn->bits3.urb_gen5.allocate = allocate; 479 insn->bits3.urb_gen5.used = used; /* ? */ 480 insn->bits3.urb_gen5.complete = complete; 481 insn->bits3.urb_gen5.header_present = 1; 482 insn->bits3.urb_gen5.response_length = response_length; 483 insn->bits3.urb_gen5.msg_length = msg_length; 484 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 485 if (intel->gen >= 6) { 486 /* For SNB, the SFID bits moved to the condmod bits, and 487 * EOT stayed in bits3 above. Does the EOT bit setting 488 * below on Ironlake even do anything? 489 */ 490 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 491 } else { 492 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 493 insn->bits2.send_gen5.end_of_thread = end_of_thread; 494 } 495 } else { 496 insn->bits3.urb.opcode = 0; /* ? */ 497 insn->bits3.urb.offset = offset; 498 insn->bits3.urb.swizzle_control = swizzle_control; 499 insn->bits3.urb.allocate = allocate; 500 insn->bits3.urb.used = used; /* ? */ 501 insn->bits3.urb.complete = complete; 502 insn->bits3.urb.response_length = response_length; 503 insn->bits3.urb.msg_length = msg_length; 504 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 505 insn->bits3.urb.end_of_thread = end_of_thread; 506 } 507} 508 509static void brw_set_dp_write_message( struct brw_compile *p, 510 struct brw_instruction *insn, 511 GLuint binding_table_index, 512 GLuint msg_control, 513 GLuint msg_type, 514 GLuint msg_length, 515 GLboolean header_present, 516 GLuint pixel_scoreboard_clear, 517 GLuint response_length, 518 GLuint end_of_thread, 519 GLuint send_commit_msg) 520{ 521 struct brw_context *brw = p->brw; 522 struct intel_context *intel = &brw->intel; 523 brw_set_src1(p, insn, brw_imm_ud(0)); 524 525 if (intel->gen >= 7) { 526 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 527 insn->bits3.gen7_dp.msg_control = msg_control; 528 insn->bits3.gen7_dp.pixel_scoreboard_clear = pixel_scoreboard_clear; 529 insn->bits3.gen7_dp.msg_type = msg_type; 530 insn->bits3.gen7_dp.header_present = header_present; 531 insn->bits3.gen7_dp.response_length = response_length; 532 insn->bits3.gen7_dp.msg_length = msg_length; 533 insn->bits3.gen7_dp.end_of_thread = end_of_thread; 534 535 /* We always use the render cache for write messages */ 536 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; 537 } else if (intel->gen == 6) { 538 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 539 insn->bits3.gen6_dp.msg_control = msg_control; 540 insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear; 541 insn->bits3.gen6_dp.msg_type = msg_type; 542 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; 543 insn->bits3.gen6_dp.header_present = header_present; 544 insn->bits3.gen6_dp.response_length = response_length; 545 insn->bits3.gen6_dp.msg_length = msg_length; 546 insn->bits3.gen6_dp.end_of_thread = end_of_thread; 547 548 /* We always use the render cache for write messages */ 549 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; 550 } else if (intel->gen == 5) { 551 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 552 insn->bits3.dp_write_gen5.msg_control = msg_control; 553 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; 554 insn->bits3.dp_write_gen5.msg_type = msg_type; 555 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 556 insn->bits3.dp_write_gen5.header_present = header_present; 557 insn->bits3.dp_write_gen5.response_length = response_length; 558 insn->bits3.dp_write_gen5.msg_length = msg_length; 559 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; 560 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 561 insn->bits2.send_gen5.end_of_thread = end_of_thread; 562 } else { 563 insn->bits3.dp_write.binding_table_index = binding_table_index; 564 insn->bits3.dp_write.msg_control = msg_control; 565 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 566 insn->bits3.dp_write.msg_type = msg_type; 567 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 568 insn->bits3.dp_write.response_length = response_length; 569 insn->bits3.dp_write.msg_length = msg_length; 570 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 571 insn->bits3.dp_write.end_of_thread = end_of_thread; 572 } 573} 574 575static void 576brw_set_dp_read_message(struct brw_compile *p, 577 struct brw_instruction *insn, 578 GLuint binding_table_index, 579 GLuint msg_control, 580 GLuint msg_type, 581 GLuint target_cache, 582 GLuint msg_length, 583 GLuint response_length) 584{ 585 struct brw_context *brw = p->brw; 586 struct intel_context *intel = &brw->intel; 587 brw_set_src1(p, insn, brw_imm_d(0)); 588 589 if (intel->gen >= 6) { 590 uint32_t target_function; 591 592 if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE) 593 target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE; 594 else 595 target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; 596 597 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 598 insn->bits3.gen6_dp.msg_control = msg_control; 599 insn->bits3.gen6_dp.pixel_scoreboard_clear = 0; 600 insn->bits3.gen6_dp.msg_type = msg_type; 601 insn->bits3.gen6_dp.send_commit_msg = 0; 602 insn->bits3.gen6_dp.header_present = 1; 603 insn->bits3.gen6_dp.response_length = response_length; 604 insn->bits3.gen6_dp.msg_length = msg_length; 605 insn->bits3.gen6_dp.end_of_thread = 0; 606 insn->header.destreg__conditionalmod = target_function; 607 } else if (intel->gen == 5) { 608 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 609 insn->bits3.dp_read_gen5.msg_control = msg_control; 610 insn->bits3.dp_read_gen5.msg_type = msg_type; 611 insn->bits3.dp_read_gen5.target_cache = target_cache; 612 insn->bits3.dp_read_gen5.header_present = 1; 613 insn->bits3.dp_read_gen5.response_length = response_length; 614 insn->bits3.dp_read_gen5.msg_length = msg_length; 615 insn->bits3.dp_read_gen5.pad1 = 0; 616 insn->bits3.dp_read_gen5.end_of_thread = 0; 617 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 618 insn->bits2.send_gen5.end_of_thread = 0; 619 } else if (intel->is_g4x) { 620 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 621 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 622 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 623 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 624 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/ 625 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/ 626 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 627 insn->bits3.dp_read_g4x.pad1 = 0; 628 insn->bits3.dp_read_g4x.end_of_thread = 0; 629 } else { 630 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 631 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 632 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 633 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 634 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 635 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 636 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 637 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 638 insn->bits3.dp_read.end_of_thread = 0; /*31*/ 639 } 640} 641 642static void brw_set_sampler_message(struct brw_compile *p, 643 struct brw_instruction *insn, 644 GLuint binding_table_index, 645 GLuint sampler, 646 GLuint msg_type, 647 GLuint response_length, 648 GLuint msg_length, 649 GLboolean eot, 650 GLuint header_present, 651 GLuint simd_mode) 652{ 653 struct brw_context *brw = p->brw; 654 struct intel_context *intel = &brw->intel; 655 assert(eot == 0); 656 brw_set_src1(p, insn, brw_imm_d(0)); 657 658 if (intel->gen >= 5) { 659 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 660 insn->bits3.sampler_gen5.sampler = sampler; 661 insn->bits3.sampler_gen5.msg_type = msg_type; 662 insn->bits3.sampler_gen5.simd_mode = simd_mode; 663 insn->bits3.sampler_gen5.header_present = header_present; 664 insn->bits3.sampler_gen5.response_length = response_length; 665 insn->bits3.sampler_gen5.msg_length = msg_length; 666 insn->bits3.sampler_gen5.end_of_thread = eot; 667 if (intel->gen >= 6) 668 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; 669 else { 670 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; 671 insn->bits2.send_gen5.end_of_thread = eot; 672 } 673 } else if (intel->is_g4x) { 674 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 675 insn->bits3.sampler_g4x.sampler = sampler; 676 insn->bits3.sampler_g4x.msg_type = msg_type; 677 insn->bits3.sampler_g4x.response_length = response_length; 678 insn->bits3.sampler_g4x.msg_length = msg_length; 679 insn->bits3.sampler_g4x.end_of_thread = eot; 680 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 681 } else { 682 insn->bits3.sampler.binding_table_index = binding_table_index; 683 insn->bits3.sampler.sampler = sampler; 684 insn->bits3.sampler.msg_type = msg_type; 685 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 686 insn->bits3.sampler.response_length = response_length; 687 insn->bits3.sampler.msg_length = msg_length; 688 insn->bits3.sampler.end_of_thread = eot; 689 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 690 } 691} 692 693 694 695static struct brw_instruction *next_insn( struct brw_compile *p, 696 GLuint opcode ) 697{ 698 struct brw_instruction *insn; 699 700 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 701 702 insn = &p->store[p->nr_insn++]; 703 memcpy(insn, p->current, sizeof(*insn)); 704 705 /* Reset this one-shot flag: 706 */ 707 708 if (p->current->header.destreg__conditionalmod) { 709 p->current->header.destreg__conditionalmod = 0; 710 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 711 } 712 713 insn->header.opcode = opcode; 714 return insn; 715} 716 717 718static struct brw_instruction *brw_alu1( struct brw_compile *p, 719 GLuint opcode, 720 struct brw_reg dest, 721 struct brw_reg src ) 722{ 723 struct brw_instruction *insn = next_insn(p, opcode); 724 brw_set_dest(p, insn, dest); 725 brw_set_src0(p, insn, src); 726 return insn; 727} 728 729static struct brw_instruction *brw_alu2(struct brw_compile *p, 730 GLuint opcode, 731 struct brw_reg dest, 732 struct brw_reg src0, 733 struct brw_reg src1 ) 734{ 735 struct brw_instruction *insn = next_insn(p, opcode); 736 brw_set_dest(p, insn, dest); 737 brw_set_src0(p, insn, src0); 738 brw_set_src1(p, insn, src1); 739 return insn; 740} 741 742 743/*********************************************************************** 744 * Convenience routines. 745 */ 746#define ALU1(OP) \ 747struct brw_instruction *brw_##OP(struct brw_compile *p, \ 748 struct brw_reg dest, \ 749 struct brw_reg src0) \ 750{ \ 751 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 752} 753 754#define ALU2(OP) \ 755struct brw_instruction *brw_##OP(struct brw_compile *p, \ 756 struct brw_reg dest, \ 757 struct brw_reg src0, \ 758 struct brw_reg src1) \ 759{ \ 760 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 761} 762 763/* Rounding operations (other than RNDD) require two instructions - the first 764 * stores a rounded value (possibly the wrong way) in the dest register, but 765 * also sets a per-channel "increment bit" in the flag register. A predicated 766 * add of 1.0 fixes dest to contain the desired result. 767 */ 768#define ROUND(OP) \ 769void brw_##OP(struct brw_compile *p, \ 770 struct brw_reg dest, \ 771 struct brw_reg src) \ 772{ \ 773 struct brw_instruction *rnd, *add; \ 774 rnd = next_insn(p, BRW_OPCODE_##OP); \ 775 brw_set_dest(p, rnd, dest); \ 776 brw_set_src0(p, rnd, src); \ 777 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ 778 \ 779 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 780 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 781} 782 783 784ALU1(MOV) 785ALU2(SEL) 786ALU1(NOT) 787ALU2(AND) 788ALU2(OR) 789ALU2(XOR) 790ALU2(SHR) 791ALU2(SHL) 792ALU2(RSR) 793ALU2(RSL) 794ALU2(ASR) 795ALU1(FRC) 796ALU1(RNDD) 797ALU2(MAC) 798ALU2(MACH) 799ALU1(LZD) 800ALU2(DP4) 801ALU2(DPH) 802ALU2(DP3) 803ALU2(DP2) 804ALU2(LINE) 805ALU2(PLN) 806 807 808ROUND(RNDZ) 809ROUND(RNDE) 810 811 812struct brw_instruction *brw_ADD(struct brw_compile *p, 813 struct brw_reg dest, 814 struct brw_reg src0, 815 struct brw_reg src1) 816{ 817 /* 6.2.2: add */ 818 if (src0.type == BRW_REGISTER_TYPE_F || 819 (src0.file == BRW_IMMEDIATE_VALUE && 820 src0.type == BRW_REGISTER_TYPE_VF)) { 821 assert(src1.type != BRW_REGISTER_TYPE_UD); 822 assert(src1.type != BRW_REGISTER_TYPE_D); 823 } 824 825 if (src1.type == BRW_REGISTER_TYPE_F || 826 (src1.file == BRW_IMMEDIATE_VALUE && 827 src1.type == BRW_REGISTER_TYPE_VF)) { 828 assert(src0.type != BRW_REGISTER_TYPE_UD); 829 assert(src0.type != BRW_REGISTER_TYPE_D); 830 } 831 832 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 833} 834 835struct brw_instruction *brw_MUL(struct brw_compile *p, 836 struct brw_reg dest, 837 struct brw_reg src0, 838 struct brw_reg src1) 839{ 840 /* 6.32.38: mul */ 841 if (src0.type == BRW_REGISTER_TYPE_D || 842 src0.type == BRW_REGISTER_TYPE_UD || 843 src1.type == BRW_REGISTER_TYPE_D || 844 src1.type == BRW_REGISTER_TYPE_UD) { 845 assert(dest.type != BRW_REGISTER_TYPE_F); 846 } 847 848 if (src0.type == BRW_REGISTER_TYPE_F || 849 (src0.file == BRW_IMMEDIATE_VALUE && 850 src0.type == BRW_REGISTER_TYPE_VF)) { 851 assert(src1.type != BRW_REGISTER_TYPE_UD); 852 assert(src1.type != BRW_REGISTER_TYPE_D); 853 } 854 855 if (src1.type == BRW_REGISTER_TYPE_F || 856 (src1.file == BRW_IMMEDIATE_VALUE && 857 src1.type == BRW_REGISTER_TYPE_VF)) { 858 assert(src0.type != BRW_REGISTER_TYPE_UD); 859 assert(src0.type != BRW_REGISTER_TYPE_D); 860 } 861 862 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 863 src0.nr != BRW_ARF_ACCUMULATOR); 864 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 865 src1.nr != BRW_ARF_ACCUMULATOR); 866 867 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 868} 869 870 871void brw_NOP(struct brw_compile *p) 872{ 873 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 874 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 875 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 876 brw_set_src1(p, insn, brw_imm_ud(0x0)); 877} 878 879 880 881 882 883/*********************************************************************** 884 * Comparisons, if/else/endif 885 */ 886 887struct brw_instruction *brw_JMPI(struct brw_compile *p, 888 struct brw_reg dest, 889 struct brw_reg src0, 890 struct brw_reg src1) 891{ 892 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 893 894 insn->header.execution_size = 1; 895 insn->header.compression_control = BRW_COMPRESSION_NONE; 896 insn->header.mask_control = BRW_MASK_DISABLE; 897 898 p->current->header.predicate_control = BRW_PREDICATE_NONE; 899 900 return insn; 901} 902 903static void 904push_if_stack(struct brw_compile *p, struct brw_instruction *inst) 905{ 906 p->if_stack[p->if_stack_depth] = inst; 907 908 p->if_stack_depth++; 909 if (p->if_stack_array_size <= p->if_stack_depth) { 910 p->if_stack_array_size *= 2; 911 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *, 912 p->if_stack_array_size); 913 } 914} 915 916/* EU takes the value from the flag register and pushes it onto some 917 * sort of a stack (presumably merging with any flag value already on 918 * the stack). Within an if block, the flags at the top of the stack 919 * control execution on each channel of the unit, eg. on each of the 920 * 16 pixel values in our wm programs. 921 * 922 * When the matching 'else' instruction is reached (presumably by 923 * countdown of the instruction count patched in by our ELSE/ENDIF 924 * functions), the relevent flags are inverted. 925 * 926 * When the matching 'endif' instruction is reached, the flags are 927 * popped off. If the stack is now empty, normal execution resumes. 928 */ 929struct brw_instruction * 930brw_IF(struct brw_compile *p, GLuint execute_size) 931{ 932 struct intel_context *intel = &p->brw->intel; 933 struct brw_instruction *insn; 934 935 insn = next_insn(p, BRW_OPCODE_IF); 936 937 /* Override the defaults for this instruction: 938 */ 939 if (intel->gen < 6) { 940 brw_set_dest(p, insn, brw_ip_reg()); 941 brw_set_src0(p, insn, brw_ip_reg()); 942 brw_set_src1(p, insn, brw_imm_d(0x0)); 943 } else { 944 brw_set_dest(p, insn, brw_imm_w(0)); 945 insn->bits1.branch_gen6.jump_count = 0; 946 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 947 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 948 } 949 950 insn->header.execution_size = execute_size; 951 insn->header.compression_control = BRW_COMPRESSION_NONE; 952 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 953 insn->header.mask_control = BRW_MASK_ENABLE; 954 if (!p->single_program_flow) 955 insn->header.thread_control = BRW_THREAD_SWITCH; 956 957 p->current->header.predicate_control = BRW_PREDICATE_NONE; 958 959 push_if_stack(p, insn); 960 return insn; 961} 962 963struct brw_instruction * 964gen6_IF(struct brw_compile *p, uint32_t conditional, 965 struct brw_reg src0, struct brw_reg src1) 966{ 967 struct brw_instruction *insn; 968 969 insn = next_insn(p, BRW_OPCODE_IF); 970 971 brw_set_dest(p, insn, brw_imm_w(0)); 972 insn->header.execution_size = BRW_EXECUTE_8; 973 insn->bits1.branch_gen6.jump_count = 0; 974 brw_set_src0(p, insn, src0); 975 brw_set_src1(p, insn, src1); 976 977 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 978 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 979 insn->header.destreg__conditionalmod = conditional; 980 981 if (!p->single_program_flow) 982 insn->header.thread_control = BRW_THREAD_SWITCH; 983 984 push_if_stack(p, insn); 985 return insn; 986} 987 988/** 989 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. 990 */ 991static void 992convert_IF_ELSE_to_ADD(struct brw_compile *p, 993 struct brw_instruction *if_inst, 994 struct brw_instruction *else_inst) 995{ 996 /* The next instruction (where the ENDIF would be, if it existed) */ 997 struct brw_instruction *next_inst = &p->store[p->nr_insn]; 998 999 assert(p->single_program_flow); 1000 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1001 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1002 assert(if_inst->header.execution_size == BRW_EXECUTE_1); 1003 1004 /* Convert IF to an ADD instruction that moves the instruction pointer 1005 * to the first instruction of the ELSE block. If there is no ELSE 1006 * block, point to where ENDIF would be. Reverse the predicate. 1007 * 1008 * There's no need to execute an ENDIF since we don't need to do any 1009 * stack operations, and if we're currently executing, we just want to 1010 * continue normally. 1011 */ 1012 if_inst->header.opcode = BRW_OPCODE_ADD; 1013 if_inst->header.predicate_inverse = 1; 1014 1015 if (else_inst != NULL) { 1016 /* Convert ELSE to an ADD instruction that points where the ENDIF 1017 * would be. 1018 */ 1019 else_inst->header.opcode = BRW_OPCODE_ADD; 1020 1021 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; 1022 else_inst->bits3.ud = (next_inst - else_inst) * 16; 1023 } else { 1024 if_inst->bits3.ud = (next_inst - if_inst) * 16; 1025 } 1026} 1027 1028/** 1029 * Patch IF and ELSE instructions with appropriate jump targets. 1030 */ 1031static void 1032patch_IF_ELSE(struct brw_compile *p, 1033 struct brw_instruction *if_inst, 1034 struct brw_instruction *else_inst, 1035 struct brw_instruction *endif_inst) 1036{ 1037 struct intel_context *intel = &p->brw->intel; 1038 1039 assert(!p->single_program_flow); 1040 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1041 assert(endif_inst != NULL); 1042 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1043 1044 unsigned br = 1; 1045 /* Jump count is for 64bit data chunk each, so one 128bit instruction 1046 * requires 2 chunks. 1047 */ 1048 if (intel->gen >= 5) 1049 br = 2; 1050 1051 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); 1052 endif_inst->header.execution_size = if_inst->header.execution_size; 1053 1054 if (else_inst == NULL) { 1055 /* Patch IF -> ENDIF */ 1056 if (intel->gen < 6) { 1057 /* Turn it into an IFF, which means no mask stack operations for 1058 * all-false and jumping past the ENDIF. 1059 */ 1060 if_inst->header.opcode = BRW_OPCODE_IFF; 1061 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); 1062 if_inst->bits3.if_else.pop_count = 0; 1063 if_inst->bits3.if_else.pad0 = 0; 1064 } else { 1065 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 1066 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); 1067 } 1068 } else { 1069 else_inst->header.execution_size = if_inst->header.execution_size; 1070 1071 /* Patch IF -> ELSE */ 1072 if (intel->gen < 6) { 1073 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); 1074 if_inst->bits3.if_else.pop_count = 0; 1075 if_inst->bits3.if_else.pad0 = 0; 1076 } else if (intel->gen == 6) { 1077 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); 1078 } 1079 1080 /* Patch ELSE -> ENDIF */ 1081 if (intel->gen < 6) { 1082 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 1083 * matching ENDIF. 1084 */ 1085 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); 1086 else_inst->bits3.if_else.pop_count = 1; 1087 else_inst->bits3.if_else.pad0 = 0; 1088 } else { 1089 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1090 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); 1091 } 1092 } 1093} 1094 1095void 1096brw_ELSE(struct brw_compile *p) 1097{ 1098 struct intel_context *intel = &p->brw->intel; 1099 struct brw_instruction *insn; 1100 1101 insn = next_insn(p, BRW_OPCODE_ELSE); 1102 1103 if (intel->gen < 6) { 1104 brw_set_dest(p, insn, brw_ip_reg()); 1105 brw_set_src0(p, insn, brw_ip_reg()); 1106 brw_set_src1(p, insn, brw_imm_d(0x0)); 1107 } else { 1108 brw_set_dest(p, insn, brw_imm_w(0)); 1109 insn->bits1.branch_gen6.jump_count = 0; 1110 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1111 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1112 } 1113 1114 insn->header.compression_control = BRW_COMPRESSION_NONE; 1115 insn->header.mask_control = BRW_MASK_ENABLE; 1116 if (!p->single_program_flow) 1117 insn->header.thread_control = BRW_THREAD_SWITCH; 1118 1119 push_if_stack(p, insn); 1120} 1121 1122void 1123brw_ENDIF(struct brw_compile *p) 1124{ 1125 struct intel_context *intel = &p->brw->intel; 1126 struct brw_instruction *insn; 1127 struct brw_instruction *else_inst = NULL; 1128 struct brw_instruction *if_inst = NULL; 1129 1130 /* Pop the IF and (optional) ELSE instructions from the stack */ 1131 p->if_stack_depth--; 1132 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { 1133 else_inst = p->if_stack[p->if_stack_depth]; 1134 p->if_stack_depth--; 1135 } 1136 if_inst = p->if_stack[p->if_stack_depth]; 1137 1138 if (p->single_program_flow) { 1139 /* ENDIF is useless; don't bother emitting it. */ 1140 convert_IF_ELSE_to_ADD(p, if_inst, else_inst); 1141 return; 1142 } 1143 1144 insn = next_insn(p, BRW_OPCODE_ENDIF); 1145 1146 if (intel->gen < 6) { 1147 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1148 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1149 brw_set_src1(p, insn, brw_imm_d(0x0)); 1150 } else { 1151 brw_set_dest(p, insn, brw_imm_w(0)); 1152 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1153 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1154 } 1155 1156 insn->header.compression_control = BRW_COMPRESSION_NONE; 1157 insn->header.mask_control = BRW_MASK_ENABLE; 1158 insn->header.thread_control = BRW_THREAD_SWITCH; 1159 1160 /* Also pop item off the stack in the endif instruction: */ 1161 if (intel->gen < 6) { 1162 insn->bits3.if_else.jump_count = 0; 1163 insn->bits3.if_else.pop_count = 1; 1164 insn->bits3.if_else.pad0 = 0; 1165 } else { 1166 insn->bits1.branch_gen6.jump_count = 2; 1167 } 1168 patch_IF_ELSE(p, if_inst, else_inst, insn); 1169} 1170 1171struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 1172{ 1173 struct intel_context *intel = &p->brw->intel; 1174 struct brw_instruction *insn; 1175 1176 insn = next_insn(p, BRW_OPCODE_BREAK); 1177 if (intel->gen >= 6) { 1178 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1179 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1180 brw_set_src1(p, insn, brw_imm_d(0x0)); 1181 } else { 1182 brw_set_dest(p, insn, brw_ip_reg()); 1183 brw_set_src0(p, insn, brw_ip_reg()); 1184 brw_set_src1(p, insn, brw_imm_d(0x0)); 1185 insn->bits3.if_else.pad0 = 0; 1186 insn->bits3.if_else.pop_count = pop_count; 1187 } 1188 insn->header.compression_control = BRW_COMPRESSION_NONE; 1189 insn->header.execution_size = BRW_EXECUTE_8; 1190 1191 return insn; 1192} 1193 1194struct brw_instruction *gen6_CONT(struct brw_compile *p, 1195 struct brw_instruction *do_insn) 1196{ 1197 struct brw_instruction *insn; 1198 int br = 2; 1199 1200 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1201 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1202 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1203 brw_set_dest(p, insn, brw_ip_reg()); 1204 brw_set_src0(p, insn, brw_ip_reg()); 1205 brw_set_src1(p, insn, brw_imm_d(0x0)); 1206 1207 insn->bits3.break_cont.uip = br * (do_insn - insn); 1208 1209 insn->header.compression_control = BRW_COMPRESSION_NONE; 1210 insn->header.execution_size = BRW_EXECUTE_8; 1211 return insn; 1212} 1213 1214struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 1215{ 1216 struct brw_instruction *insn; 1217 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1218 brw_set_dest(p, insn, brw_ip_reg()); 1219 brw_set_src0(p, insn, brw_ip_reg()); 1220 brw_set_src1(p, insn, brw_imm_d(0x0)); 1221 insn->header.compression_control = BRW_COMPRESSION_NONE; 1222 insn->header.execution_size = BRW_EXECUTE_8; 1223 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1224 insn->bits3.if_else.pad0 = 0; 1225 insn->bits3.if_else.pop_count = pop_count; 1226 return insn; 1227} 1228 1229/* DO/WHILE loop: 1230 * 1231 * The DO/WHILE is just an unterminated loop -- break or continue are 1232 * used for control within the loop. We have a few ways they can be 1233 * done. 1234 * 1235 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1236 * jip and no DO instruction. 1237 * 1238 * For non-uniform control flow pre-gen6, there's a DO instruction to 1239 * push the mask, and a WHILE to jump back, and BREAK to get out and 1240 * pop the mask. 1241 * 1242 * For gen6, there's no more mask stack, so no need for DO. WHILE 1243 * just points back to the first instruction of the loop. 1244 */ 1245struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 1246{ 1247 struct intel_context *intel = &p->brw->intel; 1248 1249 if (intel->gen >= 6 || p->single_program_flow) { 1250 return &p->store[p->nr_insn]; 1251 } else { 1252 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1253 1254 /* Override the defaults for this instruction: 1255 */ 1256 brw_set_dest(p, insn, brw_null_reg()); 1257 brw_set_src0(p, insn, brw_null_reg()); 1258 brw_set_src1(p, insn, brw_null_reg()); 1259 1260 insn->header.compression_control = BRW_COMPRESSION_NONE; 1261 insn->header.execution_size = execute_size; 1262 insn->header.predicate_control = BRW_PREDICATE_NONE; 1263 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1264 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1265 1266 return insn; 1267 } 1268} 1269 1270 1271 1272struct brw_instruction *brw_WHILE(struct brw_compile *p, 1273 struct brw_instruction *do_insn) 1274{ 1275 struct intel_context *intel = &p->brw->intel; 1276 struct brw_instruction *insn; 1277 GLuint br = 1; 1278 1279 if (intel->gen >= 5) 1280 br = 2; 1281 1282 if (intel->gen >= 6) { 1283 insn = next_insn(p, BRW_OPCODE_WHILE); 1284 1285 brw_set_dest(p, insn, brw_imm_w(0)); 1286 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1287 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1288 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1289 1290 insn->header.execution_size = do_insn->header.execution_size; 1291 assert(insn->header.execution_size == BRW_EXECUTE_8); 1292 } else { 1293 if (p->single_program_flow) { 1294 insn = next_insn(p, BRW_OPCODE_ADD); 1295 1296 brw_set_dest(p, insn, brw_ip_reg()); 1297 brw_set_src0(p, insn, brw_ip_reg()); 1298 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); 1299 insn->header.execution_size = BRW_EXECUTE_1; 1300 } else { 1301 insn = next_insn(p, BRW_OPCODE_WHILE); 1302 1303 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1304 1305 brw_set_dest(p, insn, brw_ip_reg()); 1306 brw_set_src0(p, insn, brw_ip_reg()); 1307 brw_set_src1(p, insn, brw_imm_d(0)); 1308 1309 insn->header.execution_size = do_insn->header.execution_size; 1310 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1311 insn->bits3.if_else.pop_count = 0; 1312 insn->bits3.if_else.pad0 = 0; 1313 } 1314 } 1315 insn->header.compression_control = BRW_COMPRESSION_NONE; 1316 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1317 1318 return insn; 1319} 1320 1321 1322/* FORWARD JUMPS: 1323 */ 1324void brw_land_fwd_jump(struct brw_compile *p, 1325 struct brw_instruction *jmp_insn) 1326{ 1327 struct intel_context *intel = &p->brw->intel; 1328 struct brw_instruction *landing = &p->store[p->nr_insn]; 1329 GLuint jmpi = 1; 1330 1331 if (intel->gen >= 5) 1332 jmpi = 2; 1333 1334 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1335 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1336 1337 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1338} 1339 1340 1341 1342/* To integrate with the above, it makes sense that the comparison 1343 * instruction should populate the flag register. It might be simpler 1344 * just to use the flag reg for most WM tasks? 1345 */ 1346void brw_CMP(struct brw_compile *p, 1347 struct brw_reg dest, 1348 GLuint conditional, 1349 struct brw_reg src0, 1350 struct brw_reg src1) 1351{ 1352 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1353 1354 insn->header.destreg__conditionalmod = conditional; 1355 brw_set_dest(p, insn, dest); 1356 brw_set_src0(p, insn, src0); 1357 brw_set_src1(p, insn, src1); 1358 1359/* guess_execution_size(insn, src0); */ 1360 1361 1362 /* Make it so that future instructions will use the computed flag 1363 * value until brw_set_predicate_control_flag_value() is called 1364 * again. 1365 */ 1366 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1367 dest.nr == 0) { 1368 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1369 p->flag_value = 0xff; 1370 } 1371} 1372 1373/* Issue 'wait' instruction for n1, host could program MMIO 1374 to wake up thread. */ 1375void brw_WAIT (struct brw_compile *p) 1376{ 1377 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1378 struct brw_reg src = brw_notification_1_reg(); 1379 1380 brw_set_dest(p, insn, src); 1381 brw_set_src0(p, insn, src); 1382 brw_set_src1(p, insn, brw_null_reg()); 1383 insn->header.execution_size = 0; /* must */ 1384 insn->header.predicate_control = 0; 1385 insn->header.compression_control = 0; 1386} 1387 1388 1389/*********************************************************************** 1390 * Helpers for the various SEND message types: 1391 */ 1392 1393/** Extended math function, float[8]. 1394 */ 1395void brw_math( struct brw_compile *p, 1396 struct brw_reg dest, 1397 GLuint function, 1398 GLuint saturate, 1399 GLuint msg_reg_nr, 1400 struct brw_reg src, 1401 GLuint data_type, 1402 GLuint precision ) 1403{ 1404 struct intel_context *intel = &p->brw->intel; 1405 1406 if (intel->gen >= 6) { 1407 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1408 1409 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1410 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1411 1412 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1413 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1414 1415 /* Source modifiers are ignored for extended math instructions. */ 1416 assert(!src.negate); 1417 assert(!src.abs); 1418 1419 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1420 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1421 assert(src.type == BRW_REGISTER_TYPE_F); 1422 } 1423 1424 /* Math is the same ISA format as other opcodes, except that CondModifier 1425 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1426 */ 1427 insn->header.destreg__conditionalmod = function; 1428 insn->header.saturate = saturate; 1429 1430 brw_set_dest(p, insn, dest); 1431 brw_set_src0(p, insn, src); 1432 brw_set_src1(p, insn, brw_null_reg()); 1433 } else { 1434 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1435 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1436 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1437 /* Example code doesn't set predicate_control for send 1438 * instructions. 1439 */ 1440 insn->header.predicate_control = 0; 1441 insn->header.destreg__conditionalmod = msg_reg_nr; 1442 1443 brw_set_dest(p, insn, dest); 1444 brw_set_src0(p, insn, src); 1445 brw_set_math_message(p, 1446 insn, 1447 msg_length, response_length, 1448 function, 1449 BRW_MATH_INTEGER_UNSIGNED, 1450 precision, 1451 saturate, 1452 data_type); 1453 } 1454} 1455 1456/** Extended math function, float[8]. 1457 */ 1458void brw_math2(struct brw_compile *p, 1459 struct brw_reg dest, 1460 GLuint function, 1461 struct brw_reg src0, 1462 struct brw_reg src1) 1463{ 1464 struct intel_context *intel = &p->brw->intel; 1465 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1466 1467 assert(intel->gen >= 6); 1468 (void) intel; 1469 1470 1471 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1472 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1473 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1474 1475 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1476 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1477 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1478 1479 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1480 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1481 assert(src0.type == BRW_REGISTER_TYPE_F); 1482 assert(src1.type == BRW_REGISTER_TYPE_F); 1483 } 1484 1485 /* Source modifiers are ignored for extended math instructions. */ 1486 assert(!src0.negate); 1487 assert(!src0.abs); 1488 assert(!src1.negate); 1489 assert(!src1.abs); 1490 1491 /* Math is the same ISA format as other opcodes, except that CondModifier 1492 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1493 */ 1494 insn->header.destreg__conditionalmod = function; 1495 1496 brw_set_dest(p, insn, dest); 1497 brw_set_src0(p, insn, src0); 1498 brw_set_src1(p, insn, src1); 1499} 1500 1501/** 1502 * Extended math function, float[16]. 1503 * Use 2 send instructions. 1504 */ 1505void brw_math_16( struct brw_compile *p, 1506 struct brw_reg dest, 1507 GLuint function, 1508 GLuint saturate, 1509 GLuint msg_reg_nr, 1510 struct brw_reg src, 1511 GLuint precision ) 1512{ 1513 struct intel_context *intel = &p->brw->intel; 1514 struct brw_instruction *insn; 1515 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1516 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1517 1518 if (intel->gen >= 6) { 1519 insn = next_insn(p, BRW_OPCODE_MATH); 1520 1521 /* Math is the same ISA format as other opcodes, except that CondModifier 1522 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1523 */ 1524 insn->header.destreg__conditionalmod = function; 1525 insn->header.saturate = saturate; 1526 1527 /* Source modifiers are ignored for extended math instructions. */ 1528 assert(!src.negate); 1529 assert(!src.abs); 1530 1531 brw_set_dest(p, insn, dest); 1532 brw_set_src0(p, insn, src); 1533 brw_set_src1(p, insn, brw_null_reg()); 1534 return; 1535 } 1536 1537 /* First instruction: 1538 */ 1539 brw_push_insn_state(p); 1540 brw_set_predicate_control_flag_value(p, 0xff); 1541 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1542 1543 insn = next_insn(p, BRW_OPCODE_SEND); 1544 insn->header.destreg__conditionalmod = msg_reg_nr; 1545 1546 brw_set_dest(p, insn, dest); 1547 brw_set_src0(p, insn, src); 1548 brw_set_math_message(p, 1549 insn, 1550 msg_length, response_length, 1551 function, 1552 BRW_MATH_INTEGER_UNSIGNED, 1553 precision, 1554 saturate, 1555 BRW_MATH_DATA_VECTOR); 1556 1557 /* Second instruction: 1558 */ 1559 insn = next_insn(p, BRW_OPCODE_SEND); 1560 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1561 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1562 1563 brw_set_dest(p, insn, offset(dest,1)); 1564 brw_set_src0(p, insn, src); 1565 brw_set_math_message(p, 1566 insn, 1567 msg_length, response_length, 1568 function, 1569 BRW_MATH_INTEGER_UNSIGNED, 1570 precision, 1571 saturate, 1572 BRW_MATH_DATA_VECTOR); 1573 1574 brw_pop_insn_state(p); 1575} 1576 1577 1578/** 1579 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1580 * using a constant offset per channel. 1581 * 1582 * The offset must be aligned to oword size (16 bytes). Used for 1583 * register spilling. 1584 */ 1585void brw_oword_block_write_scratch(struct brw_compile *p, 1586 struct brw_reg mrf, 1587 int num_regs, 1588 GLuint offset) 1589{ 1590 struct intel_context *intel = &p->brw->intel; 1591 uint32_t msg_control, msg_type; 1592 int mlen; 1593 1594 if (intel->gen >= 6) 1595 offset /= 16; 1596 1597 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1598 1599 if (num_regs == 1) { 1600 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1601 mlen = 2; 1602 } else { 1603 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1604 mlen = 3; 1605 } 1606 1607 /* Set up the message header. This is g0, with g0.2 filled with 1608 * the offset. We don't want to leave our offset around in g0 or 1609 * it'll screw up texture samples, so set it up inside the message 1610 * reg. 1611 */ 1612 { 1613 brw_push_insn_state(p); 1614 brw_set_mask_control(p, BRW_MASK_DISABLE); 1615 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1616 1617 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1618 1619 /* set message header global offset field (reg 0, element 2) */ 1620 brw_MOV(p, 1621 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1622 mrf.nr, 1623 2), BRW_REGISTER_TYPE_UD), 1624 brw_imm_ud(offset)); 1625 1626 brw_pop_insn_state(p); 1627 } 1628 1629 { 1630 struct brw_reg dest; 1631 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1632 int send_commit_msg; 1633 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 1634 BRW_REGISTER_TYPE_UW); 1635 1636 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1637 insn->header.compression_control = BRW_COMPRESSION_NONE; 1638 src_header = vec16(src_header); 1639 } 1640 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1641 insn->header.destreg__conditionalmod = mrf.nr; 1642 1643 /* Until gen6, writes followed by reads from the same location 1644 * are not guaranteed to be ordered unless write_commit is set. 1645 * If set, then a no-op write is issued to the destination 1646 * register to set a dependency, and a read from the destination 1647 * can be used to ensure the ordering. 1648 * 1649 * For gen6, only writes between different threads need ordering 1650 * protection. Our use of DP writes is all about register 1651 * spilling within a thread. 1652 */ 1653 if (intel->gen >= 6) { 1654 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1655 send_commit_msg = 0; 1656 } else { 1657 dest = src_header; 1658 send_commit_msg = 1; 1659 } 1660 1661 brw_set_dest(p, insn, dest); 1662 if (intel->gen >= 6) { 1663 brw_set_src0(p, insn, mrf); 1664 } else { 1665 brw_set_src0(p, insn, brw_null_reg()); 1666 } 1667 1668 if (intel->gen >= 6) 1669 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1670 else 1671 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1672 1673 brw_set_dp_write_message(p, 1674 insn, 1675 255, /* binding table index (255=stateless) */ 1676 msg_control, 1677 msg_type, 1678 mlen, 1679 GL_TRUE, /* header_present */ 1680 0, /* pixel scoreboard */ 1681 send_commit_msg, /* response_length */ 1682 0, /* eot */ 1683 send_commit_msg); 1684 } 1685} 1686 1687 1688/** 1689 * Read a block of owords (half a GRF each) from the scratch buffer 1690 * using a constant index per channel. 1691 * 1692 * Offset must be aligned to oword size (16 bytes). Used for register 1693 * spilling. 1694 */ 1695void 1696brw_oword_block_read_scratch(struct brw_compile *p, 1697 struct brw_reg dest, 1698 struct brw_reg mrf, 1699 int num_regs, 1700 GLuint offset) 1701{ 1702 struct intel_context *intel = &p->brw->intel; 1703 uint32_t msg_control; 1704 int rlen; 1705 1706 if (intel->gen >= 6) 1707 offset /= 16; 1708 1709 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1710 dest = retype(dest, BRW_REGISTER_TYPE_UW); 1711 1712 if (num_regs == 1) { 1713 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1714 rlen = 1; 1715 } else { 1716 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1717 rlen = 2; 1718 } 1719 1720 { 1721 brw_push_insn_state(p); 1722 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1723 brw_set_mask_control(p, BRW_MASK_DISABLE); 1724 1725 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1726 1727 /* set message header global offset field (reg 0, element 2) */ 1728 brw_MOV(p, 1729 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1730 mrf.nr, 1731 2), BRW_REGISTER_TYPE_UD), 1732 brw_imm_ud(offset)); 1733 1734 brw_pop_insn_state(p); 1735 } 1736 1737 { 1738 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1739 1740 assert(insn->header.predicate_control == 0); 1741 insn->header.compression_control = BRW_COMPRESSION_NONE; 1742 insn->header.destreg__conditionalmod = mrf.nr; 1743 1744 brw_set_dest(p, insn, dest); /* UW? */ 1745 if (intel->gen >= 6) { 1746 brw_set_src0(p, insn, mrf); 1747 } else { 1748 brw_set_src0(p, insn, brw_null_reg()); 1749 } 1750 1751 brw_set_dp_read_message(p, 1752 insn, 1753 255, /* binding table index (255=stateless) */ 1754 msg_control, 1755 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1756 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 1757 1, /* msg_length */ 1758 rlen); 1759 } 1760} 1761 1762/** 1763 * Read a float[4] vector from the data port Data Cache (const buffer). 1764 * Location (in buffer) should be a multiple of 16. 1765 * Used for fetching shader constants. 1766 */ 1767void brw_oword_block_read(struct brw_compile *p, 1768 struct brw_reg dest, 1769 struct brw_reg mrf, 1770 uint32_t offset, 1771 uint32_t bind_table_index) 1772{ 1773 struct intel_context *intel = &p->brw->intel; 1774 1775 /* On newer hardware, offset is in units of owords. */ 1776 if (intel->gen >= 6) 1777 offset /= 16; 1778 1779 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1780 1781 brw_push_insn_state(p); 1782 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1783 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1784 brw_set_mask_control(p, BRW_MASK_DISABLE); 1785 1786 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1787 1788 /* set message header global offset field (reg 0, element 2) */ 1789 brw_MOV(p, 1790 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1791 mrf.nr, 1792 2), BRW_REGISTER_TYPE_UD), 1793 brw_imm_ud(offset)); 1794 1795 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1796 insn->header.destreg__conditionalmod = mrf.nr; 1797 1798 /* cast dest to a uword[8] vector */ 1799 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1800 1801 brw_set_dest(p, insn, dest); 1802 if (intel->gen >= 6) { 1803 brw_set_src0(p, insn, mrf); 1804 } else { 1805 brw_set_src0(p, insn, brw_null_reg()); 1806 } 1807 1808 brw_set_dp_read_message(p, 1809 insn, 1810 bind_table_index, 1811 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 1812 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 1813 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1814 1, /* msg_length */ 1815 1); /* response_length (1 reg, 2 owords!) */ 1816 1817 brw_pop_insn_state(p); 1818} 1819 1820/** 1821 * Read a set of dwords from the data port Data Cache (const buffer). 1822 * 1823 * Location (in buffer) appears as UD offsets in the register after 1824 * the provided mrf header reg. 1825 */ 1826void brw_dword_scattered_read(struct brw_compile *p, 1827 struct brw_reg dest, 1828 struct brw_reg mrf, 1829 uint32_t bind_table_index) 1830{ 1831 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1832 1833 brw_push_insn_state(p); 1834 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1835 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1836 brw_set_mask_control(p, BRW_MASK_DISABLE); 1837 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1838 brw_pop_insn_state(p); 1839 1840 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1841 insn->header.destreg__conditionalmod = mrf.nr; 1842 1843 /* cast dest to a uword[8] vector */ 1844 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1845 1846 brw_set_dest(p, insn, dest); 1847 brw_set_src0(p, insn, brw_null_reg()); 1848 1849 brw_set_dp_read_message(p, 1850 insn, 1851 bind_table_index, 1852 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, 1853 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, 1854 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1855 2, /* msg_length */ 1856 1); /* response_length */ 1857} 1858 1859 1860 1861/** 1862 * Read float[4] constant(s) from VS constant buffer. 1863 * For relative addressing, two float[4] constants will be read into 'dest'. 1864 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1865 */ 1866void brw_dp_READ_4_vs(struct brw_compile *p, 1867 struct brw_reg dest, 1868 GLuint location, 1869 GLuint bind_table_index) 1870{ 1871 struct intel_context *intel = &p->brw->intel; 1872 struct brw_instruction *insn; 1873 GLuint msg_reg_nr = 1; 1874 1875 if (intel->gen >= 6) 1876 location /= 16; 1877 1878 /* Setup MRF[1] with location/offset into const buffer */ 1879 brw_push_insn_state(p); 1880 brw_set_access_mode(p, BRW_ALIGN_1); 1881 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1882 brw_set_mask_control(p, BRW_MASK_DISABLE); 1883 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1884 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), 1885 BRW_REGISTER_TYPE_UD), 1886 brw_imm_ud(location)); 1887 brw_pop_insn_state(p); 1888 1889 insn = next_insn(p, BRW_OPCODE_SEND); 1890 1891 insn->header.predicate_control = BRW_PREDICATE_NONE; 1892 insn->header.compression_control = BRW_COMPRESSION_NONE; 1893 insn->header.destreg__conditionalmod = msg_reg_nr; 1894 insn->header.mask_control = BRW_MASK_DISABLE; 1895 1896 brw_set_dest(p, insn, dest); 1897 if (intel->gen >= 6) { 1898 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); 1899 } else { 1900 brw_set_src0(p, insn, brw_null_reg()); 1901 } 1902 1903 brw_set_dp_read_message(p, 1904 insn, 1905 bind_table_index, 1906 0, 1907 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1908 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1909 1, /* msg_length */ 1910 1); /* response_length (1 Oword) */ 1911} 1912 1913/** 1914 * Read a float[4] constant per vertex from VS constant buffer, with 1915 * relative addressing. 1916 */ 1917void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1918 struct brw_reg dest, 1919 struct brw_reg addr_reg, 1920 GLuint offset, 1921 GLuint bind_table_index) 1922{ 1923 struct intel_context *intel = &p->brw->intel; 1924 struct brw_reg src = brw_vec8_grf(0, 0); 1925 int msg_type; 1926 1927 /* Setup MRF[1] with offset into const buffer */ 1928 brw_push_insn_state(p); 1929 brw_set_access_mode(p, BRW_ALIGN_1); 1930 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1931 brw_set_mask_control(p, BRW_MASK_DISABLE); 1932 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1933 1934 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1935 * fields ignored. 1936 */ 1937 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), 1938 addr_reg, brw_imm_d(offset)); 1939 brw_pop_insn_state(p); 1940 1941 gen6_resolve_implied_move(p, &src, 0); 1942 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1943 1944 insn->header.predicate_control = BRW_PREDICATE_NONE; 1945 insn->header.compression_control = BRW_COMPRESSION_NONE; 1946 insn->header.destreg__conditionalmod = 0; 1947 insn->header.mask_control = BRW_MASK_DISABLE; 1948 1949 brw_set_dest(p, insn, dest); 1950 brw_set_src0(p, insn, src); 1951 1952 if (intel->gen == 6) 1953 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1954 else if (intel->gen == 5 || intel->is_g4x) 1955 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1956 else 1957 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1958 1959 brw_set_dp_read_message(p, 1960 insn, 1961 bind_table_index, 1962 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1963 msg_type, 1964 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1965 2, /* msg_length */ 1966 1); /* response_length */ 1967} 1968 1969 1970 1971void brw_fb_WRITE(struct brw_compile *p, 1972 int dispatch_width, 1973 GLuint msg_reg_nr, 1974 struct brw_reg src0, 1975 GLuint binding_table_index, 1976 GLuint msg_length, 1977 GLuint response_length, 1978 GLboolean eot, 1979 GLboolean header_present) 1980{ 1981 struct intel_context *intel = &p->brw->intel; 1982 struct brw_instruction *insn; 1983 GLuint msg_control, msg_type; 1984 struct brw_reg dest; 1985 1986 if (dispatch_width == 16) 1987 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1988 else 1989 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1990 1991 if (intel->gen >= 6 && binding_table_index == 0) { 1992 insn = next_insn(p, BRW_OPCODE_SENDC); 1993 } else { 1994 insn = next_insn(p, BRW_OPCODE_SEND); 1995 } 1996 /* The execution mask is ignored for render target writes. */ 1997 insn->header.predicate_control = 0; 1998 insn->header.compression_control = BRW_COMPRESSION_NONE; 1999 2000 if (intel->gen >= 6) { 2001 /* headerless version, just submit color payload */ 2002 src0 = brw_message_reg(msg_reg_nr); 2003 2004 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2005 } else { 2006 insn->header.destreg__conditionalmod = msg_reg_nr; 2007 2008 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2009 } 2010 2011 if (dispatch_width == 16) 2012 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 2013 else 2014 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 2015 2016 brw_set_dest(p, insn, dest); 2017 brw_set_src0(p, insn, src0); 2018 brw_set_dp_write_message(p, 2019 insn, 2020 binding_table_index, 2021 msg_control, 2022 msg_type, 2023 msg_length, 2024 header_present, 2025 1, /* pixel scoreboard */ 2026 response_length, 2027 eot, 2028 0 /* send_commit_msg */); 2029} 2030 2031 2032/** 2033 * Texture sample instruction. 2034 * Note: the msg_type plus msg_length values determine exactly what kind 2035 * of sampling operation is performed. See volume 4, page 161 of docs. 2036 */ 2037void brw_SAMPLE(struct brw_compile *p, 2038 struct brw_reg dest, 2039 GLuint msg_reg_nr, 2040 struct brw_reg src0, 2041 GLuint binding_table_index, 2042 GLuint sampler, 2043 GLuint writemask, 2044 GLuint msg_type, 2045 GLuint response_length, 2046 GLuint msg_length, 2047 GLboolean eot, 2048 GLuint header_present, 2049 GLuint simd_mode) 2050{ 2051 struct intel_context *intel = &p->brw->intel; 2052 GLboolean need_stall = 0; 2053 2054 if (writemask == 0) { 2055 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 2056 return; 2057 } 2058 2059 /* Hardware doesn't do destination dependency checking on send 2060 * instructions properly. Add a workaround which generates the 2061 * dependency by other means. In practice it seems like this bug 2062 * only crops up for texture samples, and only where registers are 2063 * written by the send and then written again later without being 2064 * read in between. Luckily for us, we already track that 2065 * information and use it to modify the writemask for the 2066 * instruction, so that is a guide for whether a workaround is 2067 * needed. 2068 */ 2069 if (writemask != WRITEMASK_XYZW) { 2070 GLuint dst_offset = 0; 2071 GLuint i, newmask = 0, len = 0; 2072 2073 for (i = 0; i < 4; i++) { 2074 if (writemask & (1<<i)) 2075 break; 2076 dst_offset += 2; 2077 } 2078 for (; i < 4; i++) { 2079 if (!(writemask & (1<<i))) 2080 break; 2081 newmask |= 1<<i; 2082 len++; 2083 } 2084 2085 if (newmask != writemask) { 2086 need_stall = 1; 2087 /* printf("need stall %x %x\n", newmask , writemask); */ 2088 } 2089 else { 2090 GLboolean dispatch_16 = GL_FALSE; 2091 2092 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 2093 2094 guess_execution_size(p, p->current, dest); 2095 if (p->current->header.execution_size == BRW_EXECUTE_16) 2096 dispatch_16 = GL_TRUE; 2097 2098 newmask = ~newmask & WRITEMASK_XYZW; 2099 2100 brw_push_insn_state(p); 2101 2102 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2103 brw_set_mask_control(p, BRW_MASK_DISABLE); 2104 2105 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), 2106 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); 2107 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 2108 2109 brw_pop_insn_state(p); 2110 2111 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 2112 dest = offset(dest, dst_offset); 2113 2114 /* For 16-wide dispatch, masked channels are skipped in the 2115 * response. For 8-wide, masked channels still take up slots, 2116 * and are just not written to. 2117 */ 2118 if (dispatch_16) 2119 response_length = len * 2; 2120 } 2121 } 2122 2123 { 2124 struct brw_instruction *insn; 2125 2126 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2127 2128 insn = next_insn(p, BRW_OPCODE_SEND); 2129 insn->header.predicate_control = 0; /* XXX */ 2130 insn->header.compression_control = BRW_COMPRESSION_NONE; 2131 if (intel->gen < 6) 2132 insn->header.destreg__conditionalmod = msg_reg_nr; 2133 2134 brw_set_dest(p, insn, dest); 2135 brw_set_src0(p, insn, src0); 2136 brw_set_sampler_message(p, insn, 2137 binding_table_index, 2138 sampler, 2139 msg_type, 2140 response_length, 2141 msg_length, 2142 eot, 2143 header_present, 2144 simd_mode); 2145 } 2146 2147 if (need_stall) { 2148 struct brw_reg reg = vec8(offset(dest, response_length-1)); 2149 2150 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 2151 */ 2152 brw_push_insn_state(p); 2153 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2154 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), 2155 retype(reg, BRW_REGISTER_TYPE_UD)); 2156 brw_pop_insn_state(p); 2157 } 2158 2159} 2160 2161/* All these variables are pretty confusing - we might be better off 2162 * using bitmasks and macros for this, in the old style. Or perhaps 2163 * just having the caller instantiate the fields in dword3 itself. 2164 */ 2165void brw_urb_WRITE(struct brw_compile *p, 2166 struct brw_reg dest, 2167 GLuint msg_reg_nr, 2168 struct brw_reg src0, 2169 GLboolean allocate, 2170 GLboolean used, 2171 GLuint msg_length, 2172 GLuint response_length, 2173 GLboolean eot, 2174 GLboolean writes_complete, 2175 GLuint offset, 2176 GLuint swizzle) 2177{ 2178 struct intel_context *intel = &p->brw->intel; 2179 struct brw_instruction *insn; 2180 2181 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2182 2183 if (intel->gen == 7) { 2184 /* Enable Channel Masks in the URB_WRITE_HWORD message header */ 2185 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), 2186 BRW_REGISTER_TYPE_UD), 2187 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), 2188 brw_imm_ud(0xff00)); 2189 } 2190 2191 insn = next_insn(p, BRW_OPCODE_SEND); 2192 2193 assert(msg_length < BRW_MAX_MRF); 2194 2195 brw_set_dest(p, insn, dest); 2196 brw_set_src0(p, insn, src0); 2197 brw_set_src1(p, insn, brw_imm_d(0)); 2198 2199 if (intel->gen < 6) 2200 insn->header.destreg__conditionalmod = msg_reg_nr; 2201 2202 brw_set_urb_message(p, 2203 insn, 2204 allocate, 2205 used, 2206 msg_length, 2207 response_length, 2208 eot, 2209 writes_complete, 2210 offset, 2211 swizzle); 2212} 2213 2214static int 2215brw_find_next_block_end(struct brw_compile *p, int start) 2216{ 2217 int ip; 2218 2219 for (ip = start + 1; ip < p->nr_insn; ip++) { 2220 struct brw_instruction *insn = &p->store[ip]; 2221 2222 switch (insn->header.opcode) { 2223 case BRW_OPCODE_ENDIF: 2224 case BRW_OPCODE_ELSE: 2225 case BRW_OPCODE_WHILE: 2226 return ip; 2227 } 2228 } 2229 assert(!"not reached"); 2230 return start + 1; 2231} 2232 2233/* There is no DO instruction on gen6, so to find the end of the loop 2234 * we have to see if the loop is jumping back before our start 2235 * instruction. 2236 */ 2237static int 2238brw_find_loop_end(struct brw_compile *p, int start) 2239{ 2240 int ip; 2241 int br = 2; 2242 2243 for (ip = start + 1; ip < p->nr_insn; ip++) { 2244 struct brw_instruction *insn = &p->store[ip]; 2245 2246 if (insn->header.opcode == BRW_OPCODE_WHILE) { 2247 if (ip + insn->bits1.branch_gen6.jump_count / br < start) 2248 return ip; 2249 } 2250 } 2251 assert(!"not reached"); 2252 return start + 1; 2253} 2254 2255/* After program generation, go back and update the UIP and JIP of 2256 * BREAK and CONT instructions to their correct locations. 2257 */ 2258void 2259brw_set_uip_jip(struct brw_compile *p) 2260{ 2261 struct intel_context *intel = &p->brw->intel; 2262 int ip; 2263 int br = 2; 2264 2265 if (intel->gen < 6) 2266 return; 2267 2268 for (ip = 0; ip < p->nr_insn; ip++) { 2269 struct brw_instruction *insn = &p->store[ip]; 2270 2271 switch (insn->header.opcode) { 2272 case BRW_OPCODE_BREAK: 2273 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2274 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1); 2275 break; 2276 case BRW_OPCODE_CONTINUE: 2277 /* JIP is set at CONTINUE emit time, since that's when we 2278 * know where the start of the loop is. 2279 */ 2280 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2281 assert(insn->bits3.break_cont.uip != 0); 2282 assert(insn->bits3.break_cont.jip != 0); 2283 break; 2284 } 2285 } 2286} 2287 2288void brw_ff_sync(struct brw_compile *p, 2289 struct brw_reg dest, 2290 GLuint msg_reg_nr, 2291 struct brw_reg src0, 2292 GLboolean allocate, 2293 GLuint response_length, 2294 GLboolean eot) 2295{ 2296 struct intel_context *intel = &p->brw->intel; 2297 struct brw_instruction *insn; 2298 2299 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2300 2301 insn = next_insn(p, BRW_OPCODE_SEND); 2302 brw_set_dest(p, insn, dest); 2303 brw_set_src0(p, insn, src0); 2304 brw_set_src1(p, insn, brw_imm_d(0)); 2305 2306 if (intel->gen < 6) 2307 insn->header.destreg__conditionalmod = msg_reg_nr; 2308 2309 brw_set_ff_sync_message(p, 2310 insn, 2311 allocate, 2312 response_length, 2313 eot); 2314} 2315