brw_eu_emit.c revision 77397ef96edbc17a698ae2a02ec4807b1059c036
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37#include "../glsl/ralloc.h" 38 39/*********************************************************************** 40 * Internal helper for constructing instructions 41 */ 42 43static void guess_execution_size(struct brw_compile *p, 44 struct brw_instruction *insn, 45 struct brw_reg reg) 46{ 47 if (reg.width == BRW_WIDTH_8 && p->compressed) 48 insn->header.execution_size = BRW_EXECUTE_16; 49 else 50 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 51} 52 53 54/** 55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source 56 * registers, implicitly moving the operand to a message register. 57 * 58 * On Sandybridge, this is no longer the case. This function performs the 59 * explicit move; it should be called before emitting a SEND instruction. 60 */ 61static void 62gen6_resolve_implied_move(struct brw_compile *p, 63 struct brw_reg *src, 64 GLuint msg_reg_nr) 65{ 66 struct intel_context *intel = &p->brw->intel; 67 if (intel->gen < 6) 68 return; 69 70 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 71 brw_push_insn_state(p); 72 brw_set_mask_control(p, BRW_MASK_DISABLE); 73 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 74 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 75 retype(*src, BRW_REGISTER_TYPE_UD)); 76 brw_pop_insn_state(p); 77 } 78 *src = brw_message_reg(msg_reg_nr); 79} 80 81static void 82gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) 83{ 84 struct intel_context *intel = &p->brw->intel; 85 if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { 86 reg->file = BRW_GENERAL_REGISTER_FILE; 87 reg->nr += 111; 88 } 89} 90 91 92static void brw_set_dest(struct brw_compile *p, 93 struct brw_instruction *insn, 94 struct brw_reg dest) 95{ 96 struct intel_context *intel = &p->brw->intel; 97 98 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 99 dest.file != BRW_MESSAGE_REGISTER_FILE) 100 assert(dest.nr < 128); 101 102 gen7_convert_mrf_to_grf(p, &dest); 103 104 insn->bits1.da1.dest_reg_file = dest.file; 105 insn->bits1.da1.dest_reg_type = dest.type; 106 insn->bits1.da1.dest_address_mode = dest.address_mode; 107 108 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 109 insn->bits1.da1.dest_reg_nr = dest.nr; 110 111 if (insn->header.access_mode == BRW_ALIGN_1) { 112 insn->bits1.da1.dest_subreg_nr = dest.subnr; 113 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 114 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 115 insn->bits1.da1.dest_horiz_stride = dest.hstride; 116 } 117 else { 118 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 119 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 120 /* even ignored in da16, still need to set as '01' */ 121 insn->bits1.da16.dest_horiz_stride = 1; 122 } 123 } 124 else { 125 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 126 127 /* These are different sizes in align1 vs align16: 128 */ 129 if (insn->header.access_mode == BRW_ALIGN_1) { 130 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 131 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 132 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 133 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 134 } 135 else { 136 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 137 /* even ignored in da16, still need to set as '01' */ 138 insn->bits1.ia16.dest_horiz_stride = 1; 139 } 140 } 141 142 /* NEW: Set the execution size based on dest.width and 143 * insn->compression_control: 144 */ 145 guess_execution_size(p, insn, dest); 146} 147 148extern int reg_type_size[]; 149 150static void 151validate_reg(struct brw_instruction *insn, struct brw_reg reg) 152{ 153 int hstride_for_reg[] = {0, 1, 2, 4}; 154 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 155 int width_for_reg[] = {1, 2, 4, 8, 16}; 156 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 157 int width, hstride, vstride, execsize; 158 159 if (reg.file == BRW_IMMEDIATE_VALUE) { 160 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 161 * mean the destination has to be 128-bit aligned and the 162 * destination horiz stride has to be a word. 163 */ 164 if (reg.type == BRW_REGISTER_TYPE_V) { 165 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 166 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 167 } 168 169 return; 170 } 171 172 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 173 reg.file == BRW_ARF_NULL) 174 return; 175 176 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 177 hstride = hstride_for_reg[reg.hstride]; 178 179 if (reg.vstride == 0xf) { 180 vstride = -1; 181 } else { 182 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 183 vstride = vstride_for_reg[reg.vstride]; 184 } 185 186 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 187 width = width_for_reg[reg.width]; 188 189 assert(insn->header.execution_size >= 0 && 190 insn->header.execution_size < Elements(execsize_for_reg)); 191 execsize = execsize_for_reg[insn->header.execution_size]; 192 193 /* Restrictions from 3.3.10: Register Region Restrictions. */ 194 /* 3. */ 195 assert(execsize >= width); 196 197 /* 4. */ 198 if (execsize == width && hstride != 0) { 199 assert(vstride == -1 || vstride == width * hstride); 200 } 201 202 /* 5. */ 203 if (execsize == width && hstride == 0) { 204 /* no restriction on vstride. */ 205 } 206 207 /* 6. */ 208 if (width == 1) { 209 assert(hstride == 0); 210 } 211 212 /* 7. */ 213 if (execsize == 1 && width == 1) { 214 assert(hstride == 0); 215 assert(vstride == 0); 216 } 217 218 /* 8. */ 219 if (vstride == 0 && hstride == 0) { 220 assert(width == 1); 221 } 222 223 /* 10. Check destination issues. */ 224} 225 226static void brw_set_src0(struct brw_compile *p, 227 struct brw_instruction *insn, 228 struct brw_reg reg) 229{ 230 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 231 assert(reg.nr < 128); 232 233 gen7_convert_mrf_to_grf(p, ®); 234 235 validate_reg(insn, reg); 236 237 insn->bits1.da1.src0_reg_file = reg.file; 238 insn->bits1.da1.src0_reg_type = reg.type; 239 insn->bits2.da1.src0_abs = reg.abs; 240 insn->bits2.da1.src0_negate = reg.negate; 241 insn->bits2.da1.src0_address_mode = reg.address_mode; 242 243 if (reg.file == BRW_IMMEDIATE_VALUE) { 244 insn->bits3.ud = reg.dw1.ud; 245 246 /* Required to set some fields in src1 as well: 247 */ 248 insn->bits1.da1.src1_reg_file = 0; /* arf */ 249 insn->bits1.da1.src1_reg_type = reg.type; 250 } 251 else 252 { 253 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 254 if (insn->header.access_mode == BRW_ALIGN_1) { 255 insn->bits2.da1.src0_subreg_nr = reg.subnr; 256 insn->bits2.da1.src0_reg_nr = reg.nr; 257 } 258 else { 259 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 260 insn->bits2.da16.src0_reg_nr = reg.nr; 261 } 262 } 263 else { 264 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 265 266 if (insn->header.access_mode == BRW_ALIGN_1) { 267 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 268 } 269 else { 270 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 271 } 272 } 273 274 if (insn->header.access_mode == BRW_ALIGN_1) { 275 if (reg.width == BRW_WIDTH_1 && 276 insn->header.execution_size == BRW_EXECUTE_1) { 277 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 278 insn->bits2.da1.src0_width = BRW_WIDTH_1; 279 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 280 } 281 else { 282 insn->bits2.da1.src0_horiz_stride = reg.hstride; 283 insn->bits2.da1.src0_width = reg.width; 284 insn->bits2.da1.src0_vert_stride = reg.vstride; 285 } 286 } 287 else { 288 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 289 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 290 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 291 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 292 293 /* This is an oddity of the fact we're using the same 294 * descriptions for registers in align_16 as align_1: 295 */ 296 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 297 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 298 else 299 insn->bits2.da16.src0_vert_stride = reg.vstride; 300 } 301 } 302} 303 304 305void brw_set_src1(struct brw_compile *p, 306 struct brw_instruction *insn, 307 struct brw_reg reg) 308{ 309 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 310 311 assert(reg.nr < 128); 312 313 gen7_convert_mrf_to_grf(p, ®); 314 315 validate_reg(insn, reg); 316 317 insn->bits1.da1.src1_reg_file = reg.file; 318 insn->bits1.da1.src1_reg_type = reg.type; 319 insn->bits3.da1.src1_abs = reg.abs; 320 insn->bits3.da1.src1_negate = reg.negate; 321 322 /* Only src1 can be immediate in two-argument instructions. 323 */ 324 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 325 326 if (reg.file == BRW_IMMEDIATE_VALUE) { 327 insn->bits3.ud = reg.dw1.ud; 328 } 329 else { 330 /* This is a hardware restriction, which may or may not be lifted 331 * in the future: 332 */ 333 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 334 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 335 336 if (insn->header.access_mode == BRW_ALIGN_1) { 337 insn->bits3.da1.src1_subreg_nr = reg.subnr; 338 insn->bits3.da1.src1_reg_nr = reg.nr; 339 } 340 else { 341 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 342 insn->bits3.da16.src1_reg_nr = reg.nr; 343 } 344 345 if (insn->header.access_mode == BRW_ALIGN_1) { 346 if (reg.width == BRW_WIDTH_1 && 347 insn->header.execution_size == BRW_EXECUTE_1) { 348 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 349 insn->bits3.da1.src1_width = BRW_WIDTH_1; 350 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 351 } 352 else { 353 insn->bits3.da1.src1_horiz_stride = reg.hstride; 354 insn->bits3.da1.src1_width = reg.width; 355 insn->bits3.da1.src1_vert_stride = reg.vstride; 356 } 357 } 358 else { 359 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 360 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 361 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 362 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 363 364 /* This is an oddity of the fact we're using the same 365 * descriptions for registers in align_16 as align_1: 366 */ 367 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 368 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 369 else 370 insn->bits3.da16.src1_vert_stride = reg.vstride; 371 } 372 } 373} 374 375 376 377static void brw_set_math_message( struct brw_compile *p, 378 struct brw_instruction *insn, 379 GLuint msg_length, 380 GLuint response_length, 381 GLuint function, 382 GLuint integer_type, 383 GLboolean low_precision, 384 GLboolean saturate, 385 GLuint dataType ) 386{ 387 struct brw_context *brw = p->brw; 388 struct intel_context *intel = &brw->intel; 389 brw_set_src1(p, insn, brw_imm_d(0)); 390 391 if (intel->gen == 5) { 392 insn->bits3.math_gen5.function = function; 393 insn->bits3.math_gen5.int_type = integer_type; 394 insn->bits3.math_gen5.precision = low_precision; 395 insn->bits3.math_gen5.saturate = saturate; 396 insn->bits3.math_gen5.data_type = dataType; 397 insn->bits3.math_gen5.snapshot = 0; 398 insn->bits3.math_gen5.header_present = 0; 399 insn->bits3.math_gen5.response_length = response_length; 400 insn->bits3.math_gen5.msg_length = msg_length; 401 insn->bits3.math_gen5.end_of_thread = 0; 402 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; 403 insn->bits2.send_gen5.end_of_thread = 0; 404 } else { 405 insn->bits3.math.function = function; 406 insn->bits3.math.int_type = integer_type; 407 insn->bits3.math.precision = low_precision; 408 insn->bits3.math.saturate = saturate; 409 insn->bits3.math.data_type = dataType; 410 insn->bits3.math.response_length = response_length; 411 insn->bits3.math.msg_length = msg_length; 412 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 413 insn->bits3.math.end_of_thread = 0; 414 } 415} 416 417 418static void brw_set_ff_sync_message(struct brw_compile *p, 419 struct brw_instruction *insn, 420 GLboolean allocate, 421 GLuint response_length, 422 GLboolean end_of_thread) 423{ 424 struct brw_context *brw = p->brw; 425 struct intel_context *intel = &brw->intel; 426 brw_set_src1(p, insn, brw_imm_d(0)); 427 428 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 429 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 430 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 431 insn->bits3.urb_gen5.allocate = allocate; 432 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 433 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 434 insn->bits3.urb_gen5.header_present = 1; 435 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ 436 insn->bits3.urb_gen5.msg_length = 1; 437 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 438 if (intel->gen >= 6) { 439 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 440 } else { 441 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 442 insn->bits2.send_gen5.end_of_thread = end_of_thread; 443 } 444} 445 446static void brw_set_urb_message( struct brw_compile *p, 447 struct brw_instruction *insn, 448 GLboolean allocate, 449 GLboolean used, 450 GLuint msg_length, 451 GLuint response_length, 452 GLboolean end_of_thread, 453 GLboolean complete, 454 GLuint offset, 455 GLuint swizzle_control ) 456{ 457 struct brw_context *brw = p->brw; 458 struct intel_context *intel = &brw->intel; 459 brw_set_src1(p, insn, brw_imm_d(0)); 460 461 if (intel->gen == 7) { 462 insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ 463 insn->bits3.urb_gen7.offset = offset; 464 assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); 465 insn->bits3.urb_gen7.swizzle_control = swizzle_control; 466 /* per_slot_offset = 0 makes it ignore offsets in message header */ 467 insn->bits3.urb_gen7.per_slot_offset = 0; 468 insn->bits3.urb_gen7.complete = complete; 469 insn->bits3.urb_gen7.header_present = 1; 470 insn->bits3.urb_gen7.response_length = response_length; 471 insn->bits3.urb_gen7.msg_length = msg_length; 472 insn->bits3.urb_gen7.end_of_thread = end_of_thread; 473 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 474 } else if (intel->gen >= 5) { 475 insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ 476 insn->bits3.urb_gen5.offset = offset; 477 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 478 insn->bits3.urb_gen5.allocate = allocate; 479 insn->bits3.urb_gen5.used = used; /* ? */ 480 insn->bits3.urb_gen5.complete = complete; 481 insn->bits3.urb_gen5.header_present = 1; 482 insn->bits3.urb_gen5.response_length = response_length; 483 insn->bits3.urb_gen5.msg_length = msg_length; 484 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 485 if (intel->gen >= 6) { 486 /* For SNB, the SFID bits moved to the condmod bits, and 487 * EOT stayed in bits3 above. Does the EOT bit setting 488 * below on Ironlake even do anything? 489 */ 490 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 491 } else { 492 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 493 insn->bits2.send_gen5.end_of_thread = end_of_thread; 494 } 495 } else { 496 insn->bits3.urb.opcode = 0; /* ? */ 497 insn->bits3.urb.offset = offset; 498 insn->bits3.urb.swizzle_control = swizzle_control; 499 insn->bits3.urb.allocate = allocate; 500 insn->bits3.urb.used = used; /* ? */ 501 insn->bits3.urb.complete = complete; 502 insn->bits3.urb.response_length = response_length; 503 insn->bits3.urb.msg_length = msg_length; 504 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 505 insn->bits3.urb.end_of_thread = end_of_thread; 506 } 507} 508 509static void brw_set_dp_write_message( struct brw_compile *p, 510 struct brw_instruction *insn, 511 GLuint binding_table_index, 512 GLuint msg_control, 513 GLuint msg_type, 514 GLuint msg_length, 515 GLboolean header_present, 516 GLuint pixel_scoreboard_clear, 517 GLuint response_length, 518 GLuint end_of_thread, 519 GLuint send_commit_msg) 520{ 521 struct brw_context *brw = p->brw; 522 struct intel_context *intel = &brw->intel; 523 brw_set_src1(p, insn, brw_imm_ud(0)); 524 525 if (intel->gen >= 7) { 526 insn->bits3.gen7_dp.binding_table_index = binding_table_index; 527 insn->bits3.gen7_dp.msg_control = msg_control; 528 insn->bits3.gen7_dp.pixel_scoreboard_clear = pixel_scoreboard_clear; 529 insn->bits3.gen7_dp.msg_type = msg_type; 530 insn->bits3.gen7_dp.header_present = header_present; 531 insn->bits3.gen7_dp.response_length = response_length; 532 insn->bits3.gen7_dp.msg_length = msg_length; 533 insn->bits3.gen7_dp.end_of_thread = end_of_thread; 534 535 /* We always use the render cache for write messages */ 536 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; 537 } else if (intel->gen == 6) { 538 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 539 insn->bits3.gen6_dp.msg_control = msg_control; 540 insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear; 541 insn->bits3.gen6_dp.msg_type = msg_type; 542 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; 543 insn->bits3.gen6_dp.header_present = header_present; 544 insn->bits3.gen6_dp.response_length = response_length; 545 insn->bits3.gen6_dp.msg_length = msg_length; 546 insn->bits3.gen6_dp.end_of_thread = end_of_thread; 547 548 /* We always use the render cache for write messages */ 549 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; 550 } else if (intel->gen == 5) { 551 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 552 insn->bits3.dp_write_gen5.msg_control = msg_control; 553 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; 554 insn->bits3.dp_write_gen5.msg_type = msg_type; 555 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 556 insn->bits3.dp_write_gen5.header_present = header_present; 557 insn->bits3.dp_write_gen5.response_length = response_length; 558 insn->bits3.dp_write_gen5.msg_length = msg_length; 559 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; 560 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 561 insn->bits2.send_gen5.end_of_thread = end_of_thread; 562 } else { 563 insn->bits3.dp_write.binding_table_index = binding_table_index; 564 insn->bits3.dp_write.msg_control = msg_control; 565 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 566 insn->bits3.dp_write.msg_type = msg_type; 567 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 568 insn->bits3.dp_write.response_length = response_length; 569 insn->bits3.dp_write.msg_length = msg_length; 570 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 571 insn->bits3.dp_write.end_of_thread = end_of_thread; 572 } 573} 574 575static void 576brw_set_dp_read_message(struct brw_compile *p, 577 struct brw_instruction *insn, 578 GLuint binding_table_index, 579 GLuint msg_control, 580 GLuint msg_type, 581 GLuint target_cache, 582 GLuint msg_length, 583 GLuint response_length) 584{ 585 struct brw_context *brw = p->brw; 586 struct intel_context *intel = &brw->intel; 587 brw_set_src1(p, insn, brw_imm_d(0)); 588 589 if (intel->gen >= 6) { 590 uint32_t target_function; 591 592 if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE) 593 target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE; 594 else 595 target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; 596 597 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 598 insn->bits3.gen6_dp.msg_control = msg_control; 599 insn->bits3.gen6_dp.pixel_scoreboard_clear = 0; 600 insn->bits3.gen6_dp.msg_type = msg_type; 601 insn->bits3.gen6_dp.send_commit_msg = 0; 602 insn->bits3.gen6_dp.header_present = 1; 603 insn->bits3.gen6_dp.response_length = response_length; 604 insn->bits3.gen6_dp.msg_length = msg_length; 605 insn->bits3.gen6_dp.end_of_thread = 0; 606 insn->header.destreg__conditionalmod = target_function; 607 } else if (intel->gen == 5) { 608 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 609 insn->bits3.dp_read_gen5.msg_control = msg_control; 610 insn->bits3.dp_read_gen5.msg_type = msg_type; 611 insn->bits3.dp_read_gen5.target_cache = target_cache; 612 insn->bits3.dp_read_gen5.header_present = 1; 613 insn->bits3.dp_read_gen5.response_length = response_length; 614 insn->bits3.dp_read_gen5.msg_length = msg_length; 615 insn->bits3.dp_read_gen5.pad1 = 0; 616 insn->bits3.dp_read_gen5.end_of_thread = 0; 617 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 618 insn->bits2.send_gen5.end_of_thread = 0; 619 } else if (intel->is_g4x) { 620 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 621 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 622 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 623 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 624 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/ 625 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/ 626 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 627 insn->bits3.dp_read_g4x.pad1 = 0; 628 insn->bits3.dp_read_g4x.end_of_thread = 0; 629 } else { 630 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 631 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 632 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 633 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 634 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 635 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 636 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 637 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 638 insn->bits3.dp_read.end_of_thread = 0; /*31*/ 639 } 640} 641 642static void brw_set_sampler_message(struct brw_compile *p, 643 struct brw_instruction *insn, 644 GLuint binding_table_index, 645 GLuint sampler, 646 GLuint msg_type, 647 GLuint response_length, 648 GLuint msg_length, 649 GLboolean eot, 650 GLuint header_present, 651 GLuint simd_mode) 652{ 653 struct brw_context *brw = p->brw; 654 struct intel_context *intel = &brw->intel; 655 assert(eot == 0); 656 brw_set_src1(p, insn, brw_imm_d(0)); 657 658 if (intel->gen >= 7) { 659 insn->bits3.sampler_gen7.binding_table_index = binding_table_index; 660 insn->bits3.sampler_gen7.sampler = sampler; 661 insn->bits3.sampler_gen7.msg_type = msg_type; 662 insn->bits3.sampler_gen7.simd_mode = simd_mode; 663 insn->bits3.sampler_gen7.header_present = header_present; 664 insn->bits3.sampler_gen7.response_length = response_length; 665 insn->bits3.sampler_gen7.msg_length = msg_length; 666 insn->bits3.sampler_gen7.end_of_thread = eot; 667 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; 668 } else if (intel->gen >= 5) { 669 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 670 insn->bits3.sampler_gen5.sampler = sampler; 671 insn->bits3.sampler_gen5.msg_type = msg_type; 672 insn->bits3.sampler_gen5.simd_mode = simd_mode; 673 insn->bits3.sampler_gen5.header_present = header_present; 674 insn->bits3.sampler_gen5.response_length = response_length; 675 insn->bits3.sampler_gen5.msg_length = msg_length; 676 insn->bits3.sampler_gen5.end_of_thread = eot; 677 if (intel->gen >= 6) 678 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; 679 else { 680 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; 681 insn->bits2.send_gen5.end_of_thread = eot; 682 } 683 } else if (intel->is_g4x) { 684 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 685 insn->bits3.sampler_g4x.sampler = sampler; 686 insn->bits3.sampler_g4x.msg_type = msg_type; 687 insn->bits3.sampler_g4x.response_length = response_length; 688 insn->bits3.sampler_g4x.msg_length = msg_length; 689 insn->bits3.sampler_g4x.end_of_thread = eot; 690 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 691 } else { 692 insn->bits3.sampler.binding_table_index = binding_table_index; 693 insn->bits3.sampler.sampler = sampler; 694 insn->bits3.sampler.msg_type = msg_type; 695 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 696 insn->bits3.sampler.response_length = response_length; 697 insn->bits3.sampler.msg_length = msg_length; 698 insn->bits3.sampler.end_of_thread = eot; 699 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 700 } 701} 702 703 704 705static struct brw_instruction *next_insn( struct brw_compile *p, 706 GLuint opcode ) 707{ 708 struct brw_instruction *insn; 709 710 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 711 712 insn = &p->store[p->nr_insn++]; 713 memcpy(insn, p->current, sizeof(*insn)); 714 715 /* Reset this one-shot flag: 716 */ 717 718 if (p->current->header.destreg__conditionalmod) { 719 p->current->header.destreg__conditionalmod = 0; 720 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 721 } 722 723 insn->header.opcode = opcode; 724 return insn; 725} 726 727 728static struct brw_instruction *brw_alu1( struct brw_compile *p, 729 GLuint opcode, 730 struct brw_reg dest, 731 struct brw_reg src ) 732{ 733 struct brw_instruction *insn = next_insn(p, opcode); 734 brw_set_dest(p, insn, dest); 735 brw_set_src0(p, insn, src); 736 return insn; 737} 738 739static struct brw_instruction *brw_alu2(struct brw_compile *p, 740 GLuint opcode, 741 struct brw_reg dest, 742 struct brw_reg src0, 743 struct brw_reg src1 ) 744{ 745 struct brw_instruction *insn = next_insn(p, opcode); 746 brw_set_dest(p, insn, dest); 747 brw_set_src0(p, insn, src0); 748 brw_set_src1(p, insn, src1); 749 return insn; 750} 751 752 753/*********************************************************************** 754 * Convenience routines. 755 */ 756#define ALU1(OP) \ 757struct brw_instruction *brw_##OP(struct brw_compile *p, \ 758 struct brw_reg dest, \ 759 struct brw_reg src0) \ 760{ \ 761 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 762} 763 764#define ALU2(OP) \ 765struct brw_instruction *brw_##OP(struct brw_compile *p, \ 766 struct brw_reg dest, \ 767 struct brw_reg src0, \ 768 struct brw_reg src1) \ 769{ \ 770 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 771} 772 773/* Rounding operations (other than RNDD) require two instructions - the first 774 * stores a rounded value (possibly the wrong way) in the dest register, but 775 * also sets a per-channel "increment bit" in the flag register. A predicated 776 * add of 1.0 fixes dest to contain the desired result. 777 */ 778#define ROUND(OP) \ 779void brw_##OP(struct brw_compile *p, \ 780 struct brw_reg dest, \ 781 struct brw_reg src) \ 782{ \ 783 struct brw_instruction *rnd, *add; \ 784 rnd = next_insn(p, BRW_OPCODE_##OP); \ 785 brw_set_dest(p, rnd, dest); \ 786 brw_set_src0(p, rnd, src); \ 787 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ 788 \ 789 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 790 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 791} 792 793 794ALU1(MOV) 795ALU2(SEL) 796ALU1(NOT) 797ALU2(AND) 798ALU2(OR) 799ALU2(XOR) 800ALU2(SHR) 801ALU2(SHL) 802ALU2(RSR) 803ALU2(RSL) 804ALU2(ASR) 805ALU1(FRC) 806ALU1(RNDD) 807ALU2(MAC) 808ALU2(MACH) 809ALU1(LZD) 810ALU2(DP4) 811ALU2(DPH) 812ALU2(DP3) 813ALU2(DP2) 814ALU2(LINE) 815ALU2(PLN) 816 817 818ROUND(RNDZ) 819ROUND(RNDE) 820 821 822struct brw_instruction *brw_ADD(struct brw_compile *p, 823 struct brw_reg dest, 824 struct brw_reg src0, 825 struct brw_reg src1) 826{ 827 /* 6.2.2: add */ 828 if (src0.type == BRW_REGISTER_TYPE_F || 829 (src0.file == BRW_IMMEDIATE_VALUE && 830 src0.type == BRW_REGISTER_TYPE_VF)) { 831 assert(src1.type != BRW_REGISTER_TYPE_UD); 832 assert(src1.type != BRW_REGISTER_TYPE_D); 833 } 834 835 if (src1.type == BRW_REGISTER_TYPE_F || 836 (src1.file == BRW_IMMEDIATE_VALUE && 837 src1.type == BRW_REGISTER_TYPE_VF)) { 838 assert(src0.type != BRW_REGISTER_TYPE_UD); 839 assert(src0.type != BRW_REGISTER_TYPE_D); 840 } 841 842 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 843} 844 845struct brw_instruction *brw_MUL(struct brw_compile *p, 846 struct brw_reg dest, 847 struct brw_reg src0, 848 struct brw_reg src1) 849{ 850 /* 6.32.38: mul */ 851 if (src0.type == BRW_REGISTER_TYPE_D || 852 src0.type == BRW_REGISTER_TYPE_UD || 853 src1.type == BRW_REGISTER_TYPE_D || 854 src1.type == BRW_REGISTER_TYPE_UD) { 855 assert(dest.type != BRW_REGISTER_TYPE_F); 856 } 857 858 if (src0.type == BRW_REGISTER_TYPE_F || 859 (src0.file == BRW_IMMEDIATE_VALUE && 860 src0.type == BRW_REGISTER_TYPE_VF)) { 861 assert(src1.type != BRW_REGISTER_TYPE_UD); 862 assert(src1.type != BRW_REGISTER_TYPE_D); 863 } 864 865 if (src1.type == BRW_REGISTER_TYPE_F || 866 (src1.file == BRW_IMMEDIATE_VALUE && 867 src1.type == BRW_REGISTER_TYPE_VF)) { 868 assert(src0.type != BRW_REGISTER_TYPE_UD); 869 assert(src0.type != BRW_REGISTER_TYPE_D); 870 } 871 872 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 873 src0.nr != BRW_ARF_ACCUMULATOR); 874 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 875 src1.nr != BRW_ARF_ACCUMULATOR); 876 877 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 878} 879 880 881void brw_NOP(struct brw_compile *p) 882{ 883 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 884 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 885 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 886 brw_set_src1(p, insn, brw_imm_ud(0x0)); 887} 888 889 890 891 892 893/*********************************************************************** 894 * Comparisons, if/else/endif 895 */ 896 897struct brw_instruction *brw_JMPI(struct brw_compile *p, 898 struct brw_reg dest, 899 struct brw_reg src0, 900 struct brw_reg src1) 901{ 902 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 903 904 insn->header.execution_size = 1; 905 insn->header.compression_control = BRW_COMPRESSION_NONE; 906 insn->header.mask_control = BRW_MASK_DISABLE; 907 908 p->current->header.predicate_control = BRW_PREDICATE_NONE; 909 910 return insn; 911} 912 913static void 914push_if_stack(struct brw_compile *p, struct brw_instruction *inst) 915{ 916 p->if_stack[p->if_stack_depth] = inst; 917 918 p->if_stack_depth++; 919 if (p->if_stack_array_size <= p->if_stack_depth) { 920 p->if_stack_array_size *= 2; 921 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *, 922 p->if_stack_array_size); 923 } 924} 925 926/* EU takes the value from the flag register and pushes it onto some 927 * sort of a stack (presumably merging with any flag value already on 928 * the stack). Within an if block, the flags at the top of the stack 929 * control execution on each channel of the unit, eg. on each of the 930 * 16 pixel values in our wm programs. 931 * 932 * When the matching 'else' instruction is reached (presumably by 933 * countdown of the instruction count patched in by our ELSE/ENDIF 934 * functions), the relevent flags are inverted. 935 * 936 * When the matching 'endif' instruction is reached, the flags are 937 * popped off. If the stack is now empty, normal execution resumes. 938 */ 939struct brw_instruction * 940brw_IF(struct brw_compile *p, GLuint execute_size) 941{ 942 struct intel_context *intel = &p->brw->intel; 943 struct brw_instruction *insn; 944 945 insn = next_insn(p, BRW_OPCODE_IF); 946 947 /* Override the defaults for this instruction: 948 */ 949 if (intel->gen < 6) { 950 brw_set_dest(p, insn, brw_ip_reg()); 951 brw_set_src0(p, insn, brw_ip_reg()); 952 brw_set_src1(p, insn, brw_imm_d(0x0)); 953 } else if (intel->gen == 6) { 954 brw_set_dest(p, insn, brw_imm_w(0)); 955 insn->bits1.branch_gen6.jump_count = 0; 956 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 957 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 958 } else { 959 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 960 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 961 brw_set_src1(p, insn, brw_imm_ud(0)); 962 insn->bits3.break_cont.jip = 0; 963 insn->bits3.break_cont.uip = 0; 964 } 965 966 insn->header.execution_size = execute_size; 967 insn->header.compression_control = BRW_COMPRESSION_NONE; 968 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 969 insn->header.mask_control = BRW_MASK_ENABLE; 970 if (!p->single_program_flow) 971 insn->header.thread_control = BRW_THREAD_SWITCH; 972 973 p->current->header.predicate_control = BRW_PREDICATE_NONE; 974 975 push_if_stack(p, insn); 976 return insn; 977} 978 979/* This function is only used for gen6-style IF instructions with an 980 * embedded comparison (conditional modifier). It is not used on gen7. 981 */ 982struct brw_instruction * 983gen6_IF(struct brw_compile *p, uint32_t conditional, 984 struct brw_reg src0, struct brw_reg src1) 985{ 986 struct brw_instruction *insn; 987 988 insn = next_insn(p, BRW_OPCODE_IF); 989 990 brw_set_dest(p, insn, brw_imm_w(0)); 991 insn->header.execution_size = BRW_EXECUTE_8; 992 insn->bits1.branch_gen6.jump_count = 0; 993 brw_set_src0(p, insn, src0); 994 brw_set_src1(p, insn, src1); 995 996 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 997 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 998 insn->header.destreg__conditionalmod = conditional; 999 1000 if (!p->single_program_flow) 1001 insn->header.thread_control = BRW_THREAD_SWITCH; 1002 1003 push_if_stack(p, insn); 1004 return insn; 1005} 1006 1007/** 1008 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. 1009 */ 1010static void 1011convert_IF_ELSE_to_ADD(struct brw_compile *p, 1012 struct brw_instruction *if_inst, 1013 struct brw_instruction *else_inst) 1014{ 1015 /* The next instruction (where the ENDIF would be, if it existed) */ 1016 struct brw_instruction *next_inst = &p->store[p->nr_insn]; 1017 1018 assert(p->single_program_flow); 1019 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1020 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1021 assert(if_inst->header.execution_size == BRW_EXECUTE_1); 1022 1023 /* Convert IF to an ADD instruction that moves the instruction pointer 1024 * to the first instruction of the ELSE block. If there is no ELSE 1025 * block, point to where ENDIF would be. Reverse the predicate. 1026 * 1027 * There's no need to execute an ENDIF since we don't need to do any 1028 * stack operations, and if we're currently executing, we just want to 1029 * continue normally. 1030 */ 1031 if_inst->header.opcode = BRW_OPCODE_ADD; 1032 if_inst->header.predicate_inverse = 1; 1033 1034 if (else_inst != NULL) { 1035 /* Convert ELSE to an ADD instruction that points where the ENDIF 1036 * would be. 1037 */ 1038 else_inst->header.opcode = BRW_OPCODE_ADD; 1039 1040 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; 1041 else_inst->bits3.ud = (next_inst - else_inst) * 16; 1042 } else { 1043 if_inst->bits3.ud = (next_inst - if_inst) * 16; 1044 } 1045} 1046 1047/** 1048 * Patch IF and ELSE instructions with appropriate jump targets. 1049 */ 1050static void 1051patch_IF_ELSE(struct brw_compile *p, 1052 struct brw_instruction *if_inst, 1053 struct brw_instruction *else_inst, 1054 struct brw_instruction *endif_inst) 1055{ 1056 struct intel_context *intel = &p->brw->intel; 1057 1058 assert(!p->single_program_flow); 1059 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 1060 assert(endif_inst != NULL); 1061 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1062 1063 unsigned br = 1; 1064 /* Jump count is for 64bit data chunk each, so one 128bit instruction 1065 * requires 2 chunks. 1066 */ 1067 if (intel->gen >= 5) 1068 br = 2; 1069 1070 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); 1071 endif_inst->header.execution_size = if_inst->header.execution_size; 1072 1073 if (else_inst == NULL) { 1074 /* Patch IF -> ENDIF */ 1075 if (intel->gen < 6) { 1076 /* Turn it into an IFF, which means no mask stack operations for 1077 * all-false and jumping past the ENDIF. 1078 */ 1079 if_inst->header.opcode = BRW_OPCODE_IFF; 1080 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); 1081 if_inst->bits3.if_else.pop_count = 0; 1082 if_inst->bits3.if_else.pad0 = 0; 1083 } else if (intel->gen == 6) { 1084 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 1085 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); 1086 } else { 1087 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1088 if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); 1089 } 1090 } else { 1091 else_inst->header.execution_size = if_inst->header.execution_size; 1092 1093 /* Patch IF -> ELSE */ 1094 if (intel->gen < 6) { 1095 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); 1096 if_inst->bits3.if_else.pop_count = 0; 1097 if_inst->bits3.if_else.pad0 = 0; 1098 } else if (intel->gen == 6) { 1099 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); 1100 } 1101 1102 /* Patch ELSE -> ENDIF */ 1103 if (intel->gen < 6) { 1104 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 1105 * matching ENDIF. 1106 */ 1107 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); 1108 else_inst->bits3.if_else.pop_count = 1; 1109 else_inst->bits3.if_else.pad0 = 0; 1110 } else if (intel->gen == 6) { 1111 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1112 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); 1113 } else { 1114 /* The IF instruction's JIP should point just past the ELSE */ 1115 if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); 1116 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ 1117 if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); 1118 else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); 1119 } 1120 } 1121} 1122 1123void 1124brw_ELSE(struct brw_compile *p) 1125{ 1126 struct intel_context *intel = &p->brw->intel; 1127 struct brw_instruction *insn; 1128 1129 insn = next_insn(p, BRW_OPCODE_ELSE); 1130 1131 if (intel->gen < 6) { 1132 brw_set_dest(p, insn, brw_ip_reg()); 1133 brw_set_src0(p, insn, brw_ip_reg()); 1134 brw_set_src1(p, insn, brw_imm_d(0x0)); 1135 } else if (intel->gen == 6) { 1136 brw_set_dest(p, insn, brw_imm_w(0)); 1137 insn->bits1.branch_gen6.jump_count = 0; 1138 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1139 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1140 } else { 1141 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1142 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1143 brw_set_src1(p, insn, brw_imm_ud(0)); 1144 insn->bits3.break_cont.jip = 0; 1145 insn->bits3.break_cont.uip = 0; 1146 } 1147 1148 insn->header.compression_control = BRW_COMPRESSION_NONE; 1149 insn->header.mask_control = BRW_MASK_ENABLE; 1150 if (!p->single_program_flow) 1151 insn->header.thread_control = BRW_THREAD_SWITCH; 1152 1153 push_if_stack(p, insn); 1154} 1155 1156void 1157brw_ENDIF(struct brw_compile *p) 1158{ 1159 struct intel_context *intel = &p->brw->intel; 1160 struct brw_instruction *insn; 1161 struct brw_instruction *else_inst = NULL; 1162 struct brw_instruction *if_inst = NULL; 1163 1164 /* Pop the IF and (optional) ELSE instructions from the stack */ 1165 p->if_stack_depth--; 1166 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { 1167 else_inst = p->if_stack[p->if_stack_depth]; 1168 p->if_stack_depth--; 1169 } 1170 if_inst = p->if_stack[p->if_stack_depth]; 1171 1172 if (p->single_program_flow) { 1173 /* ENDIF is useless; don't bother emitting it. */ 1174 convert_IF_ELSE_to_ADD(p, if_inst, else_inst); 1175 return; 1176 } 1177 1178 insn = next_insn(p, BRW_OPCODE_ENDIF); 1179 1180 if (intel->gen < 6) { 1181 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1182 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1183 brw_set_src1(p, insn, brw_imm_d(0x0)); 1184 } else if (intel->gen == 6) { 1185 brw_set_dest(p, insn, brw_imm_w(0)); 1186 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1187 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1188 } else { 1189 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1190 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1191 brw_set_src1(p, insn, brw_imm_ud(0)); 1192 } 1193 1194 insn->header.compression_control = BRW_COMPRESSION_NONE; 1195 insn->header.mask_control = BRW_MASK_ENABLE; 1196 insn->header.thread_control = BRW_THREAD_SWITCH; 1197 1198 /* Also pop item off the stack in the endif instruction: */ 1199 if (intel->gen < 6) { 1200 insn->bits3.if_else.jump_count = 0; 1201 insn->bits3.if_else.pop_count = 1; 1202 insn->bits3.if_else.pad0 = 0; 1203 } else if (intel->gen == 6) { 1204 insn->bits1.branch_gen6.jump_count = 2; 1205 } else { 1206 insn->bits3.break_cont.jip = 2; 1207 } 1208 patch_IF_ELSE(p, if_inst, else_inst, insn); 1209} 1210 1211struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 1212{ 1213 struct intel_context *intel = &p->brw->intel; 1214 struct brw_instruction *insn; 1215 1216 insn = next_insn(p, BRW_OPCODE_BREAK); 1217 if (intel->gen >= 6) { 1218 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1219 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1220 brw_set_src1(p, insn, brw_imm_d(0x0)); 1221 } else { 1222 brw_set_dest(p, insn, brw_ip_reg()); 1223 brw_set_src0(p, insn, brw_ip_reg()); 1224 brw_set_src1(p, insn, brw_imm_d(0x0)); 1225 insn->bits3.if_else.pad0 = 0; 1226 insn->bits3.if_else.pop_count = pop_count; 1227 } 1228 insn->header.compression_control = BRW_COMPRESSION_NONE; 1229 insn->header.execution_size = BRW_EXECUTE_8; 1230 1231 return insn; 1232} 1233 1234struct brw_instruction *gen6_CONT(struct brw_compile *p, 1235 struct brw_instruction *do_insn) 1236{ 1237 struct brw_instruction *insn; 1238 int br = 2; 1239 1240 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1241 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1242 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1243 brw_set_dest(p, insn, brw_ip_reg()); 1244 brw_set_src0(p, insn, brw_ip_reg()); 1245 brw_set_src1(p, insn, brw_imm_d(0x0)); 1246 1247 insn->bits3.break_cont.uip = br * (do_insn - insn); 1248 1249 insn->header.compression_control = BRW_COMPRESSION_NONE; 1250 insn->header.execution_size = BRW_EXECUTE_8; 1251 return insn; 1252} 1253 1254struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 1255{ 1256 struct brw_instruction *insn; 1257 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1258 brw_set_dest(p, insn, brw_ip_reg()); 1259 brw_set_src0(p, insn, brw_ip_reg()); 1260 brw_set_src1(p, insn, brw_imm_d(0x0)); 1261 insn->header.compression_control = BRW_COMPRESSION_NONE; 1262 insn->header.execution_size = BRW_EXECUTE_8; 1263 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1264 insn->bits3.if_else.pad0 = 0; 1265 insn->bits3.if_else.pop_count = pop_count; 1266 return insn; 1267} 1268 1269/* DO/WHILE loop: 1270 * 1271 * The DO/WHILE is just an unterminated loop -- break or continue are 1272 * used for control within the loop. We have a few ways they can be 1273 * done. 1274 * 1275 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1276 * jip and no DO instruction. 1277 * 1278 * For non-uniform control flow pre-gen6, there's a DO instruction to 1279 * push the mask, and a WHILE to jump back, and BREAK to get out and 1280 * pop the mask. 1281 * 1282 * For gen6, there's no more mask stack, so no need for DO. WHILE 1283 * just points back to the first instruction of the loop. 1284 */ 1285struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 1286{ 1287 struct intel_context *intel = &p->brw->intel; 1288 1289 if (intel->gen >= 6 || p->single_program_flow) { 1290 return &p->store[p->nr_insn]; 1291 } else { 1292 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1293 1294 /* Override the defaults for this instruction: 1295 */ 1296 brw_set_dest(p, insn, brw_null_reg()); 1297 brw_set_src0(p, insn, brw_null_reg()); 1298 brw_set_src1(p, insn, brw_null_reg()); 1299 1300 insn->header.compression_control = BRW_COMPRESSION_NONE; 1301 insn->header.execution_size = execute_size; 1302 insn->header.predicate_control = BRW_PREDICATE_NONE; 1303 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1304 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1305 1306 return insn; 1307 } 1308} 1309 1310 1311 1312struct brw_instruction *brw_WHILE(struct brw_compile *p, 1313 struct brw_instruction *do_insn) 1314{ 1315 struct intel_context *intel = &p->brw->intel; 1316 struct brw_instruction *insn; 1317 GLuint br = 1; 1318 1319 if (intel->gen >= 5) 1320 br = 2; 1321 1322 if (intel->gen >= 7) { 1323 insn = next_insn(p, BRW_OPCODE_WHILE); 1324 1325 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1326 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1327 brw_set_src1(p, insn, brw_imm_ud(0)); 1328 insn->bits3.break_cont.jip = br * (do_insn - insn); 1329 1330 insn->header.execution_size = do_insn->header.execution_size; 1331 assert(insn->header.execution_size == BRW_EXECUTE_8); 1332 } else if (intel->gen == 6) { 1333 insn = next_insn(p, BRW_OPCODE_WHILE); 1334 1335 brw_set_dest(p, insn, brw_imm_w(0)); 1336 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1337 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1338 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1339 1340 insn->header.execution_size = do_insn->header.execution_size; 1341 assert(insn->header.execution_size == BRW_EXECUTE_8); 1342 } else { 1343 if (p->single_program_flow) { 1344 insn = next_insn(p, BRW_OPCODE_ADD); 1345 1346 brw_set_dest(p, insn, brw_ip_reg()); 1347 brw_set_src0(p, insn, brw_ip_reg()); 1348 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); 1349 insn->header.execution_size = BRW_EXECUTE_1; 1350 } else { 1351 insn = next_insn(p, BRW_OPCODE_WHILE); 1352 1353 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1354 1355 brw_set_dest(p, insn, brw_ip_reg()); 1356 brw_set_src0(p, insn, brw_ip_reg()); 1357 brw_set_src1(p, insn, brw_imm_d(0)); 1358 1359 insn->header.execution_size = do_insn->header.execution_size; 1360 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1361 insn->bits3.if_else.pop_count = 0; 1362 insn->bits3.if_else.pad0 = 0; 1363 } 1364 } 1365 insn->header.compression_control = BRW_COMPRESSION_NONE; 1366 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1367 1368 return insn; 1369} 1370 1371 1372/* FORWARD JUMPS: 1373 */ 1374void brw_land_fwd_jump(struct brw_compile *p, 1375 struct brw_instruction *jmp_insn) 1376{ 1377 struct intel_context *intel = &p->brw->intel; 1378 struct brw_instruction *landing = &p->store[p->nr_insn]; 1379 GLuint jmpi = 1; 1380 1381 if (intel->gen >= 5) 1382 jmpi = 2; 1383 1384 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1385 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1386 1387 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1388} 1389 1390 1391 1392/* To integrate with the above, it makes sense that the comparison 1393 * instruction should populate the flag register. It might be simpler 1394 * just to use the flag reg for most WM tasks? 1395 */ 1396void brw_CMP(struct brw_compile *p, 1397 struct brw_reg dest, 1398 GLuint conditional, 1399 struct brw_reg src0, 1400 struct brw_reg src1) 1401{ 1402 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1403 1404 insn->header.destreg__conditionalmod = conditional; 1405 brw_set_dest(p, insn, dest); 1406 brw_set_src0(p, insn, src0); 1407 brw_set_src1(p, insn, src1); 1408 1409/* guess_execution_size(insn, src0); */ 1410 1411 1412 /* Make it so that future instructions will use the computed flag 1413 * value until brw_set_predicate_control_flag_value() is called 1414 * again. 1415 */ 1416 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1417 dest.nr == 0) { 1418 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1419 p->flag_value = 0xff; 1420 } 1421} 1422 1423/* Issue 'wait' instruction for n1, host could program MMIO 1424 to wake up thread. */ 1425void brw_WAIT (struct brw_compile *p) 1426{ 1427 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1428 struct brw_reg src = brw_notification_1_reg(); 1429 1430 brw_set_dest(p, insn, src); 1431 brw_set_src0(p, insn, src); 1432 brw_set_src1(p, insn, brw_null_reg()); 1433 insn->header.execution_size = 0; /* must */ 1434 insn->header.predicate_control = 0; 1435 insn->header.compression_control = 0; 1436} 1437 1438 1439/*********************************************************************** 1440 * Helpers for the various SEND message types: 1441 */ 1442 1443/** Extended math function, float[8]. 1444 */ 1445void brw_math( struct brw_compile *p, 1446 struct brw_reg dest, 1447 GLuint function, 1448 GLuint saturate, 1449 GLuint msg_reg_nr, 1450 struct brw_reg src, 1451 GLuint data_type, 1452 GLuint precision ) 1453{ 1454 struct intel_context *intel = &p->brw->intel; 1455 1456 if (intel->gen >= 6) { 1457 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1458 1459 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1460 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1461 1462 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1463 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1464 1465 /* Source modifiers are ignored for extended math instructions. */ 1466 assert(!src.negate); 1467 assert(!src.abs); 1468 1469 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1470 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1471 assert(src.type == BRW_REGISTER_TYPE_F); 1472 } 1473 1474 /* Math is the same ISA format as other opcodes, except that CondModifier 1475 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1476 */ 1477 insn->header.destreg__conditionalmod = function; 1478 insn->header.saturate = saturate; 1479 1480 brw_set_dest(p, insn, dest); 1481 brw_set_src0(p, insn, src); 1482 brw_set_src1(p, insn, brw_null_reg()); 1483 } else { 1484 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1485 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1486 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1487 /* Example code doesn't set predicate_control for send 1488 * instructions. 1489 */ 1490 insn->header.predicate_control = 0; 1491 insn->header.destreg__conditionalmod = msg_reg_nr; 1492 1493 brw_set_dest(p, insn, dest); 1494 brw_set_src0(p, insn, src); 1495 brw_set_math_message(p, 1496 insn, 1497 msg_length, response_length, 1498 function, 1499 BRW_MATH_INTEGER_UNSIGNED, 1500 precision, 1501 saturate, 1502 data_type); 1503 } 1504} 1505 1506/** Extended math function, float[8]. 1507 */ 1508void brw_math2(struct brw_compile *p, 1509 struct brw_reg dest, 1510 GLuint function, 1511 struct brw_reg src0, 1512 struct brw_reg src1) 1513{ 1514 struct intel_context *intel = &p->brw->intel; 1515 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1516 1517 assert(intel->gen >= 6); 1518 (void) intel; 1519 1520 1521 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1522 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1523 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1524 1525 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1526 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1527 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1528 1529 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1530 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1531 assert(src0.type == BRW_REGISTER_TYPE_F); 1532 assert(src1.type == BRW_REGISTER_TYPE_F); 1533 } 1534 1535 /* Source modifiers are ignored for extended math instructions. */ 1536 assert(!src0.negate); 1537 assert(!src0.abs); 1538 assert(!src1.negate); 1539 assert(!src1.abs); 1540 1541 /* Math is the same ISA format as other opcodes, except that CondModifier 1542 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1543 */ 1544 insn->header.destreg__conditionalmod = function; 1545 1546 brw_set_dest(p, insn, dest); 1547 brw_set_src0(p, insn, src0); 1548 brw_set_src1(p, insn, src1); 1549} 1550 1551/** 1552 * Extended math function, float[16]. 1553 * Use 2 send instructions. 1554 */ 1555void brw_math_16( struct brw_compile *p, 1556 struct brw_reg dest, 1557 GLuint function, 1558 GLuint saturate, 1559 GLuint msg_reg_nr, 1560 struct brw_reg src, 1561 GLuint precision ) 1562{ 1563 struct intel_context *intel = &p->brw->intel; 1564 struct brw_instruction *insn; 1565 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1566 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1567 1568 if (intel->gen >= 6) { 1569 insn = next_insn(p, BRW_OPCODE_MATH); 1570 1571 /* Math is the same ISA format as other opcodes, except that CondModifier 1572 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1573 */ 1574 insn->header.destreg__conditionalmod = function; 1575 insn->header.saturate = saturate; 1576 1577 /* Source modifiers are ignored for extended math instructions. */ 1578 assert(!src.negate); 1579 assert(!src.abs); 1580 1581 brw_set_dest(p, insn, dest); 1582 brw_set_src0(p, insn, src); 1583 brw_set_src1(p, insn, brw_null_reg()); 1584 return; 1585 } 1586 1587 /* First instruction: 1588 */ 1589 brw_push_insn_state(p); 1590 brw_set_predicate_control_flag_value(p, 0xff); 1591 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1592 1593 insn = next_insn(p, BRW_OPCODE_SEND); 1594 insn->header.destreg__conditionalmod = msg_reg_nr; 1595 1596 brw_set_dest(p, insn, dest); 1597 brw_set_src0(p, insn, src); 1598 brw_set_math_message(p, 1599 insn, 1600 msg_length, response_length, 1601 function, 1602 BRW_MATH_INTEGER_UNSIGNED, 1603 precision, 1604 saturate, 1605 BRW_MATH_DATA_VECTOR); 1606 1607 /* Second instruction: 1608 */ 1609 insn = next_insn(p, BRW_OPCODE_SEND); 1610 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1611 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1612 1613 brw_set_dest(p, insn, offset(dest,1)); 1614 brw_set_src0(p, insn, src); 1615 brw_set_math_message(p, 1616 insn, 1617 msg_length, response_length, 1618 function, 1619 BRW_MATH_INTEGER_UNSIGNED, 1620 precision, 1621 saturate, 1622 BRW_MATH_DATA_VECTOR); 1623 1624 brw_pop_insn_state(p); 1625} 1626 1627 1628/** 1629 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1630 * using a constant offset per channel. 1631 * 1632 * The offset must be aligned to oword size (16 bytes). Used for 1633 * register spilling. 1634 */ 1635void brw_oword_block_write_scratch(struct brw_compile *p, 1636 struct brw_reg mrf, 1637 int num_regs, 1638 GLuint offset) 1639{ 1640 struct intel_context *intel = &p->brw->intel; 1641 uint32_t msg_control, msg_type; 1642 int mlen; 1643 1644 if (intel->gen >= 6) 1645 offset /= 16; 1646 1647 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1648 1649 if (num_regs == 1) { 1650 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1651 mlen = 2; 1652 } else { 1653 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1654 mlen = 3; 1655 } 1656 1657 /* Set up the message header. This is g0, with g0.2 filled with 1658 * the offset. We don't want to leave our offset around in g0 or 1659 * it'll screw up texture samples, so set it up inside the message 1660 * reg. 1661 */ 1662 { 1663 brw_push_insn_state(p); 1664 brw_set_mask_control(p, BRW_MASK_DISABLE); 1665 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1666 1667 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1668 1669 /* set message header global offset field (reg 0, element 2) */ 1670 brw_MOV(p, 1671 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1672 mrf.nr, 1673 2), BRW_REGISTER_TYPE_UD), 1674 brw_imm_ud(offset)); 1675 1676 brw_pop_insn_state(p); 1677 } 1678 1679 { 1680 struct brw_reg dest; 1681 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1682 int send_commit_msg; 1683 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 1684 BRW_REGISTER_TYPE_UW); 1685 1686 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1687 insn->header.compression_control = BRW_COMPRESSION_NONE; 1688 src_header = vec16(src_header); 1689 } 1690 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1691 insn->header.destreg__conditionalmod = mrf.nr; 1692 1693 /* Until gen6, writes followed by reads from the same location 1694 * are not guaranteed to be ordered unless write_commit is set. 1695 * If set, then a no-op write is issued to the destination 1696 * register to set a dependency, and a read from the destination 1697 * can be used to ensure the ordering. 1698 * 1699 * For gen6, only writes between different threads need ordering 1700 * protection. Our use of DP writes is all about register 1701 * spilling within a thread. 1702 */ 1703 if (intel->gen >= 6) { 1704 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1705 send_commit_msg = 0; 1706 } else { 1707 dest = src_header; 1708 send_commit_msg = 1; 1709 } 1710 1711 brw_set_dest(p, insn, dest); 1712 if (intel->gen >= 6) { 1713 brw_set_src0(p, insn, mrf); 1714 } else { 1715 brw_set_src0(p, insn, brw_null_reg()); 1716 } 1717 1718 if (intel->gen >= 6) 1719 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1720 else 1721 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1722 1723 brw_set_dp_write_message(p, 1724 insn, 1725 255, /* binding table index (255=stateless) */ 1726 msg_control, 1727 msg_type, 1728 mlen, 1729 GL_TRUE, /* header_present */ 1730 0, /* pixel scoreboard */ 1731 send_commit_msg, /* response_length */ 1732 0, /* eot */ 1733 send_commit_msg); 1734 } 1735} 1736 1737 1738/** 1739 * Read a block of owords (half a GRF each) from the scratch buffer 1740 * using a constant index per channel. 1741 * 1742 * Offset must be aligned to oword size (16 bytes). Used for register 1743 * spilling. 1744 */ 1745void 1746brw_oword_block_read_scratch(struct brw_compile *p, 1747 struct brw_reg dest, 1748 struct brw_reg mrf, 1749 int num_regs, 1750 GLuint offset) 1751{ 1752 struct intel_context *intel = &p->brw->intel; 1753 uint32_t msg_control; 1754 int rlen; 1755 1756 if (intel->gen >= 6) 1757 offset /= 16; 1758 1759 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1760 dest = retype(dest, BRW_REGISTER_TYPE_UW); 1761 1762 if (num_regs == 1) { 1763 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1764 rlen = 1; 1765 } else { 1766 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1767 rlen = 2; 1768 } 1769 1770 { 1771 brw_push_insn_state(p); 1772 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1773 brw_set_mask_control(p, BRW_MASK_DISABLE); 1774 1775 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1776 1777 /* set message header global offset field (reg 0, element 2) */ 1778 brw_MOV(p, 1779 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1780 mrf.nr, 1781 2), BRW_REGISTER_TYPE_UD), 1782 brw_imm_ud(offset)); 1783 1784 brw_pop_insn_state(p); 1785 } 1786 1787 { 1788 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1789 1790 assert(insn->header.predicate_control == 0); 1791 insn->header.compression_control = BRW_COMPRESSION_NONE; 1792 insn->header.destreg__conditionalmod = mrf.nr; 1793 1794 brw_set_dest(p, insn, dest); /* UW? */ 1795 if (intel->gen >= 6) { 1796 brw_set_src0(p, insn, mrf); 1797 } else { 1798 brw_set_src0(p, insn, brw_null_reg()); 1799 } 1800 1801 brw_set_dp_read_message(p, 1802 insn, 1803 255, /* binding table index (255=stateless) */ 1804 msg_control, 1805 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1806 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 1807 1, /* msg_length */ 1808 rlen); 1809 } 1810} 1811 1812/** 1813 * Read a float[4] vector from the data port Data Cache (const buffer). 1814 * Location (in buffer) should be a multiple of 16. 1815 * Used for fetching shader constants. 1816 */ 1817void brw_oword_block_read(struct brw_compile *p, 1818 struct brw_reg dest, 1819 struct brw_reg mrf, 1820 uint32_t offset, 1821 uint32_t bind_table_index) 1822{ 1823 struct intel_context *intel = &p->brw->intel; 1824 1825 /* On newer hardware, offset is in units of owords. */ 1826 if (intel->gen >= 6) 1827 offset /= 16; 1828 1829 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1830 1831 brw_push_insn_state(p); 1832 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1833 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1834 brw_set_mask_control(p, BRW_MASK_DISABLE); 1835 1836 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1837 1838 /* set message header global offset field (reg 0, element 2) */ 1839 brw_MOV(p, 1840 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1841 mrf.nr, 1842 2), BRW_REGISTER_TYPE_UD), 1843 brw_imm_ud(offset)); 1844 1845 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1846 insn->header.destreg__conditionalmod = mrf.nr; 1847 1848 /* cast dest to a uword[8] vector */ 1849 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1850 1851 brw_set_dest(p, insn, dest); 1852 if (intel->gen >= 6) { 1853 brw_set_src0(p, insn, mrf); 1854 } else { 1855 brw_set_src0(p, insn, brw_null_reg()); 1856 } 1857 1858 brw_set_dp_read_message(p, 1859 insn, 1860 bind_table_index, 1861 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 1862 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 1863 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1864 1, /* msg_length */ 1865 1); /* response_length (1 reg, 2 owords!) */ 1866 1867 brw_pop_insn_state(p); 1868} 1869 1870/** 1871 * Read a set of dwords from the data port Data Cache (const buffer). 1872 * 1873 * Location (in buffer) appears as UD offsets in the register after 1874 * the provided mrf header reg. 1875 */ 1876void brw_dword_scattered_read(struct brw_compile *p, 1877 struct brw_reg dest, 1878 struct brw_reg mrf, 1879 uint32_t bind_table_index) 1880{ 1881 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1882 1883 brw_push_insn_state(p); 1884 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1885 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1886 brw_set_mask_control(p, BRW_MASK_DISABLE); 1887 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1888 brw_pop_insn_state(p); 1889 1890 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1891 insn->header.destreg__conditionalmod = mrf.nr; 1892 1893 /* cast dest to a uword[8] vector */ 1894 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1895 1896 brw_set_dest(p, insn, dest); 1897 brw_set_src0(p, insn, brw_null_reg()); 1898 1899 brw_set_dp_read_message(p, 1900 insn, 1901 bind_table_index, 1902 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, 1903 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, 1904 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1905 2, /* msg_length */ 1906 1); /* response_length */ 1907} 1908 1909 1910 1911/** 1912 * Read float[4] constant(s) from VS constant buffer. 1913 * For relative addressing, two float[4] constants will be read into 'dest'. 1914 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1915 */ 1916void brw_dp_READ_4_vs(struct brw_compile *p, 1917 struct brw_reg dest, 1918 GLuint location, 1919 GLuint bind_table_index) 1920{ 1921 struct intel_context *intel = &p->brw->intel; 1922 struct brw_instruction *insn; 1923 GLuint msg_reg_nr = 1; 1924 1925 if (intel->gen >= 6) 1926 location /= 16; 1927 1928 /* Setup MRF[1] with location/offset into const buffer */ 1929 brw_push_insn_state(p); 1930 brw_set_access_mode(p, BRW_ALIGN_1); 1931 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1932 brw_set_mask_control(p, BRW_MASK_DISABLE); 1933 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1934 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), 1935 BRW_REGISTER_TYPE_UD), 1936 brw_imm_ud(location)); 1937 brw_pop_insn_state(p); 1938 1939 insn = next_insn(p, BRW_OPCODE_SEND); 1940 1941 insn->header.predicate_control = BRW_PREDICATE_NONE; 1942 insn->header.compression_control = BRW_COMPRESSION_NONE; 1943 insn->header.destreg__conditionalmod = msg_reg_nr; 1944 insn->header.mask_control = BRW_MASK_DISABLE; 1945 1946 brw_set_dest(p, insn, dest); 1947 if (intel->gen >= 6) { 1948 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); 1949 } else { 1950 brw_set_src0(p, insn, brw_null_reg()); 1951 } 1952 1953 brw_set_dp_read_message(p, 1954 insn, 1955 bind_table_index, 1956 0, 1957 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1958 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1959 1, /* msg_length */ 1960 1); /* response_length (1 Oword) */ 1961} 1962 1963/** 1964 * Read a float[4] constant per vertex from VS constant buffer, with 1965 * relative addressing. 1966 */ 1967void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1968 struct brw_reg dest, 1969 struct brw_reg addr_reg, 1970 GLuint offset, 1971 GLuint bind_table_index) 1972{ 1973 struct intel_context *intel = &p->brw->intel; 1974 struct brw_reg src = brw_vec8_grf(0, 0); 1975 int msg_type; 1976 1977 /* Setup MRF[1] with offset into const buffer */ 1978 brw_push_insn_state(p); 1979 brw_set_access_mode(p, BRW_ALIGN_1); 1980 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1981 brw_set_mask_control(p, BRW_MASK_DISABLE); 1982 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1983 1984 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1985 * fields ignored. 1986 */ 1987 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), 1988 addr_reg, brw_imm_d(offset)); 1989 brw_pop_insn_state(p); 1990 1991 gen6_resolve_implied_move(p, &src, 0); 1992 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1993 1994 insn->header.predicate_control = BRW_PREDICATE_NONE; 1995 insn->header.compression_control = BRW_COMPRESSION_NONE; 1996 insn->header.destreg__conditionalmod = 0; 1997 insn->header.mask_control = BRW_MASK_DISABLE; 1998 1999 brw_set_dest(p, insn, dest); 2000 brw_set_src0(p, insn, src); 2001 2002 if (intel->gen == 6) 2003 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 2004 else if (intel->gen == 5 || intel->is_g4x) 2005 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 2006 else 2007 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 2008 2009 brw_set_dp_read_message(p, 2010 insn, 2011 bind_table_index, 2012 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 2013 msg_type, 2014 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2015 2, /* msg_length */ 2016 1); /* response_length */ 2017} 2018 2019 2020 2021void brw_fb_WRITE(struct brw_compile *p, 2022 int dispatch_width, 2023 GLuint msg_reg_nr, 2024 struct brw_reg src0, 2025 GLuint binding_table_index, 2026 GLuint msg_length, 2027 GLuint response_length, 2028 GLboolean eot, 2029 GLboolean header_present) 2030{ 2031 struct intel_context *intel = &p->brw->intel; 2032 struct brw_instruction *insn; 2033 GLuint msg_control, msg_type; 2034 struct brw_reg dest; 2035 2036 if (dispatch_width == 16) 2037 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2038 else 2039 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); 2040 2041 if (intel->gen >= 6 && binding_table_index == 0) { 2042 insn = next_insn(p, BRW_OPCODE_SENDC); 2043 } else { 2044 insn = next_insn(p, BRW_OPCODE_SEND); 2045 } 2046 /* The execution mask is ignored for render target writes. */ 2047 insn->header.predicate_control = 0; 2048 insn->header.compression_control = BRW_COMPRESSION_NONE; 2049 2050 if (intel->gen >= 6) { 2051 /* headerless version, just submit color payload */ 2052 src0 = brw_message_reg(msg_reg_nr); 2053 2054 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2055 } else { 2056 insn->header.destreg__conditionalmod = msg_reg_nr; 2057 2058 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 2059 } 2060 2061 if (dispatch_width == 16) 2062 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 2063 else 2064 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 2065 2066 brw_set_dest(p, insn, dest); 2067 brw_set_src0(p, insn, src0); 2068 brw_set_dp_write_message(p, 2069 insn, 2070 binding_table_index, 2071 msg_control, 2072 msg_type, 2073 msg_length, 2074 header_present, 2075 1, /* pixel scoreboard */ 2076 response_length, 2077 eot, 2078 0 /* send_commit_msg */); 2079} 2080 2081 2082/** 2083 * Texture sample instruction. 2084 * Note: the msg_type plus msg_length values determine exactly what kind 2085 * of sampling operation is performed. See volume 4, page 161 of docs. 2086 */ 2087void brw_SAMPLE(struct brw_compile *p, 2088 struct brw_reg dest, 2089 GLuint msg_reg_nr, 2090 struct brw_reg src0, 2091 GLuint binding_table_index, 2092 GLuint sampler, 2093 GLuint writemask, 2094 GLuint msg_type, 2095 GLuint response_length, 2096 GLuint msg_length, 2097 GLboolean eot, 2098 GLuint header_present, 2099 GLuint simd_mode) 2100{ 2101 struct intel_context *intel = &p->brw->intel; 2102 GLboolean need_stall = 0; 2103 2104 if (writemask == 0) { 2105 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 2106 return; 2107 } 2108 2109 /* Hardware doesn't do destination dependency checking on send 2110 * instructions properly. Add a workaround which generates the 2111 * dependency by other means. In practice it seems like this bug 2112 * only crops up for texture samples, and only where registers are 2113 * written by the send and then written again later without being 2114 * read in between. Luckily for us, we already track that 2115 * information and use it to modify the writemask for the 2116 * instruction, so that is a guide for whether a workaround is 2117 * needed. 2118 */ 2119 if (writemask != WRITEMASK_XYZW) { 2120 GLuint dst_offset = 0; 2121 GLuint i, newmask = 0, len = 0; 2122 2123 for (i = 0; i < 4; i++) { 2124 if (writemask & (1<<i)) 2125 break; 2126 dst_offset += 2; 2127 } 2128 for (; i < 4; i++) { 2129 if (!(writemask & (1<<i))) 2130 break; 2131 newmask |= 1<<i; 2132 len++; 2133 } 2134 2135 if (newmask != writemask) { 2136 need_stall = 1; 2137 /* printf("need stall %x %x\n", newmask , writemask); */ 2138 } 2139 else { 2140 GLboolean dispatch_16 = GL_FALSE; 2141 2142 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 2143 2144 guess_execution_size(p, p->current, dest); 2145 if (p->current->header.execution_size == BRW_EXECUTE_16) 2146 dispatch_16 = GL_TRUE; 2147 2148 newmask = ~newmask & WRITEMASK_XYZW; 2149 2150 brw_push_insn_state(p); 2151 2152 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2153 brw_set_mask_control(p, BRW_MASK_DISABLE); 2154 2155 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), 2156 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); 2157 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 2158 2159 brw_pop_insn_state(p); 2160 2161 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 2162 dest = offset(dest, dst_offset); 2163 2164 /* For 16-wide dispatch, masked channels are skipped in the 2165 * response. For 8-wide, masked channels still take up slots, 2166 * and are just not written to. 2167 */ 2168 if (dispatch_16) 2169 response_length = len * 2; 2170 } 2171 } 2172 2173 { 2174 struct brw_instruction *insn; 2175 2176 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2177 2178 insn = next_insn(p, BRW_OPCODE_SEND); 2179 insn->header.predicate_control = 0; /* XXX */ 2180 insn->header.compression_control = BRW_COMPRESSION_NONE; 2181 if (intel->gen < 6) 2182 insn->header.destreg__conditionalmod = msg_reg_nr; 2183 2184 brw_set_dest(p, insn, dest); 2185 brw_set_src0(p, insn, src0); 2186 brw_set_sampler_message(p, insn, 2187 binding_table_index, 2188 sampler, 2189 msg_type, 2190 response_length, 2191 msg_length, 2192 eot, 2193 header_present, 2194 simd_mode); 2195 } 2196 2197 if (need_stall) { 2198 struct brw_reg reg = vec8(offset(dest, response_length-1)); 2199 2200 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 2201 */ 2202 brw_push_insn_state(p); 2203 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2204 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), 2205 retype(reg, BRW_REGISTER_TYPE_UD)); 2206 brw_pop_insn_state(p); 2207 } 2208 2209} 2210 2211/* All these variables are pretty confusing - we might be better off 2212 * using bitmasks and macros for this, in the old style. Or perhaps 2213 * just having the caller instantiate the fields in dword3 itself. 2214 */ 2215void brw_urb_WRITE(struct brw_compile *p, 2216 struct brw_reg dest, 2217 GLuint msg_reg_nr, 2218 struct brw_reg src0, 2219 GLboolean allocate, 2220 GLboolean used, 2221 GLuint msg_length, 2222 GLuint response_length, 2223 GLboolean eot, 2224 GLboolean writes_complete, 2225 GLuint offset, 2226 GLuint swizzle) 2227{ 2228 struct intel_context *intel = &p->brw->intel; 2229 struct brw_instruction *insn; 2230 2231 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2232 2233 if (intel->gen == 7) { 2234 /* Enable Channel Masks in the URB_WRITE_HWORD message header */ 2235 brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), 2236 BRW_REGISTER_TYPE_UD), 2237 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), 2238 brw_imm_ud(0xff00)); 2239 } 2240 2241 insn = next_insn(p, BRW_OPCODE_SEND); 2242 2243 assert(msg_length < BRW_MAX_MRF); 2244 2245 brw_set_dest(p, insn, dest); 2246 brw_set_src0(p, insn, src0); 2247 brw_set_src1(p, insn, brw_imm_d(0)); 2248 2249 if (intel->gen < 6) 2250 insn->header.destreg__conditionalmod = msg_reg_nr; 2251 2252 brw_set_urb_message(p, 2253 insn, 2254 allocate, 2255 used, 2256 msg_length, 2257 response_length, 2258 eot, 2259 writes_complete, 2260 offset, 2261 swizzle); 2262} 2263 2264static int 2265brw_find_next_block_end(struct brw_compile *p, int start) 2266{ 2267 int ip; 2268 2269 for (ip = start + 1; ip < p->nr_insn; ip++) { 2270 struct brw_instruction *insn = &p->store[ip]; 2271 2272 switch (insn->header.opcode) { 2273 case BRW_OPCODE_ENDIF: 2274 case BRW_OPCODE_ELSE: 2275 case BRW_OPCODE_WHILE: 2276 return ip; 2277 } 2278 } 2279 assert(!"not reached"); 2280 return start + 1; 2281} 2282 2283/* There is no DO instruction on gen6, so to find the end of the loop 2284 * we have to see if the loop is jumping back before our start 2285 * instruction. 2286 */ 2287static int 2288brw_find_loop_end(struct brw_compile *p, int start) 2289{ 2290 struct intel_context *intel = &p->brw->intel; 2291 int ip; 2292 int br = 2; 2293 2294 for (ip = start + 1; ip < p->nr_insn; ip++) { 2295 struct brw_instruction *insn = &p->store[ip]; 2296 2297 if (insn->header.opcode == BRW_OPCODE_WHILE) { 2298 int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count 2299 : insn->bits3.break_cont.jip; 2300 if (ip + jip / br < start) 2301 return ip; 2302 } 2303 } 2304 assert(!"not reached"); 2305 return start + 1; 2306} 2307 2308/* After program generation, go back and update the UIP and JIP of 2309 * BREAK and CONT instructions to their correct locations. 2310 */ 2311void 2312brw_set_uip_jip(struct brw_compile *p) 2313{ 2314 struct intel_context *intel = &p->brw->intel; 2315 int ip; 2316 int br = 2; 2317 2318 if (intel->gen < 6) 2319 return; 2320 2321 for (ip = 0; ip < p->nr_insn; ip++) { 2322 struct brw_instruction *insn = &p->store[ip]; 2323 2324 switch (insn->header.opcode) { 2325 case BRW_OPCODE_BREAK: 2326 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2327 /* Gen7 UIP points to WHILE; Gen6 points just after it */ 2328 insn->bits3.break_cont.uip = 2329 br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0)); 2330 break; 2331 case BRW_OPCODE_CONTINUE: 2332 /* JIP is set at CONTINUE emit time, since that's when we 2333 * know where the start of the loop is. 2334 */ 2335 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2336 assert(insn->bits3.break_cont.uip != 0); 2337 assert(insn->bits3.break_cont.jip != 0); 2338 break; 2339 } 2340 } 2341} 2342 2343void brw_ff_sync(struct brw_compile *p, 2344 struct brw_reg dest, 2345 GLuint msg_reg_nr, 2346 struct brw_reg src0, 2347 GLboolean allocate, 2348 GLuint response_length, 2349 GLboolean eot) 2350{ 2351 struct intel_context *intel = &p->brw->intel; 2352 struct brw_instruction *insn; 2353 2354 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2355 2356 insn = next_insn(p, BRW_OPCODE_SEND); 2357 brw_set_dest(p, insn, dest); 2358 brw_set_src0(p, insn, src0); 2359 brw_set_src1(p, insn, brw_imm_d(0)); 2360 2361 if (intel->gen < 6) 2362 insn->header.destreg__conditionalmod = msg_reg_nr; 2363 2364 brw_set_ff_sync_message(p, 2365 insn, 2366 allocate, 2367 response_length, 2368 eot); 2369} 2370