brw_eu_emit.c revision c638180fc715aff84422c1092926120af966d417
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37#include "../glsl/ralloc.h" 38 39/*********************************************************************** 40 * Internal helper for constructing instructions 41 */ 42 43static void guess_execution_size(struct brw_compile *p, 44 struct brw_instruction *insn, 45 struct brw_reg reg) 46{ 47 if (reg.width == BRW_WIDTH_8 && p->compressed) 48 insn->header.execution_size = BRW_EXECUTE_16; 49 else 50 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 51} 52 53 54/** 55 * Prior to Sandybridge, the SEND instruction accepted non-MRF source 56 * registers, implicitly moving the operand to a message register. 57 * 58 * On Sandybridge, this is no longer the case. This function performs the 59 * explicit move; it should be called before emitting a SEND instruction. 60 */ 61static void 62gen6_resolve_implied_move(struct brw_compile *p, 63 struct brw_reg *src, 64 GLuint msg_reg_nr) 65{ 66 struct intel_context *intel = &p->brw->intel; 67 if (intel->gen != 6) 68 return; 69 70 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 71 brw_push_insn_state(p); 72 brw_set_mask_control(p, BRW_MASK_DISABLE); 73 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 74 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 75 retype(*src, BRW_REGISTER_TYPE_UD)); 76 brw_pop_insn_state(p); 77 } 78 *src = brw_message_reg(msg_reg_nr); 79} 80 81 82static void brw_set_dest(struct brw_compile *p, 83 struct brw_instruction *insn, 84 struct brw_reg dest) 85{ 86 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 87 dest.file != BRW_MESSAGE_REGISTER_FILE) 88 assert(dest.nr < 128); 89 90 insn->bits1.da1.dest_reg_file = dest.file; 91 insn->bits1.da1.dest_reg_type = dest.type; 92 insn->bits1.da1.dest_address_mode = dest.address_mode; 93 94 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 95 insn->bits1.da1.dest_reg_nr = dest.nr; 96 97 if (insn->header.access_mode == BRW_ALIGN_1) { 98 insn->bits1.da1.dest_subreg_nr = dest.subnr; 99 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 100 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 101 insn->bits1.da1.dest_horiz_stride = dest.hstride; 102 } 103 else { 104 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 105 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 106 /* even ignored in da16, still need to set as '01' */ 107 insn->bits1.da16.dest_horiz_stride = 1; 108 } 109 } 110 else { 111 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 112 113 /* These are different sizes in align1 vs align16: 114 */ 115 if (insn->header.access_mode == BRW_ALIGN_1) { 116 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 117 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 118 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 119 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 120 } 121 else { 122 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 123 /* even ignored in da16, still need to set as '01' */ 124 insn->bits1.ia16.dest_horiz_stride = 1; 125 } 126 } 127 128 /* NEW: Set the execution size based on dest.width and 129 * insn->compression_control: 130 */ 131 guess_execution_size(p, insn, dest); 132} 133 134extern int reg_type_size[]; 135 136static void 137validate_reg(struct brw_instruction *insn, struct brw_reg reg) 138{ 139 int hstride_for_reg[] = {0, 1, 2, 4}; 140 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 141 int width_for_reg[] = {1, 2, 4, 8, 16}; 142 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 143 int width, hstride, vstride, execsize; 144 145 if (reg.file == BRW_IMMEDIATE_VALUE) { 146 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 147 * mean the destination has to be 128-bit aligned and the 148 * destination horiz stride has to be a word. 149 */ 150 if (reg.type == BRW_REGISTER_TYPE_V) { 151 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 152 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 153 } 154 155 return; 156 } 157 158 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 159 reg.file == BRW_ARF_NULL) 160 return; 161 162 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 163 hstride = hstride_for_reg[reg.hstride]; 164 165 if (reg.vstride == 0xf) { 166 vstride = -1; 167 } else { 168 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 169 vstride = vstride_for_reg[reg.vstride]; 170 } 171 172 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 173 width = width_for_reg[reg.width]; 174 175 assert(insn->header.execution_size >= 0 && 176 insn->header.execution_size < Elements(execsize_for_reg)); 177 execsize = execsize_for_reg[insn->header.execution_size]; 178 179 /* Restrictions from 3.3.10: Register Region Restrictions. */ 180 /* 3. */ 181 assert(execsize >= width); 182 183 /* 4. */ 184 if (execsize == width && hstride != 0) { 185 assert(vstride == -1 || vstride == width * hstride); 186 } 187 188 /* 5. */ 189 if (execsize == width && hstride == 0) { 190 /* no restriction on vstride. */ 191 } 192 193 /* 6. */ 194 if (width == 1) { 195 assert(hstride == 0); 196 } 197 198 /* 7. */ 199 if (execsize == 1 && width == 1) { 200 assert(hstride == 0); 201 assert(vstride == 0); 202 } 203 204 /* 8. */ 205 if (vstride == 0 && hstride == 0) { 206 assert(width == 1); 207 } 208 209 /* 10. Check destination issues. */ 210} 211 212static void brw_set_src0(struct brw_compile *p, 213 struct brw_instruction *insn, 214 struct brw_reg reg) 215{ 216 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 217 assert(reg.nr < 128); 218 219 validate_reg(insn, reg); 220 221 insn->bits1.da1.src0_reg_file = reg.file; 222 insn->bits1.da1.src0_reg_type = reg.type; 223 insn->bits2.da1.src0_abs = reg.abs; 224 insn->bits2.da1.src0_negate = reg.negate; 225 insn->bits2.da1.src0_address_mode = reg.address_mode; 226 227 if (reg.file == BRW_IMMEDIATE_VALUE) { 228 insn->bits3.ud = reg.dw1.ud; 229 230 /* Required to set some fields in src1 as well: 231 */ 232 insn->bits1.da1.src1_reg_file = 0; /* arf */ 233 insn->bits1.da1.src1_reg_type = reg.type; 234 } 235 else 236 { 237 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 238 if (insn->header.access_mode == BRW_ALIGN_1) { 239 insn->bits2.da1.src0_subreg_nr = reg.subnr; 240 insn->bits2.da1.src0_reg_nr = reg.nr; 241 } 242 else { 243 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 244 insn->bits2.da16.src0_reg_nr = reg.nr; 245 } 246 } 247 else { 248 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 249 250 if (insn->header.access_mode == BRW_ALIGN_1) { 251 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 252 } 253 else { 254 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 255 } 256 } 257 258 if (insn->header.access_mode == BRW_ALIGN_1) { 259 if (reg.width == BRW_WIDTH_1 && 260 insn->header.execution_size == BRW_EXECUTE_1) { 261 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 262 insn->bits2.da1.src0_width = BRW_WIDTH_1; 263 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 264 } 265 else { 266 insn->bits2.da1.src0_horiz_stride = reg.hstride; 267 insn->bits2.da1.src0_width = reg.width; 268 insn->bits2.da1.src0_vert_stride = reg.vstride; 269 } 270 } 271 else { 272 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 273 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 274 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 275 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 276 277 /* This is an oddity of the fact we're using the same 278 * descriptions for registers in align_16 as align_1: 279 */ 280 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 281 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 282 else 283 insn->bits2.da16.src0_vert_stride = reg.vstride; 284 } 285 } 286} 287 288 289void brw_set_src1(struct brw_compile *p, 290 struct brw_instruction *insn, 291 struct brw_reg reg) 292{ 293 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 294 295 assert(reg.nr < 128); 296 297 validate_reg(insn, reg); 298 299 insn->bits1.da1.src1_reg_file = reg.file; 300 insn->bits1.da1.src1_reg_type = reg.type; 301 insn->bits3.da1.src1_abs = reg.abs; 302 insn->bits3.da1.src1_negate = reg.negate; 303 304 /* Only src1 can be immediate in two-argument instructions. 305 */ 306 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 307 308 if (reg.file == BRW_IMMEDIATE_VALUE) { 309 insn->bits3.ud = reg.dw1.ud; 310 } 311 else { 312 /* This is a hardware restriction, which may or may not be lifted 313 * in the future: 314 */ 315 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 316 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 317 318 if (insn->header.access_mode == BRW_ALIGN_1) { 319 insn->bits3.da1.src1_subreg_nr = reg.subnr; 320 insn->bits3.da1.src1_reg_nr = reg.nr; 321 } 322 else { 323 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 324 insn->bits3.da16.src1_reg_nr = reg.nr; 325 } 326 327 if (insn->header.access_mode == BRW_ALIGN_1) { 328 if (reg.width == BRW_WIDTH_1 && 329 insn->header.execution_size == BRW_EXECUTE_1) { 330 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 331 insn->bits3.da1.src1_width = BRW_WIDTH_1; 332 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 333 } 334 else { 335 insn->bits3.da1.src1_horiz_stride = reg.hstride; 336 insn->bits3.da1.src1_width = reg.width; 337 insn->bits3.da1.src1_vert_stride = reg.vstride; 338 } 339 } 340 else { 341 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 342 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 343 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 344 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 345 346 /* This is an oddity of the fact we're using the same 347 * descriptions for registers in align_16 as align_1: 348 */ 349 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 350 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 351 else 352 insn->bits3.da16.src1_vert_stride = reg.vstride; 353 } 354 } 355} 356 357 358 359static void brw_set_math_message( struct brw_compile *p, 360 struct brw_instruction *insn, 361 GLuint msg_length, 362 GLuint response_length, 363 GLuint function, 364 GLuint integer_type, 365 GLboolean low_precision, 366 GLboolean saturate, 367 GLuint dataType ) 368{ 369 struct brw_context *brw = p->brw; 370 struct intel_context *intel = &brw->intel; 371 brw_set_src1(p, insn, brw_imm_d(0)); 372 373 if (intel->gen == 5) { 374 insn->bits3.math_gen5.function = function; 375 insn->bits3.math_gen5.int_type = integer_type; 376 insn->bits3.math_gen5.precision = low_precision; 377 insn->bits3.math_gen5.saturate = saturate; 378 insn->bits3.math_gen5.data_type = dataType; 379 insn->bits3.math_gen5.snapshot = 0; 380 insn->bits3.math_gen5.header_present = 0; 381 insn->bits3.math_gen5.response_length = response_length; 382 insn->bits3.math_gen5.msg_length = msg_length; 383 insn->bits3.math_gen5.end_of_thread = 0; 384 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; 385 insn->bits2.send_gen5.end_of_thread = 0; 386 } else { 387 insn->bits3.math.function = function; 388 insn->bits3.math.int_type = integer_type; 389 insn->bits3.math.precision = low_precision; 390 insn->bits3.math.saturate = saturate; 391 insn->bits3.math.data_type = dataType; 392 insn->bits3.math.response_length = response_length; 393 insn->bits3.math.msg_length = msg_length; 394 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 395 insn->bits3.math.end_of_thread = 0; 396 } 397} 398 399 400static void brw_set_ff_sync_message(struct brw_compile *p, 401 struct brw_instruction *insn, 402 GLboolean allocate, 403 GLuint response_length, 404 GLboolean end_of_thread) 405{ 406 struct brw_context *brw = p->brw; 407 struct intel_context *intel = &brw->intel; 408 brw_set_src1(p, insn, brw_imm_d(0)); 409 410 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 411 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 412 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 413 insn->bits3.urb_gen5.allocate = allocate; 414 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 415 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 416 insn->bits3.urb_gen5.header_present = 1; 417 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ 418 insn->bits3.urb_gen5.msg_length = 1; 419 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 420 if (intel->gen >= 6) { 421 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 422 } else { 423 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 424 insn->bits2.send_gen5.end_of_thread = end_of_thread; 425 } 426} 427 428static void brw_set_urb_message( struct brw_compile *p, 429 struct brw_instruction *insn, 430 GLboolean allocate, 431 GLboolean used, 432 GLuint msg_length, 433 GLuint response_length, 434 GLboolean end_of_thread, 435 GLboolean complete, 436 GLuint offset, 437 GLuint swizzle_control ) 438{ 439 struct brw_context *brw = p->brw; 440 struct intel_context *intel = &brw->intel; 441 brw_set_src1(p, insn, brw_imm_d(0)); 442 443 if (intel->gen >= 5) { 444 insn->bits3.urb_gen5.opcode = 0; /* ? */ 445 insn->bits3.urb_gen5.offset = offset; 446 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 447 insn->bits3.urb_gen5.allocate = allocate; 448 insn->bits3.urb_gen5.used = used; /* ? */ 449 insn->bits3.urb_gen5.complete = complete; 450 insn->bits3.urb_gen5.header_present = 1; 451 insn->bits3.urb_gen5.response_length = response_length; 452 insn->bits3.urb_gen5.msg_length = msg_length; 453 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 454 if (intel->gen >= 6) { 455 /* For SNB, the SFID bits moved to the condmod bits, and 456 * EOT stayed in bits3 above. Does the EOT bit setting 457 * below on Ironlake even do anything? 458 */ 459 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 460 } else { 461 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 462 insn->bits2.send_gen5.end_of_thread = end_of_thread; 463 } 464 } else { 465 insn->bits3.urb.opcode = 0; /* ? */ 466 insn->bits3.urb.offset = offset; 467 insn->bits3.urb.swizzle_control = swizzle_control; 468 insn->bits3.urb.allocate = allocate; 469 insn->bits3.urb.used = used; /* ? */ 470 insn->bits3.urb.complete = complete; 471 insn->bits3.urb.response_length = response_length; 472 insn->bits3.urb.msg_length = msg_length; 473 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 474 insn->bits3.urb.end_of_thread = end_of_thread; 475 } 476} 477 478static void brw_set_dp_write_message( struct brw_compile *p, 479 struct brw_instruction *insn, 480 GLuint binding_table_index, 481 GLuint msg_control, 482 GLuint msg_type, 483 GLuint msg_length, 484 GLboolean header_present, 485 GLuint pixel_scoreboard_clear, 486 GLuint response_length, 487 GLuint end_of_thread, 488 GLuint send_commit_msg) 489{ 490 struct brw_context *brw = p->brw; 491 struct intel_context *intel = &brw->intel; 492 brw_set_src1(p, insn, brw_imm_ud(0)); 493 494 if (intel->gen >= 6) { 495 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 496 insn->bits3.gen6_dp.msg_control = msg_control; 497 insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear; 498 insn->bits3.gen6_dp.msg_type = msg_type; 499 insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; 500 insn->bits3.gen6_dp.header_present = header_present; 501 insn->bits3.gen6_dp.response_length = response_length; 502 insn->bits3.gen6_dp.msg_length = msg_length; 503 insn->bits3.gen6_dp.end_of_thread = end_of_thread; 504 505 /* We always use the render cache for write messages */ 506 insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; 507 } else if (intel->gen == 5) { 508 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 509 insn->bits3.dp_write_gen5.msg_control = msg_control; 510 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; 511 insn->bits3.dp_write_gen5.msg_type = msg_type; 512 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 513 insn->bits3.dp_write_gen5.header_present = header_present; 514 insn->bits3.dp_write_gen5.response_length = response_length; 515 insn->bits3.dp_write_gen5.msg_length = msg_length; 516 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; 517 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 518 insn->bits2.send_gen5.end_of_thread = end_of_thread; 519 } else { 520 insn->bits3.dp_write.binding_table_index = binding_table_index; 521 insn->bits3.dp_write.msg_control = msg_control; 522 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 523 insn->bits3.dp_write.msg_type = msg_type; 524 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 525 insn->bits3.dp_write.response_length = response_length; 526 insn->bits3.dp_write.msg_length = msg_length; 527 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 528 insn->bits3.dp_write.end_of_thread = end_of_thread; 529 } 530} 531 532static void 533brw_set_dp_read_message(struct brw_compile *p, 534 struct brw_instruction *insn, 535 GLuint binding_table_index, 536 GLuint msg_control, 537 GLuint msg_type, 538 GLuint target_cache, 539 GLuint msg_length, 540 GLuint response_length) 541{ 542 struct brw_context *brw = p->brw; 543 struct intel_context *intel = &brw->intel; 544 brw_set_src1(p, insn, brw_imm_d(0)); 545 546 if (intel->gen >= 6) { 547 uint32_t target_function; 548 549 if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE) 550 target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE; 551 else 552 target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; 553 554 insn->bits3.gen6_dp.binding_table_index = binding_table_index; 555 insn->bits3.gen6_dp.msg_control = msg_control; 556 insn->bits3.gen6_dp.pixel_scoreboard_clear = 0; 557 insn->bits3.gen6_dp.msg_type = msg_type; 558 insn->bits3.gen6_dp.send_commit_msg = 0; 559 insn->bits3.gen6_dp.header_present = 1; 560 insn->bits3.gen6_dp.response_length = response_length; 561 insn->bits3.gen6_dp.msg_length = msg_length; 562 insn->bits3.gen6_dp.end_of_thread = 0; 563 insn->header.destreg__conditionalmod = target_function; 564 } else if (intel->gen == 5) { 565 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 566 insn->bits3.dp_read_gen5.msg_control = msg_control; 567 insn->bits3.dp_read_gen5.msg_type = msg_type; 568 insn->bits3.dp_read_gen5.target_cache = target_cache; 569 insn->bits3.dp_read_gen5.header_present = 1; 570 insn->bits3.dp_read_gen5.response_length = response_length; 571 insn->bits3.dp_read_gen5.msg_length = msg_length; 572 insn->bits3.dp_read_gen5.pad1 = 0; 573 insn->bits3.dp_read_gen5.end_of_thread = 0; 574 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 575 insn->bits2.send_gen5.end_of_thread = 0; 576 } else if (intel->is_g4x) { 577 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 578 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 579 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 580 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 581 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/ 582 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/ 583 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 584 insn->bits3.dp_read_g4x.pad1 = 0; 585 insn->bits3.dp_read_g4x.end_of_thread = 0; 586 } else { 587 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 588 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 589 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 590 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 591 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 592 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 593 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 594 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 595 insn->bits3.dp_read.end_of_thread = 0; /*31*/ 596 } 597} 598 599static void brw_set_sampler_message(struct brw_compile *p, 600 struct brw_instruction *insn, 601 GLuint binding_table_index, 602 GLuint sampler, 603 GLuint msg_type, 604 GLuint response_length, 605 GLuint msg_length, 606 GLboolean eot, 607 GLuint header_present, 608 GLuint simd_mode) 609{ 610 struct brw_context *brw = p->brw; 611 struct intel_context *intel = &brw->intel; 612 assert(eot == 0); 613 brw_set_src1(p, insn, brw_imm_d(0)); 614 615 if (intel->gen >= 5) { 616 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 617 insn->bits3.sampler_gen5.sampler = sampler; 618 insn->bits3.sampler_gen5.msg_type = msg_type; 619 insn->bits3.sampler_gen5.simd_mode = simd_mode; 620 insn->bits3.sampler_gen5.header_present = header_present; 621 insn->bits3.sampler_gen5.response_length = response_length; 622 insn->bits3.sampler_gen5.msg_length = msg_length; 623 insn->bits3.sampler_gen5.end_of_thread = eot; 624 if (intel->gen >= 6) 625 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; 626 else { 627 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; 628 insn->bits2.send_gen5.end_of_thread = eot; 629 } 630 } else if (intel->is_g4x) { 631 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 632 insn->bits3.sampler_g4x.sampler = sampler; 633 insn->bits3.sampler_g4x.msg_type = msg_type; 634 insn->bits3.sampler_g4x.response_length = response_length; 635 insn->bits3.sampler_g4x.msg_length = msg_length; 636 insn->bits3.sampler_g4x.end_of_thread = eot; 637 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 638 } else { 639 insn->bits3.sampler.binding_table_index = binding_table_index; 640 insn->bits3.sampler.sampler = sampler; 641 insn->bits3.sampler.msg_type = msg_type; 642 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 643 insn->bits3.sampler.response_length = response_length; 644 insn->bits3.sampler.msg_length = msg_length; 645 insn->bits3.sampler.end_of_thread = eot; 646 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 647 } 648} 649 650 651 652static struct brw_instruction *next_insn( struct brw_compile *p, 653 GLuint opcode ) 654{ 655 struct brw_instruction *insn; 656 657 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 658 659 insn = &p->store[p->nr_insn++]; 660 memcpy(insn, p->current, sizeof(*insn)); 661 662 /* Reset this one-shot flag: 663 */ 664 665 if (p->current->header.destreg__conditionalmod) { 666 p->current->header.destreg__conditionalmod = 0; 667 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 668 } 669 670 insn->header.opcode = opcode; 671 return insn; 672} 673 674 675static struct brw_instruction *brw_alu1( struct brw_compile *p, 676 GLuint opcode, 677 struct brw_reg dest, 678 struct brw_reg src ) 679{ 680 struct brw_instruction *insn = next_insn(p, opcode); 681 brw_set_dest(p, insn, dest); 682 brw_set_src0(p, insn, src); 683 return insn; 684} 685 686static struct brw_instruction *brw_alu2(struct brw_compile *p, 687 GLuint opcode, 688 struct brw_reg dest, 689 struct brw_reg src0, 690 struct brw_reg src1 ) 691{ 692 struct brw_instruction *insn = next_insn(p, opcode); 693 brw_set_dest(p, insn, dest); 694 brw_set_src0(p, insn, src0); 695 brw_set_src1(p, insn, src1); 696 return insn; 697} 698 699 700/*********************************************************************** 701 * Convenience routines. 702 */ 703#define ALU1(OP) \ 704struct brw_instruction *brw_##OP(struct brw_compile *p, \ 705 struct brw_reg dest, \ 706 struct brw_reg src0) \ 707{ \ 708 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 709} 710 711#define ALU2(OP) \ 712struct brw_instruction *brw_##OP(struct brw_compile *p, \ 713 struct brw_reg dest, \ 714 struct brw_reg src0, \ 715 struct brw_reg src1) \ 716{ \ 717 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 718} 719 720/* Rounding operations (other than RNDD) require two instructions - the first 721 * stores a rounded value (possibly the wrong way) in the dest register, but 722 * also sets a per-channel "increment bit" in the flag register. A predicated 723 * add of 1.0 fixes dest to contain the desired result. 724 */ 725#define ROUND(OP) \ 726void brw_##OP(struct brw_compile *p, \ 727 struct brw_reg dest, \ 728 struct brw_reg src) \ 729{ \ 730 struct brw_instruction *rnd, *add; \ 731 rnd = next_insn(p, BRW_OPCODE_##OP); \ 732 brw_set_dest(p, rnd, dest); \ 733 brw_set_src0(p, rnd, src); \ 734 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ 735 \ 736 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 737 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 738} 739 740 741ALU1(MOV) 742ALU2(SEL) 743ALU1(NOT) 744ALU2(AND) 745ALU2(OR) 746ALU2(XOR) 747ALU2(SHR) 748ALU2(SHL) 749ALU2(RSR) 750ALU2(RSL) 751ALU2(ASR) 752ALU1(FRC) 753ALU1(RNDD) 754ALU2(MAC) 755ALU2(MACH) 756ALU1(LZD) 757ALU2(DP4) 758ALU2(DPH) 759ALU2(DP3) 760ALU2(DP2) 761ALU2(LINE) 762ALU2(PLN) 763 764 765ROUND(RNDZ) 766ROUND(RNDE) 767 768 769struct brw_instruction *brw_ADD(struct brw_compile *p, 770 struct brw_reg dest, 771 struct brw_reg src0, 772 struct brw_reg src1) 773{ 774 /* 6.2.2: add */ 775 if (src0.type == BRW_REGISTER_TYPE_F || 776 (src0.file == BRW_IMMEDIATE_VALUE && 777 src0.type == BRW_REGISTER_TYPE_VF)) { 778 assert(src1.type != BRW_REGISTER_TYPE_UD); 779 assert(src1.type != BRW_REGISTER_TYPE_D); 780 } 781 782 if (src1.type == BRW_REGISTER_TYPE_F || 783 (src1.file == BRW_IMMEDIATE_VALUE && 784 src1.type == BRW_REGISTER_TYPE_VF)) { 785 assert(src0.type != BRW_REGISTER_TYPE_UD); 786 assert(src0.type != BRW_REGISTER_TYPE_D); 787 } 788 789 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 790} 791 792struct brw_instruction *brw_MUL(struct brw_compile *p, 793 struct brw_reg dest, 794 struct brw_reg src0, 795 struct brw_reg src1) 796{ 797 /* 6.32.38: mul */ 798 if (src0.type == BRW_REGISTER_TYPE_D || 799 src0.type == BRW_REGISTER_TYPE_UD || 800 src1.type == BRW_REGISTER_TYPE_D || 801 src1.type == BRW_REGISTER_TYPE_UD) { 802 assert(dest.type != BRW_REGISTER_TYPE_F); 803 } 804 805 if (src0.type == BRW_REGISTER_TYPE_F || 806 (src0.file == BRW_IMMEDIATE_VALUE && 807 src0.type == BRW_REGISTER_TYPE_VF)) { 808 assert(src1.type != BRW_REGISTER_TYPE_UD); 809 assert(src1.type != BRW_REGISTER_TYPE_D); 810 } 811 812 if (src1.type == BRW_REGISTER_TYPE_F || 813 (src1.file == BRW_IMMEDIATE_VALUE && 814 src1.type == BRW_REGISTER_TYPE_VF)) { 815 assert(src0.type != BRW_REGISTER_TYPE_UD); 816 assert(src0.type != BRW_REGISTER_TYPE_D); 817 } 818 819 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 820 src0.nr != BRW_ARF_ACCUMULATOR); 821 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 822 src1.nr != BRW_ARF_ACCUMULATOR); 823 824 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 825} 826 827 828void brw_NOP(struct brw_compile *p) 829{ 830 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 831 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 832 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 833 brw_set_src1(p, insn, brw_imm_ud(0x0)); 834} 835 836 837 838 839 840/*********************************************************************** 841 * Comparisons, if/else/endif 842 */ 843 844struct brw_instruction *brw_JMPI(struct brw_compile *p, 845 struct brw_reg dest, 846 struct brw_reg src0, 847 struct brw_reg src1) 848{ 849 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 850 851 insn->header.execution_size = 1; 852 insn->header.compression_control = BRW_COMPRESSION_NONE; 853 insn->header.mask_control = BRW_MASK_DISABLE; 854 855 p->current->header.predicate_control = BRW_PREDICATE_NONE; 856 857 return insn; 858} 859 860static void 861push_if_stack(struct brw_compile *p, struct brw_instruction *inst) 862{ 863 p->if_stack[p->if_stack_depth] = inst; 864 865 p->if_stack_depth++; 866 if (p->if_stack_array_size <= p->if_stack_depth) { 867 p->if_stack_array_size *= 2; 868 p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *, 869 p->if_stack_array_size); 870 } 871} 872 873/* EU takes the value from the flag register and pushes it onto some 874 * sort of a stack (presumably merging with any flag value already on 875 * the stack). Within an if block, the flags at the top of the stack 876 * control execution on each channel of the unit, eg. on each of the 877 * 16 pixel values in our wm programs. 878 * 879 * When the matching 'else' instruction is reached (presumably by 880 * countdown of the instruction count patched in by our ELSE/ENDIF 881 * functions), the relevent flags are inverted. 882 * 883 * When the matching 'endif' instruction is reached, the flags are 884 * popped off. If the stack is now empty, normal execution resumes. 885 */ 886struct brw_instruction * 887brw_IF(struct brw_compile *p, GLuint execute_size) 888{ 889 struct intel_context *intel = &p->brw->intel; 890 struct brw_instruction *insn; 891 892 insn = next_insn(p, BRW_OPCODE_IF); 893 894 /* Override the defaults for this instruction: 895 */ 896 if (intel->gen < 6) { 897 brw_set_dest(p, insn, brw_ip_reg()); 898 brw_set_src0(p, insn, brw_ip_reg()); 899 brw_set_src1(p, insn, brw_imm_d(0x0)); 900 } else { 901 brw_set_dest(p, insn, brw_imm_w(0)); 902 insn->bits1.branch_gen6.jump_count = 0; 903 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 904 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 905 } 906 907 insn->header.execution_size = execute_size; 908 insn->header.compression_control = BRW_COMPRESSION_NONE; 909 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 910 insn->header.mask_control = BRW_MASK_ENABLE; 911 if (!p->single_program_flow) 912 insn->header.thread_control = BRW_THREAD_SWITCH; 913 914 p->current->header.predicate_control = BRW_PREDICATE_NONE; 915 916 push_if_stack(p, insn); 917 return insn; 918} 919 920struct brw_instruction * 921gen6_IF(struct brw_compile *p, uint32_t conditional, 922 struct brw_reg src0, struct brw_reg src1) 923{ 924 struct brw_instruction *insn; 925 926 insn = next_insn(p, BRW_OPCODE_IF); 927 928 brw_set_dest(p, insn, brw_imm_w(0)); 929 insn->header.execution_size = BRW_EXECUTE_8; 930 insn->bits1.branch_gen6.jump_count = 0; 931 brw_set_src0(p, insn, src0); 932 brw_set_src1(p, insn, src1); 933 934 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 935 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 936 insn->header.destreg__conditionalmod = conditional; 937 938 if (!p->single_program_flow) 939 insn->header.thread_control = BRW_THREAD_SWITCH; 940 941 push_if_stack(p, insn); 942 return insn; 943} 944 945/** 946 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. 947 */ 948static void 949convert_IF_ELSE_to_ADD(struct brw_compile *p, 950 struct brw_instruction *if_inst, 951 struct brw_instruction *else_inst) 952{ 953 /* The next instruction (where the ENDIF would be, if it existed) */ 954 struct brw_instruction *next_inst = &p->store[p->nr_insn]; 955 956 assert(p->single_program_flow); 957 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 958 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 959 assert(if_inst->header.execution_size == BRW_EXECUTE_1); 960 961 /* Convert IF to an ADD instruction that moves the instruction pointer 962 * to the first instruction of the ELSE block. If there is no ELSE 963 * block, point to where ENDIF would be. Reverse the predicate. 964 * 965 * There's no need to execute an ENDIF since we don't need to do any 966 * stack operations, and if we're currently executing, we just want to 967 * continue normally. 968 */ 969 if_inst->header.opcode = BRW_OPCODE_ADD; 970 if_inst->header.predicate_inverse = 1; 971 972 if (else_inst != NULL) { 973 /* Convert ELSE to an ADD instruction that points where the ENDIF 974 * would be. 975 */ 976 else_inst->header.opcode = BRW_OPCODE_ADD; 977 978 if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; 979 else_inst->bits3.ud = (next_inst - else_inst) * 16; 980 } else { 981 if_inst->bits3.ud = (next_inst - if_inst) * 16; 982 } 983} 984 985/** 986 * Patch IF and ELSE instructions with appropriate jump targets. 987 */ 988static void 989patch_IF_ELSE(struct brw_compile *p, 990 struct brw_instruction *if_inst, 991 struct brw_instruction *else_inst, 992 struct brw_instruction *endif_inst) 993{ 994 struct intel_context *intel = &p->brw->intel; 995 996 assert(!p->single_program_flow); 997 assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); 998 assert(endif_inst != NULL); 999 assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); 1000 1001 unsigned br = 1; 1002 /* Jump count is for 64bit data chunk each, so one 128bit instruction 1003 * requires 2 chunks. 1004 */ 1005 if (intel->gen >= 5) 1006 br = 2; 1007 1008 assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); 1009 endif_inst->header.execution_size = if_inst->header.execution_size; 1010 1011 if (else_inst == NULL) { 1012 /* Patch IF -> ENDIF */ 1013 if (intel->gen < 6) { 1014 /* Turn it into an IFF, which means no mask stack operations for 1015 * all-false and jumping past the ENDIF. 1016 */ 1017 if_inst->header.opcode = BRW_OPCODE_IFF; 1018 if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); 1019 if_inst->bits3.if_else.pop_count = 0; 1020 if_inst->bits3.if_else.pad0 = 0; 1021 } else { 1022 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 1023 if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); 1024 } 1025 } else { 1026 else_inst->header.execution_size = if_inst->header.execution_size; 1027 1028 /* Patch IF -> ELSE */ 1029 if (intel->gen < 6) { 1030 if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); 1031 if_inst->bits3.if_else.pop_count = 0; 1032 if_inst->bits3.if_else.pad0 = 0; 1033 } else if (intel->gen == 6) { 1034 if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); 1035 } 1036 1037 /* Patch ELSE -> ENDIF */ 1038 if (intel->gen < 6) { 1039 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 1040 * matching ENDIF. 1041 */ 1042 else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); 1043 else_inst->bits3.if_else.pop_count = 1; 1044 else_inst->bits3.if_else.pad0 = 0; 1045 } else { 1046 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1047 else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); 1048 } 1049 } 1050} 1051 1052void 1053brw_ELSE(struct brw_compile *p) 1054{ 1055 struct intel_context *intel = &p->brw->intel; 1056 struct brw_instruction *insn; 1057 1058 insn = next_insn(p, BRW_OPCODE_ELSE); 1059 1060 if (intel->gen < 6) { 1061 brw_set_dest(p, insn, brw_ip_reg()); 1062 brw_set_src0(p, insn, brw_ip_reg()); 1063 brw_set_src1(p, insn, brw_imm_d(0x0)); 1064 } else { 1065 brw_set_dest(p, insn, brw_imm_w(0)); 1066 insn->bits1.branch_gen6.jump_count = 0; 1067 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1068 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1069 } 1070 1071 insn->header.compression_control = BRW_COMPRESSION_NONE; 1072 insn->header.mask_control = BRW_MASK_ENABLE; 1073 if (!p->single_program_flow) 1074 insn->header.thread_control = BRW_THREAD_SWITCH; 1075 1076 push_if_stack(p, insn); 1077} 1078 1079void 1080brw_ENDIF(struct brw_compile *p) 1081{ 1082 struct intel_context *intel = &p->brw->intel; 1083 struct brw_instruction *insn; 1084 struct brw_instruction *else_inst = NULL; 1085 struct brw_instruction *if_inst = NULL; 1086 1087 /* Pop the IF and (optional) ELSE instructions from the stack */ 1088 p->if_stack_depth--; 1089 if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { 1090 else_inst = p->if_stack[p->if_stack_depth]; 1091 p->if_stack_depth--; 1092 } 1093 if_inst = p->if_stack[p->if_stack_depth]; 1094 1095 if (p->single_program_flow) { 1096 /* ENDIF is useless; don't bother emitting it. */ 1097 convert_IF_ELSE_to_ADD(p, if_inst, else_inst); 1098 return; 1099 } 1100 1101 insn = next_insn(p, BRW_OPCODE_ENDIF); 1102 1103 if (intel->gen < 6) { 1104 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1105 brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1106 brw_set_src1(p, insn, brw_imm_d(0x0)); 1107 } else { 1108 brw_set_dest(p, insn, brw_imm_w(0)); 1109 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1110 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1111 } 1112 1113 insn->header.compression_control = BRW_COMPRESSION_NONE; 1114 insn->header.mask_control = BRW_MASK_ENABLE; 1115 insn->header.thread_control = BRW_THREAD_SWITCH; 1116 1117 /* Also pop item off the stack in the endif instruction: */ 1118 if (intel->gen < 6) { 1119 insn->bits3.if_else.jump_count = 0; 1120 insn->bits3.if_else.pop_count = 1; 1121 insn->bits3.if_else.pad0 = 0; 1122 } else { 1123 insn->bits1.branch_gen6.jump_count = 2; 1124 } 1125 patch_IF_ELSE(p, if_inst, else_inst, insn); 1126} 1127 1128struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 1129{ 1130 struct intel_context *intel = &p->brw->intel; 1131 struct brw_instruction *insn; 1132 1133 insn = next_insn(p, BRW_OPCODE_BREAK); 1134 if (intel->gen >= 6) { 1135 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1136 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1137 brw_set_src1(p, insn, brw_imm_d(0x0)); 1138 } else { 1139 brw_set_dest(p, insn, brw_ip_reg()); 1140 brw_set_src0(p, insn, brw_ip_reg()); 1141 brw_set_src1(p, insn, brw_imm_d(0x0)); 1142 insn->bits3.if_else.pad0 = 0; 1143 insn->bits3.if_else.pop_count = pop_count; 1144 } 1145 insn->header.compression_control = BRW_COMPRESSION_NONE; 1146 insn->header.execution_size = BRW_EXECUTE_8; 1147 1148 return insn; 1149} 1150 1151struct brw_instruction *gen6_CONT(struct brw_compile *p, 1152 struct brw_instruction *do_insn) 1153{ 1154 struct brw_instruction *insn; 1155 int br = 2; 1156 1157 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1158 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1159 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1160 brw_set_dest(p, insn, brw_ip_reg()); 1161 brw_set_src0(p, insn, brw_ip_reg()); 1162 brw_set_src1(p, insn, brw_imm_d(0x0)); 1163 1164 insn->bits3.break_cont.uip = br * (do_insn - insn); 1165 1166 insn->header.compression_control = BRW_COMPRESSION_NONE; 1167 insn->header.execution_size = BRW_EXECUTE_8; 1168 return insn; 1169} 1170 1171struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 1172{ 1173 struct brw_instruction *insn; 1174 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1175 brw_set_dest(p, insn, brw_ip_reg()); 1176 brw_set_src0(p, insn, brw_ip_reg()); 1177 brw_set_src1(p, insn, brw_imm_d(0x0)); 1178 insn->header.compression_control = BRW_COMPRESSION_NONE; 1179 insn->header.execution_size = BRW_EXECUTE_8; 1180 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1181 insn->bits3.if_else.pad0 = 0; 1182 insn->bits3.if_else.pop_count = pop_count; 1183 return insn; 1184} 1185 1186/* DO/WHILE loop: 1187 * 1188 * The DO/WHILE is just an unterminated loop -- break or continue are 1189 * used for control within the loop. We have a few ways they can be 1190 * done. 1191 * 1192 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1193 * jip and no DO instruction. 1194 * 1195 * For non-uniform control flow pre-gen6, there's a DO instruction to 1196 * push the mask, and a WHILE to jump back, and BREAK to get out and 1197 * pop the mask. 1198 * 1199 * For gen6, there's no more mask stack, so no need for DO. WHILE 1200 * just points back to the first instruction of the loop. 1201 */ 1202struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 1203{ 1204 struct intel_context *intel = &p->brw->intel; 1205 1206 if (intel->gen >= 6 || p->single_program_flow) { 1207 return &p->store[p->nr_insn]; 1208 } else { 1209 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1210 1211 /* Override the defaults for this instruction: 1212 */ 1213 brw_set_dest(p, insn, brw_null_reg()); 1214 brw_set_src0(p, insn, brw_null_reg()); 1215 brw_set_src1(p, insn, brw_null_reg()); 1216 1217 insn->header.compression_control = BRW_COMPRESSION_NONE; 1218 insn->header.execution_size = execute_size; 1219 insn->header.predicate_control = BRW_PREDICATE_NONE; 1220 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1221 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1222 1223 return insn; 1224 } 1225} 1226 1227 1228 1229struct brw_instruction *brw_WHILE(struct brw_compile *p, 1230 struct brw_instruction *do_insn) 1231{ 1232 struct intel_context *intel = &p->brw->intel; 1233 struct brw_instruction *insn; 1234 GLuint br = 1; 1235 1236 if (intel->gen >= 5) 1237 br = 2; 1238 1239 if (intel->gen >= 6) { 1240 insn = next_insn(p, BRW_OPCODE_WHILE); 1241 1242 brw_set_dest(p, insn, brw_imm_w(0)); 1243 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1244 brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1245 brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1246 1247 insn->header.execution_size = do_insn->header.execution_size; 1248 assert(insn->header.execution_size == BRW_EXECUTE_8); 1249 } else { 1250 if (p->single_program_flow) { 1251 insn = next_insn(p, BRW_OPCODE_ADD); 1252 1253 brw_set_dest(p, insn, brw_ip_reg()); 1254 brw_set_src0(p, insn, brw_ip_reg()); 1255 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); 1256 insn->header.execution_size = BRW_EXECUTE_1; 1257 } else { 1258 insn = next_insn(p, BRW_OPCODE_WHILE); 1259 1260 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1261 1262 brw_set_dest(p, insn, brw_ip_reg()); 1263 brw_set_src0(p, insn, brw_ip_reg()); 1264 brw_set_src1(p, insn, brw_imm_d(0)); 1265 1266 insn->header.execution_size = do_insn->header.execution_size; 1267 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1268 insn->bits3.if_else.pop_count = 0; 1269 insn->bits3.if_else.pad0 = 0; 1270 } 1271 } 1272 insn->header.compression_control = BRW_COMPRESSION_NONE; 1273 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1274 1275 return insn; 1276} 1277 1278 1279/* FORWARD JUMPS: 1280 */ 1281void brw_land_fwd_jump(struct brw_compile *p, 1282 struct brw_instruction *jmp_insn) 1283{ 1284 struct intel_context *intel = &p->brw->intel; 1285 struct brw_instruction *landing = &p->store[p->nr_insn]; 1286 GLuint jmpi = 1; 1287 1288 if (intel->gen >= 5) 1289 jmpi = 2; 1290 1291 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1292 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1293 1294 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1295} 1296 1297 1298 1299/* To integrate with the above, it makes sense that the comparison 1300 * instruction should populate the flag register. It might be simpler 1301 * just to use the flag reg for most WM tasks? 1302 */ 1303void brw_CMP(struct brw_compile *p, 1304 struct brw_reg dest, 1305 GLuint conditional, 1306 struct brw_reg src0, 1307 struct brw_reg src1) 1308{ 1309 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1310 1311 insn->header.destreg__conditionalmod = conditional; 1312 brw_set_dest(p, insn, dest); 1313 brw_set_src0(p, insn, src0); 1314 brw_set_src1(p, insn, src1); 1315 1316/* guess_execution_size(insn, src0); */ 1317 1318 1319 /* Make it so that future instructions will use the computed flag 1320 * value until brw_set_predicate_control_flag_value() is called 1321 * again. 1322 */ 1323 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1324 dest.nr == 0) { 1325 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1326 p->flag_value = 0xff; 1327 } 1328} 1329 1330/* Issue 'wait' instruction for n1, host could program MMIO 1331 to wake up thread. */ 1332void brw_WAIT (struct brw_compile *p) 1333{ 1334 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1335 struct brw_reg src = brw_notification_1_reg(); 1336 1337 brw_set_dest(p, insn, src); 1338 brw_set_src0(p, insn, src); 1339 brw_set_src1(p, insn, brw_null_reg()); 1340 insn->header.execution_size = 0; /* must */ 1341 insn->header.predicate_control = 0; 1342 insn->header.compression_control = 0; 1343} 1344 1345 1346/*********************************************************************** 1347 * Helpers for the various SEND message types: 1348 */ 1349 1350/** Extended math function, float[8]. 1351 */ 1352void brw_math( struct brw_compile *p, 1353 struct brw_reg dest, 1354 GLuint function, 1355 GLuint saturate, 1356 GLuint msg_reg_nr, 1357 struct brw_reg src, 1358 GLuint data_type, 1359 GLuint precision ) 1360{ 1361 struct intel_context *intel = &p->brw->intel; 1362 1363 if (intel->gen >= 6) { 1364 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1365 1366 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1367 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1368 1369 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1370 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1371 1372 /* Source modifiers are ignored for extended math instructions. */ 1373 assert(!src.negate); 1374 assert(!src.abs); 1375 1376 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1377 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1378 assert(src.type == BRW_REGISTER_TYPE_F); 1379 } 1380 1381 /* Math is the same ISA format as other opcodes, except that CondModifier 1382 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1383 */ 1384 insn->header.destreg__conditionalmod = function; 1385 insn->header.saturate = saturate; 1386 1387 brw_set_dest(p, insn, dest); 1388 brw_set_src0(p, insn, src); 1389 brw_set_src1(p, insn, brw_null_reg()); 1390 } else { 1391 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1392 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1393 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1394 /* Example code doesn't set predicate_control for send 1395 * instructions. 1396 */ 1397 insn->header.predicate_control = 0; 1398 insn->header.destreg__conditionalmod = msg_reg_nr; 1399 1400 brw_set_dest(p, insn, dest); 1401 brw_set_src0(p, insn, src); 1402 brw_set_math_message(p, 1403 insn, 1404 msg_length, response_length, 1405 function, 1406 BRW_MATH_INTEGER_UNSIGNED, 1407 precision, 1408 saturate, 1409 data_type); 1410 } 1411} 1412 1413/** Extended math function, float[8]. 1414 */ 1415void brw_math2(struct brw_compile *p, 1416 struct brw_reg dest, 1417 GLuint function, 1418 struct brw_reg src0, 1419 struct brw_reg src1) 1420{ 1421 struct intel_context *intel = &p->brw->intel; 1422 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1423 1424 assert(intel->gen >= 6); 1425 (void) intel; 1426 1427 1428 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1429 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1430 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1431 1432 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1433 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1434 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1435 1436 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1437 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1438 assert(src0.type == BRW_REGISTER_TYPE_F); 1439 assert(src1.type == BRW_REGISTER_TYPE_F); 1440 } 1441 1442 /* Source modifiers are ignored for extended math instructions. */ 1443 assert(!src0.negate); 1444 assert(!src0.abs); 1445 assert(!src1.negate); 1446 assert(!src1.abs); 1447 1448 /* Math is the same ISA format as other opcodes, except that CondModifier 1449 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1450 */ 1451 insn->header.destreg__conditionalmod = function; 1452 1453 brw_set_dest(p, insn, dest); 1454 brw_set_src0(p, insn, src0); 1455 brw_set_src1(p, insn, src1); 1456} 1457 1458/** 1459 * Extended math function, float[16]. 1460 * Use 2 send instructions. 1461 */ 1462void brw_math_16( struct brw_compile *p, 1463 struct brw_reg dest, 1464 GLuint function, 1465 GLuint saturate, 1466 GLuint msg_reg_nr, 1467 struct brw_reg src, 1468 GLuint precision ) 1469{ 1470 struct intel_context *intel = &p->brw->intel; 1471 struct brw_instruction *insn; 1472 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1473 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1474 1475 if (intel->gen >= 6) { 1476 insn = next_insn(p, BRW_OPCODE_MATH); 1477 1478 /* Math is the same ISA format as other opcodes, except that CondModifier 1479 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1480 */ 1481 insn->header.destreg__conditionalmod = function; 1482 insn->header.saturate = saturate; 1483 1484 /* Source modifiers are ignored for extended math instructions. */ 1485 assert(!src.negate); 1486 assert(!src.abs); 1487 1488 brw_set_dest(p, insn, dest); 1489 brw_set_src0(p, insn, src); 1490 brw_set_src1(p, insn, brw_null_reg()); 1491 return; 1492 } 1493 1494 /* First instruction: 1495 */ 1496 brw_push_insn_state(p); 1497 brw_set_predicate_control_flag_value(p, 0xff); 1498 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1499 1500 insn = next_insn(p, BRW_OPCODE_SEND); 1501 insn->header.destreg__conditionalmod = msg_reg_nr; 1502 1503 brw_set_dest(p, insn, dest); 1504 brw_set_src0(p, insn, src); 1505 brw_set_math_message(p, 1506 insn, 1507 msg_length, response_length, 1508 function, 1509 BRW_MATH_INTEGER_UNSIGNED, 1510 precision, 1511 saturate, 1512 BRW_MATH_DATA_VECTOR); 1513 1514 /* Second instruction: 1515 */ 1516 insn = next_insn(p, BRW_OPCODE_SEND); 1517 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1518 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1519 1520 brw_set_dest(p, insn, offset(dest,1)); 1521 brw_set_src0(p, insn, src); 1522 brw_set_math_message(p, 1523 insn, 1524 msg_length, response_length, 1525 function, 1526 BRW_MATH_INTEGER_UNSIGNED, 1527 precision, 1528 saturate, 1529 BRW_MATH_DATA_VECTOR); 1530 1531 brw_pop_insn_state(p); 1532} 1533 1534 1535/** 1536 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1537 * using a constant offset per channel. 1538 * 1539 * The offset must be aligned to oword size (16 bytes). Used for 1540 * register spilling. 1541 */ 1542void brw_oword_block_write_scratch(struct brw_compile *p, 1543 struct brw_reg mrf, 1544 int num_regs, 1545 GLuint offset) 1546{ 1547 struct intel_context *intel = &p->brw->intel; 1548 uint32_t msg_control, msg_type; 1549 int mlen; 1550 1551 if (intel->gen >= 6) 1552 offset /= 16; 1553 1554 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1555 1556 if (num_regs == 1) { 1557 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1558 mlen = 2; 1559 } else { 1560 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1561 mlen = 3; 1562 } 1563 1564 /* Set up the message header. This is g0, with g0.2 filled with 1565 * the offset. We don't want to leave our offset around in g0 or 1566 * it'll screw up texture samples, so set it up inside the message 1567 * reg. 1568 */ 1569 { 1570 brw_push_insn_state(p); 1571 brw_set_mask_control(p, BRW_MASK_DISABLE); 1572 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1573 1574 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1575 1576 /* set message header global offset field (reg 0, element 2) */ 1577 brw_MOV(p, 1578 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1579 mrf.nr, 1580 2), BRW_REGISTER_TYPE_UD), 1581 brw_imm_ud(offset)); 1582 1583 brw_pop_insn_state(p); 1584 } 1585 1586 { 1587 struct brw_reg dest; 1588 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1589 int send_commit_msg; 1590 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 1591 BRW_REGISTER_TYPE_UW); 1592 1593 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1594 insn->header.compression_control = BRW_COMPRESSION_NONE; 1595 src_header = vec16(src_header); 1596 } 1597 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1598 insn->header.destreg__conditionalmod = mrf.nr; 1599 1600 /* Until gen6, writes followed by reads from the same location 1601 * are not guaranteed to be ordered unless write_commit is set. 1602 * If set, then a no-op write is issued to the destination 1603 * register to set a dependency, and a read from the destination 1604 * can be used to ensure the ordering. 1605 * 1606 * For gen6, only writes between different threads need ordering 1607 * protection. Our use of DP writes is all about register 1608 * spilling within a thread. 1609 */ 1610 if (intel->gen >= 6) { 1611 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1612 send_commit_msg = 0; 1613 } else { 1614 dest = src_header; 1615 send_commit_msg = 1; 1616 } 1617 1618 brw_set_dest(p, insn, dest); 1619 if (intel->gen >= 6) { 1620 brw_set_src0(p, insn, mrf); 1621 } else { 1622 brw_set_src0(p, insn, brw_null_reg()); 1623 } 1624 1625 if (intel->gen >= 6) 1626 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1627 else 1628 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1629 1630 brw_set_dp_write_message(p, 1631 insn, 1632 255, /* binding table index (255=stateless) */ 1633 msg_control, 1634 msg_type, 1635 mlen, 1636 GL_TRUE, /* header_present */ 1637 0, /* pixel scoreboard */ 1638 send_commit_msg, /* response_length */ 1639 0, /* eot */ 1640 send_commit_msg); 1641 } 1642} 1643 1644 1645/** 1646 * Read a block of owords (half a GRF each) from the scratch buffer 1647 * using a constant index per channel. 1648 * 1649 * Offset must be aligned to oword size (16 bytes). Used for register 1650 * spilling. 1651 */ 1652void 1653brw_oword_block_read_scratch(struct brw_compile *p, 1654 struct brw_reg dest, 1655 struct brw_reg mrf, 1656 int num_regs, 1657 GLuint offset) 1658{ 1659 struct intel_context *intel = &p->brw->intel; 1660 uint32_t msg_control; 1661 int rlen; 1662 1663 if (intel->gen >= 6) 1664 offset /= 16; 1665 1666 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1667 dest = retype(dest, BRW_REGISTER_TYPE_UW); 1668 1669 if (num_regs == 1) { 1670 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1671 rlen = 1; 1672 } else { 1673 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1674 rlen = 2; 1675 } 1676 1677 { 1678 brw_push_insn_state(p); 1679 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1680 brw_set_mask_control(p, BRW_MASK_DISABLE); 1681 1682 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1683 1684 /* set message header global offset field (reg 0, element 2) */ 1685 brw_MOV(p, 1686 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1687 mrf.nr, 1688 2), BRW_REGISTER_TYPE_UD), 1689 brw_imm_ud(offset)); 1690 1691 brw_pop_insn_state(p); 1692 } 1693 1694 { 1695 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1696 1697 assert(insn->header.predicate_control == 0); 1698 insn->header.compression_control = BRW_COMPRESSION_NONE; 1699 insn->header.destreg__conditionalmod = mrf.nr; 1700 1701 brw_set_dest(p, insn, dest); /* UW? */ 1702 if (intel->gen >= 6) { 1703 brw_set_src0(p, insn, mrf); 1704 } else { 1705 brw_set_src0(p, insn, brw_null_reg()); 1706 } 1707 1708 brw_set_dp_read_message(p, 1709 insn, 1710 255, /* binding table index (255=stateless) */ 1711 msg_control, 1712 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1713 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 1714 1, /* msg_length */ 1715 rlen); 1716 } 1717} 1718 1719/** 1720 * Read a float[4] vector from the data port Data Cache (const buffer). 1721 * Location (in buffer) should be a multiple of 16. 1722 * Used for fetching shader constants. 1723 */ 1724void brw_oword_block_read(struct brw_compile *p, 1725 struct brw_reg dest, 1726 struct brw_reg mrf, 1727 uint32_t offset, 1728 uint32_t bind_table_index) 1729{ 1730 struct intel_context *intel = &p->brw->intel; 1731 1732 /* On newer hardware, offset is in units of owords. */ 1733 if (intel->gen >= 6) 1734 offset /= 16; 1735 1736 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1737 1738 brw_push_insn_state(p); 1739 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1740 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1741 brw_set_mask_control(p, BRW_MASK_DISABLE); 1742 1743 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1744 1745 /* set message header global offset field (reg 0, element 2) */ 1746 brw_MOV(p, 1747 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1748 mrf.nr, 1749 2), BRW_REGISTER_TYPE_UD), 1750 brw_imm_ud(offset)); 1751 1752 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1753 insn->header.destreg__conditionalmod = mrf.nr; 1754 1755 /* cast dest to a uword[8] vector */ 1756 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1757 1758 brw_set_dest(p, insn, dest); 1759 if (intel->gen >= 6) { 1760 brw_set_src0(p, insn, mrf); 1761 } else { 1762 brw_set_src0(p, insn, brw_null_reg()); 1763 } 1764 1765 brw_set_dp_read_message(p, 1766 insn, 1767 bind_table_index, 1768 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 1769 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 1770 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1771 1, /* msg_length */ 1772 1); /* response_length (1 reg, 2 owords!) */ 1773 1774 brw_pop_insn_state(p); 1775} 1776 1777/** 1778 * Read a set of dwords from the data port Data Cache (const buffer). 1779 * 1780 * Location (in buffer) appears as UD offsets in the register after 1781 * the provided mrf header reg. 1782 */ 1783void brw_dword_scattered_read(struct brw_compile *p, 1784 struct brw_reg dest, 1785 struct brw_reg mrf, 1786 uint32_t bind_table_index) 1787{ 1788 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1789 1790 brw_push_insn_state(p); 1791 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1792 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1793 brw_set_mask_control(p, BRW_MASK_DISABLE); 1794 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1795 brw_pop_insn_state(p); 1796 1797 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1798 insn->header.destreg__conditionalmod = mrf.nr; 1799 1800 /* cast dest to a uword[8] vector */ 1801 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1802 1803 brw_set_dest(p, insn, dest); 1804 brw_set_src0(p, insn, brw_null_reg()); 1805 1806 brw_set_dp_read_message(p, 1807 insn, 1808 bind_table_index, 1809 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, 1810 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, 1811 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1812 2, /* msg_length */ 1813 1); /* response_length */ 1814} 1815 1816 1817 1818/** 1819 * Read float[4] constant(s) from VS constant buffer. 1820 * For relative addressing, two float[4] constants will be read into 'dest'. 1821 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1822 */ 1823void brw_dp_READ_4_vs(struct brw_compile *p, 1824 struct brw_reg dest, 1825 GLuint location, 1826 GLuint bind_table_index) 1827{ 1828 struct intel_context *intel = &p->brw->intel; 1829 struct brw_instruction *insn; 1830 GLuint msg_reg_nr = 1; 1831 1832 if (intel->gen >= 6) 1833 location /= 16; 1834 1835 /* Setup MRF[1] with location/offset into const buffer */ 1836 brw_push_insn_state(p); 1837 brw_set_access_mode(p, BRW_ALIGN_1); 1838 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1839 brw_set_mask_control(p, BRW_MASK_DISABLE); 1840 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1841 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), 1842 BRW_REGISTER_TYPE_UD), 1843 brw_imm_ud(location)); 1844 brw_pop_insn_state(p); 1845 1846 insn = next_insn(p, BRW_OPCODE_SEND); 1847 1848 insn->header.predicate_control = BRW_PREDICATE_NONE; 1849 insn->header.compression_control = BRW_COMPRESSION_NONE; 1850 insn->header.destreg__conditionalmod = msg_reg_nr; 1851 insn->header.mask_control = BRW_MASK_DISABLE; 1852 1853 brw_set_dest(p, insn, dest); 1854 if (intel->gen >= 6) { 1855 brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); 1856 } else { 1857 brw_set_src0(p, insn, brw_null_reg()); 1858 } 1859 1860 brw_set_dp_read_message(p, 1861 insn, 1862 bind_table_index, 1863 0, 1864 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1865 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1866 1, /* msg_length */ 1867 1); /* response_length (1 Oword) */ 1868} 1869 1870/** 1871 * Read a float[4] constant per vertex from VS constant buffer, with 1872 * relative addressing. 1873 */ 1874void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1875 struct brw_reg dest, 1876 struct brw_reg addr_reg, 1877 GLuint offset, 1878 GLuint bind_table_index) 1879{ 1880 struct intel_context *intel = &p->brw->intel; 1881 struct brw_reg src = brw_vec8_grf(0, 0); 1882 int msg_type; 1883 1884 /* Setup MRF[1] with offset into const buffer */ 1885 brw_push_insn_state(p); 1886 brw_set_access_mode(p, BRW_ALIGN_1); 1887 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1888 brw_set_mask_control(p, BRW_MASK_DISABLE); 1889 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1890 1891 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1892 * fields ignored. 1893 */ 1894 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), 1895 addr_reg, brw_imm_d(offset)); 1896 brw_pop_insn_state(p); 1897 1898 gen6_resolve_implied_move(p, &src, 0); 1899 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1900 1901 insn->header.predicate_control = BRW_PREDICATE_NONE; 1902 insn->header.compression_control = BRW_COMPRESSION_NONE; 1903 insn->header.destreg__conditionalmod = 0; 1904 insn->header.mask_control = BRW_MASK_DISABLE; 1905 1906 brw_set_dest(p, insn, dest); 1907 brw_set_src0(p, insn, src); 1908 1909 if (intel->gen == 6) 1910 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1911 else if (intel->gen == 5 || intel->is_g4x) 1912 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1913 else 1914 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1915 1916 brw_set_dp_read_message(p, 1917 insn, 1918 bind_table_index, 1919 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1920 msg_type, 1921 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1922 2, /* msg_length */ 1923 1); /* response_length */ 1924} 1925 1926 1927 1928void brw_fb_WRITE(struct brw_compile *p, 1929 int dispatch_width, 1930 GLuint msg_reg_nr, 1931 struct brw_reg src0, 1932 GLuint binding_table_index, 1933 GLuint msg_length, 1934 GLuint response_length, 1935 GLboolean eot, 1936 GLboolean header_present) 1937{ 1938 struct intel_context *intel = &p->brw->intel; 1939 struct brw_instruction *insn; 1940 GLuint msg_control, msg_type; 1941 struct brw_reg dest; 1942 1943 if (dispatch_width == 16) 1944 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1945 else 1946 dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1947 1948 if (intel->gen >= 6 && binding_table_index == 0) { 1949 insn = next_insn(p, BRW_OPCODE_SENDC); 1950 } else { 1951 insn = next_insn(p, BRW_OPCODE_SEND); 1952 } 1953 /* The execution mask is ignored for render target writes. */ 1954 insn->header.predicate_control = 0; 1955 insn->header.compression_control = BRW_COMPRESSION_NONE; 1956 1957 if (intel->gen >= 6) { 1958 /* headerless version, just submit color payload */ 1959 src0 = brw_message_reg(msg_reg_nr); 1960 1961 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1962 } else { 1963 insn->header.destreg__conditionalmod = msg_reg_nr; 1964 1965 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1966 } 1967 1968 if (dispatch_width == 16) 1969 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 1970 else 1971 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 1972 1973 brw_set_dest(p, insn, dest); 1974 brw_set_src0(p, insn, src0); 1975 brw_set_dp_write_message(p, 1976 insn, 1977 binding_table_index, 1978 msg_control, 1979 msg_type, 1980 msg_length, 1981 header_present, 1982 1, /* pixel scoreboard */ 1983 response_length, 1984 eot, 1985 0 /* send_commit_msg */); 1986} 1987 1988 1989/** 1990 * Texture sample instruction. 1991 * Note: the msg_type plus msg_length values determine exactly what kind 1992 * of sampling operation is performed. See volume 4, page 161 of docs. 1993 */ 1994void brw_SAMPLE(struct brw_compile *p, 1995 struct brw_reg dest, 1996 GLuint msg_reg_nr, 1997 struct brw_reg src0, 1998 GLuint binding_table_index, 1999 GLuint sampler, 2000 GLuint writemask, 2001 GLuint msg_type, 2002 GLuint response_length, 2003 GLuint msg_length, 2004 GLboolean eot, 2005 GLuint header_present, 2006 GLuint simd_mode) 2007{ 2008 struct intel_context *intel = &p->brw->intel; 2009 GLboolean need_stall = 0; 2010 2011 if (writemask == 0) { 2012 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 2013 return; 2014 } 2015 2016 /* Hardware doesn't do destination dependency checking on send 2017 * instructions properly. Add a workaround which generates the 2018 * dependency by other means. In practice it seems like this bug 2019 * only crops up for texture samples, and only where registers are 2020 * written by the send and then written again later without being 2021 * read in between. Luckily for us, we already track that 2022 * information and use it to modify the writemask for the 2023 * instruction, so that is a guide for whether a workaround is 2024 * needed. 2025 */ 2026 if (writemask != WRITEMASK_XYZW) { 2027 GLuint dst_offset = 0; 2028 GLuint i, newmask = 0, len = 0; 2029 2030 for (i = 0; i < 4; i++) { 2031 if (writemask & (1<<i)) 2032 break; 2033 dst_offset += 2; 2034 } 2035 for (; i < 4; i++) { 2036 if (!(writemask & (1<<i))) 2037 break; 2038 newmask |= 1<<i; 2039 len++; 2040 } 2041 2042 if (newmask != writemask) { 2043 need_stall = 1; 2044 /* printf("need stall %x %x\n", newmask , writemask); */ 2045 } 2046 else { 2047 GLboolean dispatch_16 = GL_FALSE; 2048 2049 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 2050 2051 guess_execution_size(p, p->current, dest); 2052 if (p->current->header.execution_size == BRW_EXECUTE_16) 2053 dispatch_16 = GL_TRUE; 2054 2055 newmask = ~newmask & WRITEMASK_XYZW; 2056 2057 brw_push_insn_state(p); 2058 2059 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2060 brw_set_mask_control(p, BRW_MASK_DISABLE); 2061 2062 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), 2063 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); 2064 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 2065 2066 brw_pop_insn_state(p); 2067 2068 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 2069 dest = offset(dest, dst_offset); 2070 2071 /* For 16-wide dispatch, masked channels are skipped in the 2072 * response. For 8-wide, masked channels still take up slots, 2073 * and are just not written to. 2074 */ 2075 if (dispatch_16) 2076 response_length = len * 2; 2077 } 2078 } 2079 2080 { 2081 struct brw_instruction *insn; 2082 2083 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2084 2085 insn = next_insn(p, BRW_OPCODE_SEND); 2086 insn->header.predicate_control = 0; /* XXX */ 2087 insn->header.compression_control = BRW_COMPRESSION_NONE; 2088 if (intel->gen < 6) 2089 insn->header.destreg__conditionalmod = msg_reg_nr; 2090 2091 brw_set_dest(p, insn, dest); 2092 brw_set_src0(p, insn, src0); 2093 brw_set_sampler_message(p, insn, 2094 binding_table_index, 2095 sampler, 2096 msg_type, 2097 response_length, 2098 msg_length, 2099 eot, 2100 header_present, 2101 simd_mode); 2102 } 2103 2104 if (need_stall) { 2105 struct brw_reg reg = vec8(offset(dest, response_length-1)); 2106 2107 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 2108 */ 2109 brw_push_insn_state(p); 2110 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2111 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), 2112 retype(reg, BRW_REGISTER_TYPE_UD)); 2113 brw_pop_insn_state(p); 2114 } 2115 2116} 2117 2118/* All these variables are pretty confusing - we might be better off 2119 * using bitmasks and macros for this, in the old style. Or perhaps 2120 * just having the caller instantiate the fields in dword3 itself. 2121 */ 2122void brw_urb_WRITE(struct brw_compile *p, 2123 struct brw_reg dest, 2124 GLuint msg_reg_nr, 2125 struct brw_reg src0, 2126 GLboolean allocate, 2127 GLboolean used, 2128 GLuint msg_length, 2129 GLuint response_length, 2130 GLboolean eot, 2131 GLboolean writes_complete, 2132 GLuint offset, 2133 GLuint swizzle) 2134{ 2135 struct intel_context *intel = &p->brw->intel; 2136 struct brw_instruction *insn; 2137 2138 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2139 2140 insn = next_insn(p, BRW_OPCODE_SEND); 2141 2142 assert(msg_length < BRW_MAX_MRF); 2143 2144 brw_set_dest(p, insn, dest); 2145 brw_set_src0(p, insn, src0); 2146 brw_set_src1(p, insn, brw_imm_d(0)); 2147 2148 if (intel->gen < 6) 2149 insn->header.destreg__conditionalmod = msg_reg_nr; 2150 2151 brw_set_urb_message(p, 2152 insn, 2153 allocate, 2154 used, 2155 msg_length, 2156 response_length, 2157 eot, 2158 writes_complete, 2159 offset, 2160 swizzle); 2161} 2162 2163static int 2164brw_find_next_block_end(struct brw_compile *p, int start) 2165{ 2166 int ip; 2167 2168 for (ip = start + 1; ip < p->nr_insn; ip++) { 2169 struct brw_instruction *insn = &p->store[ip]; 2170 2171 switch (insn->header.opcode) { 2172 case BRW_OPCODE_ENDIF: 2173 case BRW_OPCODE_ELSE: 2174 case BRW_OPCODE_WHILE: 2175 return ip; 2176 } 2177 } 2178 assert(!"not reached"); 2179 return start + 1; 2180} 2181 2182/* There is no DO instruction on gen6, so to find the end of the loop 2183 * we have to see if the loop is jumping back before our start 2184 * instruction. 2185 */ 2186static int 2187brw_find_loop_end(struct brw_compile *p, int start) 2188{ 2189 int ip; 2190 int br = 2; 2191 2192 for (ip = start + 1; ip < p->nr_insn; ip++) { 2193 struct brw_instruction *insn = &p->store[ip]; 2194 2195 if (insn->header.opcode == BRW_OPCODE_WHILE) { 2196 if (ip + insn->bits1.branch_gen6.jump_count / br < start) 2197 return ip; 2198 } 2199 } 2200 assert(!"not reached"); 2201 return start + 1; 2202} 2203 2204/* After program generation, go back and update the UIP and JIP of 2205 * BREAK and CONT instructions to their correct locations. 2206 */ 2207void 2208brw_set_uip_jip(struct brw_compile *p) 2209{ 2210 struct intel_context *intel = &p->brw->intel; 2211 int ip; 2212 int br = 2; 2213 2214 if (intel->gen < 6) 2215 return; 2216 2217 for (ip = 0; ip < p->nr_insn; ip++) { 2218 struct brw_instruction *insn = &p->store[ip]; 2219 2220 switch (insn->header.opcode) { 2221 case BRW_OPCODE_BREAK: 2222 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2223 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1); 2224 break; 2225 case BRW_OPCODE_CONTINUE: 2226 /* JIP is set at CONTINUE emit time, since that's when we 2227 * know where the start of the loop is. 2228 */ 2229 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2230 assert(insn->bits3.break_cont.uip != 0); 2231 assert(insn->bits3.break_cont.jip != 0); 2232 break; 2233 } 2234 } 2235} 2236 2237void brw_ff_sync(struct brw_compile *p, 2238 struct brw_reg dest, 2239 GLuint msg_reg_nr, 2240 struct brw_reg src0, 2241 GLboolean allocate, 2242 GLuint response_length, 2243 GLboolean eot) 2244{ 2245 struct intel_context *intel = &p->brw->intel; 2246 struct brw_instruction *insn; 2247 2248 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2249 2250 insn = next_insn(p, BRW_OPCODE_SEND); 2251 brw_set_dest(p, insn, dest); 2252 brw_set_src0(p, insn, src0); 2253 brw_set_src1(p, insn, brw_imm_d(0)); 2254 2255 if (intel->gen < 6) 2256 insn->header.destreg__conditionalmod = msg_reg_nr; 2257 2258 brw_set_ff_sync_message(p, 2259 insn, 2260 allocate, 2261 response_length, 2262 eot); 2263} 2264