brw_eu_emit.c revision 59c6b775a6aacfe03c84dae62c2fd45d4af9d70b
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37 38 39 40/*********************************************************************** 41 * Internal helper for constructing instructions 42 */ 43 44static void guess_execution_size(struct brw_compile *p, 45 struct brw_instruction *insn, 46 struct brw_reg reg) 47{ 48 if (reg.width == BRW_WIDTH_8 && p->compressed) 49 insn->header.execution_size = BRW_EXECUTE_16; 50 else 51 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 52} 53 54 55/** 56 * Prior to Sandybridge, the SEND instruction accepted non-MRF source 57 * registers, implicitly moving the operand to a message register. 58 * 59 * On Sandybridge, this is no longer the case. This function performs the 60 * explicit move; it should be called before emitting a SEND instruction. 61 */ 62static void 63gen6_resolve_implied_move(struct brw_compile *p, 64 struct brw_reg *src, 65 GLuint msg_reg_nr) 66{ 67 struct intel_context *intel = &p->brw->intel; 68 if (intel->gen != 6) 69 return; 70 71 if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { 72 brw_push_insn_state(p); 73 brw_set_mask_control(p, BRW_MASK_DISABLE); 74 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 75 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 76 retype(*src, BRW_REGISTER_TYPE_UD)); 77 brw_pop_insn_state(p); 78 } 79 *src = brw_message_reg(msg_reg_nr); 80} 81 82 83static void brw_set_dest(struct brw_compile *p, 84 struct brw_instruction *insn, 85 struct brw_reg dest) 86{ 87 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 88 dest.file != BRW_MESSAGE_REGISTER_FILE) 89 assert(dest.nr < 128); 90 91 insn->bits1.da1.dest_reg_file = dest.file; 92 insn->bits1.da1.dest_reg_type = dest.type; 93 insn->bits1.da1.dest_address_mode = dest.address_mode; 94 95 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 96 insn->bits1.da1.dest_reg_nr = dest.nr; 97 98 if (insn->header.access_mode == BRW_ALIGN_1) { 99 insn->bits1.da1.dest_subreg_nr = dest.subnr; 100 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 101 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 102 insn->bits1.da1.dest_horiz_stride = dest.hstride; 103 } 104 else { 105 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 106 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 107 /* even ignored in da16, still need to set as '01' */ 108 insn->bits1.da16.dest_horiz_stride = 1; 109 } 110 } 111 else { 112 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 113 114 /* These are different sizes in align1 vs align16: 115 */ 116 if (insn->header.access_mode == BRW_ALIGN_1) { 117 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 118 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 119 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 120 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 121 } 122 else { 123 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 124 /* even ignored in da16, still need to set as '01' */ 125 insn->bits1.ia16.dest_horiz_stride = 1; 126 } 127 } 128 129 /* NEW: Set the execution size based on dest.width and 130 * insn->compression_control: 131 */ 132 guess_execution_size(p, insn, dest); 133} 134 135extern int reg_type_size[]; 136 137static void 138validate_reg(struct brw_instruction *insn, struct brw_reg reg) 139{ 140 int hstride_for_reg[] = {0, 1, 2, 4}; 141 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 142 int width_for_reg[] = {1, 2, 4, 8, 16}; 143 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 144 int width, hstride, vstride, execsize; 145 146 if (reg.file == BRW_IMMEDIATE_VALUE) { 147 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 148 * mean the destination has to be 128-bit aligned and the 149 * destination horiz stride has to be a word. 150 */ 151 if (reg.type == BRW_REGISTER_TYPE_V) { 152 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 153 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 154 } 155 156 return; 157 } 158 159 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 160 reg.file == BRW_ARF_NULL) 161 return; 162 163 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 164 hstride = hstride_for_reg[reg.hstride]; 165 166 if (reg.vstride == 0xf) { 167 vstride = -1; 168 } else { 169 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 170 vstride = vstride_for_reg[reg.vstride]; 171 } 172 173 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 174 width = width_for_reg[reg.width]; 175 176 assert(insn->header.execution_size >= 0 && 177 insn->header.execution_size < Elements(execsize_for_reg)); 178 execsize = execsize_for_reg[insn->header.execution_size]; 179 180 /* Restrictions from 3.3.10: Register Region Restrictions. */ 181 /* 3. */ 182 assert(execsize >= width); 183 184 /* 4. */ 185 if (execsize == width && hstride != 0) { 186 assert(vstride == -1 || vstride == width * hstride); 187 } 188 189 /* 5. */ 190 if (execsize == width && hstride == 0) { 191 /* no restriction on vstride. */ 192 } 193 194 /* 6. */ 195 if (width == 1) { 196 assert(hstride == 0); 197 } 198 199 /* 7. */ 200 if (execsize == 1 && width == 1) { 201 assert(hstride == 0); 202 assert(vstride == 0); 203 } 204 205 /* 8. */ 206 if (vstride == 0 && hstride == 0) { 207 assert(width == 1); 208 } 209 210 /* 10. Check destination issues. */ 211} 212 213static void brw_set_src0( struct brw_instruction *insn, 214 struct brw_reg reg ) 215{ 216 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 217 assert(reg.nr < 128); 218 219 validate_reg(insn, reg); 220 221 insn->bits1.da1.src0_reg_file = reg.file; 222 insn->bits1.da1.src0_reg_type = reg.type; 223 insn->bits2.da1.src0_abs = reg.abs; 224 insn->bits2.da1.src0_negate = reg.negate; 225 insn->bits2.da1.src0_address_mode = reg.address_mode; 226 227 if (reg.file == BRW_IMMEDIATE_VALUE) { 228 insn->bits3.ud = reg.dw1.ud; 229 230 /* Required to set some fields in src1 as well: 231 */ 232 insn->bits1.da1.src1_reg_file = 0; /* arf */ 233 insn->bits1.da1.src1_reg_type = reg.type; 234 } 235 else 236 { 237 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 238 if (insn->header.access_mode == BRW_ALIGN_1) { 239 insn->bits2.da1.src0_subreg_nr = reg.subnr; 240 insn->bits2.da1.src0_reg_nr = reg.nr; 241 } 242 else { 243 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 244 insn->bits2.da16.src0_reg_nr = reg.nr; 245 } 246 } 247 else { 248 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 249 250 if (insn->header.access_mode == BRW_ALIGN_1) { 251 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 252 } 253 else { 254 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 255 } 256 } 257 258 if (insn->header.access_mode == BRW_ALIGN_1) { 259 if (reg.width == BRW_WIDTH_1 && 260 insn->header.execution_size == BRW_EXECUTE_1) { 261 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 262 insn->bits2.da1.src0_width = BRW_WIDTH_1; 263 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 264 } 265 else { 266 insn->bits2.da1.src0_horiz_stride = reg.hstride; 267 insn->bits2.da1.src0_width = reg.width; 268 insn->bits2.da1.src0_vert_stride = reg.vstride; 269 } 270 } 271 else { 272 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 273 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 274 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 275 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 276 277 /* This is an oddity of the fact we're using the same 278 * descriptions for registers in align_16 as align_1: 279 */ 280 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 281 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 282 else 283 insn->bits2.da16.src0_vert_stride = reg.vstride; 284 } 285 } 286} 287 288 289void brw_set_src1( struct brw_instruction *insn, 290 struct brw_reg reg ) 291{ 292 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 293 294 assert(reg.nr < 128); 295 296 validate_reg(insn, reg); 297 298 insn->bits1.da1.src1_reg_file = reg.file; 299 insn->bits1.da1.src1_reg_type = reg.type; 300 insn->bits3.da1.src1_abs = reg.abs; 301 insn->bits3.da1.src1_negate = reg.negate; 302 303 /* Only src1 can be immediate in two-argument instructions. 304 */ 305 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 306 307 if (reg.file == BRW_IMMEDIATE_VALUE) { 308 insn->bits3.ud = reg.dw1.ud; 309 } 310 else { 311 /* This is a hardware restriction, which may or may not be lifted 312 * in the future: 313 */ 314 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 315 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 316 317 if (insn->header.access_mode == BRW_ALIGN_1) { 318 insn->bits3.da1.src1_subreg_nr = reg.subnr; 319 insn->bits3.da1.src1_reg_nr = reg.nr; 320 } 321 else { 322 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 323 insn->bits3.da16.src1_reg_nr = reg.nr; 324 } 325 326 if (insn->header.access_mode == BRW_ALIGN_1) { 327 if (reg.width == BRW_WIDTH_1 && 328 insn->header.execution_size == BRW_EXECUTE_1) { 329 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 330 insn->bits3.da1.src1_width = BRW_WIDTH_1; 331 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 332 } 333 else { 334 insn->bits3.da1.src1_horiz_stride = reg.hstride; 335 insn->bits3.da1.src1_width = reg.width; 336 insn->bits3.da1.src1_vert_stride = reg.vstride; 337 } 338 } 339 else { 340 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 341 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 342 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 343 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 344 345 /* This is an oddity of the fact we're using the same 346 * descriptions for registers in align_16 as align_1: 347 */ 348 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 349 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 350 else 351 insn->bits3.da16.src1_vert_stride = reg.vstride; 352 } 353 } 354} 355 356 357 358static void brw_set_math_message( struct brw_context *brw, 359 struct brw_instruction *insn, 360 GLuint msg_length, 361 GLuint response_length, 362 GLuint function, 363 GLuint integer_type, 364 GLboolean low_precision, 365 GLboolean saturate, 366 GLuint dataType ) 367{ 368 struct intel_context *intel = &brw->intel; 369 brw_set_src1(insn, brw_imm_d(0)); 370 371 if (intel->gen == 5) { 372 insn->bits3.math_gen5.function = function; 373 insn->bits3.math_gen5.int_type = integer_type; 374 insn->bits3.math_gen5.precision = low_precision; 375 insn->bits3.math_gen5.saturate = saturate; 376 insn->bits3.math_gen5.data_type = dataType; 377 insn->bits3.math_gen5.snapshot = 0; 378 insn->bits3.math_gen5.header_present = 0; 379 insn->bits3.math_gen5.response_length = response_length; 380 insn->bits3.math_gen5.msg_length = msg_length; 381 insn->bits3.math_gen5.end_of_thread = 0; 382 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; 383 insn->bits2.send_gen5.end_of_thread = 0; 384 } else { 385 insn->bits3.math.function = function; 386 insn->bits3.math.int_type = integer_type; 387 insn->bits3.math.precision = low_precision; 388 insn->bits3.math.saturate = saturate; 389 insn->bits3.math.data_type = dataType; 390 insn->bits3.math.response_length = response_length; 391 insn->bits3.math.msg_length = msg_length; 392 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 393 insn->bits3.math.end_of_thread = 0; 394 } 395} 396 397 398static void brw_set_ff_sync_message(struct brw_context *brw, 399 struct brw_instruction *insn, 400 GLboolean allocate, 401 GLuint response_length, 402 GLboolean end_of_thread) 403{ 404 struct intel_context *intel = &brw->intel; 405 brw_set_src1(insn, brw_imm_d(0)); 406 407 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 408 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 409 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 410 insn->bits3.urb_gen5.allocate = allocate; 411 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 412 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 413 insn->bits3.urb_gen5.header_present = 1; 414 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ 415 insn->bits3.urb_gen5.msg_length = 1; 416 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 417 if (intel->gen >= 6) { 418 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 419 } else { 420 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 421 insn->bits2.send_gen5.end_of_thread = end_of_thread; 422 } 423} 424 425static void brw_set_urb_message( struct brw_context *brw, 426 struct brw_instruction *insn, 427 GLboolean allocate, 428 GLboolean used, 429 GLuint msg_length, 430 GLuint response_length, 431 GLboolean end_of_thread, 432 GLboolean complete, 433 GLuint offset, 434 GLuint swizzle_control ) 435{ 436 struct intel_context *intel = &brw->intel; 437 brw_set_src1(insn, brw_imm_d(0)); 438 439 if (intel->gen >= 5) { 440 insn->bits3.urb_gen5.opcode = 0; /* ? */ 441 insn->bits3.urb_gen5.offset = offset; 442 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 443 insn->bits3.urb_gen5.allocate = allocate; 444 insn->bits3.urb_gen5.used = used; /* ? */ 445 insn->bits3.urb_gen5.complete = complete; 446 insn->bits3.urb_gen5.header_present = 1; 447 insn->bits3.urb_gen5.response_length = response_length; 448 insn->bits3.urb_gen5.msg_length = msg_length; 449 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 450 if (intel->gen >= 6) { 451 /* For SNB, the SFID bits moved to the condmod bits, and 452 * EOT stayed in bits3 above. Does the EOT bit setting 453 * below on Ironlake even do anything? 454 */ 455 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 456 } else { 457 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 458 insn->bits2.send_gen5.end_of_thread = end_of_thread; 459 } 460 } else { 461 insn->bits3.urb.opcode = 0; /* ? */ 462 insn->bits3.urb.offset = offset; 463 insn->bits3.urb.swizzle_control = swizzle_control; 464 insn->bits3.urb.allocate = allocate; 465 insn->bits3.urb.used = used; /* ? */ 466 insn->bits3.urb.complete = complete; 467 insn->bits3.urb.response_length = response_length; 468 insn->bits3.urb.msg_length = msg_length; 469 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 470 insn->bits3.urb.end_of_thread = end_of_thread; 471 } 472} 473 474static void brw_set_dp_write_message( struct brw_context *brw, 475 struct brw_instruction *insn, 476 GLuint binding_table_index, 477 GLuint msg_control, 478 GLuint msg_type, 479 GLuint msg_length, 480 GLboolean header_present, 481 GLuint pixel_scoreboard_clear, 482 GLuint response_length, 483 GLuint end_of_thread, 484 GLuint send_commit_msg) 485{ 486 struct intel_context *intel = &brw->intel; 487 brw_set_src1(insn, brw_imm_ud(0)); 488 489 if (intel->gen >= 6) { 490 insn->bits3.dp_render_cache.binding_table_index = binding_table_index; 491 insn->bits3.dp_render_cache.msg_control = msg_control; 492 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; 493 insn->bits3.dp_render_cache.msg_type = msg_type; 494 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; 495 insn->bits3.dp_render_cache.header_present = header_present; 496 insn->bits3.dp_render_cache.response_length = response_length; 497 insn->bits3.dp_render_cache.msg_length = msg_length; 498 insn->bits3.dp_render_cache.end_of_thread = end_of_thread; 499 500 /* We always use the render cache for write messages */ 501 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 502 /* XXX really need below? */ 503 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 504 insn->bits2.send_gen5.end_of_thread = end_of_thread; 505 } else if (intel->gen == 5) { 506 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 507 insn->bits3.dp_write_gen5.msg_control = msg_control; 508 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; 509 insn->bits3.dp_write_gen5.msg_type = msg_type; 510 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 511 insn->bits3.dp_write_gen5.header_present = header_present; 512 insn->bits3.dp_write_gen5.response_length = response_length; 513 insn->bits3.dp_write_gen5.msg_length = msg_length; 514 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; 515 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 516 insn->bits2.send_gen5.end_of_thread = end_of_thread; 517 } else { 518 insn->bits3.dp_write.binding_table_index = binding_table_index; 519 insn->bits3.dp_write.msg_control = msg_control; 520 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 521 insn->bits3.dp_write.msg_type = msg_type; 522 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 523 insn->bits3.dp_write.response_length = response_length; 524 insn->bits3.dp_write.msg_length = msg_length; 525 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 526 insn->bits3.dp_write.end_of_thread = end_of_thread; 527 } 528} 529 530static void 531brw_set_dp_read_message(struct brw_context *brw, 532 struct brw_instruction *insn, 533 GLuint binding_table_index, 534 GLuint msg_control, 535 GLuint msg_type, 536 GLuint target_cache, 537 GLuint msg_length, 538 GLuint response_length) 539{ 540 struct intel_context *intel = &brw->intel; 541 brw_set_src1(insn, brw_imm_d(0)); 542 543 if (intel->gen >= 6) { 544 uint32_t target_function; 545 546 if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE) 547 target_function = BRW_MESSAGE_TARGET_DATAPORT_READ; /* data cache */ 548 else 549 target_function = BRW_MESSAGE_TARGET_DATAPORT_WRITE; /* render cache */ 550 551 insn->bits3.dp_render_cache.binding_table_index = binding_table_index; 552 insn->bits3.dp_render_cache.msg_control = msg_control; 553 insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0; 554 insn->bits3.dp_render_cache.msg_type = msg_type; 555 insn->bits3.dp_render_cache.send_commit_msg = 0; 556 insn->bits3.dp_render_cache.header_present = 1; 557 insn->bits3.dp_render_cache.response_length = response_length; 558 insn->bits3.dp_render_cache.msg_length = msg_length; 559 insn->bits3.dp_render_cache.end_of_thread = 0; 560 insn->header.destreg__conditionalmod = target_function; 561 /* XXX really need below? */ 562 insn->bits2.send_gen5.sfid = target_function; 563 insn->bits2.send_gen5.end_of_thread = 0; 564 } else if (intel->gen == 5) { 565 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 566 insn->bits3.dp_read_gen5.msg_control = msg_control; 567 insn->bits3.dp_read_gen5.msg_type = msg_type; 568 insn->bits3.dp_read_gen5.target_cache = target_cache; 569 insn->bits3.dp_read_gen5.header_present = 1; 570 insn->bits3.dp_read_gen5.response_length = response_length; 571 insn->bits3.dp_read_gen5.msg_length = msg_length; 572 insn->bits3.dp_read_gen5.pad1 = 0; 573 insn->bits3.dp_read_gen5.end_of_thread = 0; 574 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 575 insn->bits2.send_gen5.end_of_thread = 0; 576 } else if (intel->is_g4x) { 577 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 578 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 579 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 580 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 581 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/ 582 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/ 583 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 584 insn->bits3.dp_read_g4x.pad1 = 0; 585 insn->bits3.dp_read_g4x.end_of_thread = 0; 586 } else { 587 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 588 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 589 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 590 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 591 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 592 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 593 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 594 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 595 insn->bits3.dp_read.end_of_thread = 0; /*31*/ 596 } 597} 598 599static void brw_set_sampler_message(struct brw_context *brw, 600 struct brw_instruction *insn, 601 GLuint binding_table_index, 602 GLuint sampler, 603 GLuint msg_type, 604 GLuint response_length, 605 GLuint msg_length, 606 GLboolean eot, 607 GLuint header_present, 608 GLuint simd_mode) 609{ 610 struct intel_context *intel = &brw->intel; 611 assert(eot == 0); 612 brw_set_src1(insn, brw_imm_d(0)); 613 614 if (intel->gen >= 5) { 615 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 616 insn->bits3.sampler_gen5.sampler = sampler; 617 insn->bits3.sampler_gen5.msg_type = msg_type; 618 insn->bits3.sampler_gen5.simd_mode = simd_mode; 619 insn->bits3.sampler_gen5.header_present = header_present; 620 insn->bits3.sampler_gen5.response_length = response_length; 621 insn->bits3.sampler_gen5.msg_length = msg_length; 622 insn->bits3.sampler_gen5.end_of_thread = eot; 623 if (intel->gen >= 6) 624 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; 625 else { 626 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; 627 insn->bits2.send_gen5.end_of_thread = eot; 628 } 629 } else if (intel->is_g4x) { 630 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 631 insn->bits3.sampler_g4x.sampler = sampler; 632 insn->bits3.sampler_g4x.msg_type = msg_type; 633 insn->bits3.sampler_g4x.response_length = response_length; 634 insn->bits3.sampler_g4x.msg_length = msg_length; 635 insn->bits3.sampler_g4x.end_of_thread = eot; 636 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 637 } else { 638 insn->bits3.sampler.binding_table_index = binding_table_index; 639 insn->bits3.sampler.sampler = sampler; 640 insn->bits3.sampler.msg_type = msg_type; 641 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 642 insn->bits3.sampler.response_length = response_length; 643 insn->bits3.sampler.msg_length = msg_length; 644 insn->bits3.sampler.end_of_thread = eot; 645 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 646 } 647} 648 649 650 651static struct brw_instruction *next_insn( struct brw_compile *p, 652 GLuint opcode ) 653{ 654 struct brw_instruction *insn; 655 656 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 657 658 insn = &p->store[p->nr_insn++]; 659 memcpy(insn, p->current, sizeof(*insn)); 660 661 /* Reset this one-shot flag: 662 */ 663 664 if (p->current->header.destreg__conditionalmod) { 665 p->current->header.destreg__conditionalmod = 0; 666 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 667 } 668 669 insn->header.opcode = opcode; 670 return insn; 671} 672 673 674static struct brw_instruction *brw_alu1( struct brw_compile *p, 675 GLuint opcode, 676 struct brw_reg dest, 677 struct brw_reg src ) 678{ 679 struct brw_instruction *insn = next_insn(p, opcode); 680 brw_set_dest(p, insn, dest); 681 brw_set_src0(insn, src); 682 return insn; 683} 684 685static struct brw_instruction *brw_alu2(struct brw_compile *p, 686 GLuint opcode, 687 struct brw_reg dest, 688 struct brw_reg src0, 689 struct brw_reg src1 ) 690{ 691 struct brw_instruction *insn = next_insn(p, opcode); 692 brw_set_dest(p, insn, dest); 693 brw_set_src0(insn, src0); 694 brw_set_src1(insn, src1); 695 return insn; 696} 697 698 699/*********************************************************************** 700 * Convenience routines. 701 */ 702#define ALU1(OP) \ 703struct brw_instruction *brw_##OP(struct brw_compile *p, \ 704 struct brw_reg dest, \ 705 struct brw_reg src0) \ 706{ \ 707 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 708} 709 710#define ALU2(OP) \ 711struct brw_instruction *brw_##OP(struct brw_compile *p, \ 712 struct brw_reg dest, \ 713 struct brw_reg src0, \ 714 struct brw_reg src1) \ 715{ \ 716 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 717} 718 719/* Rounding operations (other than RNDD) require two instructions - the first 720 * stores a rounded value (possibly the wrong way) in the dest register, but 721 * also sets a per-channel "increment bit" in the flag register. A predicated 722 * add of 1.0 fixes dest to contain the desired result. 723 */ 724#define ROUND(OP) \ 725void brw_##OP(struct brw_compile *p, \ 726 struct brw_reg dest, \ 727 struct brw_reg src) \ 728{ \ 729 struct brw_instruction *rnd, *add; \ 730 rnd = next_insn(p, BRW_OPCODE_##OP); \ 731 brw_set_dest(p, rnd, dest); \ 732 brw_set_src0(rnd, src); \ 733 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ 734 \ 735 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 736 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 737} 738 739 740ALU1(MOV) 741ALU2(SEL) 742ALU1(NOT) 743ALU2(AND) 744ALU2(OR) 745ALU2(XOR) 746ALU2(SHR) 747ALU2(SHL) 748ALU2(RSR) 749ALU2(RSL) 750ALU2(ASR) 751ALU1(FRC) 752ALU1(RNDD) 753ALU2(MAC) 754ALU2(MACH) 755ALU1(LZD) 756ALU2(DP4) 757ALU2(DPH) 758ALU2(DP3) 759ALU2(DP2) 760ALU2(LINE) 761ALU2(PLN) 762 763 764ROUND(RNDZ) 765ROUND(RNDE) 766 767 768struct brw_instruction *brw_ADD(struct brw_compile *p, 769 struct brw_reg dest, 770 struct brw_reg src0, 771 struct brw_reg src1) 772{ 773 /* 6.2.2: add */ 774 if (src0.type == BRW_REGISTER_TYPE_F || 775 (src0.file == BRW_IMMEDIATE_VALUE && 776 src0.type == BRW_REGISTER_TYPE_VF)) { 777 assert(src1.type != BRW_REGISTER_TYPE_UD); 778 assert(src1.type != BRW_REGISTER_TYPE_D); 779 } 780 781 if (src1.type == BRW_REGISTER_TYPE_F || 782 (src1.file == BRW_IMMEDIATE_VALUE && 783 src1.type == BRW_REGISTER_TYPE_VF)) { 784 assert(src0.type != BRW_REGISTER_TYPE_UD); 785 assert(src0.type != BRW_REGISTER_TYPE_D); 786 } 787 788 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 789} 790 791struct brw_instruction *brw_MUL(struct brw_compile *p, 792 struct brw_reg dest, 793 struct brw_reg src0, 794 struct brw_reg src1) 795{ 796 /* 6.32.38: mul */ 797 if (src0.type == BRW_REGISTER_TYPE_D || 798 src0.type == BRW_REGISTER_TYPE_UD || 799 src1.type == BRW_REGISTER_TYPE_D || 800 src1.type == BRW_REGISTER_TYPE_UD) { 801 assert(dest.type != BRW_REGISTER_TYPE_F); 802 } 803 804 if (src0.type == BRW_REGISTER_TYPE_F || 805 (src0.file == BRW_IMMEDIATE_VALUE && 806 src0.type == BRW_REGISTER_TYPE_VF)) { 807 assert(src1.type != BRW_REGISTER_TYPE_UD); 808 assert(src1.type != BRW_REGISTER_TYPE_D); 809 } 810 811 if (src1.type == BRW_REGISTER_TYPE_F || 812 (src1.file == BRW_IMMEDIATE_VALUE && 813 src1.type == BRW_REGISTER_TYPE_VF)) { 814 assert(src0.type != BRW_REGISTER_TYPE_UD); 815 assert(src0.type != BRW_REGISTER_TYPE_D); 816 } 817 818 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 819 src0.nr != BRW_ARF_ACCUMULATOR); 820 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 821 src1.nr != BRW_ARF_ACCUMULATOR); 822 823 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 824} 825 826 827void brw_NOP(struct brw_compile *p) 828{ 829 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 830 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 831 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 832 brw_set_src1(insn, brw_imm_ud(0x0)); 833} 834 835 836 837 838 839/*********************************************************************** 840 * Comparisons, if/else/endif 841 */ 842 843struct brw_instruction *brw_JMPI(struct brw_compile *p, 844 struct brw_reg dest, 845 struct brw_reg src0, 846 struct brw_reg src1) 847{ 848 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 849 850 insn->header.execution_size = 1; 851 insn->header.compression_control = BRW_COMPRESSION_NONE; 852 insn->header.mask_control = BRW_MASK_DISABLE; 853 854 p->current->header.predicate_control = BRW_PREDICATE_NONE; 855 856 return insn; 857} 858 859/* EU takes the value from the flag register and pushes it onto some 860 * sort of a stack (presumably merging with any flag value already on 861 * the stack). Within an if block, the flags at the top of the stack 862 * control execution on each channel of the unit, eg. on each of the 863 * 16 pixel values in our wm programs. 864 * 865 * When the matching 'else' instruction is reached (presumably by 866 * countdown of the instruction count patched in by our ELSE/ENDIF 867 * functions), the relevent flags are inverted. 868 * 869 * When the matching 'endif' instruction is reached, the flags are 870 * popped off. If the stack is now empty, normal execution resumes. 871 * 872 * No attempt is made to deal with stack overflow (14 elements?). 873 */ 874struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) 875{ 876 struct intel_context *intel = &p->brw->intel; 877 struct brw_instruction *insn; 878 879 if (p->single_program_flow) { 880 assert(execute_size == BRW_EXECUTE_1); 881 882 insn = next_insn(p, BRW_OPCODE_ADD); 883 insn->header.predicate_inverse = 1; 884 } else { 885 insn = next_insn(p, BRW_OPCODE_IF); 886 } 887 888 /* Override the defaults for this instruction: 889 */ 890 if (intel->gen < 6) { 891 brw_set_dest(p, insn, brw_ip_reg()); 892 brw_set_src0(insn, brw_ip_reg()); 893 brw_set_src1(insn, brw_imm_d(0x0)); 894 } else { 895 brw_set_dest(p, insn, brw_imm_w(0)); 896 insn->bits1.branch_gen6.jump_count = 0; 897 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 898 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 899 } 900 901 insn->header.execution_size = execute_size; 902 insn->header.compression_control = BRW_COMPRESSION_NONE; 903 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 904 insn->header.mask_control = BRW_MASK_ENABLE; 905 if (!p->single_program_flow) 906 insn->header.thread_control = BRW_THREAD_SWITCH; 907 908 p->current->header.predicate_control = BRW_PREDICATE_NONE; 909 910 return insn; 911} 912 913struct brw_instruction * 914gen6_IF(struct brw_compile *p, uint32_t conditional, 915 struct brw_reg src0, struct brw_reg src1) 916{ 917 struct brw_instruction *insn; 918 919 insn = next_insn(p, BRW_OPCODE_IF); 920 921 brw_set_dest(p, insn, brw_imm_w(0)); 922 insn->header.execution_size = BRW_EXECUTE_8; 923 insn->bits1.branch_gen6.jump_count = 0; 924 brw_set_src0(insn, src0); 925 brw_set_src1(insn, src1); 926 927 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 928 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 929 insn->header.destreg__conditionalmod = conditional; 930 931 if (!p->single_program_flow) 932 insn->header.thread_control = BRW_THREAD_SWITCH; 933 934 return insn; 935} 936 937struct brw_instruction *brw_ELSE(struct brw_compile *p, 938 struct brw_instruction *if_insn) 939{ 940 struct intel_context *intel = &p->brw->intel; 941 struct brw_instruction *insn; 942 GLuint br = 1; 943 944 /* jump count is for 64bit data chunk each, so one 128bit 945 instruction requires 2 chunks. */ 946 if (intel->gen >= 5) 947 br = 2; 948 949 if (p->single_program_flow) { 950 insn = next_insn(p, BRW_OPCODE_ADD); 951 } else { 952 insn = next_insn(p, BRW_OPCODE_ELSE); 953 } 954 955 if (intel->gen < 6) { 956 brw_set_dest(p, insn, brw_ip_reg()); 957 brw_set_src0(insn, brw_ip_reg()); 958 brw_set_src1(insn, brw_imm_d(0x0)); 959 } else { 960 brw_set_dest(p, insn, brw_imm_w(0)); 961 insn->bits1.branch_gen6.jump_count = 0; 962 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 963 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 964 } 965 966 insn->header.compression_control = BRW_COMPRESSION_NONE; 967 insn->header.execution_size = if_insn->header.execution_size; 968 insn->header.mask_control = BRW_MASK_ENABLE; 969 if (!p->single_program_flow) 970 insn->header.thread_control = BRW_THREAD_SWITCH; 971 972 /* Patch the if instruction to point at this instruction. 973 */ 974 if (p->single_program_flow) { 975 assert(if_insn->header.opcode == BRW_OPCODE_ADD); 976 977 if_insn->bits3.ud = (insn - if_insn + 1) * 16; 978 } else { 979 assert(if_insn->header.opcode == BRW_OPCODE_IF); 980 981 if (intel->gen < 6) { 982 if_insn->bits3.if_else.jump_count = br * (insn - if_insn); 983 if_insn->bits3.if_else.pop_count = 0; 984 if_insn->bits3.if_else.pad0 = 0; 985 } else { 986 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1); 987 } 988 } 989 990 return insn; 991} 992 993void brw_ENDIF(struct brw_compile *p, 994 struct brw_instruction *patch_insn) 995{ 996 struct intel_context *intel = &p->brw->intel; 997 GLuint br = 1; 998 999 if (intel->gen >= 5) 1000 br = 2; 1001 1002 if (p->single_program_flow) { 1003 /* In single program flow mode, there's no need to execute an ENDIF, 1004 * since we don't need to do any stack operations, and if we're executing 1005 * currently, we want to just continue executing. 1006 */ 1007 struct brw_instruction *next = &p->store[p->nr_insn]; 1008 1009 assert(patch_insn->header.opcode == BRW_OPCODE_ADD); 1010 1011 patch_insn->bits3.ud = (next - patch_insn) * 16; 1012 } else { 1013 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); 1014 1015 if (intel->gen < 6) { 1016 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1017 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1018 brw_set_src1(insn, brw_imm_d(0x0)); 1019 } else { 1020 brw_set_dest(p, insn, brw_imm_w(0)); 1021 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1022 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1023 } 1024 1025 insn->header.compression_control = BRW_COMPRESSION_NONE; 1026 insn->header.execution_size = patch_insn->header.execution_size; 1027 insn->header.mask_control = BRW_MASK_ENABLE; 1028 insn->header.thread_control = BRW_THREAD_SWITCH; 1029 1030 if (intel->gen < 6) 1031 assert(patch_insn->bits3.if_else.jump_count == 0); 1032 else 1033 assert(patch_insn->bits1.branch_gen6.jump_count == 0); 1034 1035 /* Patch the if or else instructions to point at this or the next 1036 * instruction respectively. 1037 */ 1038 if (patch_insn->header.opcode == BRW_OPCODE_IF) { 1039 if (intel->gen < 6) { 1040 /* Turn it into an IFF, which means no mask stack operations for 1041 * all-false and jumping past the ENDIF. 1042 */ 1043 patch_insn->header.opcode = BRW_OPCODE_IFF; 1044 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 1045 patch_insn->bits3.if_else.pop_count = 0; 1046 patch_insn->bits3.if_else.pad0 = 0; 1047 } else { 1048 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 1049 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); 1050 } 1051 } else { 1052 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE); 1053 if (intel->gen < 6) { 1054 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 1055 * matching ENDIF. 1056 */ 1057 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 1058 patch_insn->bits3.if_else.pop_count = 1; 1059 patch_insn->bits3.if_else.pad0 = 0; 1060 } else { 1061 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1062 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); 1063 } 1064 } 1065 1066 /* Also pop item off the stack in the endif instruction: 1067 */ 1068 if (intel->gen < 6) { 1069 insn->bits3.if_else.jump_count = 0; 1070 insn->bits3.if_else.pop_count = 1; 1071 insn->bits3.if_else.pad0 = 0; 1072 } else { 1073 insn->bits1.branch_gen6.jump_count = 2; 1074 } 1075 } 1076} 1077 1078struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 1079{ 1080 struct intel_context *intel = &p->brw->intel; 1081 struct brw_instruction *insn; 1082 1083 insn = next_insn(p, BRW_OPCODE_BREAK); 1084 if (intel->gen >= 6) { 1085 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1086 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1087 brw_set_src1(insn, brw_imm_d(0x0)); 1088 } else { 1089 brw_set_dest(p, insn, brw_ip_reg()); 1090 brw_set_src0(insn, brw_ip_reg()); 1091 brw_set_src1(insn, brw_imm_d(0x0)); 1092 insn->bits3.if_else.pad0 = 0; 1093 insn->bits3.if_else.pop_count = pop_count; 1094 } 1095 insn->header.compression_control = BRW_COMPRESSION_NONE; 1096 insn->header.execution_size = BRW_EXECUTE_8; 1097 1098 return insn; 1099} 1100 1101struct brw_instruction *gen6_CONT(struct brw_compile *p, 1102 struct brw_instruction *do_insn) 1103{ 1104 struct brw_instruction *insn; 1105 int br = 2; 1106 1107 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1108 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1109 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1110 brw_set_dest(p, insn, brw_ip_reg()); 1111 brw_set_src0(insn, brw_ip_reg()); 1112 brw_set_src1(insn, brw_imm_d(0x0)); 1113 1114 insn->bits3.break_cont.uip = br * (do_insn - insn); 1115 1116 insn->header.compression_control = BRW_COMPRESSION_NONE; 1117 insn->header.execution_size = BRW_EXECUTE_8; 1118 return insn; 1119} 1120 1121struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 1122{ 1123 struct brw_instruction *insn; 1124 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1125 brw_set_dest(p, insn, brw_ip_reg()); 1126 brw_set_src0(insn, brw_ip_reg()); 1127 brw_set_src1(insn, brw_imm_d(0x0)); 1128 insn->header.compression_control = BRW_COMPRESSION_NONE; 1129 insn->header.execution_size = BRW_EXECUTE_8; 1130 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1131 insn->bits3.if_else.pad0 = 0; 1132 insn->bits3.if_else.pop_count = pop_count; 1133 return insn; 1134} 1135 1136/* DO/WHILE loop: 1137 * 1138 * The DO/WHILE is just an unterminated loop -- break or continue are 1139 * used for control within the loop. We have a few ways they can be 1140 * done. 1141 * 1142 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1143 * jip and no DO instruction. 1144 * 1145 * For non-uniform control flow pre-gen6, there's a DO instruction to 1146 * push the mask, and a WHILE to jump back, and BREAK to get out and 1147 * pop the mask. 1148 * 1149 * For gen6, there's no more mask stack, so no need for DO. WHILE 1150 * just points back to the first instruction of the loop. 1151 */ 1152struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 1153{ 1154 struct intel_context *intel = &p->brw->intel; 1155 1156 if (intel->gen >= 6 || p->single_program_flow) { 1157 return &p->store[p->nr_insn]; 1158 } else { 1159 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1160 1161 /* Override the defaults for this instruction: 1162 */ 1163 brw_set_dest(p, insn, brw_null_reg()); 1164 brw_set_src0(insn, brw_null_reg()); 1165 brw_set_src1(insn, brw_null_reg()); 1166 1167 insn->header.compression_control = BRW_COMPRESSION_NONE; 1168 insn->header.execution_size = execute_size; 1169 insn->header.predicate_control = BRW_PREDICATE_NONE; 1170 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1171 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1172 1173 return insn; 1174 } 1175} 1176 1177 1178 1179struct brw_instruction *brw_WHILE(struct brw_compile *p, 1180 struct brw_instruction *do_insn) 1181{ 1182 struct intel_context *intel = &p->brw->intel; 1183 struct brw_instruction *insn; 1184 GLuint br = 1; 1185 1186 if (intel->gen >= 5) 1187 br = 2; 1188 1189 if (intel->gen >= 6) { 1190 insn = next_insn(p, BRW_OPCODE_WHILE); 1191 1192 brw_set_dest(p, insn, brw_imm_w(0)); 1193 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1194 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1195 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1196 1197 insn->header.execution_size = do_insn->header.execution_size; 1198 assert(insn->header.execution_size == BRW_EXECUTE_8); 1199 } else { 1200 if (p->single_program_flow) { 1201 insn = next_insn(p, BRW_OPCODE_ADD); 1202 1203 brw_set_dest(p, insn, brw_ip_reg()); 1204 brw_set_src0(insn, brw_ip_reg()); 1205 brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16)); 1206 insn->header.execution_size = BRW_EXECUTE_1; 1207 } else { 1208 insn = next_insn(p, BRW_OPCODE_WHILE); 1209 1210 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1211 1212 brw_set_dest(p, insn, brw_ip_reg()); 1213 brw_set_src0(insn, brw_ip_reg()); 1214 brw_set_src1(insn, brw_imm_d(0)); 1215 1216 insn->header.execution_size = do_insn->header.execution_size; 1217 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1218 insn->bits3.if_else.pop_count = 0; 1219 insn->bits3.if_else.pad0 = 0; 1220 } 1221 } 1222 insn->header.compression_control = BRW_COMPRESSION_NONE; 1223 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1224 1225 return insn; 1226} 1227 1228 1229/* FORWARD JUMPS: 1230 */ 1231void brw_land_fwd_jump(struct brw_compile *p, 1232 struct brw_instruction *jmp_insn) 1233{ 1234 struct intel_context *intel = &p->brw->intel; 1235 struct brw_instruction *landing = &p->store[p->nr_insn]; 1236 GLuint jmpi = 1; 1237 1238 if (intel->gen >= 5) 1239 jmpi = 2; 1240 1241 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1242 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1243 1244 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1245} 1246 1247 1248 1249/* To integrate with the above, it makes sense that the comparison 1250 * instruction should populate the flag register. It might be simpler 1251 * just to use the flag reg for most WM tasks? 1252 */ 1253void brw_CMP(struct brw_compile *p, 1254 struct brw_reg dest, 1255 GLuint conditional, 1256 struct brw_reg src0, 1257 struct brw_reg src1) 1258{ 1259 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1260 1261 insn->header.destreg__conditionalmod = conditional; 1262 brw_set_dest(p, insn, dest); 1263 brw_set_src0(insn, src0); 1264 brw_set_src1(insn, src1); 1265 1266/* guess_execution_size(insn, src0); */ 1267 1268 1269 /* Make it so that future instructions will use the computed flag 1270 * value until brw_set_predicate_control_flag_value() is called 1271 * again. 1272 */ 1273 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1274 dest.nr == 0) { 1275 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1276 p->flag_value = 0xff; 1277 } 1278} 1279 1280/* Issue 'wait' instruction for n1, host could program MMIO 1281 to wake up thread. */ 1282void brw_WAIT (struct brw_compile *p) 1283{ 1284 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1285 struct brw_reg src = brw_notification_1_reg(); 1286 1287 brw_set_dest(p, insn, src); 1288 brw_set_src0(insn, src); 1289 brw_set_src1(insn, brw_null_reg()); 1290 insn->header.execution_size = 0; /* must */ 1291 insn->header.predicate_control = 0; 1292 insn->header.compression_control = 0; 1293} 1294 1295 1296/*********************************************************************** 1297 * Helpers for the various SEND message types: 1298 */ 1299 1300/** Extended math function, float[8]. 1301 */ 1302void brw_math( struct brw_compile *p, 1303 struct brw_reg dest, 1304 GLuint function, 1305 GLuint saturate, 1306 GLuint msg_reg_nr, 1307 struct brw_reg src, 1308 GLuint data_type, 1309 GLuint precision ) 1310{ 1311 struct intel_context *intel = &p->brw->intel; 1312 1313 if (intel->gen >= 6) { 1314 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1315 1316 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1317 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1318 1319 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1320 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1321 1322 /* Source modifiers are ignored for extended math instructions. */ 1323 assert(!src.negate); 1324 assert(!src.abs); 1325 1326 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1327 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1328 assert(src.type == BRW_REGISTER_TYPE_F); 1329 } 1330 1331 /* Math is the same ISA format as other opcodes, except that CondModifier 1332 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1333 */ 1334 insn->header.destreg__conditionalmod = function; 1335 insn->header.saturate = saturate; 1336 1337 brw_set_dest(p, insn, dest); 1338 brw_set_src0(insn, src); 1339 brw_set_src1(insn, brw_null_reg()); 1340 } else { 1341 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1342 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1343 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1344 /* Example code doesn't set predicate_control for send 1345 * instructions. 1346 */ 1347 insn->header.predicate_control = 0; 1348 insn->header.destreg__conditionalmod = msg_reg_nr; 1349 1350 brw_set_dest(p, insn, dest); 1351 brw_set_src0(insn, src); 1352 brw_set_math_message(p->brw, 1353 insn, 1354 msg_length, response_length, 1355 function, 1356 BRW_MATH_INTEGER_UNSIGNED, 1357 precision, 1358 saturate, 1359 data_type); 1360 } 1361} 1362 1363/** Extended math function, float[8]. 1364 */ 1365void brw_math2(struct brw_compile *p, 1366 struct brw_reg dest, 1367 GLuint function, 1368 struct brw_reg src0, 1369 struct brw_reg src1) 1370{ 1371 struct intel_context *intel = &p->brw->intel; 1372 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1373 1374 assert(intel->gen >= 6); 1375 (void) intel; 1376 1377 1378 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1379 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1380 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1381 1382 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1383 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1384 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1385 1386 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1387 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1388 assert(src0.type == BRW_REGISTER_TYPE_F); 1389 assert(src1.type == BRW_REGISTER_TYPE_F); 1390 } 1391 1392 /* Source modifiers are ignored for extended math instructions. */ 1393 assert(!src0.negate); 1394 assert(!src0.abs); 1395 assert(!src1.negate); 1396 assert(!src1.abs); 1397 1398 /* Math is the same ISA format as other opcodes, except that CondModifier 1399 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1400 */ 1401 insn->header.destreg__conditionalmod = function; 1402 1403 brw_set_dest(p, insn, dest); 1404 brw_set_src0(insn, src0); 1405 brw_set_src1(insn, src1); 1406} 1407 1408/** 1409 * Extended math function, float[16]. 1410 * Use 2 send instructions. 1411 */ 1412void brw_math_16( struct brw_compile *p, 1413 struct brw_reg dest, 1414 GLuint function, 1415 GLuint saturate, 1416 GLuint msg_reg_nr, 1417 struct brw_reg src, 1418 GLuint precision ) 1419{ 1420 struct intel_context *intel = &p->brw->intel; 1421 struct brw_instruction *insn; 1422 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1423 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1424 1425 if (intel->gen >= 6) { 1426 insn = next_insn(p, BRW_OPCODE_MATH); 1427 1428 /* Math is the same ISA format as other opcodes, except that CondModifier 1429 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1430 */ 1431 insn->header.destreg__conditionalmod = function; 1432 insn->header.saturate = saturate; 1433 1434 /* Source modifiers are ignored for extended math instructions. */ 1435 assert(!src.negate); 1436 assert(!src.abs); 1437 1438 brw_set_dest(p, insn, dest); 1439 brw_set_src0(insn, src); 1440 brw_set_src1(insn, brw_null_reg()); 1441 return; 1442 } 1443 1444 /* First instruction: 1445 */ 1446 brw_push_insn_state(p); 1447 brw_set_predicate_control_flag_value(p, 0xff); 1448 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1449 1450 insn = next_insn(p, BRW_OPCODE_SEND); 1451 insn->header.destreg__conditionalmod = msg_reg_nr; 1452 1453 brw_set_dest(p, insn, dest); 1454 brw_set_src0(insn, src); 1455 brw_set_math_message(p->brw, 1456 insn, 1457 msg_length, response_length, 1458 function, 1459 BRW_MATH_INTEGER_UNSIGNED, 1460 precision, 1461 saturate, 1462 BRW_MATH_DATA_VECTOR); 1463 1464 /* Second instruction: 1465 */ 1466 insn = next_insn(p, BRW_OPCODE_SEND); 1467 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1468 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1469 1470 brw_set_dest(p, insn, offset(dest,1)); 1471 brw_set_src0(insn, src); 1472 brw_set_math_message(p->brw, 1473 insn, 1474 msg_length, response_length, 1475 function, 1476 BRW_MATH_INTEGER_UNSIGNED, 1477 precision, 1478 saturate, 1479 BRW_MATH_DATA_VECTOR); 1480 1481 brw_pop_insn_state(p); 1482} 1483 1484 1485/** 1486 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1487 * using a constant offset per channel. 1488 * 1489 * The offset must be aligned to oword size (16 bytes). Used for 1490 * register spilling. 1491 */ 1492void brw_oword_block_write_scratch(struct brw_compile *p, 1493 struct brw_reg mrf, 1494 int num_regs, 1495 GLuint offset) 1496{ 1497 struct intel_context *intel = &p->brw->intel; 1498 uint32_t msg_control, msg_type; 1499 int mlen; 1500 1501 if (intel->gen >= 6) 1502 offset /= 16; 1503 1504 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1505 1506 if (num_regs == 1) { 1507 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1508 mlen = 2; 1509 } else { 1510 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1511 mlen = 3; 1512 } 1513 1514 /* Set up the message header. This is g0, with g0.2 filled with 1515 * the offset. We don't want to leave our offset around in g0 or 1516 * it'll screw up texture samples, so set it up inside the message 1517 * reg. 1518 */ 1519 { 1520 brw_push_insn_state(p); 1521 brw_set_mask_control(p, BRW_MASK_DISABLE); 1522 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1523 1524 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1525 1526 /* set message header global offset field (reg 0, element 2) */ 1527 brw_MOV(p, 1528 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1529 mrf.nr, 1530 2), BRW_REGISTER_TYPE_UD), 1531 brw_imm_ud(offset)); 1532 1533 brw_pop_insn_state(p); 1534 } 1535 1536 { 1537 struct brw_reg dest; 1538 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1539 int send_commit_msg; 1540 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 1541 BRW_REGISTER_TYPE_UW); 1542 1543 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1544 insn->header.compression_control = BRW_COMPRESSION_NONE; 1545 src_header = vec16(src_header); 1546 } 1547 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1548 insn->header.destreg__conditionalmod = mrf.nr; 1549 1550 /* Until gen6, writes followed by reads from the same location 1551 * are not guaranteed to be ordered unless write_commit is set. 1552 * If set, then a no-op write is issued to the destination 1553 * register to set a dependency, and a read from the destination 1554 * can be used to ensure the ordering. 1555 * 1556 * For gen6, only writes between different threads need ordering 1557 * protection. Our use of DP writes is all about register 1558 * spilling within a thread. 1559 */ 1560 if (intel->gen >= 6) { 1561 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1562 send_commit_msg = 0; 1563 } else { 1564 dest = src_header; 1565 send_commit_msg = 1; 1566 } 1567 1568 brw_set_dest(p, insn, dest); 1569 if (intel->gen >= 6) { 1570 brw_set_src0(insn, mrf); 1571 } else { 1572 brw_set_src0(insn, brw_null_reg()); 1573 } 1574 1575 if (intel->gen >= 6) 1576 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1577 else 1578 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; 1579 1580 brw_set_dp_write_message(p->brw, 1581 insn, 1582 255, /* binding table index (255=stateless) */ 1583 msg_control, 1584 msg_type, 1585 mlen, 1586 GL_TRUE, /* header_present */ 1587 0, /* pixel scoreboard */ 1588 send_commit_msg, /* response_length */ 1589 0, /* eot */ 1590 send_commit_msg); 1591 } 1592} 1593 1594 1595/** 1596 * Read a block of owords (half a GRF each) from the scratch buffer 1597 * using a constant index per channel. 1598 * 1599 * Offset must be aligned to oword size (16 bytes). Used for register 1600 * spilling. 1601 */ 1602void 1603brw_oword_block_read_scratch(struct brw_compile *p, 1604 struct brw_reg dest, 1605 struct brw_reg mrf, 1606 int num_regs, 1607 GLuint offset) 1608{ 1609 struct intel_context *intel = &p->brw->intel; 1610 uint32_t msg_control; 1611 int rlen; 1612 1613 if (intel->gen >= 6) 1614 offset /= 16; 1615 1616 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1617 dest = retype(dest, BRW_REGISTER_TYPE_UW); 1618 1619 if (num_regs == 1) { 1620 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1621 rlen = 1; 1622 } else { 1623 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1624 rlen = 2; 1625 } 1626 1627 { 1628 brw_push_insn_state(p); 1629 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1630 brw_set_mask_control(p, BRW_MASK_DISABLE); 1631 1632 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1633 1634 /* set message header global offset field (reg 0, element 2) */ 1635 brw_MOV(p, 1636 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1637 mrf.nr, 1638 2), BRW_REGISTER_TYPE_UD), 1639 brw_imm_ud(offset)); 1640 1641 brw_pop_insn_state(p); 1642 } 1643 1644 { 1645 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1646 1647 assert(insn->header.predicate_control == 0); 1648 insn->header.compression_control = BRW_COMPRESSION_NONE; 1649 insn->header.destreg__conditionalmod = mrf.nr; 1650 1651 brw_set_dest(p, insn, dest); /* UW? */ 1652 if (intel->gen >= 6) { 1653 brw_set_src0(insn, mrf); 1654 } else { 1655 brw_set_src0(insn, brw_null_reg()); 1656 } 1657 1658 brw_set_dp_read_message(p->brw, 1659 insn, 1660 255, /* binding table index (255=stateless) */ 1661 msg_control, 1662 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1663 BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 1664 1, /* msg_length */ 1665 rlen); 1666 } 1667} 1668 1669/** 1670 * Read a float[4] vector from the data port Data Cache (const buffer). 1671 * Location (in buffer) should be a multiple of 16. 1672 * Used for fetching shader constants. 1673 */ 1674void brw_oword_block_read(struct brw_compile *p, 1675 struct brw_reg dest, 1676 struct brw_reg mrf, 1677 uint32_t offset, 1678 uint32_t bind_table_index) 1679{ 1680 struct intel_context *intel = &p->brw->intel; 1681 1682 /* On newer hardware, offset is in units of owords. */ 1683 if (intel->gen >= 6) 1684 offset /= 16; 1685 1686 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1687 1688 brw_push_insn_state(p); 1689 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1690 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1691 brw_set_mask_control(p, BRW_MASK_DISABLE); 1692 1693 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1694 1695 /* set message header global offset field (reg 0, element 2) */ 1696 brw_MOV(p, 1697 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1698 mrf.nr, 1699 2), BRW_REGISTER_TYPE_UD), 1700 brw_imm_ud(offset)); 1701 1702 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1703 insn->header.destreg__conditionalmod = mrf.nr; 1704 1705 /* cast dest to a uword[8] vector */ 1706 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1707 1708 brw_set_dest(p, insn, dest); 1709 if (intel->gen >= 6) { 1710 brw_set_src0(insn, mrf); 1711 } else { 1712 brw_set_src0(insn, brw_null_reg()); 1713 } 1714 1715 brw_set_dp_read_message(p->brw, 1716 insn, 1717 bind_table_index, 1718 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 1719 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 1720 0, /* source cache = data cache */ 1721 1, /* msg_length */ 1722 1); /* response_length (1 reg, 2 owords!) */ 1723 1724 brw_pop_insn_state(p); 1725} 1726 1727/** 1728 * Read a set of dwords from the data port Data Cache (const buffer). 1729 * 1730 * Location (in buffer) appears as UD offsets in the register after 1731 * the provided mrf header reg. 1732 */ 1733void brw_dword_scattered_read(struct brw_compile *p, 1734 struct brw_reg dest, 1735 struct brw_reg mrf, 1736 uint32_t bind_table_index) 1737{ 1738 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1739 1740 brw_push_insn_state(p); 1741 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1742 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1743 brw_set_mask_control(p, BRW_MASK_DISABLE); 1744 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1745 brw_pop_insn_state(p); 1746 1747 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1748 insn->header.destreg__conditionalmod = mrf.nr; 1749 1750 /* cast dest to a uword[8] vector */ 1751 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1752 1753 brw_set_dest(p, insn, dest); 1754 brw_set_src0(insn, brw_null_reg()); 1755 1756 brw_set_dp_read_message(p->brw, 1757 insn, 1758 bind_table_index, 1759 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, 1760 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, 1761 0, /* source cache = data cache */ 1762 2, /* msg_length */ 1763 1); /* response_length */ 1764} 1765 1766 1767 1768/** 1769 * Read float[4] constant(s) from VS constant buffer. 1770 * For relative addressing, two float[4] constants will be read into 'dest'. 1771 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1772 */ 1773void brw_dp_READ_4_vs(struct brw_compile *p, 1774 struct brw_reg dest, 1775 GLuint location, 1776 GLuint bind_table_index) 1777{ 1778 struct intel_context *intel = &p->brw->intel; 1779 struct brw_instruction *insn; 1780 GLuint msg_reg_nr = 1; 1781 1782 if (intel->gen >= 6) 1783 location /= 16; 1784 1785 /* Setup MRF[1] with location/offset into const buffer */ 1786 brw_push_insn_state(p); 1787 brw_set_access_mode(p, BRW_ALIGN_1); 1788 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1789 brw_set_mask_control(p, BRW_MASK_DISABLE); 1790 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1791 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), 1792 BRW_REGISTER_TYPE_UD), 1793 brw_imm_ud(location)); 1794 brw_pop_insn_state(p); 1795 1796 insn = next_insn(p, BRW_OPCODE_SEND); 1797 1798 insn->header.predicate_control = BRW_PREDICATE_NONE; 1799 insn->header.compression_control = BRW_COMPRESSION_NONE; 1800 insn->header.destreg__conditionalmod = msg_reg_nr; 1801 insn->header.mask_control = BRW_MASK_DISABLE; 1802 1803 brw_set_dest(p, insn, dest); 1804 if (intel->gen >= 6) { 1805 brw_set_src0(insn, brw_message_reg(msg_reg_nr)); 1806 } else { 1807 brw_set_src0(insn, brw_null_reg()); 1808 } 1809 1810 brw_set_dp_read_message(p->brw, 1811 insn, 1812 bind_table_index, 1813 0, 1814 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1815 0, /* source cache = data cache */ 1816 1, /* msg_length */ 1817 1); /* response_length (1 Oword) */ 1818} 1819 1820/** 1821 * Read a float[4] constant per vertex from VS constant buffer, with 1822 * relative addressing. 1823 */ 1824void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1825 struct brw_reg dest, 1826 struct brw_reg addr_reg, 1827 GLuint offset, 1828 GLuint bind_table_index) 1829{ 1830 struct intel_context *intel = &p->brw->intel; 1831 struct brw_reg src = brw_vec8_grf(0, 0); 1832 int msg_type; 1833 1834 /* Setup MRF[1] with offset into const buffer */ 1835 brw_push_insn_state(p); 1836 brw_set_access_mode(p, BRW_ALIGN_1); 1837 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1838 brw_set_mask_control(p, BRW_MASK_DISABLE); 1839 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1840 1841 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1842 * fields ignored. 1843 */ 1844 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), 1845 addr_reg, brw_imm_d(offset)); 1846 brw_pop_insn_state(p); 1847 1848 gen6_resolve_implied_move(p, &src, 0); 1849 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1850 1851 insn->header.predicate_control = BRW_PREDICATE_NONE; 1852 insn->header.compression_control = BRW_COMPRESSION_NONE; 1853 insn->header.destreg__conditionalmod = 0; 1854 insn->header.mask_control = BRW_MASK_DISABLE; 1855 1856 brw_set_dest(p, insn, dest); 1857 brw_set_src0(insn, src); 1858 1859 if (intel->gen == 6) 1860 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1861 else if (intel->gen == 5 || intel->is_g4x) 1862 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1863 else 1864 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1865 1866 brw_set_dp_read_message(p->brw, 1867 insn, 1868 bind_table_index, 1869 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1870 msg_type, 1871 BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1872 2, /* msg_length */ 1873 1); /* response_length */ 1874} 1875 1876 1877 1878void brw_fb_WRITE(struct brw_compile *p, 1879 int dispatch_width, 1880 struct brw_reg dest, 1881 GLuint msg_reg_nr, 1882 struct brw_reg src0, 1883 GLuint binding_table_index, 1884 GLuint msg_length, 1885 GLuint response_length, 1886 GLboolean eot, 1887 GLboolean header_present) 1888{ 1889 struct intel_context *intel = &p->brw->intel; 1890 struct brw_instruction *insn; 1891 GLuint msg_control, msg_type; 1892 1893 if (intel->gen >= 6 && binding_table_index == 0) { 1894 insn = next_insn(p, BRW_OPCODE_SENDC); 1895 } else { 1896 insn = next_insn(p, BRW_OPCODE_SEND); 1897 } 1898 /* The execution mask is ignored for render target writes. */ 1899 insn->header.predicate_control = 0; 1900 insn->header.compression_control = BRW_COMPRESSION_NONE; 1901 1902 if (intel->gen >= 6) { 1903 /* headerless version, just submit color payload */ 1904 src0 = brw_message_reg(msg_reg_nr); 1905 1906 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1907 } else { 1908 insn->header.destreg__conditionalmod = msg_reg_nr; 1909 1910 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1911 } 1912 1913 if (dispatch_width == 16) 1914 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 1915 else 1916 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 1917 1918 brw_set_dest(p, insn, dest); 1919 brw_set_src0(insn, src0); 1920 brw_set_dp_write_message(p->brw, 1921 insn, 1922 binding_table_index, 1923 msg_control, 1924 msg_type, 1925 msg_length, 1926 header_present, 1927 1, /* pixel scoreboard */ 1928 response_length, 1929 eot, 1930 0 /* send_commit_msg */); 1931} 1932 1933 1934/** 1935 * Texture sample instruction. 1936 * Note: the msg_type plus msg_length values determine exactly what kind 1937 * of sampling operation is performed. See volume 4, page 161 of docs. 1938 */ 1939void brw_SAMPLE(struct brw_compile *p, 1940 struct brw_reg dest, 1941 GLuint msg_reg_nr, 1942 struct brw_reg src0, 1943 GLuint binding_table_index, 1944 GLuint sampler, 1945 GLuint writemask, 1946 GLuint msg_type, 1947 GLuint response_length, 1948 GLuint msg_length, 1949 GLboolean eot, 1950 GLuint header_present, 1951 GLuint simd_mode) 1952{ 1953 struct intel_context *intel = &p->brw->intel; 1954 GLboolean need_stall = 0; 1955 1956 if (writemask == 0) { 1957 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 1958 return; 1959 } 1960 1961 /* Hardware doesn't do destination dependency checking on send 1962 * instructions properly. Add a workaround which generates the 1963 * dependency by other means. In practice it seems like this bug 1964 * only crops up for texture samples, and only where registers are 1965 * written by the send and then written again later without being 1966 * read in between. Luckily for us, we already track that 1967 * information and use it to modify the writemask for the 1968 * instruction, so that is a guide for whether a workaround is 1969 * needed. 1970 */ 1971 if (writemask != WRITEMASK_XYZW) { 1972 GLuint dst_offset = 0; 1973 GLuint i, newmask = 0, len = 0; 1974 1975 for (i = 0; i < 4; i++) { 1976 if (writemask & (1<<i)) 1977 break; 1978 dst_offset += 2; 1979 } 1980 for (; i < 4; i++) { 1981 if (!(writemask & (1<<i))) 1982 break; 1983 newmask |= 1<<i; 1984 len++; 1985 } 1986 1987 if (newmask != writemask) { 1988 need_stall = 1; 1989 /* printf("need stall %x %x\n", newmask , writemask); */ 1990 } 1991 else { 1992 GLboolean dispatch_16 = GL_FALSE; 1993 1994 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1995 1996 guess_execution_size(p, p->current, dest); 1997 if (p->current->header.execution_size == BRW_EXECUTE_16) 1998 dispatch_16 = GL_TRUE; 1999 2000 newmask = ~newmask & WRITEMASK_XYZW; 2001 2002 brw_push_insn_state(p); 2003 2004 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2005 brw_set_mask_control(p, BRW_MASK_DISABLE); 2006 2007 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), 2008 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); 2009 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 2010 2011 brw_pop_insn_state(p); 2012 2013 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 2014 dest = offset(dest, dst_offset); 2015 2016 /* For 16-wide dispatch, masked channels are skipped in the 2017 * response. For 8-wide, masked channels still take up slots, 2018 * and are just not written to. 2019 */ 2020 if (dispatch_16) 2021 response_length = len * 2; 2022 } 2023 } 2024 2025 { 2026 struct brw_instruction *insn; 2027 2028 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2029 2030 insn = next_insn(p, BRW_OPCODE_SEND); 2031 insn->header.predicate_control = 0; /* XXX */ 2032 insn->header.compression_control = BRW_COMPRESSION_NONE; 2033 if (intel->gen < 6) 2034 insn->header.destreg__conditionalmod = msg_reg_nr; 2035 2036 brw_set_dest(p, insn, dest); 2037 brw_set_src0(insn, src0); 2038 brw_set_sampler_message(p->brw, insn, 2039 binding_table_index, 2040 sampler, 2041 msg_type, 2042 response_length, 2043 msg_length, 2044 eot, 2045 header_present, 2046 simd_mode); 2047 } 2048 2049 if (need_stall) { 2050 struct brw_reg reg = vec8(offset(dest, response_length-1)); 2051 2052 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 2053 */ 2054 brw_push_insn_state(p); 2055 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2056 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), 2057 retype(reg, BRW_REGISTER_TYPE_UD)); 2058 brw_pop_insn_state(p); 2059 } 2060 2061} 2062 2063/* All these variables are pretty confusing - we might be better off 2064 * using bitmasks and macros for this, in the old style. Or perhaps 2065 * just having the caller instantiate the fields in dword3 itself. 2066 */ 2067void brw_urb_WRITE(struct brw_compile *p, 2068 struct brw_reg dest, 2069 GLuint msg_reg_nr, 2070 struct brw_reg src0, 2071 GLboolean allocate, 2072 GLboolean used, 2073 GLuint msg_length, 2074 GLuint response_length, 2075 GLboolean eot, 2076 GLboolean writes_complete, 2077 GLuint offset, 2078 GLuint swizzle) 2079{ 2080 struct intel_context *intel = &p->brw->intel; 2081 struct brw_instruction *insn; 2082 2083 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2084 2085 insn = next_insn(p, BRW_OPCODE_SEND); 2086 2087 assert(msg_length < BRW_MAX_MRF); 2088 2089 brw_set_dest(p, insn, dest); 2090 brw_set_src0(insn, src0); 2091 brw_set_src1(insn, brw_imm_d(0)); 2092 2093 if (intel->gen < 6) 2094 insn->header.destreg__conditionalmod = msg_reg_nr; 2095 2096 brw_set_urb_message(p->brw, 2097 insn, 2098 allocate, 2099 used, 2100 msg_length, 2101 response_length, 2102 eot, 2103 writes_complete, 2104 offset, 2105 swizzle); 2106} 2107 2108static int 2109brw_find_next_block_end(struct brw_compile *p, int start) 2110{ 2111 int ip; 2112 2113 for (ip = start + 1; ip < p->nr_insn; ip++) { 2114 struct brw_instruction *insn = &p->store[ip]; 2115 2116 switch (insn->header.opcode) { 2117 case BRW_OPCODE_ENDIF: 2118 case BRW_OPCODE_ELSE: 2119 case BRW_OPCODE_WHILE: 2120 return ip; 2121 } 2122 } 2123 assert(!"not reached"); 2124 return start + 1; 2125} 2126 2127/* There is no DO instruction on gen6, so to find the end of the loop 2128 * we have to see if the loop is jumping back before our start 2129 * instruction. 2130 */ 2131static int 2132brw_find_loop_end(struct brw_compile *p, int start) 2133{ 2134 int ip; 2135 int br = 2; 2136 2137 for (ip = start + 1; ip < p->nr_insn; ip++) { 2138 struct brw_instruction *insn = &p->store[ip]; 2139 2140 if (insn->header.opcode == BRW_OPCODE_WHILE) { 2141 if (ip + insn->bits1.branch_gen6.jump_count / br < start) 2142 return ip; 2143 } 2144 } 2145 assert(!"not reached"); 2146 return start + 1; 2147} 2148 2149/* After program generation, go back and update the UIP and JIP of 2150 * BREAK and CONT instructions to their correct locations. 2151 */ 2152void 2153brw_set_uip_jip(struct brw_compile *p) 2154{ 2155 struct intel_context *intel = &p->brw->intel; 2156 int ip; 2157 int br = 2; 2158 2159 if (intel->gen < 6) 2160 return; 2161 2162 for (ip = 0; ip < p->nr_insn; ip++) { 2163 struct brw_instruction *insn = &p->store[ip]; 2164 2165 switch (insn->header.opcode) { 2166 case BRW_OPCODE_BREAK: 2167 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2168 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1); 2169 break; 2170 case BRW_OPCODE_CONTINUE: 2171 /* JIP is set at CONTINUE emit time, since that's when we 2172 * know where the start of the loop is. 2173 */ 2174 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2175 assert(insn->bits3.break_cont.uip != 0); 2176 assert(insn->bits3.break_cont.jip != 0); 2177 break; 2178 } 2179 } 2180} 2181 2182void brw_ff_sync(struct brw_compile *p, 2183 struct brw_reg dest, 2184 GLuint msg_reg_nr, 2185 struct brw_reg src0, 2186 GLboolean allocate, 2187 GLuint response_length, 2188 GLboolean eot) 2189{ 2190 struct intel_context *intel = &p->brw->intel; 2191 struct brw_instruction *insn; 2192 2193 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2194 2195 insn = next_insn(p, BRW_OPCODE_SEND); 2196 brw_set_dest(p, insn, dest); 2197 brw_set_src0(insn, src0); 2198 brw_set_src1(insn, brw_imm_d(0)); 2199 2200 if (intel->gen < 6) 2201 insn->header.destreg__conditionalmod = msg_reg_nr; 2202 2203 brw_set_ff_sync_message(p->brw, 2204 insn, 2205 allocate, 2206 response_length, 2207 eot); 2208} 2209