brw_eu_emit.c revision 9a21bc640188e4078075b9f8e6701853a4f0bbe4
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37 38 39 40/*********************************************************************** 41 * Internal helper for constructing instructions 42 */ 43 44static void guess_execution_size(struct brw_compile *p, 45 struct brw_instruction *insn, 46 struct brw_reg reg) 47{ 48 if (reg.width == BRW_WIDTH_8 && p->compressed) 49 insn->header.execution_size = BRW_EXECUTE_16; 50 else 51 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 52} 53 54 55/** 56 * Prior to Sandybridge, the SEND instruction accepted non-MRF source 57 * registers, implicitly moving the operand to a message register. 58 * 59 * On Sandybridge, this is no longer the case. This function performs the 60 * explicit move; it should be called before emitting a SEND instruction. 61 */ 62static void 63gen6_resolve_implied_move(struct brw_compile *p, 64 struct brw_reg *src, 65 GLuint msg_reg_nr) 66{ 67 struct intel_context *intel = &p->brw->intel; 68 if (intel->gen != 6) 69 return; 70 71 if (src->file == BRW_ARCHITECTURE_REGISTER_FILE && src->nr == BRW_ARF_NULL) 72 return; 73 74 brw_push_insn_state(p); 75 brw_set_mask_control(p, BRW_MASK_DISABLE); 76 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 77 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 78 retype(*src, BRW_REGISTER_TYPE_UD)); 79 brw_pop_insn_state(p); 80 *src = brw_message_reg(msg_reg_nr); 81} 82 83 84static void brw_set_dest(struct brw_compile *p, 85 struct brw_instruction *insn, 86 struct brw_reg dest) 87{ 88 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 89 dest.file != BRW_MESSAGE_REGISTER_FILE) 90 assert(dest.nr < 128); 91 92 insn->bits1.da1.dest_reg_file = dest.file; 93 insn->bits1.da1.dest_reg_type = dest.type; 94 insn->bits1.da1.dest_address_mode = dest.address_mode; 95 96 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 97 insn->bits1.da1.dest_reg_nr = dest.nr; 98 99 if (insn->header.access_mode == BRW_ALIGN_1) { 100 insn->bits1.da1.dest_subreg_nr = dest.subnr; 101 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 102 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 103 insn->bits1.da1.dest_horiz_stride = dest.hstride; 104 } 105 else { 106 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 107 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 108 /* even ignored in da16, still need to set as '01' */ 109 insn->bits1.da16.dest_horiz_stride = 1; 110 } 111 } 112 else { 113 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 114 115 /* These are different sizes in align1 vs align16: 116 */ 117 if (insn->header.access_mode == BRW_ALIGN_1) { 118 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 119 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 120 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 121 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 122 } 123 else { 124 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 125 /* even ignored in da16, still need to set as '01' */ 126 insn->bits1.ia16.dest_horiz_stride = 1; 127 } 128 } 129 130 /* NEW: Set the execution size based on dest.width and 131 * insn->compression_control: 132 */ 133 guess_execution_size(p, insn, dest); 134} 135 136extern int reg_type_size[]; 137 138static void 139validate_reg(struct brw_instruction *insn, struct brw_reg reg) 140{ 141 int hstride_for_reg[] = {0, 1, 2, 4}; 142 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 143 int width_for_reg[] = {1, 2, 4, 8, 16}; 144 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 145 int width, hstride, vstride, execsize; 146 147 if (reg.file == BRW_IMMEDIATE_VALUE) { 148 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 149 * mean the destination has to be 128-bit aligned and the 150 * destination horiz stride has to be a word. 151 */ 152 if (reg.type == BRW_REGISTER_TYPE_V) { 153 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 154 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 155 } 156 157 return; 158 } 159 160 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 161 reg.file == BRW_ARF_NULL) 162 return; 163 164 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 165 hstride = hstride_for_reg[reg.hstride]; 166 167 if (reg.vstride == 0xf) { 168 vstride = -1; 169 } else { 170 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 171 vstride = vstride_for_reg[reg.vstride]; 172 } 173 174 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 175 width = width_for_reg[reg.width]; 176 177 assert(insn->header.execution_size >= 0 && 178 insn->header.execution_size < Elements(execsize_for_reg)); 179 execsize = execsize_for_reg[insn->header.execution_size]; 180 181 /* Restrictions from 3.3.10: Register Region Restrictions. */ 182 /* 3. */ 183 assert(execsize >= width); 184 185 /* 4. */ 186 if (execsize == width && hstride != 0) { 187 assert(vstride == -1 || vstride == width * hstride); 188 } 189 190 /* 5. */ 191 if (execsize == width && hstride == 0) { 192 /* no restriction on vstride. */ 193 } 194 195 /* 6. */ 196 if (width == 1) { 197 assert(hstride == 0); 198 } 199 200 /* 7. */ 201 if (execsize == 1 && width == 1) { 202 assert(hstride == 0); 203 assert(vstride == 0); 204 } 205 206 /* 8. */ 207 if (vstride == 0 && hstride == 0) { 208 assert(width == 1); 209 } 210 211 /* 10. Check destination issues. */ 212} 213 214static void brw_set_src0( struct brw_instruction *insn, 215 struct brw_reg reg ) 216{ 217 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 218 assert(reg.nr < 128); 219 220 validate_reg(insn, reg); 221 222 insn->bits1.da1.src0_reg_file = reg.file; 223 insn->bits1.da1.src0_reg_type = reg.type; 224 insn->bits2.da1.src0_abs = reg.abs; 225 insn->bits2.da1.src0_negate = reg.negate; 226 insn->bits2.da1.src0_address_mode = reg.address_mode; 227 228 if (reg.file == BRW_IMMEDIATE_VALUE) { 229 insn->bits3.ud = reg.dw1.ud; 230 231 /* Required to set some fields in src1 as well: 232 */ 233 insn->bits1.da1.src1_reg_file = 0; /* arf */ 234 insn->bits1.da1.src1_reg_type = reg.type; 235 } 236 else 237 { 238 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 239 if (insn->header.access_mode == BRW_ALIGN_1) { 240 insn->bits2.da1.src0_subreg_nr = reg.subnr; 241 insn->bits2.da1.src0_reg_nr = reg.nr; 242 } 243 else { 244 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 245 insn->bits2.da16.src0_reg_nr = reg.nr; 246 } 247 } 248 else { 249 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 250 251 if (insn->header.access_mode == BRW_ALIGN_1) { 252 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 253 } 254 else { 255 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 256 } 257 } 258 259 if (insn->header.access_mode == BRW_ALIGN_1) { 260 if (reg.width == BRW_WIDTH_1 && 261 insn->header.execution_size == BRW_EXECUTE_1) { 262 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 263 insn->bits2.da1.src0_width = BRW_WIDTH_1; 264 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 265 } 266 else { 267 insn->bits2.da1.src0_horiz_stride = reg.hstride; 268 insn->bits2.da1.src0_width = reg.width; 269 insn->bits2.da1.src0_vert_stride = reg.vstride; 270 } 271 } 272 else { 273 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 274 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 275 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 276 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 277 278 /* This is an oddity of the fact we're using the same 279 * descriptions for registers in align_16 as align_1: 280 */ 281 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 282 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 283 else 284 insn->bits2.da16.src0_vert_stride = reg.vstride; 285 } 286 } 287} 288 289 290void brw_set_src1( struct brw_instruction *insn, 291 struct brw_reg reg ) 292{ 293 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 294 295 assert(reg.nr < 128); 296 297 validate_reg(insn, reg); 298 299 insn->bits1.da1.src1_reg_file = reg.file; 300 insn->bits1.da1.src1_reg_type = reg.type; 301 insn->bits3.da1.src1_abs = reg.abs; 302 insn->bits3.da1.src1_negate = reg.negate; 303 304 /* Only src1 can be immediate in two-argument instructions. 305 */ 306 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 307 308 if (reg.file == BRW_IMMEDIATE_VALUE) { 309 insn->bits3.ud = reg.dw1.ud; 310 } 311 else { 312 /* This is a hardware restriction, which may or may not be lifted 313 * in the future: 314 */ 315 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 316 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 317 318 if (insn->header.access_mode == BRW_ALIGN_1) { 319 insn->bits3.da1.src1_subreg_nr = reg.subnr; 320 insn->bits3.da1.src1_reg_nr = reg.nr; 321 } 322 else { 323 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 324 insn->bits3.da16.src1_reg_nr = reg.nr; 325 } 326 327 if (insn->header.access_mode == BRW_ALIGN_1) { 328 if (reg.width == BRW_WIDTH_1 && 329 insn->header.execution_size == BRW_EXECUTE_1) { 330 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 331 insn->bits3.da1.src1_width = BRW_WIDTH_1; 332 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 333 } 334 else { 335 insn->bits3.da1.src1_horiz_stride = reg.hstride; 336 insn->bits3.da1.src1_width = reg.width; 337 insn->bits3.da1.src1_vert_stride = reg.vstride; 338 } 339 } 340 else { 341 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 342 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 343 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 344 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 345 346 /* This is an oddity of the fact we're using the same 347 * descriptions for registers in align_16 as align_1: 348 */ 349 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 350 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 351 else 352 insn->bits3.da16.src1_vert_stride = reg.vstride; 353 } 354 } 355} 356 357 358 359static void brw_set_math_message( struct brw_context *brw, 360 struct brw_instruction *insn, 361 GLuint msg_length, 362 GLuint response_length, 363 GLuint function, 364 GLuint integer_type, 365 GLboolean low_precision, 366 GLboolean saturate, 367 GLuint dataType ) 368{ 369 struct intel_context *intel = &brw->intel; 370 brw_set_src1(insn, brw_imm_d(0)); 371 372 if (intel->gen == 5) { 373 insn->bits3.math_gen5.function = function; 374 insn->bits3.math_gen5.int_type = integer_type; 375 insn->bits3.math_gen5.precision = low_precision; 376 insn->bits3.math_gen5.saturate = saturate; 377 insn->bits3.math_gen5.data_type = dataType; 378 insn->bits3.math_gen5.snapshot = 0; 379 insn->bits3.math_gen5.header_present = 0; 380 insn->bits3.math_gen5.response_length = response_length; 381 insn->bits3.math_gen5.msg_length = msg_length; 382 insn->bits3.math_gen5.end_of_thread = 0; 383 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; 384 insn->bits2.send_gen5.end_of_thread = 0; 385 } else { 386 insn->bits3.math.function = function; 387 insn->bits3.math.int_type = integer_type; 388 insn->bits3.math.precision = low_precision; 389 insn->bits3.math.saturate = saturate; 390 insn->bits3.math.data_type = dataType; 391 insn->bits3.math.response_length = response_length; 392 insn->bits3.math.msg_length = msg_length; 393 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 394 insn->bits3.math.end_of_thread = 0; 395 } 396} 397 398 399static void brw_set_ff_sync_message(struct brw_context *brw, 400 struct brw_instruction *insn, 401 GLboolean allocate, 402 GLuint response_length, 403 GLboolean end_of_thread) 404{ 405 struct intel_context *intel = &brw->intel; 406 brw_set_src1(insn, brw_imm_d(0)); 407 408 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 409 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 410 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 411 insn->bits3.urb_gen5.allocate = allocate; 412 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 413 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 414 insn->bits3.urb_gen5.header_present = 1; 415 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ 416 insn->bits3.urb_gen5.msg_length = 1; 417 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 418 if (intel->gen >= 6) { 419 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 420 } else { 421 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 422 insn->bits2.send_gen5.end_of_thread = end_of_thread; 423 } 424} 425 426static void brw_set_urb_message( struct brw_context *brw, 427 struct brw_instruction *insn, 428 GLboolean allocate, 429 GLboolean used, 430 GLuint msg_length, 431 GLuint response_length, 432 GLboolean end_of_thread, 433 GLboolean complete, 434 GLuint offset, 435 GLuint swizzle_control ) 436{ 437 struct intel_context *intel = &brw->intel; 438 brw_set_src1(insn, brw_imm_d(0)); 439 440 if (intel->gen >= 5) { 441 insn->bits3.urb_gen5.opcode = 0; /* ? */ 442 insn->bits3.urb_gen5.offset = offset; 443 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 444 insn->bits3.urb_gen5.allocate = allocate; 445 insn->bits3.urb_gen5.used = used; /* ? */ 446 insn->bits3.urb_gen5.complete = complete; 447 insn->bits3.urb_gen5.header_present = 1; 448 insn->bits3.urb_gen5.response_length = response_length; 449 insn->bits3.urb_gen5.msg_length = msg_length; 450 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 451 if (intel->gen >= 6) { 452 /* For SNB, the SFID bits moved to the condmod bits, and 453 * EOT stayed in bits3 above. Does the EOT bit setting 454 * below on Ironlake even do anything? 455 */ 456 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 457 } else { 458 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 459 insn->bits2.send_gen5.end_of_thread = end_of_thread; 460 } 461 } else { 462 insn->bits3.urb.opcode = 0; /* ? */ 463 insn->bits3.urb.offset = offset; 464 insn->bits3.urb.swizzle_control = swizzle_control; 465 insn->bits3.urb.allocate = allocate; 466 insn->bits3.urb.used = used; /* ? */ 467 insn->bits3.urb.complete = complete; 468 insn->bits3.urb.response_length = response_length; 469 insn->bits3.urb.msg_length = msg_length; 470 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 471 insn->bits3.urb.end_of_thread = end_of_thread; 472 } 473} 474 475static void brw_set_dp_write_message( struct brw_context *brw, 476 struct brw_instruction *insn, 477 GLuint binding_table_index, 478 GLuint msg_control, 479 GLuint msg_type, 480 GLuint msg_length, 481 GLboolean header_present, 482 GLuint pixel_scoreboard_clear, 483 GLuint response_length, 484 GLuint end_of_thread, 485 GLuint send_commit_msg) 486{ 487 struct intel_context *intel = &brw->intel; 488 brw_set_src1(insn, brw_imm_ud(0)); 489 490 if (intel->gen >= 6) { 491 insn->bits3.dp_render_cache.binding_table_index = binding_table_index; 492 insn->bits3.dp_render_cache.msg_control = msg_control; 493 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; 494 insn->bits3.dp_render_cache.msg_type = msg_type; 495 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; 496 insn->bits3.dp_render_cache.header_present = header_present; 497 insn->bits3.dp_render_cache.response_length = response_length; 498 insn->bits3.dp_render_cache.msg_length = msg_length; 499 insn->bits3.dp_render_cache.end_of_thread = end_of_thread; 500 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 501 /* XXX really need below? */ 502 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 503 insn->bits2.send_gen5.end_of_thread = end_of_thread; 504 } else if (intel->gen == 5) { 505 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 506 insn->bits3.dp_write_gen5.msg_control = msg_control; 507 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; 508 insn->bits3.dp_write_gen5.msg_type = msg_type; 509 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 510 insn->bits3.dp_write_gen5.header_present = header_present; 511 insn->bits3.dp_write_gen5.response_length = response_length; 512 insn->bits3.dp_write_gen5.msg_length = msg_length; 513 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; 514 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 515 insn->bits2.send_gen5.end_of_thread = end_of_thread; 516 } else { 517 insn->bits3.dp_write.binding_table_index = binding_table_index; 518 insn->bits3.dp_write.msg_control = msg_control; 519 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 520 insn->bits3.dp_write.msg_type = msg_type; 521 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 522 insn->bits3.dp_write.response_length = response_length; 523 insn->bits3.dp_write.msg_length = msg_length; 524 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 525 insn->bits3.dp_write.end_of_thread = end_of_thread; 526 } 527} 528 529static void 530brw_set_dp_read_message(struct brw_context *brw, 531 struct brw_instruction *insn, 532 GLuint binding_table_index, 533 GLuint msg_control, 534 GLuint msg_type, 535 GLuint target_cache, 536 GLuint msg_length, 537 GLuint response_length) 538{ 539 struct intel_context *intel = &brw->intel; 540 brw_set_src1(insn, brw_imm_d(0)); 541 542 if (intel->gen >= 6) { 543 insn->bits3.dp_render_cache.binding_table_index = binding_table_index; 544 insn->bits3.dp_render_cache.msg_control = msg_control; 545 insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0; 546 insn->bits3.dp_render_cache.msg_type = msg_type; 547 insn->bits3.dp_render_cache.send_commit_msg = 0; 548 insn->bits3.dp_render_cache.header_present = 1; 549 insn->bits3.dp_render_cache.response_length = response_length; 550 insn->bits3.dp_render_cache.msg_length = msg_length; 551 insn->bits3.dp_render_cache.end_of_thread = 0; 552 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_READ; 553 /* XXX really need below? */ 554 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 555 insn->bits2.send_gen5.end_of_thread = 0; 556 } else if (intel->gen == 5) { 557 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 558 insn->bits3.dp_read_gen5.msg_control = msg_control; 559 insn->bits3.dp_read_gen5.msg_type = msg_type; 560 insn->bits3.dp_read_gen5.target_cache = target_cache; 561 insn->bits3.dp_read_gen5.header_present = 1; 562 insn->bits3.dp_read_gen5.response_length = response_length; 563 insn->bits3.dp_read_gen5.msg_length = msg_length; 564 insn->bits3.dp_read_gen5.pad1 = 0; 565 insn->bits3.dp_read_gen5.end_of_thread = 0; 566 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 567 insn->bits2.send_gen5.end_of_thread = 0; 568 } else if (intel->is_g4x) { 569 insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ 570 insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ 571 insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ 572 insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ 573 insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/ 574 insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/ 575 insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 576 insn->bits3.dp_read_g4x.pad1 = 0; 577 insn->bits3.dp_read_g4x.end_of_thread = 0; 578 } else { 579 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 580 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 581 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 582 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 583 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 584 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 585 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 586 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 587 insn->bits3.dp_read.end_of_thread = 0; /*31*/ 588 } 589} 590 591static void brw_set_sampler_message(struct brw_context *brw, 592 struct brw_instruction *insn, 593 GLuint binding_table_index, 594 GLuint sampler, 595 GLuint msg_type, 596 GLuint response_length, 597 GLuint msg_length, 598 GLboolean eot, 599 GLuint header_present, 600 GLuint simd_mode) 601{ 602 struct intel_context *intel = &brw->intel; 603 assert(eot == 0); 604 brw_set_src1(insn, brw_imm_d(0)); 605 606 if (intel->gen >= 5) { 607 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 608 insn->bits3.sampler_gen5.sampler = sampler; 609 insn->bits3.sampler_gen5.msg_type = msg_type; 610 insn->bits3.sampler_gen5.simd_mode = simd_mode; 611 insn->bits3.sampler_gen5.header_present = header_present; 612 insn->bits3.sampler_gen5.response_length = response_length; 613 insn->bits3.sampler_gen5.msg_length = msg_length; 614 insn->bits3.sampler_gen5.end_of_thread = eot; 615 if (intel->gen >= 6) 616 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; 617 else { 618 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; 619 insn->bits2.send_gen5.end_of_thread = eot; 620 } 621 } else if (intel->is_g4x) { 622 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 623 insn->bits3.sampler_g4x.sampler = sampler; 624 insn->bits3.sampler_g4x.msg_type = msg_type; 625 insn->bits3.sampler_g4x.response_length = response_length; 626 insn->bits3.sampler_g4x.msg_length = msg_length; 627 insn->bits3.sampler_g4x.end_of_thread = eot; 628 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 629 } else { 630 insn->bits3.sampler.binding_table_index = binding_table_index; 631 insn->bits3.sampler.sampler = sampler; 632 insn->bits3.sampler.msg_type = msg_type; 633 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 634 insn->bits3.sampler.response_length = response_length; 635 insn->bits3.sampler.msg_length = msg_length; 636 insn->bits3.sampler.end_of_thread = eot; 637 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 638 } 639} 640 641 642 643static struct brw_instruction *next_insn( struct brw_compile *p, 644 GLuint opcode ) 645{ 646 struct brw_instruction *insn; 647 648 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 649 650 insn = &p->store[p->nr_insn++]; 651 memcpy(insn, p->current, sizeof(*insn)); 652 653 /* Reset this one-shot flag: 654 */ 655 656 if (p->current->header.destreg__conditionalmod) { 657 p->current->header.destreg__conditionalmod = 0; 658 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 659 } 660 661 insn->header.opcode = opcode; 662 return insn; 663} 664 665 666static struct brw_instruction *brw_alu1( struct brw_compile *p, 667 GLuint opcode, 668 struct brw_reg dest, 669 struct brw_reg src ) 670{ 671 struct brw_instruction *insn = next_insn(p, opcode); 672 brw_set_dest(p, insn, dest); 673 brw_set_src0(insn, src); 674 return insn; 675} 676 677static struct brw_instruction *brw_alu2(struct brw_compile *p, 678 GLuint opcode, 679 struct brw_reg dest, 680 struct brw_reg src0, 681 struct brw_reg src1 ) 682{ 683 struct brw_instruction *insn = next_insn(p, opcode); 684 brw_set_dest(p, insn, dest); 685 brw_set_src0(insn, src0); 686 brw_set_src1(insn, src1); 687 return insn; 688} 689 690 691/*********************************************************************** 692 * Convenience routines. 693 */ 694#define ALU1(OP) \ 695struct brw_instruction *brw_##OP(struct brw_compile *p, \ 696 struct brw_reg dest, \ 697 struct brw_reg src0) \ 698{ \ 699 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 700} 701 702#define ALU2(OP) \ 703struct brw_instruction *brw_##OP(struct brw_compile *p, \ 704 struct brw_reg dest, \ 705 struct brw_reg src0, \ 706 struct brw_reg src1) \ 707{ \ 708 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 709} 710 711/* Rounding operations (other than RNDD) require two instructions - the first 712 * stores a rounded value (possibly the wrong way) in the dest register, but 713 * also sets a per-channel "increment bit" in the flag register. A predicated 714 * add of 1.0 fixes dest to contain the desired result. 715 */ 716#define ROUND(OP) \ 717void brw_##OP(struct brw_compile *p, \ 718 struct brw_reg dest, \ 719 struct brw_reg src) \ 720{ \ 721 struct brw_instruction *rnd, *add; \ 722 rnd = next_insn(p, BRW_OPCODE_##OP); \ 723 brw_set_dest(p, rnd, dest); \ 724 brw_set_src0(rnd, src); \ 725 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ 726 \ 727 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 728 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 729} 730 731 732ALU1(MOV) 733ALU2(SEL) 734ALU1(NOT) 735ALU2(AND) 736ALU2(OR) 737ALU2(XOR) 738ALU2(SHR) 739ALU2(SHL) 740ALU2(RSR) 741ALU2(RSL) 742ALU2(ASR) 743ALU1(FRC) 744ALU1(RNDD) 745ALU2(MAC) 746ALU2(MACH) 747ALU1(LZD) 748ALU2(DP4) 749ALU2(DPH) 750ALU2(DP3) 751ALU2(DP2) 752ALU2(LINE) 753ALU2(PLN) 754 755 756ROUND(RNDZ) 757ROUND(RNDE) 758 759 760struct brw_instruction *brw_ADD(struct brw_compile *p, 761 struct brw_reg dest, 762 struct brw_reg src0, 763 struct brw_reg src1) 764{ 765 /* 6.2.2: add */ 766 if (src0.type == BRW_REGISTER_TYPE_F || 767 (src0.file == BRW_IMMEDIATE_VALUE && 768 src0.type == BRW_REGISTER_TYPE_VF)) { 769 assert(src1.type != BRW_REGISTER_TYPE_UD); 770 assert(src1.type != BRW_REGISTER_TYPE_D); 771 } 772 773 if (src1.type == BRW_REGISTER_TYPE_F || 774 (src1.file == BRW_IMMEDIATE_VALUE && 775 src1.type == BRW_REGISTER_TYPE_VF)) { 776 assert(src0.type != BRW_REGISTER_TYPE_UD); 777 assert(src0.type != BRW_REGISTER_TYPE_D); 778 } 779 780 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 781} 782 783struct brw_instruction *brw_MUL(struct brw_compile *p, 784 struct brw_reg dest, 785 struct brw_reg src0, 786 struct brw_reg src1) 787{ 788 /* 6.32.38: mul */ 789 if (src0.type == BRW_REGISTER_TYPE_D || 790 src0.type == BRW_REGISTER_TYPE_UD || 791 src1.type == BRW_REGISTER_TYPE_D || 792 src1.type == BRW_REGISTER_TYPE_UD) { 793 assert(dest.type != BRW_REGISTER_TYPE_F); 794 } 795 796 if (src0.type == BRW_REGISTER_TYPE_F || 797 (src0.file == BRW_IMMEDIATE_VALUE && 798 src0.type == BRW_REGISTER_TYPE_VF)) { 799 assert(src1.type != BRW_REGISTER_TYPE_UD); 800 assert(src1.type != BRW_REGISTER_TYPE_D); 801 } 802 803 if (src1.type == BRW_REGISTER_TYPE_F || 804 (src1.file == BRW_IMMEDIATE_VALUE && 805 src1.type == BRW_REGISTER_TYPE_VF)) { 806 assert(src0.type != BRW_REGISTER_TYPE_UD); 807 assert(src0.type != BRW_REGISTER_TYPE_D); 808 } 809 810 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 811 src0.nr != BRW_ARF_ACCUMULATOR); 812 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 813 src1.nr != BRW_ARF_ACCUMULATOR); 814 815 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 816} 817 818 819void brw_NOP(struct brw_compile *p) 820{ 821 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 822 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 823 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 824 brw_set_src1(insn, brw_imm_ud(0x0)); 825} 826 827 828 829 830 831/*********************************************************************** 832 * Comparisons, if/else/endif 833 */ 834 835struct brw_instruction *brw_JMPI(struct brw_compile *p, 836 struct brw_reg dest, 837 struct brw_reg src0, 838 struct brw_reg src1) 839{ 840 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 841 842 insn->header.execution_size = 1; 843 insn->header.compression_control = BRW_COMPRESSION_NONE; 844 insn->header.mask_control = BRW_MASK_DISABLE; 845 846 p->current->header.predicate_control = BRW_PREDICATE_NONE; 847 848 return insn; 849} 850 851/* EU takes the value from the flag register and pushes it onto some 852 * sort of a stack (presumably merging with any flag value already on 853 * the stack). Within an if block, the flags at the top of the stack 854 * control execution on each channel of the unit, eg. on each of the 855 * 16 pixel values in our wm programs. 856 * 857 * When the matching 'else' instruction is reached (presumably by 858 * countdown of the instruction count patched in by our ELSE/ENDIF 859 * functions), the relevent flags are inverted. 860 * 861 * When the matching 'endif' instruction is reached, the flags are 862 * popped off. If the stack is now empty, normal execution resumes. 863 * 864 * No attempt is made to deal with stack overflow (14 elements?). 865 */ 866struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) 867{ 868 struct intel_context *intel = &p->brw->intel; 869 struct brw_instruction *insn; 870 871 if (p->single_program_flow) { 872 assert(execute_size == BRW_EXECUTE_1); 873 874 insn = next_insn(p, BRW_OPCODE_ADD); 875 insn->header.predicate_inverse = 1; 876 } else { 877 insn = next_insn(p, BRW_OPCODE_IF); 878 } 879 880 /* Override the defaults for this instruction: 881 */ 882 if (intel->gen < 6) { 883 brw_set_dest(p, insn, brw_ip_reg()); 884 brw_set_src0(insn, brw_ip_reg()); 885 brw_set_src1(insn, brw_imm_d(0x0)); 886 } else { 887 brw_set_dest(p, insn, brw_imm_w(0)); 888 insn->bits1.branch_gen6.jump_count = 0; 889 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 890 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 891 } 892 893 insn->header.execution_size = execute_size; 894 insn->header.compression_control = BRW_COMPRESSION_NONE; 895 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 896 insn->header.mask_control = BRW_MASK_ENABLE; 897 if (!p->single_program_flow) 898 insn->header.thread_control = BRW_THREAD_SWITCH; 899 900 p->current->header.predicate_control = BRW_PREDICATE_NONE; 901 902 return insn; 903} 904 905struct brw_instruction * 906gen6_IF(struct brw_compile *p, uint32_t conditional, 907 struct brw_reg src0, struct brw_reg src1) 908{ 909 struct brw_instruction *insn; 910 911 insn = next_insn(p, BRW_OPCODE_IF); 912 913 brw_set_dest(p, insn, brw_imm_w(0)); 914 insn->header.execution_size = BRW_EXECUTE_8; 915 insn->bits1.branch_gen6.jump_count = 0; 916 brw_set_src0(insn, src0); 917 brw_set_src1(insn, src1); 918 919 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 920 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 921 insn->header.destreg__conditionalmod = conditional; 922 923 if (!p->single_program_flow) 924 insn->header.thread_control = BRW_THREAD_SWITCH; 925 926 return insn; 927} 928 929struct brw_instruction *brw_ELSE(struct brw_compile *p, 930 struct brw_instruction *if_insn) 931{ 932 struct intel_context *intel = &p->brw->intel; 933 struct brw_instruction *insn; 934 GLuint br = 1; 935 936 /* jump count is for 64bit data chunk each, so one 128bit 937 instruction requires 2 chunks. */ 938 if (intel->gen >= 5) 939 br = 2; 940 941 if (p->single_program_flow) { 942 insn = next_insn(p, BRW_OPCODE_ADD); 943 } else { 944 insn = next_insn(p, BRW_OPCODE_ELSE); 945 } 946 947 if (intel->gen < 6) { 948 brw_set_dest(p, insn, brw_ip_reg()); 949 brw_set_src0(insn, brw_ip_reg()); 950 brw_set_src1(insn, brw_imm_d(0x0)); 951 } else { 952 brw_set_dest(p, insn, brw_imm_w(0)); 953 insn->bits1.branch_gen6.jump_count = 0; 954 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 955 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 956 } 957 958 insn->header.compression_control = BRW_COMPRESSION_NONE; 959 insn->header.execution_size = if_insn->header.execution_size; 960 insn->header.mask_control = BRW_MASK_ENABLE; 961 if (!p->single_program_flow) 962 insn->header.thread_control = BRW_THREAD_SWITCH; 963 964 /* Patch the if instruction to point at this instruction. 965 */ 966 if (p->single_program_flow) { 967 assert(if_insn->header.opcode == BRW_OPCODE_ADD); 968 969 if_insn->bits3.ud = (insn - if_insn + 1) * 16; 970 } else { 971 assert(if_insn->header.opcode == BRW_OPCODE_IF); 972 973 if (intel->gen < 6) { 974 if_insn->bits3.if_else.jump_count = br * (insn - if_insn); 975 if_insn->bits3.if_else.pop_count = 0; 976 if_insn->bits3.if_else.pad0 = 0; 977 } else { 978 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1); 979 } 980 } 981 982 return insn; 983} 984 985void brw_ENDIF(struct brw_compile *p, 986 struct brw_instruction *patch_insn) 987{ 988 struct intel_context *intel = &p->brw->intel; 989 GLuint br = 1; 990 991 if (intel->gen >= 5) 992 br = 2; 993 994 if (p->single_program_flow) { 995 /* In single program flow mode, there's no need to execute an ENDIF, 996 * since we don't need to do any stack operations, and if we're executing 997 * currently, we want to just continue executing. 998 */ 999 struct brw_instruction *next = &p->store[p->nr_insn]; 1000 1001 assert(patch_insn->header.opcode == BRW_OPCODE_ADD); 1002 1003 patch_insn->bits3.ud = (next - patch_insn) * 16; 1004 } else { 1005 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); 1006 1007 if (intel->gen < 6) { 1008 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1009 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 1010 brw_set_src1(insn, brw_imm_d(0x0)); 1011 } else { 1012 brw_set_dest(p, insn, brw_imm_w(0)); 1013 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1014 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1015 } 1016 1017 insn->header.compression_control = BRW_COMPRESSION_NONE; 1018 insn->header.execution_size = patch_insn->header.execution_size; 1019 insn->header.mask_control = BRW_MASK_ENABLE; 1020 insn->header.thread_control = BRW_THREAD_SWITCH; 1021 1022 if (intel->gen < 6) 1023 assert(patch_insn->bits3.if_else.jump_count == 0); 1024 else 1025 assert(patch_insn->bits1.branch_gen6.jump_count == 0); 1026 1027 /* Patch the if or else instructions to point at this or the next 1028 * instruction respectively. 1029 */ 1030 if (patch_insn->header.opcode == BRW_OPCODE_IF) { 1031 if (intel->gen < 6) { 1032 /* Turn it into an IFF, which means no mask stack operations for 1033 * all-false and jumping past the ENDIF. 1034 */ 1035 patch_insn->header.opcode = BRW_OPCODE_IFF; 1036 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 1037 patch_insn->bits3.if_else.pop_count = 0; 1038 patch_insn->bits3.if_else.pad0 = 0; 1039 } else { 1040 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 1041 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); 1042 } 1043 } else { 1044 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE); 1045 if (intel->gen < 6) { 1046 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 1047 * matching ENDIF. 1048 */ 1049 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 1050 patch_insn->bits3.if_else.pop_count = 1; 1051 patch_insn->bits3.if_else.pad0 = 0; 1052 } else { 1053 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1054 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); 1055 } 1056 } 1057 1058 /* Also pop item off the stack in the endif instruction: 1059 */ 1060 if (intel->gen < 6) { 1061 insn->bits3.if_else.jump_count = 0; 1062 insn->bits3.if_else.pop_count = 1; 1063 insn->bits3.if_else.pad0 = 0; 1064 } else { 1065 insn->bits1.branch_gen6.jump_count = 2; 1066 } 1067 } 1068} 1069 1070struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 1071{ 1072 struct intel_context *intel = &p->brw->intel; 1073 struct brw_instruction *insn; 1074 1075 insn = next_insn(p, BRW_OPCODE_BREAK); 1076 if (intel->gen >= 6) { 1077 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1078 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1079 brw_set_src1(insn, brw_imm_d(0x0)); 1080 } else { 1081 brw_set_dest(p, insn, brw_ip_reg()); 1082 brw_set_src0(insn, brw_ip_reg()); 1083 brw_set_src1(insn, brw_imm_d(0x0)); 1084 insn->bits3.if_else.pad0 = 0; 1085 insn->bits3.if_else.pop_count = pop_count; 1086 } 1087 insn->header.compression_control = BRW_COMPRESSION_NONE; 1088 insn->header.execution_size = BRW_EXECUTE_8; 1089 1090 return insn; 1091} 1092 1093struct brw_instruction *gen6_CONT(struct brw_compile *p, 1094 struct brw_instruction *do_insn) 1095{ 1096 struct brw_instruction *insn; 1097 int br = 2; 1098 1099 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1100 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1101 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1102 brw_set_dest(p, insn, brw_ip_reg()); 1103 brw_set_src0(insn, brw_ip_reg()); 1104 brw_set_src1(insn, brw_imm_d(0x0)); 1105 1106 insn->bits3.break_cont.uip = br * (do_insn - insn); 1107 1108 insn->header.compression_control = BRW_COMPRESSION_NONE; 1109 insn->header.execution_size = BRW_EXECUTE_8; 1110 return insn; 1111} 1112 1113struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 1114{ 1115 struct brw_instruction *insn; 1116 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1117 brw_set_dest(p, insn, brw_ip_reg()); 1118 brw_set_src0(insn, brw_ip_reg()); 1119 brw_set_src1(insn, brw_imm_d(0x0)); 1120 insn->header.compression_control = BRW_COMPRESSION_NONE; 1121 insn->header.execution_size = BRW_EXECUTE_8; 1122 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1123 insn->bits3.if_else.pad0 = 0; 1124 insn->bits3.if_else.pop_count = pop_count; 1125 return insn; 1126} 1127 1128/* DO/WHILE loop: 1129 * 1130 * The DO/WHILE is just an unterminated loop -- break or continue are 1131 * used for control within the loop. We have a few ways they can be 1132 * done. 1133 * 1134 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1135 * jip and no DO instruction. 1136 * 1137 * For non-uniform control flow pre-gen6, there's a DO instruction to 1138 * push the mask, and a WHILE to jump back, and BREAK to get out and 1139 * pop the mask. 1140 * 1141 * For gen6, there's no more mask stack, so no need for DO. WHILE 1142 * just points back to the first instruction of the loop. 1143 */ 1144struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 1145{ 1146 struct intel_context *intel = &p->brw->intel; 1147 1148 if (intel->gen >= 6 || p->single_program_flow) { 1149 return &p->store[p->nr_insn]; 1150 } else { 1151 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1152 1153 /* Override the defaults for this instruction: 1154 */ 1155 brw_set_dest(p, insn, brw_null_reg()); 1156 brw_set_src0(insn, brw_null_reg()); 1157 brw_set_src1(insn, brw_null_reg()); 1158 1159 insn->header.compression_control = BRW_COMPRESSION_NONE; 1160 insn->header.execution_size = execute_size; 1161 insn->header.predicate_control = BRW_PREDICATE_NONE; 1162 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1163 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1164 1165 return insn; 1166 } 1167} 1168 1169 1170 1171struct brw_instruction *brw_WHILE(struct brw_compile *p, 1172 struct brw_instruction *do_insn) 1173{ 1174 struct intel_context *intel = &p->brw->intel; 1175 struct brw_instruction *insn; 1176 GLuint br = 1; 1177 1178 if (intel->gen >= 5) 1179 br = 2; 1180 1181 if (intel->gen >= 6) { 1182 insn = next_insn(p, BRW_OPCODE_WHILE); 1183 1184 brw_set_dest(p, insn, brw_imm_w(0)); 1185 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1186 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1187 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1188 1189 insn->header.execution_size = do_insn->header.execution_size; 1190 assert(insn->header.execution_size == BRW_EXECUTE_8); 1191 } else { 1192 if (p->single_program_flow) { 1193 insn = next_insn(p, BRW_OPCODE_ADD); 1194 1195 brw_set_dest(p, insn, brw_ip_reg()); 1196 brw_set_src0(insn, brw_ip_reg()); 1197 brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16)); 1198 insn->header.execution_size = BRW_EXECUTE_1; 1199 } else { 1200 insn = next_insn(p, BRW_OPCODE_WHILE); 1201 1202 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1203 1204 brw_set_dest(p, insn, brw_ip_reg()); 1205 brw_set_src0(insn, brw_ip_reg()); 1206 brw_set_src1(insn, brw_imm_d(0)); 1207 1208 insn->header.execution_size = do_insn->header.execution_size; 1209 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1210 insn->bits3.if_else.pop_count = 0; 1211 insn->bits3.if_else.pad0 = 0; 1212 } 1213 } 1214 insn->header.compression_control = BRW_COMPRESSION_NONE; 1215 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1216 1217 return insn; 1218} 1219 1220 1221/* FORWARD JUMPS: 1222 */ 1223void brw_land_fwd_jump(struct brw_compile *p, 1224 struct brw_instruction *jmp_insn) 1225{ 1226 struct intel_context *intel = &p->brw->intel; 1227 struct brw_instruction *landing = &p->store[p->nr_insn]; 1228 GLuint jmpi = 1; 1229 1230 if (intel->gen >= 5) 1231 jmpi = 2; 1232 1233 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1234 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1235 1236 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1237} 1238 1239 1240 1241/* To integrate with the above, it makes sense that the comparison 1242 * instruction should populate the flag register. It might be simpler 1243 * just to use the flag reg for most WM tasks? 1244 */ 1245void brw_CMP(struct brw_compile *p, 1246 struct brw_reg dest, 1247 GLuint conditional, 1248 struct brw_reg src0, 1249 struct brw_reg src1) 1250{ 1251 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1252 1253 insn->header.destreg__conditionalmod = conditional; 1254 brw_set_dest(p, insn, dest); 1255 brw_set_src0(insn, src0); 1256 brw_set_src1(insn, src1); 1257 1258/* guess_execution_size(insn, src0); */ 1259 1260 1261 /* Make it so that future instructions will use the computed flag 1262 * value until brw_set_predicate_control_flag_value() is called 1263 * again. 1264 */ 1265 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1266 dest.nr == 0) { 1267 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1268 p->flag_value = 0xff; 1269 } 1270} 1271 1272/* Issue 'wait' instruction for n1, host could program MMIO 1273 to wake up thread. */ 1274void brw_WAIT (struct brw_compile *p) 1275{ 1276 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1277 struct brw_reg src = brw_notification_1_reg(); 1278 1279 brw_set_dest(p, insn, src); 1280 brw_set_src0(insn, src); 1281 brw_set_src1(insn, brw_null_reg()); 1282 insn->header.execution_size = 0; /* must */ 1283 insn->header.predicate_control = 0; 1284 insn->header.compression_control = 0; 1285} 1286 1287 1288/*********************************************************************** 1289 * Helpers for the various SEND message types: 1290 */ 1291 1292/** Extended math function, float[8]. 1293 */ 1294void brw_math( struct brw_compile *p, 1295 struct brw_reg dest, 1296 GLuint function, 1297 GLuint saturate, 1298 GLuint msg_reg_nr, 1299 struct brw_reg src, 1300 GLuint data_type, 1301 GLuint precision ) 1302{ 1303 struct intel_context *intel = &p->brw->intel; 1304 1305 if (intel->gen >= 6) { 1306 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1307 1308 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1309 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1310 1311 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1312 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1313 1314 /* Source modifiers are ignored for extended math instructions. */ 1315 assert(!src.negate); 1316 assert(!src.abs); 1317 1318 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1319 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1320 assert(src.type == BRW_REGISTER_TYPE_F); 1321 } 1322 1323 /* Math is the same ISA format as other opcodes, except that CondModifier 1324 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1325 */ 1326 insn->header.destreg__conditionalmod = function; 1327 insn->header.saturate = saturate; 1328 1329 brw_set_dest(p, insn, dest); 1330 brw_set_src0(insn, src); 1331 brw_set_src1(insn, brw_null_reg()); 1332 } else { 1333 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1334 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1335 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1336 /* Example code doesn't set predicate_control for send 1337 * instructions. 1338 */ 1339 insn->header.predicate_control = 0; 1340 insn->header.destreg__conditionalmod = msg_reg_nr; 1341 1342 brw_set_dest(p, insn, dest); 1343 brw_set_src0(insn, src); 1344 brw_set_math_message(p->brw, 1345 insn, 1346 msg_length, response_length, 1347 function, 1348 BRW_MATH_INTEGER_UNSIGNED, 1349 precision, 1350 saturate, 1351 data_type); 1352 } 1353} 1354 1355/** Extended math function, float[8]. 1356 */ 1357void brw_math2(struct brw_compile *p, 1358 struct brw_reg dest, 1359 GLuint function, 1360 struct brw_reg src0, 1361 struct brw_reg src1) 1362{ 1363 struct intel_context *intel = &p->brw->intel; 1364 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1365 1366 assert(intel->gen >= 6); 1367 (void) intel; 1368 1369 1370 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1371 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1372 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1373 1374 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1375 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1376 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1377 1378 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1379 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1380 assert(src0.type == BRW_REGISTER_TYPE_F); 1381 assert(src1.type == BRW_REGISTER_TYPE_F); 1382 } 1383 1384 /* Source modifiers are ignored for extended math instructions. */ 1385 assert(!src0.negate); 1386 assert(!src0.abs); 1387 assert(!src1.negate); 1388 assert(!src1.abs); 1389 1390 /* Math is the same ISA format as other opcodes, except that CondModifier 1391 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1392 */ 1393 insn->header.destreg__conditionalmod = function; 1394 1395 brw_set_dest(p, insn, dest); 1396 brw_set_src0(insn, src0); 1397 brw_set_src1(insn, src1); 1398} 1399 1400/** 1401 * Extended math function, float[16]. 1402 * Use 2 send instructions. 1403 */ 1404void brw_math_16( struct brw_compile *p, 1405 struct brw_reg dest, 1406 GLuint function, 1407 GLuint saturate, 1408 GLuint msg_reg_nr, 1409 struct brw_reg src, 1410 GLuint precision ) 1411{ 1412 struct intel_context *intel = &p->brw->intel; 1413 struct brw_instruction *insn; 1414 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1415 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1416 1417 if (intel->gen >= 6) { 1418 insn = next_insn(p, BRW_OPCODE_MATH); 1419 1420 /* Math is the same ISA format as other opcodes, except that CondModifier 1421 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1422 */ 1423 insn->header.destreg__conditionalmod = function; 1424 insn->header.saturate = saturate; 1425 1426 /* Source modifiers are ignored for extended math instructions. */ 1427 assert(!src.negate); 1428 assert(!src.abs); 1429 1430 brw_set_dest(p, insn, dest); 1431 brw_set_src0(insn, src); 1432 brw_set_src1(insn, brw_null_reg()); 1433 return; 1434 } 1435 1436 /* First instruction: 1437 */ 1438 brw_push_insn_state(p); 1439 brw_set_predicate_control_flag_value(p, 0xff); 1440 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1441 1442 insn = next_insn(p, BRW_OPCODE_SEND); 1443 insn->header.destreg__conditionalmod = msg_reg_nr; 1444 1445 brw_set_dest(p, insn, dest); 1446 brw_set_src0(insn, src); 1447 brw_set_math_message(p->brw, 1448 insn, 1449 msg_length, response_length, 1450 function, 1451 BRW_MATH_INTEGER_UNSIGNED, 1452 precision, 1453 saturate, 1454 BRW_MATH_DATA_VECTOR); 1455 1456 /* Second instruction: 1457 */ 1458 insn = next_insn(p, BRW_OPCODE_SEND); 1459 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1460 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1461 1462 brw_set_dest(p, insn, offset(dest,1)); 1463 brw_set_src0(insn, src); 1464 brw_set_math_message(p->brw, 1465 insn, 1466 msg_length, response_length, 1467 function, 1468 BRW_MATH_INTEGER_UNSIGNED, 1469 precision, 1470 saturate, 1471 BRW_MATH_DATA_VECTOR); 1472 1473 brw_pop_insn_state(p); 1474} 1475 1476 1477/** 1478 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1479 * using a constant offset per channel. 1480 * 1481 * The offset must be aligned to oword size (16 bytes). Used for 1482 * register spilling. 1483 */ 1484void brw_oword_block_write_scratch(struct brw_compile *p, 1485 struct brw_reg mrf, 1486 int num_regs, 1487 GLuint offset) 1488{ 1489 struct intel_context *intel = &p->brw->intel; 1490 uint32_t msg_control; 1491 int mlen; 1492 1493 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1494 1495 if (num_regs == 1) { 1496 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1497 mlen = 2; 1498 } else { 1499 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1500 mlen = 3; 1501 } 1502 1503 /* Set up the message header. This is g0, with g0.2 filled with 1504 * the offset. We don't want to leave our offset around in g0 or 1505 * it'll screw up texture samples, so set it up inside the message 1506 * reg. 1507 */ 1508 { 1509 brw_push_insn_state(p); 1510 brw_set_mask_control(p, BRW_MASK_DISABLE); 1511 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1512 1513 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1514 1515 /* set message header global offset field (reg 0, element 2) */ 1516 brw_MOV(p, 1517 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1518 mrf.nr, 1519 2), BRW_REGISTER_TYPE_UD), 1520 brw_imm_ud(offset)); 1521 1522 brw_pop_insn_state(p); 1523 } 1524 1525 { 1526 struct brw_reg dest; 1527 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1528 int send_commit_msg; 1529 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 1530 BRW_REGISTER_TYPE_UW); 1531 1532 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1533 insn->header.compression_control = BRW_COMPRESSION_NONE; 1534 src_header = vec16(src_header); 1535 } 1536 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1537 insn->header.destreg__conditionalmod = mrf.nr; 1538 1539 /* Until gen6, writes followed by reads from the same location 1540 * are not guaranteed to be ordered unless write_commit is set. 1541 * If set, then a no-op write is issued to the destination 1542 * register to set a dependency, and a read from the destination 1543 * can be used to ensure the ordering. 1544 * 1545 * For gen6, only writes between different threads need ordering 1546 * protection. Our use of DP writes is all about register 1547 * spilling within a thread. 1548 */ 1549 if (intel->gen >= 6) { 1550 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1551 send_commit_msg = 0; 1552 } else { 1553 dest = src_header; 1554 send_commit_msg = 1; 1555 } 1556 1557 brw_set_dest(p, insn, dest); 1558 brw_set_src0(insn, brw_null_reg()); 1559 1560 brw_set_dp_write_message(p->brw, 1561 insn, 1562 255, /* binding table index (255=stateless) */ 1563 msg_control, 1564 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ 1565 mlen, 1566 GL_TRUE, /* header_present */ 1567 0, /* pixel scoreboard */ 1568 send_commit_msg, /* response_length */ 1569 0, /* eot */ 1570 send_commit_msg); 1571 } 1572} 1573 1574 1575/** 1576 * Read a block of owords (half a GRF each) from the scratch buffer 1577 * using a constant index per channel. 1578 * 1579 * Offset must be aligned to oword size (16 bytes). Used for register 1580 * spilling. 1581 */ 1582void 1583brw_oword_block_read_scratch(struct brw_compile *p, 1584 struct brw_reg dest, 1585 struct brw_reg mrf, 1586 int num_regs, 1587 GLuint offset) 1588{ 1589 uint32_t msg_control; 1590 int rlen; 1591 1592 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1593 dest = retype(dest, BRW_REGISTER_TYPE_UW); 1594 1595 if (num_regs == 1) { 1596 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1597 rlen = 1; 1598 } else { 1599 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1600 rlen = 2; 1601 } 1602 1603 { 1604 brw_push_insn_state(p); 1605 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1606 brw_set_mask_control(p, BRW_MASK_DISABLE); 1607 1608 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1609 1610 /* set message header global offset field (reg 0, element 2) */ 1611 brw_MOV(p, 1612 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1613 mrf.nr, 1614 2), BRW_REGISTER_TYPE_UD), 1615 brw_imm_ud(offset)); 1616 1617 brw_pop_insn_state(p); 1618 } 1619 1620 { 1621 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1622 1623 assert(insn->header.predicate_control == 0); 1624 insn->header.compression_control = BRW_COMPRESSION_NONE; 1625 insn->header.destreg__conditionalmod = mrf.nr; 1626 1627 brw_set_dest(p, insn, dest); /* UW? */ 1628 brw_set_src0(insn, brw_null_reg()); 1629 1630 brw_set_dp_read_message(p->brw, 1631 insn, 1632 255, /* binding table index (255=stateless) */ 1633 msg_control, 1634 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1635 1, /* target cache (render/scratch) */ 1636 1, /* msg_length */ 1637 rlen); 1638 } 1639} 1640 1641/** 1642 * Read a float[4] vector from the data port Data Cache (const buffer). 1643 * Location (in buffer) should be a multiple of 16. 1644 * Used for fetching shader constants. 1645 */ 1646void brw_oword_block_read(struct brw_compile *p, 1647 struct brw_reg dest, 1648 struct brw_reg mrf, 1649 uint32_t offset, 1650 uint32_t bind_table_index) 1651{ 1652 struct intel_context *intel = &p->brw->intel; 1653 1654 /* On newer hardware, offset is in units of owords. */ 1655 if (intel->gen >= 6) 1656 offset /= 16; 1657 1658 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1659 1660 brw_push_insn_state(p); 1661 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1662 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1663 brw_set_mask_control(p, BRW_MASK_DISABLE); 1664 1665 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1666 1667 /* set message header global offset field (reg 0, element 2) */ 1668 brw_MOV(p, 1669 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1670 mrf.nr, 1671 2), BRW_REGISTER_TYPE_UD), 1672 brw_imm_ud(offset)); 1673 1674 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1675 insn->header.destreg__conditionalmod = mrf.nr; 1676 1677 /* cast dest to a uword[8] vector */ 1678 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1679 1680 brw_set_dest(p, insn, dest); 1681 if (intel->gen >= 6) { 1682 brw_set_src0(insn, mrf); 1683 } else { 1684 brw_set_src0(insn, brw_null_reg()); 1685 } 1686 1687 brw_set_dp_read_message(p->brw, 1688 insn, 1689 bind_table_index, 1690 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 1691 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 1692 0, /* source cache = data cache */ 1693 1, /* msg_length */ 1694 1); /* response_length (1 reg, 2 owords!) */ 1695 1696 brw_pop_insn_state(p); 1697} 1698 1699/** 1700 * Read a set of dwords from the data port Data Cache (const buffer). 1701 * 1702 * Location (in buffer) appears as UD offsets in the register after 1703 * the provided mrf header reg. 1704 */ 1705void brw_dword_scattered_read(struct brw_compile *p, 1706 struct brw_reg dest, 1707 struct brw_reg mrf, 1708 uint32_t bind_table_index) 1709{ 1710 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1711 1712 brw_push_insn_state(p); 1713 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1714 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1715 brw_set_mask_control(p, BRW_MASK_DISABLE); 1716 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1717 brw_pop_insn_state(p); 1718 1719 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1720 insn->header.destreg__conditionalmod = mrf.nr; 1721 1722 /* cast dest to a uword[8] vector */ 1723 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1724 1725 brw_set_dest(p, insn, dest); 1726 brw_set_src0(insn, brw_null_reg()); 1727 1728 brw_set_dp_read_message(p->brw, 1729 insn, 1730 bind_table_index, 1731 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, 1732 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, 1733 0, /* source cache = data cache */ 1734 2, /* msg_length */ 1735 1); /* response_length */ 1736} 1737 1738 1739 1740/** 1741 * Read float[4] constant(s) from VS constant buffer. 1742 * For relative addressing, two float[4] constants will be read into 'dest'. 1743 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1744 */ 1745void brw_dp_READ_4_vs(struct brw_compile *p, 1746 struct brw_reg dest, 1747 GLuint location, 1748 GLuint bind_table_index) 1749{ 1750 struct intel_context *intel = &p->brw->intel; 1751 struct brw_instruction *insn; 1752 GLuint msg_reg_nr = 1; 1753 1754 if (intel->gen >= 6) 1755 location /= 16; 1756 1757 /* Setup MRF[1] with location/offset into const buffer */ 1758 brw_push_insn_state(p); 1759 brw_set_access_mode(p, BRW_ALIGN_1); 1760 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1761 brw_set_mask_control(p, BRW_MASK_DISABLE); 1762 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1763 brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), 1764 BRW_REGISTER_TYPE_UD), 1765 brw_imm_ud(location)); 1766 brw_pop_insn_state(p); 1767 1768 insn = next_insn(p, BRW_OPCODE_SEND); 1769 1770 insn->header.predicate_control = BRW_PREDICATE_NONE; 1771 insn->header.compression_control = BRW_COMPRESSION_NONE; 1772 insn->header.destreg__conditionalmod = msg_reg_nr; 1773 insn->header.mask_control = BRW_MASK_DISABLE; 1774 1775 brw_set_dest(p, insn, dest); 1776 if (intel->gen >= 6) { 1777 brw_set_src0(insn, brw_message_reg(msg_reg_nr)); 1778 } else { 1779 brw_set_src0(insn, brw_null_reg()); 1780 } 1781 1782 brw_set_dp_read_message(p->brw, 1783 insn, 1784 bind_table_index, 1785 0, 1786 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1787 0, /* source cache = data cache */ 1788 1, /* msg_length */ 1789 1); /* response_length (1 Oword) */ 1790} 1791 1792/** 1793 * Read a float[4] constant per vertex from VS constant buffer, with 1794 * relative addressing. 1795 */ 1796void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1797 struct brw_reg dest, 1798 struct brw_reg addr_reg, 1799 GLuint offset, 1800 GLuint bind_table_index) 1801{ 1802 struct intel_context *intel = &p->brw->intel; 1803 int msg_type; 1804 1805 /* Setup MRF[1] with offset into const buffer */ 1806 brw_push_insn_state(p); 1807 brw_set_access_mode(p, BRW_ALIGN_1); 1808 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1809 brw_set_mask_control(p, BRW_MASK_DISABLE); 1810 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1811 1812 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1813 * fields ignored. 1814 */ 1815 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), 1816 addr_reg, brw_imm_d(offset)); 1817 brw_pop_insn_state(p); 1818 1819 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1820 1821 insn->header.predicate_control = BRW_PREDICATE_NONE; 1822 insn->header.compression_control = BRW_COMPRESSION_NONE; 1823 insn->header.destreg__conditionalmod = 0; 1824 insn->header.mask_control = BRW_MASK_DISABLE; 1825 1826 brw_set_dest(p, insn, dest); 1827 brw_set_src0(insn, brw_vec8_grf(0, 0)); 1828 1829 if (intel->gen == 6) 1830 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1831 else if (intel->gen == 5 || intel->is_g4x) 1832 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1833 else 1834 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1835 1836 brw_set_dp_read_message(p->brw, 1837 insn, 1838 bind_table_index, 1839 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1840 msg_type, 1841 0, /* source cache = data cache */ 1842 2, /* msg_length */ 1843 1); /* response_length */ 1844} 1845 1846 1847 1848void brw_fb_WRITE(struct brw_compile *p, 1849 int dispatch_width, 1850 struct brw_reg dest, 1851 GLuint msg_reg_nr, 1852 struct brw_reg src0, 1853 GLuint binding_table_index, 1854 GLuint msg_length, 1855 GLuint response_length, 1856 GLboolean eot, 1857 GLboolean header_present) 1858{ 1859 struct intel_context *intel = &p->brw->intel; 1860 struct brw_instruction *insn; 1861 GLuint msg_control, msg_type; 1862 1863 if (intel->gen >= 6 && binding_table_index == 0) { 1864 insn = next_insn(p, BRW_OPCODE_SENDC); 1865 } else { 1866 insn = next_insn(p, BRW_OPCODE_SEND); 1867 } 1868 /* The execution mask is ignored for render target writes. */ 1869 insn->header.predicate_control = 0; 1870 insn->header.compression_control = BRW_COMPRESSION_NONE; 1871 1872 if (intel->gen >= 6) { 1873 /* headerless version, just submit color payload */ 1874 src0 = brw_message_reg(msg_reg_nr); 1875 1876 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1877 } else { 1878 insn->header.destreg__conditionalmod = msg_reg_nr; 1879 1880 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1881 } 1882 1883 if (dispatch_width == 16) 1884 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 1885 else 1886 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 1887 1888 brw_set_dest(p, insn, dest); 1889 brw_set_src0(insn, src0); 1890 brw_set_dp_write_message(p->brw, 1891 insn, 1892 binding_table_index, 1893 msg_control, 1894 msg_type, 1895 msg_length, 1896 header_present, 1897 1, /* pixel scoreboard */ 1898 response_length, 1899 eot, 1900 0 /* send_commit_msg */); 1901} 1902 1903 1904/** 1905 * Texture sample instruction. 1906 * Note: the msg_type plus msg_length values determine exactly what kind 1907 * of sampling operation is performed. See volume 4, page 161 of docs. 1908 */ 1909void brw_SAMPLE(struct brw_compile *p, 1910 struct brw_reg dest, 1911 GLuint msg_reg_nr, 1912 struct brw_reg src0, 1913 GLuint binding_table_index, 1914 GLuint sampler, 1915 GLuint writemask, 1916 GLuint msg_type, 1917 GLuint response_length, 1918 GLuint msg_length, 1919 GLboolean eot, 1920 GLuint header_present, 1921 GLuint simd_mode) 1922{ 1923 struct intel_context *intel = &p->brw->intel; 1924 GLboolean need_stall = 0; 1925 1926 if (writemask == 0) { 1927 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 1928 return; 1929 } 1930 1931 /* Hardware doesn't do destination dependency checking on send 1932 * instructions properly. Add a workaround which generates the 1933 * dependency by other means. In practice it seems like this bug 1934 * only crops up for texture samples, and only where registers are 1935 * written by the send and then written again later without being 1936 * read in between. Luckily for us, we already track that 1937 * information and use it to modify the writemask for the 1938 * instruction, so that is a guide for whether a workaround is 1939 * needed. 1940 */ 1941 if (writemask != WRITEMASK_XYZW) { 1942 GLuint dst_offset = 0; 1943 GLuint i, newmask = 0, len = 0; 1944 1945 for (i = 0; i < 4; i++) { 1946 if (writemask & (1<<i)) 1947 break; 1948 dst_offset += 2; 1949 } 1950 for (; i < 4; i++) { 1951 if (!(writemask & (1<<i))) 1952 break; 1953 newmask |= 1<<i; 1954 len++; 1955 } 1956 1957 if (newmask != writemask) { 1958 need_stall = 1; 1959 /* printf("need stall %x %x\n", newmask , writemask); */ 1960 } 1961 else { 1962 GLboolean dispatch_16 = GL_FALSE; 1963 1964 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1965 1966 guess_execution_size(p, p->current, dest); 1967 if (p->current->header.execution_size == BRW_EXECUTE_16) 1968 dispatch_16 = GL_TRUE; 1969 1970 newmask = ~newmask & WRITEMASK_XYZW; 1971 1972 brw_push_insn_state(p); 1973 1974 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1975 brw_set_mask_control(p, BRW_MASK_DISABLE); 1976 1977 brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), 1978 retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); 1979 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 1980 1981 brw_pop_insn_state(p); 1982 1983 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 1984 dest = offset(dest, dst_offset); 1985 1986 /* For 16-wide dispatch, masked channels are skipped in the 1987 * response. For 8-wide, masked channels still take up slots, 1988 * and are just not written to. 1989 */ 1990 if (dispatch_16) 1991 response_length = len * 2; 1992 } 1993 } 1994 1995 { 1996 struct brw_instruction *insn; 1997 1998 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 1999 2000 insn = next_insn(p, BRW_OPCODE_SEND); 2001 insn->header.predicate_control = 0; /* XXX */ 2002 insn->header.compression_control = BRW_COMPRESSION_NONE; 2003 if (intel->gen < 6) 2004 insn->header.destreg__conditionalmod = msg_reg_nr; 2005 2006 brw_set_dest(p, insn, dest); 2007 brw_set_src0(insn, src0); 2008 brw_set_sampler_message(p->brw, insn, 2009 binding_table_index, 2010 sampler, 2011 msg_type, 2012 response_length, 2013 msg_length, 2014 eot, 2015 header_present, 2016 simd_mode); 2017 } 2018 2019 if (need_stall) { 2020 struct brw_reg reg = vec8(offset(dest, response_length-1)); 2021 2022 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 2023 */ 2024 brw_push_insn_state(p); 2025 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 2026 brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), 2027 retype(reg, BRW_REGISTER_TYPE_UD)); 2028 brw_pop_insn_state(p); 2029 } 2030 2031} 2032 2033/* All these variables are pretty confusing - we might be better off 2034 * using bitmasks and macros for this, in the old style. Or perhaps 2035 * just having the caller instantiate the fields in dword3 itself. 2036 */ 2037void brw_urb_WRITE(struct brw_compile *p, 2038 struct brw_reg dest, 2039 GLuint msg_reg_nr, 2040 struct brw_reg src0, 2041 GLboolean allocate, 2042 GLboolean used, 2043 GLuint msg_length, 2044 GLuint response_length, 2045 GLboolean eot, 2046 GLboolean writes_complete, 2047 GLuint offset, 2048 GLuint swizzle) 2049{ 2050 struct intel_context *intel = &p->brw->intel; 2051 struct brw_instruction *insn; 2052 2053 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2054 2055 insn = next_insn(p, BRW_OPCODE_SEND); 2056 2057 assert(msg_length < BRW_MAX_MRF); 2058 2059 brw_set_dest(p, insn, dest); 2060 brw_set_src0(insn, src0); 2061 brw_set_src1(insn, brw_imm_d(0)); 2062 2063 if (intel->gen < 6) 2064 insn->header.destreg__conditionalmod = msg_reg_nr; 2065 2066 brw_set_urb_message(p->brw, 2067 insn, 2068 allocate, 2069 used, 2070 msg_length, 2071 response_length, 2072 eot, 2073 writes_complete, 2074 offset, 2075 swizzle); 2076} 2077 2078static int 2079brw_find_next_block_end(struct brw_compile *p, int start) 2080{ 2081 int ip; 2082 2083 for (ip = start + 1; ip < p->nr_insn; ip++) { 2084 struct brw_instruction *insn = &p->store[ip]; 2085 2086 switch (insn->header.opcode) { 2087 case BRW_OPCODE_ENDIF: 2088 case BRW_OPCODE_ELSE: 2089 case BRW_OPCODE_WHILE: 2090 return ip; 2091 } 2092 } 2093 assert(!"not reached"); 2094 return start + 1; 2095} 2096 2097/* There is no DO instruction on gen6, so to find the end of the loop 2098 * we have to see if the loop is jumping back before our start 2099 * instruction. 2100 */ 2101static int 2102brw_find_loop_end(struct brw_compile *p, int start) 2103{ 2104 int ip; 2105 int br = 2; 2106 2107 for (ip = start + 1; ip < p->nr_insn; ip++) { 2108 struct brw_instruction *insn = &p->store[ip]; 2109 2110 if (insn->header.opcode == BRW_OPCODE_WHILE) { 2111 if (ip + insn->bits1.branch_gen6.jump_count / br < start) 2112 return ip; 2113 } 2114 } 2115 assert(!"not reached"); 2116 return start + 1; 2117} 2118 2119/* After program generation, go back and update the UIP and JIP of 2120 * BREAK and CONT instructions to their correct locations. 2121 */ 2122void 2123brw_set_uip_jip(struct brw_compile *p) 2124{ 2125 struct intel_context *intel = &p->brw->intel; 2126 int ip; 2127 int br = 2; 2128 2129 if (intel->gen < 6) 2130 return; 2131 2132 for (ip = 0; ip < p->nr_insn; ip++) { 2133 struct brw_instruction *insn = &p->store[ip]; 2134 2135 switch (insn->header.opcode) { 2136 case BRW_OPCODE_BREAK: 2137 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2138 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1); 2139 break; 2140 case BRW_OPCODE_CONTINUE: 2141 /* JIP is set at CONTINUE emit time, since that's when we 2142 * know where the start of the loop is. 2143 */ 2144 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2145 assert(insn->bits3.break_cont.uip != 0); 2146 assert(insn->bits3.break_cont.jip != 0); 2147 break; 2148 } 2149 } 2150} 2151 2152void brw_ff_sync(struct brw_compile *p, 2153 struct brw_reg dest, 2154 GLuint msg_reg_nr, 2155 struct brw_reg src0, 2156 GLboolean allocate, 2157 GLuint response_length, 2158 GLboolean eot) 2159{ 2160 struct intel_context *intel = &p->brw->intel; 2161 struct brw_instruction *insn; 2162 2163 gen6_resolve_implied_move(p, &src0, msg_reg_nr); 2164 2165 insn = next_insn(p, BRW_OPCODE_SEND); 2166 brw_set_dest(p, insn, dest); 2167 brw_set_src0(insn, src0); 2168 brw_set_src1(insn, brw_imm_d(0)); 2169 2170 if (intel->gen < 6) 2171 insn->header.destreg__conditionalmod = msg_reg_nr; 2172 2173 brw_set_ff_sync_message(p->brw, 2174 insn, 2175 allocate, 2176 response_length, 2177 eot); 2178} 2179