brw_eu_emit.c revision 0b77d57394a3712851ec271aa7ad353d56f302a1
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37 38 39 40/*********************************************************************** 41 * Internal helper for constructing instructions 42 */ 43 44static void guess_execution_size( struct brw_instruction *insn, 45 struct brw_reg reg ) 46{ 47 if (reg.width == BRW_WIDTH_8 && 48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 49 insn->header.execution_size = BRW_EXECUTE_16; 50 else 51 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 52} 53 54 55static void brw_set_dest( struct brw_instruction *insn, 56 struct brw_reg dest ) 57{ 58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 59 dest.file != BRW_MESSAGE_REGISTER_FILE) 60 assert(dest.nr < 128); 61 62 insn->bits1.da1.dest_reg_file = dest.file; 63 insn->bits1.da1.dest_reg_type = dest.type; 64 insn->bits1.da1.dest_address_mode = dest.address_mode; 65 66 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 67 insn->bits1.da1.dest_reg_nr = dest.nr; 68 69 if (insn->header.access_mode == BRW_ALIGN_1) { 70 insn->bits1.da1.dest_subreg_nr = dest.subnr; 71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 72 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 73 insn->bits1.da1.dest_horiz_stride = dest.hstride; 74 } 75 else { 76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 78 /* even ignored in da16, still need to set as '01' */ 79 insn->bits1.da16.dest_horiz_stride = 1; 80 } 81 } 82 else { 83 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 84 85 /* These are different sizes in align1 vs align16: 86 */ 87 if (insn->header.access_mode == BRW_ALIGN_1) { 88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 90 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 91 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 92 } 93 else { 94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 95 /* even ignored in da16, still need to set as '01' */ 96 insn->bits1.ia16.dest_horiz_stride = 1; 97 } 98 } 99 100 /* NEW: Set the execution size based on dest.width and 101 * insn->compression_control: 102 */ 103 guess_execution_size(insn, dest); 104} 105 106extern int reg_type_size[]; 107 108static void 109validate_reg(struct brw_instruction *insn, struct brw_reg reg) 110{ 111 int hstride_for_reg[] = {0, 1, 2, 4}; 112 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 113 int width_for_reg[] = {1, 2, 4, 8, 16}; 114 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 115 int width, hstride, vstride, execsize; 116 117 if (reg.file == BRW_IMMEDIATE_VALUE) { 118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 119 * mean the destination has to be 128-bit aligned and the 120 * destination horiz stride has to be a word. 121 */ 122 if (reg.type == BRW_REGISTER_TYPE_V) { 123 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 124 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 125 } 126 127 return; 128 } 129 130 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 131 reg.file == BRW_ARF_NULL) 132 return; 133 134 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 135 hstride = hstride_for_reg[reg.hstride]; 136 137 if (reg.vstride == 0xf) { 138 vstride = -1; 139 } else { 140 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 141 vstride = vstride_for_reg[reg.vstride]; 142 } 143 144 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 145 width = width_for_reg[reg.width]; 146 147 assert(insn->header.execution_size >= 0 && 148 insn->header.execution_size < Elements(execsize_for_reg)); 149 execsize = execsize_for_reg[insn->header.execution_size]; 150 151 /* Restrictions from 3.3.10: Register Region Restrictions. */ 152 /* 3. */ 153 assert(execsize >= width); 154 155 /* 4. */ 156 if (execsize == width && hstride != 0) { 157 assert(vstride == -1 || vstride == width * hstride); 158 } 159 160 /* 5. */ 161 if (execsize == width && hstride == 0) { 162 /* no restriction on vstride. */ 163 } 164 165 /* 6. */ 166 if (width == 1) { 167 assert(hstride == 0); 168 } 169 170 /* 7. */ 171 if (execsize == 1 && width == 1) { 172 assert(hstride == 0); 173 assert(vstride == 0); 174 } 175 176 /* 8. */ 177 if (vstride == 0 && hstride == 0) { 178 assert(width == 1); 179 } 180 181 /* 10. Check destination issues. */ 182} 183 184static void brw_set_src0( struct brw_instruction *insn, 185 struct brw_reg reg ) 186{ 187 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 188 assert(reg.nr < 128); 189 190 validate_reg(insn, reg); 191 192 insn->bits1.da1.src0_reg_file = reg.file; 193 insn->bits1.da1.src0_reg_type = reg.type; 194 insn->bits2.da1.src0_abs = reg.abs; 195 insn->bits2.da1.src0_negate = reg.negate; 196 insn->bits2.da1.src0_address_mode = reg.address_mode; 197 198 if (reg.file == BRW_IMMEDIATE_VALUE) { 199 insn->bits3.ud = reg.dw1.ud; 200 201 /* Required to set some fields in src1 as well: 202 */ 203 insn->bits1.da1.src1_reg_file = 0; /* arf */ 204 insn->bits1.da1.src1_reg_type = reg.type; 205 } 206 else 207 { 208 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 209 if (insn->header.access_mode == BRW_ALIGN_1) { 210 insn->bits2.da1.src0_subreg_nr = reg.subnr; 211 insn->bits2.da1.src0_reg_nr = reg.nr; 212 } 213 else { 214 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 215 insn->bits2.da16.src0_reg_nr = reg.nr; 216 } 217 } 218 else { 219 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 220 221 if (insn->header.access_mode == BRW_ALIGN_1) { 222 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 223 } 224 else { 225 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 226 } 227 } 228 229 if (insn->header.access_mode == BRW_ALIGN_1) { 230 if (reg.width == BRW_WIDTH_1 && 231 insn->header.execution_size == BRW_EXECUTE_1) { 232 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 233 insn->bits2.da1.src0_width = BRW_WIDTH_1; 234 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 235 } 236 else { 237 insn->bits2.da1.src0_horiz_stride = reg.hstride; 238 insn->bits2.da1.src0_width = reg.width; 239 insn->bits2.da1.src0_vert_stride = reg.vstride; 240 } 241 } 242 else { 243 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 244 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 245 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 246 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 247 248 /* This is an oddity of the fact we're using the same 249 * descriptions for registers in align_16 as align_1: 250 */ 251 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 252 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 253 else 254 insn->bits2.da16.src0_vert_stride = reg.vstride; 255 } 256 } 257} 258 259 260void brw_set_src1( struct brw_instruction *insn, 261 struct brw_reg reg ) 262{ 263 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 264 265 assert(reg.nr < 128); 266 267 validate_reg(insn, reg); 268 269 insn->bits1.da1.src1_reg_file = reg.file; 270 insn->bits1.da1.src1_reg_type = reg.type; 271 insn->bits3.da1.src1_abs = reg.abs; 272 insn->bits3.da1.src1_negate = reg.negate; 273 274 /* Only src1 can be immediate in two-argument instructions. 275 */ 276 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 277 278 if (reg.file == BRW_IMMEDIATE_VALUE) { 279 insn->bits3.ud = reg.dw1.ud; 280 } 281 else { 282 /* This is a hardware restriction, which may or may not be lifted 283 * in the future: 284 */ 285 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 287 288 if (insn->header.access_mode == BRW_ALIGN_1) { 289 insn->bits3.da1.src1_subreg_nr = reg.subnr; 290 insn->bits3.da1.src1_reg_nr = reg.nr; 291 } 292 else { 293 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 294 insn->bits3.da16.src1_reg_nr = reg.nr; 295 } 296 297 if (insn->header.access_mode == BRW_ALIGN_1) { 298 if (reg.width == BRW_WIDTH_1 && 299 insn->header.execution_size == BRW_EXECUTE_1) { 300 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 301 insn->bits3.da1.src1_width = BRW_WIDTH_1; 302 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 303 } 304 else { 305 insn->bits3.da1.src1_horiz_stride = reg.hstride; 306 insn->bits3.da1.src1_width = reg.width; 307 insn->bits3.da1.src1_vert_stride = reg.vstride; 308 } 309 } 310 else { 311 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 312 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 313 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 314 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 315 316 /* This is an oddity of the fact we're using the same 317 * descriptions for registers in align_16 as align_1: 318 */ 319 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 320 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 321 else 322 insn->bits3.da16.src1_vert_stride = reg.vstride; 323 } 324 } 325} 326 327 328 329static void brw_set_math_message( struct brw_context *brw, 330 struct brw_instruction *insn, 331 GLuint msg_length, 332 GLuint response_length, 333 GLuint function, 334 GLuint integer_type, 335 GLboolean low_precision, 336 GLboolean saturate, 337 GLuint dataType ) 338{ 339 struct intel_context *intel = &brw->intel; 340 brw_set_src1(insn, brw_imm_d(0)); 341 342 if (intel->gen == 5) { 343 insn->bits3.math_gen5.function = function; 344 insn->bits3.math_gen5.int_type = integer_type; 345 insn->bits3.math_gen5.precision = low_precision; 346 insn->bits3.math_gen5.saturate = saturate; 347 insn->bits3.math_gen5.data_type = dataType; 348 insn->bits3.math_gen5.snapshot = 0; 349 insn->bits3.math_gen5.header_present = 0; 350 insn->bits3.math_gen5.response_length = response_length; 351 insn->bits3.math_gen5.msg_length = msg_length; 352 insn->bits3.math_gen5.end_of_thread = 0; 353 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; 354 insn->bits2.send_gen5.end_of_thread = 0; 355 } else { 356 insn->bits3.math.function = function; 357 insn->bits3.math.int_type = integer_type; 358 insn->bits3.math.precision = low_precision; 359 insn->bits3.math.saturate = saturate; 360 insn->bits3.math.data_type = dataType; 361 insn->bits3.math.response_length = response_length; 362 insn->bits3.math.msg_length = msg_length; 363 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 364 insn->bits3.math.end_of_thread = 0; 365 } 366} 367 368 369static void brw_set_ff_sync_message(struct brw_context *brw, 370 struct brw_instruction *insn, 371 GLboolean allocate, 372 GLuint response_length, 373 GLboolean end_of_thread) 374{ 375 struct intel_context *intel = &brw->intel; 376 brw_set_src1(insn, brw_imm_d(0)); 377 378 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 379 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 380 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 381 insn->bits3.urb_gen5.allocate = allocate; 382 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 383 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 384 insn->bits3.urb_gen5.header_present = 1; 385 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ 386 insn->bits3.urb_gen5.msg_length = 1; 387 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 388 if (intel->gen >= 6) { 389 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 390 } else { 391 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 392 insn->bits2.send_gen5.end_of_thread = end_of_thread; 393 } 394} 395 396static void brw_set_urb_message( struct brw_context *brw, 397 struct brw_instruction *insn, 398 GLboolean allocate, 399 GLboolean used, 400 GLuint msg_length, 401 GLuint response_length, 402 GLboolean end_of_thread, 403 GLboolean complete, 404 GLuint offset, 405 GLuint swizzle_control ) 406{ 407 struct intel_context *intel = &brw->intel; 408 brw_set_src1(insn, brw_imm_d(0)); 409 410 if (intel->gen >= 5) { 411 insn->bits3.urb_gen5.opcode = 0; /* ? */ 412 insn->bits3.urb_gen5.offset = offset; 413 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 414 insn->bits3.urb_gen5.allocate = allocate; 415 insn->bits3.urb_gen5.used = used; /* ? */ 416 insn->bits3.urb_gen5.complete = complete; 417 insn->bits3.urb_gen5.header_present = 1; 418 insn->bits3.urb_gen5.response_length = response_length; 419 insn->bits3.urb_gen5.msg_length = msg_length; 420 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 421 if (intel->gen >= 6) { 422 /* For SNB, the SFID bits moved to the condmod bits, and 423 * EOT stayed in bits3 above. Does the EOT bit setting 424 * below on Ironlake even do anything? 425 */ 426 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 427 } else { 428 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 429 insn->bits2.send_gen5.end_of_thread = end_of_thread; 430 } 431 } else { 432 insn->bits3.urb.opcode = 0; /* ? */ 433 insn->bits3.urb.offset = offset; 434 insn->bits3.urb.swizzle_control = swizzle_control; 435 insn->bits3.urb.allocate = allocate; 436 insn->bits3.urb.used = used; /* ? */ 437 insn->bits3.urb.complete = complete; 438 insn->bits3.urb.response_length = response_length; 439 insn->bits3.urb.msg_length = msg_length; 440 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 441 insn->bits3.urb.end_of_thread = end_of_thread; 442 } 443} 444 445static void brw_set_dp_write_message( struct brw_context *brw, 446 struct brw_instruction *insn, 447 GLuint binding_table_index, 448 GLuint msg_control, 449 GLuint msg_type, 450 GLuint msg_length, 451 GLboolean header_present, 452 GLuint pixel_scoreboard_clear, 453 GLuint response_length, 454 GLuint end_of_thread, 455 GLuint send_commit_msg) 456{ 457 struct intel_context *intel = &brw->intel; 458 brw_set_src1(insn, brw_imm_ud(0)); 459 460 if (intel->gen >= 6) { 461 insn->bits3.dp_render_cache.binding_table_index = binding_table_index; 462 insn->bits3.dp_render_cache.msg_control = msg_control; 463 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; 464 insn->bits3.dp_render_cache.msg_type = msg_type; 465 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; 466 insn->bits3.dp_render_cache.header_present = header_present; 467 insn->bits3.dp_render_cache.response_length = response_length; 468 insn->bits3.dp_render_cache.msg_length = msg_length; 469 insn->bits3.dp_render_cache.end_of_thread = end_of_thread; 470 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 471 /* XXX really need below? */ 472 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 473 insn->bits2.send_gen5.end_of_thread = end_of_thread; 474 } else if (intel->gen == 5) { 475 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 476 insn->bits3.dp_write_gen5.msg_control = msg_control; 477 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; 478 insn->bits3.dp_write_gen5.msg_type = msg_type; 479 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 480 insn->bits3.dp_write_gen5.header_present = header_present; 481 insn->bits3.dp_write_gen5.response_length = response_length; 482 insn->bits3.dp_write_gen5.msg_length = msg_length; 483 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; 484 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 485 insn->bits2.send_gen5.end_of_thread = end_of_thread; 486 } else { 487 insn->bits3.dp_write.binding_table_index = binding_table_index; 488 insn->bits3.dp_write.msg_control = msg_control; 489 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 490 insn->bits3.dp_write.msg_type = msg_type; 491 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 492 insn->bits3.dp_write.response_length = response_length; 493 insn->bits3.dp_write.msg_length = msg_length; 494 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 495 insn->bits3.dp_write.end_of_thread = end_of_thread; 496 } 497} 498 499static void brw_set_dp_read_message( struct brw_context *brw, 500 struct brw_instruction *insn, 501 GLuint binding_table_index, 502 GLuint msg_control, 503 GLuint msg_type, 504 GLuint target_cache, 505 GLuint msg_length, 506 GLuint response_length, 507 GLuint end_of_thread ) 508{ 509 struct intel_context *intel = &brw->intel; 510 brw_set_src1(insn, brw_imm_d(0)); 511 512 if (intel->gen == 5) { 513 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 514 insn->bits3.dp_read_gen5.msg_control = msg_control; 515 insn->bits3.dp_read_gen5.msg_type = msg_type; 516 insn->bits3.dp_read_gen5.target_cache = target_cache; 517 insn->bits3.dp_read_gen5.header_present = 1; 518 insn->bits3.dp_read_gen5.response_length = response_length; 519 insn->bits3.dp_read_gen5.msg_length = msg_length; 520 insn->bits3.dp_read_gen5.pad1 = 0; 521 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread; 522 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 523 insn->bits2.send_gen5.end_of_thread = end_of_thread; 524 } else { 525 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 526 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 527 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 528 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 529 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 530 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 531 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 532 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 533 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ 534 } 535} 536 537static void brw_set_sampler_message(struct brw_context *brw, 538 struct brw_instruction *insn, 539 GLuint binding_table_index, 540 GLuint sampler, 541 GLuint msg_type, 542 GLuint response_length, 543 GLuint msg_length, 544 GLboolean eot, 545 GLuint header_present, 546 GLuint simd_mode) 547{ 548 struct intel_context *intel = &brw->intel; 549 assert(eot == 0); 550 brw_set_src1(insn, brw_imm_d(0)); 551 552 if (intel->gen >= 5) { 553 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 554 insn->bits3.sampler_gen5.sampler = sampler; 555 insn->bits3.sampler_gen5.msg_type = msg_type; 556 insn->bits3.sampler_gen5.simd_mode = simd_mode; 557 insn->bits3.sampler_gen5.header_present = header_present; 558 insn->bits3.sampler_gen5.response_length = response_length; 559 insn->bits3.sampler_gen5.msg_length = msg_length; 560 insn->bits3.sampler_gen5.end_of_thread = eot; 561 if (intel->gen >= 6) 562 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; 563 else { 564 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; 565 insn->bits2.send_gen5.end_of_thread = eot; 566 } 567 } else if (intel->is_g4x) { 568 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 569 insn->bits3.sampler_g4x.sampler = sampler; 570 insn->bits3.sampler_g4x.msg_type = msg_type; 571 insn->bits3.sampler_g4x.response_length = response_length; 572 insn->bits3.sampler_g4x.msg_length = msg_length; 573 insn->bits3.sampler_g4x.end_of_thread = eot; 574 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 575 } else { 576 insn->bits3.sampler.binding_table_index = binding_table_index; 577 insn->bits3.sampler.sampler = sampler; 578 insn->bits3.sampler.msg_type = msg_type; 579 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 580 insn->bits3.sampler.response_length = response_length; 581 insn->bits3.sampler.msg_length = msg_length; 582 insn->bits3.sampler.end_of_thread = eot; 583 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 584 } 585} 586 587 588 589static struct brw_instruction *next_insn( struct brw_compile *p, 590 GLuint opcode ) 591{ 592 struct brw_instruction *insn; 593 594 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 595 596 insn = &p->store[p->nr_insn++]; 597 memcpy(insn, p->current, sizeof(*insn)); 598 599 /* Reset this one-shot flag: 600 */ 601 602 if (p->current->header.destreg__conditionalmod) { 603 p->current->header.destreg__conditionalmod = 0; 604 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 605 } 606 607 insn->header.opcode = opcode; 608 return insn; 609} 610 611 612static struct brw_instruction *brw_alu1( struct brw_compile *p, 613 GLuint opcode, 614 struct brw_reg dest, 615 struct brw_reg src ) 616{ 617 struct brw_instruction *insn = next_insn(p, opcode); 618 brw_set_dest(insn, dest); 619 brw_set_src0(insn, src); 620 return insn; 621} 622 623static struct brw_instruction *brw_alu2(struct brw_compile *p, 624 GLuint opcode, 625 struct brw_reg dest, 626 struct brw_reg src0, 627 struct brw_reg src1 ) 628{ 629 struct brw_instruction *insn = next_insn(p, opcode); 630 brw_set_dest(insn, dest); 631 brw_set_src0(insn, src0); 632 brw_set_src1(insn, src1); 633 return insn; 634} 635 636 637/*********************************************************************** 638 * Convenience routines. 639 */ 640#define ALU1(OP) \ 641struct brw_instruction *brw_##OP(struct brw_compile *p, \ 642 struct brw_reg dest, \ 643 struct brw_reg src0) \ 644{ \ 645 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 646} 647 648#define ALU2(OP) \ 649struct brw_instruction *brw_##OP(struct brw_compile *p, \ 650 struct brw_reg dest, \ 651 struct brw_reg src0, \ 652 struct brw_reg src1) \ 653{ \ 654 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 655} 656 657/* Rounding operations (other than RNDD) require two instructions - the first 658 * stores a rounded value (possibly the wrong way) in the dest register, but 659 * also sets a per-channel "increment bit" in the flag register. A predicated 660 * add of 1.0 fixes dest to contain the desired result. 661 */ 662#define ROUND(OP) \ 663void brw_##OP(struct brw_compile *p, \ 664 struct brw_reg dest, \ 665 struct brw_reg src) \ 666{ \ 667 struct brw_instruction *rnd, *add; \ 668 rnd = next_insn(p, BRW_OPCODE_##OP); \ 669 brw_set_dest(rnd, dest); \ 670 brw_set_src0(rnd, src); \ 671 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ 672 \ 673 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 674 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 675} 676 677 678ALU1(MOV) 679ALU2(SEL) 680ALU1(NOT) 681ALU2(AND) 682ALU2(OR) 683ALU2(XOR) 684ALU2(SHR) 685ALU2(SHL) 686ALU2(RSR) 687ALU2(RSL) 688ALU2(ASR) 689ALU1(FRC) 690ALU1(RNDD) 691ALU2(MAC) 692ALU2(MACH) 693ALU1(LZD) 694ALU2(DP4) 695ALU2(DPH) 696ALU2(DP3) 697ALU2(DP2) 698ALU2(LINE) 699ALU2(PLN) 700 701 702ROUND(RNDZ) 703ROUND(RNDE) 704 705 706struct brw_instruction *brw_ADD(struct brw_compile *p, 707 struct brw_reg dest, 708 struct brw_reg src0, 709 struct brw_reg src1) 710{ 711 /* 6.2.2: add */ 712 if (src0.type == BRW_REGISTER_TYPE_F || 713 (src0.file == BRW_IMMEDIATE_VALUE && 714 src0.type == BRW_REGISTER_TYPE_VF)) { 715 assert(src1.type != BRW_REGISTER_TYPE_UD); 716 assert(src1.type != BRW_REGISTER_TYPE_D); 717 } 718 719 if (src1.type == BRW_REGISTER_TYPE_F || 720 (src1.file == BRW_IMMEDIATE_VALUE && 721 src1.type == BRW_REGISTER_TYPE_VF)) { 722 assert(src0.type != BRW_REGISTER_TYPE_UD); 723 assert(src0.type != BRW_REGISTER_TYPE_D); 724 } 725 726 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 727} 728 729struct brw_instruction *brw_MUL(struct brw_compile *p, 730 struct brw_reg dest, 731 struct brw_reg src0, 732 struct brw_reg src1) 733{ 734 /* 6.32.38: mul */ 735 if (src0.type == BRW_REGISTER_TYPE_D || 736 src0.type == BRW_REGISTER_TYPE_UD || 737 src1.type == BRW_REGISTER_TYPE_D || 738 src1.type == BRW_REGISTER_TYPE_UD) { 739 assert(dest.type != BRW_REGISTER_TYPE_F); 740 } 741 742 if (src0.type == BRW_REGISTER_TYPE_F || 743 (src0.file == BRW_IMMEDIATE_VALUE && 744 src0.type == BRW_REGISTER_TYPE_VF)) { 745 assert(src1.type != BRW_REGISTER_TYPE_UD); 746 assert(src1.type != BRW_REGISTER_TYPE_D); 747 } 748 749 if (src1.type == BRW_REGISTER_TYPE_F || 750 (src1.file == BRW_IMMEDIATE_VALUE && 751 src1.type == BRW_REGISTER_TYPE_VF)) { 752 assert(src0.type != BRW_REGISTER_TYPE_UD); 753 assert(src0.type != BRW_REGISTER_TYPE_D); 754 } 755 756 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 757 src0.nr != BRW_ARF_ACCUMULATOR); 758 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 759 src1.nr != BRW_ARF_ACCUMULATOR); 760 761 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 762} 763 764 765void brw_NOP(struct brw_compile *p) 766{ 767 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 768 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 769 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 770 brw_set_src1(insn, brw_imm_ud(0x0)); 771} 772 773 774 775 776 777/*********************************************************************** 778 * Comparisons, if/else/endif 779 */ 780 781struct brw_instruction *brw_JMPI(struct brw_compile *p, 782 struct brw_reg dest, 783 struct brw_reg src0, 784 struct brw_reg src1) 785{ 786 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 787 788 insn->header.execution_size = 1; 789 insn->header.compression_control = BRW_COMPRESSION_NONE; 790 insn->header.mask_control = BRW_MASK_DISABLE; 791 792 p->current->header.predicate_control = BRW_PREDICATE_NONE; 793 794 return insn; 795} 796 797/* EU takes the value from the flag register and pushes it onto some 798 * sort of a stack (presumably merging with any flag value already on 799 * the stack). Within an if block, the flags at the top of the stack 800 * control execution on each channel of the unit, eg. on each of the 801 * 16 pixel values in our wm programs. 802 * 803 * When the matching 'else' instruction is reached (presumably by 804 * countdown of the instruction count patched in by our ELSE/ENDIF 805 * functions), the relevent flags are inverted. 806 * 807 * When the matching 'endif' instruction is reached, the flags are 808 * popped off. If the stack is now empty, normal execution resumes. 809 * 810 * No attempt is made to deal with stack overflow (14 elements?). 811 */ 812struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) 813{ 814 struct intel_context *intel = &p->brw->intel; 815 struct brw_instruction *insn; 816 817 if (p->single_program_flow) { 818 assert(execute_size == BRW_EXECUTE_1); 819 820 insn = next_insn(p, BRW_OPCODE_ADD); 821 insn->header.predicate_inverse = 1; 822 } else { 823 insn = next_insn(p, BRW_OPCODE_IF); 824 } 825 826 /* Override the defaults for this instruction: 827 */ 828 if (intel->gen < 6) { 829 brw_set_dest(insn, brw_ip_reg()); 830 brw_set_src0(insn, brw_ip_reg()); 831 brw_set_src1(insn, brw_imm_d(0x0)); 832 } else { 833 brw_set_dest(insn, brw_imm_w(0)); 834 insn->bits1.branch_gen6.jump_count = 0; 835 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 836 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 837 } 838 839 insn->header.execution_size = execute_size; 840 insn->header.compression_control = BRW_COMPRESSION_NONE; 841 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 842 insn->header.mask_control = BRW_MASK_ENABLE; 843 if (!p->single_program_flow) 844 insn->header.thread_control = BRW_THREAD_SWITCH; 845 846 p->current->header.predicate_control = BRW_PREDICATE_NONE; 847 848 return insn; 849} 850 851struct brw_instruction * 852brw_IF_gen6(struct brw_compile *p, uint32_t conditional, 853 struct brw_reg src0, struct brw_reg src1) 854{ 855 struct brw_instruction *insn; 856 857 insn = next_insn(p, BRW_OPCODE_IF); 858 859 brw_set_dest(insn, brw_imm_w(0)); 860 insn->header.execution_size = BRW_EXECUTE_8; 861 insn->bits1.branch_gen6.jump_count = 0; 862 brw_set_src0(insn, src0); 863 brw_set_src1(insn, src1); 864 865 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 866 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 867 insn->header.destreg__conditionalmod = conditional; 868 869 if (!p->single_program_flow) 870 insn->header.thread_control = BRW_THREAD_SWITCH; 871 872 return insn; 873} 874 875struct brw_instruction *brw_ELSE(struct brw_compile *p, 876 struct brw_instruction *if_insn) 877{ 878 struct intel_context *intel = &p->brw->intel; 879 struct brw_instruction *insn; 880 GLuint br = 1; 881 882 /* jump count is for 64bit data chunk each, so one 128bit 883 instruction requires 2 chunks. */ 884 if (intel->gen >= 5) 885 br = 2; 886 887 if (p->single_program_flow) { 888 insn = next_insn(p, BRW_OPCODE_ADD); 889 } else { 890 insn = next_insn(p, BRW_OPCODE_ELSE); 891 } 892 893 if (intel->gen < 6) { 894 brw_set_dest(insn, brw_ip_reg()); 895 brw_set_src0(insn, brw_ip_reg()); 896 brw_set_src1(insn, brw_imm_d(0x0)); 897 } else { 898 brw_set_dest(insn, brw_imm_w(0)); 899 insn->bits1.branch_gen6.jump_count = 0; 900 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 901 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 902 } 903 904 insn->header.compression_control = BRW_COMPRESSION_NONE; 905 insn->header.execution_size = if_insn->header.execution_size; 906 insn->header.mask_control = BRW_MASK_ENABLE; 907 if (!p->single_program_flow) 908 insn->header.thread_control = BRW_THREAD_SWITCH; 909 910 /* Patch the if instruction to point at this instruction. 911 */ 912 if (p->single_program_flow) { 913 assert(if_insn->header.opcode == BRW_OPCODE_ADD); 914 915 if_insn->bits3.ud = (insn - if_insn + 1) * 16; 916 } else { 917 assert(if_insn->header.opcode == BRW_OPCODE_IF); 918 919 if (intel->gen < 6) { 920 if_insn->bits3.if_else.jump_count = br * (insn - if_insn); 921 if_insn->bits3.if_else.pop_count = 0; 922 if_insn->bits3.if_else.pad0 = 0; 923 } else { 924 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1); 925 } 926 } 927 928 return insn; 929} 930 931void brw_ENDIF(struct brw_compile *p, 932 struct brw_instruction *patch_insn) 933{ 934 struct intel_context *intel = &p->brw->intel; 935 GLuint br = 1; 936 937 if (intel->gen >= 5) 938 br = 2; 939 940 if (p->single_program_flow) { 941 /* In single program flow mode, there's no need to execute an ENDIF, 942 * since we don't need to do any stack operations, and if we're executing 943 * currently, we want to just continue executing. 944 */ 945 struct brw_instruction *next = &p->store[p->nr_insn]; 946 947 assert(patch_insn->header.opcode == BRW_OPCODE_ADD); 948 949 patch_insn->bits3.ud = (next - patch_insn) * 16; 950 } else { 951 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); 952 953 if (intel->gen < 6) { 954 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 955 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 956 brw_set_src1(insn, brw_imm_d(0x0)); 957 } else { 958 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W)); 959 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 960 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 961 } 962 963 insn->header.compression_control = BRW_COMPRESSION_NONE; 964 insn->header.execution_size = patch_insn->header.execution_size; 965 insn->header.mask_control = BRW_MASK_ENABLE; 966 insn->header.thread_control = BRW_THREAD_SWITCH; 967 968 if (intel->gen < 6) 969 assert(patch_insn->bits3.if_else.jump_count == 0); 970 else 971 assert(patch_insn->bits1.branch_gen6.jump_count == 0); 972 973 /* Patch the if or else instructions to point at this or the next 974 * instruction respectively. 975 */ 976 if (patch_insn->header.opcode == BRW_OPCODE_IF) { 977 if (intel->gen < 6) { 978 /* Turn it into an IFF, which means no mask stack operations for 979 * all-false and jumping past the ENDIF. 980 */ 981 patch_insn->header.opcode = BRW_OPCODE_IFF; 982 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 983 patch_insn->bits3.if_else.pop_count = 0; 984 patch_insn->bits3.if_else.pad0 = 0; 985 } else { 986 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 987 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); 988 } 989 } else { 990 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE); 991 if (intel->gen < 6) { 992 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 993 * matching ENDIF. 994 */ 995 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 996 patch_insn->bits3.if_else.pop_count = 1; 997 patch_insn->bits3.if_else.pad0 = 0; 998 } else { 999 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1000 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); 1001 } 1002 } 1003 1004 /* Also pop item off the stack in the endif instruction: 1005 */ 1006 if (intel->gen < 6) { 1007 insn->bits3.if_else.jump_count = 0; 1008 insn->bits3.if_else.pop_count = 1; 1009 insn->bits3.if_else.pad0 = 0; 1010 } else { 1011 insn->bits1.branch_gen6.jump_count = 2; 1012 } 1013 } 1014} 1015 1016struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 1017{ 1018 struct brw_instruction *insn; 1019 insn = next_insn(p, BRW_OPCODE_BREAK); 1020 brw_set_dest(insn, brw_ip_reg()); 1021 brw_set_src0(insn, brw_ip_reg()); 1022 brw_set_src1(insn, brw_imm_d(0x0)); 1023 insn->header.compression_control = BRW_COMPRESSION_NONE; 1024 insn->header.execution_size = BRW_EXECUTE_8; 1025 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1026 insn->bits3.if_else.pad0 = 0; 1027 insn->bits3.if_else.pop_count = pop_count; 1028 return insn; 1029} 1030 1031struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 1032{ 1033 struct brw_instruction *insn; 1034 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1035 brw_set_dest(insn, brw_ip_reg()); 1036 brw_set_src0(insn, brw_ip_reg()); 1037 brw_set_src1(insn, brw_imm_d(0x0)); 1038 insn->header.compression_control = BRW_COMPRESSION_NONE; 1039 insn->header.execution_size = BRW_EXECUTE_8; 1040 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1041 insn->bits3.if_else.pad0 = 0; 1042 insn->bits3.if_else.pop_count = pop_count; 1043 return insn; 1044} 1045 1046/* DO/WHILE loop: 1047 */ 1048struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 1049{ 1050 if (p->single_program_flow) { 1051 return &p->store[p->nr_insn]; 1052 } else { 1053 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1054 1055 /* Override the defaults for this instruction: 1056 */ 1057 brw_set_dest(insn, brw_null_reg()); 1058 brw_set_src0(insn, brw_null_reg()); 1059 brw_set_src1(insn, brw_null_reg()); 1060 1061 insn->header.compression_control = BRW_COMPRESSION_NONE; 1062 insn->header.execution_size = execute_size; 1063 insn->header.predicate_control = BRW_PREDICATE_NONE; 1064 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1065 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1066 1067 return insn; 1068 } 1069} 1070 1071 1072 1073struct brw_instruction *brw_WHILE(struct brw_compile *p, 1074 struct brw_instruction *do_insn) 1075{ 1076 struct intel_context *intel = &p->brw->intel; 1077 struct brw_instruction *insn; 1078 GLuint br = 1; 1079 1080 if (intel->gen >= 5) 1081 br = 2; 1082 1083 if (p->single_program_flow) 1084 insn = next_insn(p, BRW_OPCODE_ADD); 1085 else 1086 insn = next_insn(p, BRW_OPCODE_WHILE); 1087 1088 brw_set_dest(insn, brw_ip_reg()); 1089 brw_set_src0(insn, brw_ip_reg()); 1090 brw_set_src1(insn, brw_imm_d(0x0)); 1091 1092 insn->header.compression_control = BRW_COMPRESSION_NONE; 1093 1094 if (p->single_program_flow) { 1095 insn->header.execution_size = BRW_EXECUTE_1; 1096 1097 insn->bits3.d = (do_insn - insn) * 16; 1098 } else { 1099 insn->header.execution_size = do_insn->header.execution_size; 1100 1101 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1102 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1103 insn->bits3.if_else.pop_count = 0; 1104 insn->bits3.if_else.pad0 = 0; 1105 } 1106 1107/* insn->header.mask_control = BRW_MASK_ENABLE; */ 1108 1109 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1110 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1111 return insn; 1112} 1113 1114 1115/* FORWARD JUMPS: 1116 */ 1117void brw_land_fwd_jump(struct brw_compile *p, 1118 struct brw_instruction *jmp_insn) 1119{ 1120 struct intel_context *intel = &p->brw->intel; 1121 struct brw_instruction *landing = &p->store[p->nr_insn]; 1122 GLuint jmpi = 1; 1123 1124 if (intel->gen >= 5) 1125 jmpi = 2; 1126 1127 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1128 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1129 1130 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1131} 1132 1133 1134 1135/* To integrate with the above, it makes sense that the comparison 1136 * instruction should populate the flag register. It might be simpler 1137 * just to use the flag reg for most WM tasks? 1138 */ 1139void brw_CMP(struct brw_compile *p, 1140 struct brw_reg dest, 1141 GLuint conditional, 1142 struct brw_reg src0, 1143 struct brw_reg src1) 1144{ 1145 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1146 1147 insn->header.destreg__conditionalmod = conditional; 1148 brw_set_dest(insn, dest); 1149 brw_set_src0(insn, src0); 1150 brw_set_src1(insn, src1); 1151 1152/* guess_execution_size(insn, src0); */ 1153 1154 1155 /* Make it so that future instructions will use the computed flag 1156 * value until brw_set_predicate_control_flag_value() is called 1157 * again. 1158 */ 1159 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1160 dest.nr == 0) { 1161 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1162 p->flag_value = 0xff; 1163 } 1164} 1165 1166/* Issue 'wait' instruction for n1, host could program MMIO 1167 to wake up thread. */ 1168void brw_WAIT (struct brw_compile *p) 1169{ 1170 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1171 struct brw_reg src = brw_notification_1_reg(); 1172 1173 brw_set_dest(insn, src); 1174 brw_set_src0(insn, src); 1175 brw_set_src1(insn, brw_null_reg()); 1176 insn->header.execution_size = 0; /* must */ 1177 insn->header.predicate_control = 0; 1178 insn->header.compression_control = 0; 1179} 1180 1181 1182/*********************************************************************** 1183 * Helpers for the various SEND message types: 1184 */ 1185 1186/** Extended math function, float[8]. 1187 */ 1188void brw_math( struct brw_compile *p, 1189 struct brw_reg dest, 1190 GLuint function, 1191 GLuint saturate, 1192 GLuint msg_reg_nr, 1193 struct brw_reg src, 1194 GLuint data_type, 1195 GLuint precision ) 1196{ 1197 struct intel_context *intel = &p->brw->intel; 1198 1199 if (intel->gen >= 6) { 1200 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1201 1202 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1203 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1204 1205 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1206 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1207 1208 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1209 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1210 assert(src.type == BRW_REGISTER_TYPE_F); 1211 } 1212 1213 /* Math is the same ISA format as other opcodes, except that CondModifier 1214 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1215 */ 1216 insn->header.destreg__conditionalmod = function; 1217 1218 brw_set_dest(insn, dest); 1219 brw_set_src0(insn, src); 1220 brw_set_src1(insn, brw_null_reg()); 1221 } else { 1222 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1223 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1224 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1225 /* Example code doesn't set predicate_control for send 1226 * instructions. 1227 */ 1228 insn->header.predicate_control = 0; 1229 insn->header.destreg__conditionalmod = msg_reg_nr; 1230 1231 brw_set_dest(insn, dest); 1232 brw_set_src0(insn, src); 1233 brw_set_math_message(p->brw, 1234 insn, 1235 msg_length, response_length, 1236 function, 1237 BRW_MATH_INTEGER_UNSIGNED, 1238 precision, 1239 saturate, 1240 data_type); 1241 } 1242} 1243 1244/** Extended math function, float[8]. 1245 */ 1246void brw_math2(struct brw_compile *p, 1247 struct brw_reg dest, 1248 GLuint function, 1249 struct brw_reg src0, 1250 struct brw_reg src1) 1251{ 1252 struct intel_context *intel = &p->brw->intel; 1253 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1254 1255 assert(intel->gen >= 6); 1256 (void) intel; 1257 1258 1259 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1260 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1261 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1262 1263 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1264 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1265 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1266 1267 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1268 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1269 assert(src0.type == BRW_REGISTER_TYPE_F); 1270 assert(src1.type == BRW_REGISTER_TYPE_F); 1271 } 1272 1273 /* Math is the same ISA format as other opcodes, except that CondModifier 1274 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1275 */ 1276 insn->header.destreg__conditionalmod = function; 1277 1278 brw_set_dest(insn, dest); 1279 brw_set_src0(insn, src0); 1280 brw_set_src1(insn, src1); 1281} 1282 1283/** 1284 * Extended math function, float[16]. 1285 * Use 2 send instructions. 1286 */ 1287void brw_math_16( struct brw_compile *p, 1288 struct brw_reg dest, 1289 GLuint function, 1290 GLuint saturate, 1291 GLuint msg_reg_nr, 1292 struct brw_reg src, 1293 GLuint precision ) 1294{ 1295 struct intel_context *intel = &p->brw->intel; 1296 struct brw_instruction *insn; 1297 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1298 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1299 1300 if (intel->gen >= 6) { 1301 insn = next_insn(p, BRW_OPCODE_MATH); 1302 1303 /* Math is the same ISA format as other opcodes, except that CondModifier 1304 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1305 */ 1306 insn->header.destreg__conditionalmod = function; 1307 1308 brw_set_dest(insn, dest); 1309 brw_set_src0(insn, src); 1310 brw_set_src1(insn, brw_null_reg()); 1311 return; 1312 } 1313 1314 /* First instruction: 1315 */ 1316 brw_push_insn_state(p); 1317 brw_set_predicate_control_flag_value(p, 0xff); 1318 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1319 1320 insn = next_insn(p, BRW_OPCODE_SEND); 1321 insn->header.destreg__conditionalmod = msg_reg_nr; 1322 1323 brw_set_dest(insn, dest); 1324 brw_set_src0(insn, src); 1325 brw_set_math_message(p->brw, 1326 insn, 1327 msg_length, response_length, 1328 function, 1329 BRW_MATH_INTEGER_UNSIGNED, 1330 precision, 1331 saturate, 1332 BRW_MATH_DATA_VECTOR); 1333 1334 /* Second instruction: 1335 */ 1336 insn = next_insn(p, BRW_OPCODE_SEND); 1337 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1338 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1339 1340 brw_set_dest(insn, offset(dest,1)); 1341 brw_set_src0(insn, src); 1342 brw_set_math_message(p->brw, 1343 insn, 1344 msg_length, response_length, 1345 function, 1346 BRW_MATH_INTEGER_UNSIGNED, 1347 precision, 1348 saturate, 1349 BRW_MATH_DATA_VECTOR); 1350 1351 brw_pop_insn_state(p); 1352} 1353 1354 1355/** 1356 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1357 * using a constant offset per channel. 1358 * 1359 * The offset must be aligned to oword size (16 bytes). Used for 1360 * register spilling. 1361 */ 1362void brw_oword_block_write(struct brw_compile *p, 1363 struct brw_reg mrf, 1364 int num_regs, 1365 GLuint offset) 1366{ 1367 struct intel_context *intel = &p->brw->intel; 1368 uint32_t msg_control; 1369 int mlen; 1370 1371 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1372 1373 if (num_regs == 1) { 1374 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1375 mlen = 2; 1376 } else { 1377 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1378 mlen = 3; 1379 } 1380 1381 /* Set up the message header. This is g0, with g0.2 filled with 1382 * the offset. We don't want to leave our offset around in g0 or 1383 * it'll screw up texture samples, so set it up inside the message 1384 * reg. 1385 */ 1386 { 1387 brw_push_insn_state(p); 1388 brw_set_mask_control(p, BRW_MASK_DISABLE); 1389 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1390 1391 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1392 1393 /* set message header global offset field (reg 0, element 2) */ 1394 brw_MOV(p, 1395 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1396 mrf.nr, 1397 2), BRW_REGISTER_TYPE_UD), 1398 brw_imm_ud(offset)); 1399 1400 brw_pop_insn_state(p); 1401 } 1402 1403 { 1404 struct brw_reg dest; 1405 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1406 int send_commit_msg; 1407 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 1408 BRW_REGISTER_TYPE_UW); 1409 1410 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1411 insn->header.compression_control = BRW_COMPRESSION_NONE; 1412 src_header = vec16(src_header); 1413 } 1414 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1415 insn->header.destreg__conditionalmod = mrf.nr; 1416 1417 /* Until gen6, writes followed by reads from the same location 1418 * are not guaranteed to be ordered unless write_commit is set. 1419 * If set, then a no-op write is issued to the destination 1420 * register to set a dependency, and a read from the destination 1421 * can be used to ensure the ordering. 1422 * 1423 * For gen6, only writes between different threads need ordering 1424 * protection. Our use of DP writes is all about register 1425 * spilling within a thread. 1426 */ 1427 if (intel->gen >= 6) { 1428 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1429 send_commit_msg = 0; 1430 } else { 1431 dest = src_header; 1432 send_commit_msg = 1; 1433 } 1434 1435 brw_set_dest(insn, dest); 1436 brw_set_src0(insn, brw_null_reg()); 1437 1438 brw_set_dp_write_message(p->brw, 1439 insn, 1440 255, /* binding table index (255=stateless) */ 1441 msg_control, 1442 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ 1443 mlen, 1444 GL_TRUE, /* header_present */ 1445 0, /* pixel scoreboard */ 1446 send_commit_msg, /* response_length */ 1447 0, /* eot */ 1448 send_commit_msg); 1449 } 1450} 1451 1452 1453/** 1454 * Read a block of owords (half a GRF each) from the scratch buffer 1455 * using a constant index per channel. 1456 * 1457 * Offset must be aligned to oword size (16 bytes). Used for register 1458 * spilling. 1459 */ 1460void 1461brw_oword_block_read(struct brw_compile *p, 1462 struct brw_reg dest, 1463 struct brw_reg mrf, 1464 int num_regs, 1465 GLuint offset) 1466{ 1467 uint32_t msg_control; 1468 int rlen; 1469 1470 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1471 dest = retype(dest, BRW_REGISTER_TYPE_UW); 1472 1473 if (num_regs == 1) { 1474 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1475 rlen = 1; 1476 } else { 1477 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1478 rlen = 2; 1479 } 1480 1481 { 1482 brw_push_insn_state(p); 1483 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1484 brw_set_mask_control(p, BRW_MASK_DISABLE); 1485 1486 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1487 1488 /* set message header global offset field (reg 0, element 2) */ 1489 brw_MOV(p, 1490 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1491 mrf.nr, 1492 2), BRW_REGISTER_TYPE_UD), 1493 brw_imm_ud(offset)); 1494 1495 brw_pop_insn_state(p); 1496 } 1497 1498 { 1499 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1500 1501 assert(insn->header.predicate_control == 0); 1502 insn->header.compression_control = BRW_COMPRESSION_NONE; 1503 insn->header.destreg__conditionalmod = mrf.nr; 1504 1505 brw_set_dest(insn, dest); /* UW? */ 1506 brw_set_src0(insn, brw_null_reg()); 1507 1508 brw_set_dp_read_message(p->brw, 1509 insn, 1510 255, /* binding table index (255=stateless) */ 1511 msg_control, 1512 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1513 1, /* target cache (render/scratch) */ 1514 1, /* msg_length */ 1515 rlen, 1516 0); /* eot */ 1517 } 1518} 1519 1520 1521/** 1522 * Read a float[4] vector from the data port Data Cache (const buffer). 1523 * Location (in buffer) should be a multiple of 16. 1524 * Used for fetching shader constants. 1525 * If relAddr is true, we'll do an indirect fetch using the address register. 1526 */ 1527void brw_dp_READ_4( struct brw_compile *p, 1528 struct brw_reg dest, 1529 GLboolean relAddr, 1530 GLuint location, 1531 GLuint bind_table_index ) 1532{ 1533 /* XXX: relAddr not implemented */ 1534 GLuint msg_reg_nr = 1; 1535 { 1536 struct brw_reg b; 1537 brw_push_insn_state(p); 1538 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1539 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1540 brw_set_mask_control(p, BRW_MASK_DISABLE); 1541 1542 /* Setup MRF[1] with location/offset into const buffer */ 1543 b = brw_message_reg(msg_reg_nr); 1544 b = retype(b, BRW_REGISTER_TYPE_UD); 1545 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1546 * when the docs say only dword[2] should be set. Hmmm. But it works. 1547 */ 1548 brw_MOV(p, b, brw_imm_ud(location)); 1549 brw_pop_insn_state(p); 1550 } 1551 1552 { 1553 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1554 1555 insn->header.predicate_control = BRW_PREDICATE_NONE; 1556 insn->header.compression_control = BRW_COMPRESSION_NONE; 1557 insn->header.destreg__conditionalmod = msg_reg_nr; 1558 insn->header.mask_control = BRW_MASK_DISABLE; 1559 1560 /* cast dest to a uword[8] vector */ 1561 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1562 1563 brw_set_dest(insn, dest); 1564 brw_set_src0(insn, brw_null_reg()); 1565 1566 brw_set_dp_read_message(p->brw, 1567 insn, 1568 bind_table_index, 1569 0, /* msg_control (0 means 1 Oword) */ 1570 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1571 0, /* source cache = data cache */ 1572 1, /* msg_length */ 1573 1, /* response_length (1 Oword) */ 1574 0); /* eot */ 1575 } 1576} 1577 1578 1579/** 1580 * Read float[4] constant(s) from VS constant buffer. 1581 * For relative addressing, two float[4] constants will be read into 'dest'. 1582 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1583 */ 1584void brw_dp_READ_4_vs(struct brw_compile *p, 1585 struct brw_reg dest, 1586 GLuint location, 1587 GLuint bind_table_index) 1588{ 1589 struct brw_instruction *insn; 1590 GLuint msg_reg_nr = 1; 1591 struct brw_reg b; 1592 1593 /* 1594 printf("vs const read msg, location %u, msg_reg_nr %d\n", 1595 location, msg_reg_nr); 1596 */ 1597 1598 /* Setup MRF[1] with location/offset into const buffer */ 1599 brw_push_insn_state(p); 1600 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1601 brw_set_mask_control(p, BRW_MASK_DISABLE); 1602 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1603 1604 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1605 * when the docs say only dword[2] should be set. Hmmm. But it works. 1606 */ 1607 b = brw_message_reg(msg_reg_nr); 1608 b = retype(b, BRW_REGISTER_TYPE_UD); 1609 /*b = get_element_ud(b, 2);*/ 1610 brw_MOV(p, b, brw_imm_ud(location)); 1611 1612 brw_pop_insn_state(p); 1613 1614 insn = next_insn(p, BRW_OPCODE_SEND); 1615 1616 insn->header.predicate_control = BRW_PREDICATE_NONE; 1617 insn->header.compression_control = BRW_COMPRESSION_NONE; 1618 insn->header.destreg__conditionalmod = msg_reg_nr; 1619 insn->header.mask_control = BRW_MASK_DISABLE; 1620 1621 brw_set_dest(insn, dest); 1622 brw_set_src0(insn, brw_null_reg()); 1623 1624 brw_set_dp_read_message(p->brw, 1625 insn, 1626 bind_table_index, 1627 0, 1628 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1629 0, /* source cache = data cache */ 1630 1, /* msg_length */ 1631 1, /* response_length (1 Oword) */ 1632 0); /* eot */ 1633} 1634 1635/** 1636 * Read a float[4] constant per vertex from VS constant buffer, with 1637 * relative addressing. 1638 */ 1639void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1640 struct brw_reg dest, 1641 struct brw_reg addr_reg, 1642 GLuint offset, 1643 GLuint bind_table_index) 1644{ 1645 struct intel_context *intel = &p->brw->intel; 1646 int msg_type; 1647 1648 /* Setup MRF[1] with offset into const buffer */ 1649 brw_push_insn_state(p); 1650 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1651 brw_set_mask_control(p, BRW_MASK_DISABLE); 1652 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1653 1654 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1655 * fields ignored. 1656 */ 1657 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), 1658 addr_reg, brw_imm_d(offset)); 1659 brw_pop_insn_state(p); 1660 1661 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1662 1663 insn->header.predicate_control = BRW_PREDICATE_NONE; 1664 insn->header.compression_control = BRW_COMPRESSION_NONE; 1665 insn->header.destreg__conditionalmod = 0; 1666 insn->header.mask_control = BRW_MASK_DISABLE; 1667 1668 brw_set_dest(insn, dest); 1669 brw_set_src0(insn, brw_vec8_grf(0, 0)); 1670 1671 if (intel->gen == 6) 1672 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1673 else if (intel->gen == 5 || intel->is_g4x) 1674 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1675 else 1676 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1677 1678 brw_set_dp_read_message(p->brw, 1679 insn, 1680 bind_table_index, 1681 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1682 msg_type, 1683 0, /* source cache = data cache */ 1684 2, /* msg_length */ 1685 1, /* response_length */ 1686 0); /* eot */ 1687} 1688 1689 1690 1691void brw_fb_WRITE(struct brw_compile *p, 1692 int dispatch_width, 1693 struct brw_reg dest, 1694 GLuint msg_reg_nr, 1695 struct brw_reg src0, 1696 GLuint binding_table_index, 1697 GLuint msg_length, 1698 GLuint response_length, 1699 GLboolean eot) 1700{ 1701 struct intel_context *intel = &p->brw->intel; 1702 struct brw_instruction *insn; 1703 GLuint msg_control, msg_type; 1704 GLboolean header_present = GL_TRUE; 1705 1706 insn = next_insn(p, BRW_OPCODE_SEND); 1707 insn->header.predicate_control = 0; /* XXX */ 1708 insn->header.compression_control = BRW_COMPRESSION_NONE; 1709 1710 if (intel->gen >= 6) { 1711 if (msg_length == 4) 1712 header_present = GL_FALSE; 1713 1714 /* headerless version, just submit color payload */ 1715 src0 = brw_message_reg(msg_reg_nr); 1716 1717 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6; 1718 } else { 1719 insn->header.destreg__conditionalmod = msg_reg_nr; 1720 1721 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1722 } 1723 1724 if (dispatch_width == 16) 1725 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 1726 else 1727 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 1728 1729 brw_set_dest(insn, dest); 1730 brw_set_src0(insn, src0); 1731 brw_set_dp_write_message(p->brw, 1732 insn, 1733 binding_table_index, 1734 msg_control, 1735 msg_type, 1736 msg_length, 1737 header_present, 1738 1, /* pixel scoreboard */ 1739 response_length, 1740 eot, 1741 0 /* send_commit_msg */); 1742} 1743 1744 1745/** 1746 * Texture sample instruction. 1747 * Note: the msg_type plus msg_length values determine exactly what kind 1748 * of sampling operation is performed. See volume 4, page 161 of docs. 1749 */ 1750void brw_SAMPLE(struct brw_compile *p, 1751 struct brw_reg dest, 1752 GLuint msg_reg_nr, 1753 struct brw_reg src0, 1754 GLuint binding_table_index, 1755 GLuint sampler, 1756 GLuint writemask, 1757 GLuint msg_type, 1758 GLuint response_length, 1759 GLuint msg_length, 1760 GLboolean eot, 1761 GLuint header_present, 1762 GLuint simd_mode) 1763{ 1764 struct intel_context *intel = &p->brw->intel; 1765 GLboolean need_stall = 0; 1766 1767 if (writemask == 0) { 1768 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 1769 return; 1770 } 1771 1772 /* Hardware doesn't do destination dependency checking on send 1773 * instructions properly. Add a workaround which generates the 1774 * dependency by other means. In practice it seems like this bug 1775 * only crops up for texture samples, and only where registers are 1776 * written by the send and then written again later without being 1777 * read in between. Luckily for us, we already track that 1778 * information and use it to modify the writemask for the 1779 * instruction, so that is a guide for whether a workaround is 1780 * needed. 1781 */ 1782 if (writemask != WRITEMASK_XYZW) { 1783 GLuint dst_offset = 0; 1784 GLuint i, newmask = 0, len = 0; 1785 1786 for (i = 0; i < 4; i++) { 1787 if (writemask & (1<<i)) 1788 break; 1789 dst_offset += 2; 1790 } 1791 for (; i < 4; i++) { 1792 if (!(writemask & (1<<i))) 1793 break; 1794 newmask |= 1<<i; 1795 len++; 1796 } 1797 1798 if (newmask != writemask) { 1799 need_stall = 1; 1800 /* printf("need stall %x %x\n", newmask , writemask); */ 1801 } 1802 else { 1803 GLboolean dispatch_16 = GL_FALSE; 1804 1805 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1806 1807 guess_execution_size(p->current, dest); 1808 if (p->current->header.execution_size == BRW_EXECUTE_16) 1809 dispatch_16 = GL_TRUE; 1810 1811 newmask = ~newmask & WRITEMASK_XYZW; 1812 1813 brw_push_insn_state(p); 1814 1815 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1816 brw_set_mask_control(p, BRW_MASK_DISABLE); 1817 1818 brw_MOV(p, m1, brw_vec8_grf(0,0)); 1819 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 1820 1821 brw_pop_insn_state(p); 1822 1823 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 1824 dest = offset(dest, dst_offset); 1825 1826 /* For 16-wide dispatch, masked channels are skipped in the 1827 * response. For 8-wide, masked channels still take up slots, 1828 * and are just not written to. 1829 */ 1830 if (dispatch_16) 1831 response_length = len * 2; 1832 } 1833 } 1834 1835 { 1836 struct brw_instruction *insn; 1837 1838 /* Sandybridge doesn't have the implied move for SENDs, 1839 * and the first message register index comes from src0. 1840 */ 1841 if (intel->gen >= 6) { 1842 brw_push_insn_state(p); 1843 brw_set_mask_control( p, BRW_MASK_DISABLE ); 1844 /* m1 contains header? */ 1845 brw_MOV(p, brw_message_reg(msg_reg_nr), src0); 1846 brw_pop_insn_state(p); 1847 src0 = brw_message_reg(msg_reg_nr); 1848 } 1849 1850 insn = next_insn(p, BRW_OPCODE_SEND); 1851 insn->header.predicate_control = 0; /* XXX */ 1852 insn->header.compression_control = BRW_COMPRESSION_NONE; 1853 if (intel->gen < 6) 1854 insn->header.destreg__conditionalmod = msg_reg_nr; 1855 1856 brw_set_dest(insn, dest); 1857 brw_set_src0(insn, src0); 1858 brw_set_sampler_message(p->brw, insn, 1859 binding_table_index, 1860 sampler, 1861 msg_type, 1862 response_length, 1863 msg_length, 1864 eot, 1865 header_present, 1866 simd_mode); 1867 } 1868 1869 if (need_stall) { 1870 struct brw_reg reg = vec8(offset(dest, response_length-1)); 1871 1872 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 1873 */ 1874 brw_push_insn_state(p); 1875 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1876 brw_MOV(p, reg, reg); 1877 brw_pop_insn_state(p); 1878 } 1879 1880} 1881 1882/* All these variables are pretty confusing - we might be better off 1883 * using bitmasks and macros for this, in the old style. Or perhaps 1884 * just having the caller instantiate the fields in dword3 itself. 1885 */ 1886void brw_urb_WRITE(struct brw_compile *p, 1887 struct brw_reg dest, 1888 GLuint msg_reg_nr, 1889 struct brw_reg src0, 1890 GLboolean allocate, 1891 GLboolean used, 1892 GLuint msg_length, 1893 GLuint response_length, 1894 GLboolean eot, 1895 GLboolean writes_complete, 1896 GLuint offset, 1897 GLuint swizzle) 1898{ 1899 struct intel_context *intel = &p->brw->intel; 1900 struct brw_instruction *insn; 1901 1902 /* Sandybridge doesn't have the implied move for SENDs, 1903 * and the first message register index comes from src0. 1904 */ 1905 if (intel->gen >= 6) { 1906 brw_push_insn_state(p); 1907 brw_set_mask_control( p, BRW_MASK_DISABLE ); 1908 brw_MOV(p, brw_message_reg(msg_reg_nr), src0); 1909 brw_pop_insn_state(p); 1910 src0 = brw_message_reg(msg_reg_nr); 1911 } 1912 1913 insn = next_insn(p, BRW_OPCODE_SEND); 1914 1915 assert(msg_length < BRW_MAX_MRF); 1916 1917 brw_set_dest(insn, dest); 1918 brw_set_src0(insn, src0); 1919 brw_set_src1(insn, brw_imm_d(0)); 1920 1921 if (intel->gen < 6) 1922 insn->header.destreg__conditionalmod = msg_reg_nr; 1923 1924 brw_set_urb_message(p->brw, 1925 insn, 1926 allocate, 1927 used, 1928 msg_length, 1929 response_length, 1930 eot, 1931 writes_complete, 1932 offset, 1933 swizzle); 1934} 1935 1936void brw_ff_sync(struct brw_compile *p, 1937 struct brw_reg dest, 1938 GLuint msg_reg_nr, 1939 struct brw_reg src0, 1940 GLboolean allocate, 1941 GLuint response_length, 1942 GLboolean eot) 1943{ 1944 struct intel_context *intel = &p->brw->intel; 1945 struct brw_instruction *insn; 1946 1947 /* Sandybridge doesn't have the implied move for SENDs, 1948 * and the first message register index comes from src0. 1949 */ 1950 if (intel->gen >= 6) { 1951 brw_push_insn_state(p); 1952 brw_set_mask_control( p, BRW_MASK_DISABLE ); 1953 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 1954 retype(src0, BRW_REGISTER_TYPE_UD)); 1955 brw_pop_insn_state(p); 1956 src0 = brw_message_reg(msg_reg_nr); 1957 } 1958 1959 insn = next_insn(p, BRW_OPCODE_SEND); 1960 brw_set_dest(insn, dest); 1961 brw_set_src0(insn, src0); 1962 brw_set_src1(insn, brw_imm_d(0)); 1963 1964 if (intel->gen < 6) 1965 insn->header.destreg__conditionalmod = msg_reg_nr; 1966 1967 brw_set_ff_sync_message(p->brw, 1968 insn, 1969 allocate, 1970 response_length, 1971 eot); 1972} 1973