brw_eu_emit.c revision 72845d206e692581b6084c56b8d1f3bc689e8a03
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37 38 39 40/*********************************************************************** 41 * Internal helper for constructing instructions 42 */ 43 44static void guess_execution_size(struct brw_compile *p, 45 struct brw_instruction *insn, 46 struct brw_reg reg) 47{ 48 if (reg.width == BRW_WIDTH_8 && p->compressed) 49 insn->header.execution_size = BRW_EXECUTE_16; 50 else 51 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 52} 53 54 55static void brw_set_dest(struct brw_compile *p, 56 struct brw_instruction *insn, 57 struct brw_reg dest) 58{ 59 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 60 dest.file != BRW_MESSAGE_REGISTER_FILE) 61 assert(dest.nr < 128); 62 63 insn->bits1.da1.dest_reg_file = dest.file; 64 insn->bits1.da1.dest_reg_type = dest.type; 65 insn->bits1.da1.dest_address_mode = dest.address_mode; 66 67 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 68 insn->bits1.da1.dest_reg_nr = dest.nr; 69 70 if (insn->header.access_mode == BRW_ALIGN_1) { 71 insn->bits1.da1.dest_subreg_nr = dest.subnr; 72 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 73 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 74 insn->bits1.da1.dest_horiz_stride = dest.hstride; 75 } 76 else { 77 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 78 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 79 /* even ignored in da16, still need to set as '01' */ 80 insn->bits1.da16.dest_horiz_stride = 1; 81 } 82 } 83 else { 84 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 85 86 /* These are different sizes in align1 vs align16: 87 */ 88 if (insn->header.access_mode == BRW_ALIGN_1) { 89 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 90 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 91 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 92 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 93 } 94 else { 95 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 96 /* even ignored in da16, still need to set as '01' */ 97 insn->bits1.ia16.dest_horiz_stride = 1; 98 } 99 } 100 101 /* NEW: Set the execution size based on dest.width and 102 * insn->compression_control: 103 */ 104 guess_execution_size(p, insn, dest); 105} 106 107extern int reg_type_size[]; 108 109static void 110validate_reg(struct brw_instruction *insn, struct brw_reg reg) 111{ 112 int hstride_for_reg[] = {0, 1, 2, 4}; 113 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 114 int width_for_reg[] = {1, 2, 4, 8, 16}; 115 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 116 int width, hstride, vstride, execsize; 117 118 if (reg.file == BRW_IMMEDIATE_VALUE) { 119 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 120 * mean the destination has to be 128-bit aligned and the 121 * destination horiz stride has to be a word. 122 */ 123 if (reg.type == BRW_REGISTER_TYPE_V) { 124 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 125 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 126 } 127 128 return; 129 } 130 131 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 132 reg.file == BRW_ARF_NULL) 133 return; 134 135 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 136 hstride = hstride_for_reg[reg.hstride]; 137 138 if (reg.vstride == 0xf) { 139 vstride = -1; 140 } else { 141 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 142 vstride = vstride_for_reg[reg.vstride]; 143 } 144 145 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 146 width = width_for_reg[reg.width]; 147 148 assert(insn->header.execution_size >= 0 && 149 insn->header.execution_size < Elements(execsize_for_reg)); 150 execsize = execsize_for_reg[insn->header.execution_size]; 151 152 /* Restrictions from 3.3.10: Register Region Restrictions. */ 153 /* 3. */ 154 assert(execsize >= width); 155 156 /* 4. */ 157 if (execsize == width && hstride != 0) { 158 assert(vstride == -1 || vstride == width * hstride); 159 } 160 161 /* 5. */ 162 if (execsize == width && hstride == 0) { 163 /* no restriction on vstride. */ 164 } 165 166 /* 6. */ 167 if (width == 1) { 168 assert(hstride == 0); 169 } 170 171 /* 7. */ 172 if (execsize == 1 && width == 1) { 173 assert(hstride == 0); 174 assert(vstride == 0); 175 } 176 177 /* 8. */ 178 if (vstride == 0 && hstride == 0) { 179 assert(width == 1); 180 } 181 182 /* 10. Check destination issues. */ 183} 184 185static void brw_set_src0( struct brw_instruction *insn, 186 struct brw_reg reg ) 187{ 188 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 189 assert(reg.nr < 128); 190 191 validate_reg(insn, reg); 192 193 insn->bits1.da1.src0_reg_file = reg.file; 194 insn->bits1.da1.src0_reg_type = reg.type; 195 insn->bits2.da1.src0_abs = reg.abs; 196 insn->bits2.da1.src0_negate = reg.negate; 197 insn->bits2.da1.src0_address_mode = reg.address_mode; 198 199 if (reg.file == BRW_IMMEDIATE_VALUE) { 200 insn->bits3.ud = reg.dw1.ud; 201 202 /* Required to set some fields in src1 as well: 203 */ 204 insn->bits1.da1.src1_reg_file = 0; /* arf */ 205 insn->bits1.da1.src1_reg_type = reg.type; 206 } 207 else 208 { 209 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 210 if (insn->header.access_mode == BRW_ALIGN_1) { 211 insn->bits2.da1.src0_subreg_nr = reg.subnr; 212 insn->bits2.da1.src0_reg_nr = reg.nr; 213 } 214 else { 215 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 216 insn->bits2.da16.src0_reg_nr = reg.nr; 217 } 218 } 219 else { 220 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 221 222 if (insn->header.access_mode == BRW_ALIGN_1) { 223 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 224 } 225 else { 226 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 227 } 228 } 229 230 if (insn->header.access_mode == BRW_ALIGN_1) { 231 if (reg.width == BRW_WIDTH_1 && 232 insn->header.execution_size == BRW_EXECUTE_1) { 233 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 234 insn->bits2.da1.src0_width = BRW_WIDTH_1; 235 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 236 } 237 else { 238 insn->bits2.da1.src0_horiz_stride = reg.hstride; 239 insn->bits2.da1.src0_width = reg.width; 240 insn->bits2.da1.src0_vert_stride = reg.vstride; 241 } 242 } 243 else { 244 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 245 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 246 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 247 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 248 249 /* This is an oddity of the fact we're using the same 250 * descriptions for registers in align_16 as align_1: 251 */ 252 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 253 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 254 else 255 insn->bits2.da16.src0_vert_stride = reg.vstride; 256 } 257 } 258} 259 260 261void brw_set_src1( struct brw_instruction *insn, 262 struct brw_reg reg ) 263{ 264 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 265 266 assert(reg.nr < 128); 267 268 validate_reg(insn, reg); 269 270 insn->bits1.da1.src1_reg_file = reg.file; 271 insn->bits1.da1.src1_reg_type = reg.type; 272 insn->bits3.da1.src1_abs = reg.abs; 273 insn->bits3.da1.src1_negate = reg.negate; 274 275 /* Only src1 can be immediate in two-argument instructions. 276 */ 277 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 278 279 if (reg.file == BRW_IMMEDIATE_VALUE) { 280 insn->bits3.ud = reg.dw1.ud; 281 } 282 else { 283 /* This is a hardware restriction, which may or may not be lifted 284 * in the future: 285 */ 286 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 287 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 288 289 if (insn->header.access_mode == BRW_ALIGN_1) { 290 insn->bits3.da1.src1_subreg_nr = reg.subnr; 291 insn->bits3.da1.src1_reg_nr = reg.nr; 292 } 293 else { 294 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 295 insn->bits3.da16.src1_reg_nr = reg.nr; 296 } 297 298 if (insn->header.access_mode == BRW_ALIGN_1) { 299 if (reg.width == BRW_WIDTH_1 && 300 insn->header.execution_size == BRW_EXECUTE_1) { 301 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 302 insn->bits3.da1.src1_width = BRW_WIDTH_1; 303 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 304 } 305 else { 306 insn->bits3.da1.src1_horiz_stride = reg.hstride; 307 insn->bits3.da1.src1_width = reg.width; 308 insn->bits3.da1.src1_vert_stride = reg.vstride; 309 } 310 } 311 else { 312 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 313 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 314 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 315 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 316 317 /* This is an oddity of the fact we're using the same 318 * descriptions for registers in align_16 as align_1: 319 */ 320 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 321 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 322 else 323 insn->bits3.da16.src1_vert_stride = reg.vstride; 324 } 325 } 326} 327 328 329 330static void brw_set_math_message( struct brw_context *brw, 331 struct brw_instruction *insn, 332 GLuint msg_length, 333 GLuint response_length, 334 GLuint function, 335 GLuint integer_type, 336 GLboolean low_precision, 337 GLboolean saturate, 338 GLuint dataType ) 339{ 340 struct intel_context *intel = &brw->intel; 341 brw_set_src1(insn, brw_imm_d(0)); 342 343 if (intel->gen == 5) { 344 insn->bits3.math_gen5.function = function; 345 insn->bits3.math_gen5.int_type = integer_type; 346 insn->bits3.math_gen5.precision = low_precision; 347 insn->bits3.math_gen5.saturate = saturate; 348 insn->bits3.math_gen5.data_type = dataType; 349 insn->bits3.math_gen5.snapshot = 0; 350 insn->bits3.math_gen5.header_present = 0; 351 insn->bits3.math_gen5.response_length = response_length; 352 insn->bits3.math_gen5.msg_length = msg_length; 353 insn->bits3.math_gen5.end_of_thread = 0; 354 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; 355 insn->bits2.send_gen5.end_of_thread = 0; 356 } else { 357 insn->bits3.math.function = function; 358 insn->bits3.math.int_type = integer_type; 359 insn->bits3.math.precision = low_precision; 360 insn->bits3.math.saturate = saturate; 361 insn->bits3.math.data_type = dataType; 362 insn->bits3.math.response_length = response_length; 363 insn->bits3.math.msg_length = msg_length; 364 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 365 insn->bits3.math.end_of_thread = 0; 366 } 367} 368 369 370static void brw_set_ff_sync_message(struct brw_context *brw, 371 struct brw_instruction *insn, 372 GLboolean allocate, 373 GLuint response_length, 374 GLboolean end_of_thread) 375{ 376 struct intel_context *intel = &brw->intel; 377 brw_set_src1(insn, brw_imm_d(0)); 378 379 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 380 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 381 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 382 insn->bits3.urb_gen5.allocate = allocate; 383 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 384 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 385 insn->bits3.urb_gen5.header_present = 1; 386 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ 387 insn->bits3.urb_gen5.msg_length = 1; 388 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 389 if (intel->gen >= 6) { 390 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 391 } else { 392 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 393 insn->bits2.send_gen5.end_of_thread = end_of_thread; 394 } 395} 396 397static void brw_set_urb_message( struct brw_context *brw, 398 struct brw_instruction *insn, 399 GLboolean allocate, 400 GLboolean used, 401 GLuint msg_length, 402 GLuint response_length, 403 GLboolean end_of_thread, 404 GLboolean complete, 405 GLuint offset, 406 GLuint swizzle_control ) 407{ 408 struct intel_context *intel = &brw->intel; 409 brw_set_src1(insn, brw_imm_d(0)); 410 411 if (intel->gen >= 5) { 412 insn->bits3.urb_gen5.opcode = 0; /* ? */ 413 insn->bits3.urb_gen5.offset = offset; 414 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 415 insn->bits3.urb_gen5.allocate = allocate; 416 insn->bits3.urb_gen5.used = used; /* ? */ 417 insn->bits3.urb_gen5.complete = complete; 418 insn->bits3.urb_gen5.header_present = 1; 419 insn->bits3.urb_gen5.response_length = response_length; 420 insn->bits3.urb_gen5.msg_length = msg_length; 421 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 422 if (intel->gen >= 6) { 423 /* For SNB, the SFID bits moved to the condmod bits, and 424 * EOT stayed in bits3 above. Does the EOT bit setting 425 * below on Ironlake even do anything? 426 */ 427 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 428 } else { 429 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 430 insn->bits2.send_gen5.end_of_thread = end_of_thread; 431 } 432 } else { 433 insn->bits3.urb.opcode = 0; /* ? */ 434 insn->bits3.urb.offset = offset; 435 insn->bits3.urb.swizzle_control = swizzle_control; 436 insn->bits3.urb.allocate = allocate; 437 insn->bits3.urb.used = used; /* ? */ 438 insn->bits3.urb.complete = complete; 439 insn->bits3.urb.response_length = response_length; 440 insn->bits3.urb.msg_length = msg_length; 441 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 442 insn->bits3.urb.end_of_thread = end_of_thread; 443 } 444} 445 446static void brw_set_dp_write_message( struct brw_context *brw, 447 struct brw_instruction *insn, 448 GLuint binding_table_index, 449 GLuint msg_control, 450 GLuint msg_type, 451 GLuint msg_length, 452 GLboolean header_present, 453 GLuint pixel_scoreboard_clear, 454 GLuint response_length, 455 GLuint end_of_thread, 456 GLuint send_commit_msg) 457{ 458 struct intel_context *intel = &brw->intel; 459 brw_set_src1(insn, brw_imm_ud(0)); 460 461 if (intel->gen >= 6) { 462 insn->bits3.dp_render_cache.binding_table_index = binding_table_index; 463 insn->bits3.dp_render_cache.msg_control = msg_control; 464 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; 465 insn->bits3.dp_render_cache.msg_type = msg_type; 466 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; 467 insn->bits3.dp_render_cache.header_present = header_present; 468 insn->bits3.dp_render_cache.response_length = response_length; 469 insn->bits3.dp_render_cache.msg_length = msg_length; 470 insn->bits3.dp_render_cache.end_of_thread = end_of_thread; 471 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 472 /* XXX really need below? */ 473 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 474 insn->bits2.send_gen5.end_of_thread = end_of_thread; 475 } else if (intel->gen == 5) { 476 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 477 insn->bits3.dp_write_gen5.msg_control = msg_control; 478 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; 479 insn->bits3.dp_write_gen5.msg_type = msg_type; 480 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 481 insn->bits3.dp_write_gen5.header_present = header_present; 482 insn->bits3.dp_write_gen5.response_length = response_length; 483 insn->bits3.dp_write_gen5.msg_length = msg_length; 484 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; 485 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 486 insn->bits2.send_gen5.end_of_thread = end_of_thread; 487 } else { 488 insn->bits3.dp_write.binding_table_index = binding_table_index; 489 insn->bits3.dp_write.msg_control = msg_control; 490 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 491 insn->bits3.dp_write.msg_type = msg_type; 492 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 493 insn->bits3.dp_write.response_length = response_length; 494 insn->bits3.dp_write.msg_length = msg_length; 495 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 496 insn->bits3.dp_write.end_of_thread = end_of_thread; 497 } 498} 499 500static void 501brw_set_dp_read_message(struct brw_context *brw, 502 struct brw_instruction *insn, 503 GLuint binding_table_index, 504 GLuint msg_control, 505 GLuint msg_type, 506 GLuint target_cache, 507 GLuint msg_length, 508 GLuint response_length) 509{ 510 struct intel_context *intel = &brw->intel; 511 brw_set_src1(insn, brw_imm_d(0)); 512 513 if (intel->gen >= 6) { 514 insn->bits3.dp_render_cache.binding_table_index = binding_table_index; 515 insn->bits3.dp_render_cache.msg_control = msg_control; 516 insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0; 517 insn->bits3.dp_render_cache.msg_type = msg_type; 518 insn->bits3.dp_render_cache.send_commit_msg = 0; 519 insn->bits3.dp_render_cache.header_present = 1; 520 insn->bits3.dp_render_cache.response_length = response_length; 521 insn->bits3.dp_render_cache.msg_length = msg_length; 522 insn->bits3.dp_render_cache.end_of_thread = 0; 523 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_READ; 524 /* XXX really need below? */ 525 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 526 insn->bits2.send_gen5.end_of_thread = 0; 527 } else if (intel->gen == 5) { 528 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 529 insn->bits3.dp_read_gen5.msg_control = msg_control; 530 insn->bits3.dp_read_gen5.msg_type = msg_type; 531 insn->bits3.dp_read_gen5.target_cache = target_cache; 532 insn->bits3.dp_read_gen5.header_present = 1; 533 insn->bits3.dp_read_gen5.response_length = response_length; 534 insn->bits3.dp_read_gen5.msg_length = msg_length; 535 insn->bits3.dp_read_gen5.pad1 = 0; 536 insn->bits3.dp_read_gen5.end_of_thread = 0; 537 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 538 insn->bits2.send_gen5.end_of_thread = 0; 539 } else { 540 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 541 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 542 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 543 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 544 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 545 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 546 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 547 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 548 insn->bits3.dp_read.end_of_thread = 0; /*31*/ 549 } 550} 551 552static void brw_set_sampler_message(struct brw_context *brw, 553 struct brw_instruction *insn, 554 GLuint binding_table_index, 555 GLuint sampler, 556 GLuint msg_type, 557 GLuint response_length, 558 GLuint msg_length, 559 GLboolean eot, 560 GLuint header_present, 561 GLuint simd_mode) 562{ 563 struct intel_context *intel = &brw->intel; 564 assert(eot == 0); 565 brw_set_src1(insn, brw_imm_d(0)); 566 567 if (intel->gen >= 5) { 568 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 569 insn->bits3.sampler_gen5.sampler = sampler; 570 insn->bits3.sampler_gen5.msg_type = msg_type; 571 insn->bits3.sampler_gen5.simd_mode = simd_mode; 572 insn->bits3.sampler_gen5.header_present = header_present; 573 insn->bits3.sampler_gen5.response_length = response_length; 574 insn->bits3.sampler_gen5.msg_length = msg_length; 575 insn->bits3.sampler_gen5.end_of_thread = eot; 576 if (intel->gen >= 6) 577 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; 578 else { 579 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; 580 insn->bits2.send_gen5.end_of_thread = eot; 581 } 582 } else if (intel->is_g4x) { 583 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 584 insn->bits3.sampler_g4x.sampler = sampler; 585 insn->bits3.sampler_g4x.msg_type = msg_type; 586 insn->bits3.sampler_g4x.response_length = response_length; 587 insn->bits3.sampler_g4x.msg_length = msg_length; 588 insn->bits3.sampler_g4x.end_of_thread = eot; 589 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 590 } else { 591 insn->bits3.sampler.binding_table_index = binding_table_index; 592 insn->bits3.sampler.sampler = sampler; 593 insn->bits3.sampler.msg_type = msg_type; 594 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 595 insn->bits3.sampler.response_length = response_length; 596 insn->bits3.sampler.msg_length = msg_length; 597 insn->bits3.sampler.end_of_thread = eot; 598 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 599 } 600} 601 602 603 604static struct brw_instruction *next_insn( struct brw_compile *p, 605 GLuint opcode ) 606{ 607 struct brw_instruction *insn; 608 609 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 610 611 insn = &p->store[p->nr_insn++]; 612 memcpy(insn, p->current, sizeof(*insn)); 613 614 /* Reset this one-shot flag: 615 */ 616 617 if (p->current->header.destreg__conditionalmod) { 618 p->current->header.destreg__conditionalmod = 0; 619 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 620 } 621 622 insn->header.opcode = opcode; 623 return insn; 624} 625 626 627static struct brw_instruction *brw_alu1( struct brw_compile *p, 628 GLuint opcode, 629 struct brw_reg dest, 630 struct brw_reg src ) 631{ 632 struct brw_instruction *insn = next_insn(p, opcode); 633 brw_set_dest(p, insn, dest); 634 brw_set_src0(insn, src); 635 return insn; 636} 637 638static struct brw_instruction *brw_alu2(struct brw_compile *p, 639 GLuint opcode, 640 struct brw_reg dest, 641 struct brw_reg src0, 642 struct brw_reg src1 ) 643{ 644 struct brw_instruction *insn = next_insn(p, opcode); 645 brw_set_dest(p, insn, dest); 646 brw_set_src0(insn, src0); 647 brw_set_src1(insn, src1); 648 return insn; 649} 650 651 652/*********************************************************************** 653 * Convenience routines. 654 */ 655#define ALU1(OP) \ 656struct brw_instruction *brw_##OP(struct brw_compile *p, \ 657 struct brw_reg dest, \ 658 struct brw_reg src0) \ 659{ \ 660 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 661} 662 663#define ALU2(OP) \ 664struct brw_instruction *brw_##OP(struct brw_compile *p, \ 665 struct brw_reg dest, \ 666 struct brw_reg src0, \ 667 struct brw_reg src1) \ 668{ \ 669 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 670} 671 672/* Rounding operations (other than RNDD) require two instructions - the first 673 * stores a rounded value (possibly the wrong way) in the dest register, but 674 * also sets a per-channel "increment bit" in the flag register. A predicated 675 * add of 1.0 fixes dest to contain the desired result. 676 */ 677#define ROUND(OP) \ 678void brw_##OP(struct brw_compile *p, \ 679 struct brw_reg dest, \ 680 struct brw_reg src) \ 681{ \ 682 struct brw_instruction *rnd, *add; \ 683 rnd = next_insn(p, BRW_OPCODE_##OP); \ 684 brw_set_dest(p, rnd, dest); \ 685 brw_set_src0(rnd, src); \ 686 rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ 687 \ 688 add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ 689 add->header.predicate_control = BRW_PREDICATE_NORMAL; \ 690} 691 692 693ALU1(MOV) 694ALU2(SEL) 695ALU1(NOT) 696ALU2(AND) 697ALU2(OR) 698ALU2(XOR) 699ALU2(SHR) 700ALU2(SHL) 701ALU2(RSR) 702ALU2(RSL) 703ALU2(ASR) 704ALU1(FRC) 705ALU1(RNDD) 706ALU2(MAC) 707ALU2(MACH) 708ALU1(LZD) 709ALU2(DP4) 710ALU2(DPH) 711ALU2(DP3) 712ALU2(DP2) 713ALU2(LINE) 714ALU2(PLN) 715 716 717ROUND(RNDZ) 718ROUND(RNDE) 719 720 721struct brw_instruction *brw_ADD(struct brw_compile *p, 722 struct brw_reg dest, 723 struct brw_reg src0, 724 struct brw_reg src1) 725{ 726 /* 6.2.2: add */ 727 if (src0.type == BRW_REGISTER_TYPE_F || 728 (src0.file == BRW_IMMEDIATE_VALUE && 729 src0.type == BRW_REGISTER_TYPE_VF)) { 730 assert(src1.type != BRW_REGISTER_TYPE_UD); 731 assert(src1.type != BRW_REGISTER_TYPE_D); 732 } 733 734 if (src1.type == BRW_REGISTER_TYPE_F || 735 (src1.file == BRW_IMMEDIATE_VALUE && 736 src1.type == BRW_REGISTER_TYPE_VF)) { 737 assert(src0.type != BRW_REGISTER_TYPE_UD); 738 assert(src0.type != BRW_REGISTER_TYPE_D); 739 } 740 741 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 742} 743 744struct brw_instruction *brw_MUL(struct brw_compile *p, 745 struct brw_reg dest, 746 struct brw_reg src0, 747 struct brw_reg src1) 748{ 749 /* 6.32.38: mul */ 750 if (src0.type == BRW_REGISTER_TYPE_D || 751 src0.type == BRW_REGISTER_TYPE_UD || 752 src1.type == BRW_REGISTER_TYPE_D || 753 src1.type == BRW_REGISTER_TYPE_UD) { 754 assert(dest.type != BRW_REGISTER_TYPE_F); 755 } 756 757 if (src0.type == BRW_REGISTER_TYPE_F || 758 (src0.file == BRW_IMMEDIATE_VALUE && 759 src0.type == BRW_REGISTER_TYPE_VF)) { 760 assert(src1.type != BRW_REGISTER_TYPE_UD); 761 assert(src1.type != BRW_REGISTER_TYPE_D); 762 } 763 764 if (src1.type == BRW_REGISTER_TYPE_F || 765 (src1.file == BRW_IMMEDIATE_VALUE && 766 src1.type == BRW_REGISTER_TYPE_VF)) { 767 assert(src0.type != BRW_REGISTER_TYPE_UD); 768 assert(src0.type != BRW_REGISTER_TYPE_D); 769 } 770 771 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 772 src0.nr != BRW_ARF_ACCUMULATOR); 773 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 774 src1.nr != BRW_ARF_ACCUMULATOR); 775 776 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 777} 778 779 780void brw_NOP(struct brw_compile *p) 781{ 782 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 783 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 784 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 785 brw_set_src1(insn, brw_imm_ud(0x0)); 786} 787 788 789 790 791 792/*********************************************************************** 793 * Comparisons, if/else/endif 794 */ 795 796struct brw_instruction *brw_JMPI(struct brw_compile *p, 797 struct brw_reg dest, 798 struct brw_reg src0, 799 struct brw_reg src1) 800{ 801 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 802 803 insn->header.execution_size = 1; 804 insn->header.compression_control = BRW_COMPRESSION_NONE; 805 insn->header.mask_control = BRW_MASK_DISABLE; 806 807 p->current->header.predicate_control = BRW_PREDICATE_NONE; 808 809 return insn; 810} 811 812/* EU takes the value from the flag register and pushes it onto some 813 * sort of a stack (presumably merging with any flag value already on 814 * the stack). Within an if block, the flags at the top of the stack 815 * control execution on each channel of the unit, eg. on each of the 816 * 16 pixel values in our wm programs. 817 * 818 * When the matching 'else' instruction is reached (presumably by 819 * countdown of the instruction count patched in by our ELSE/ENDIF 820 * functions), the relevent flags are inverted. 821 * 822 * When the matching 'endif' instruction is reached, the flags are 823 * popped off. If the stack is now empty, normal execution resumes. 824 * 825 * No attempt is made to deal with stack overflow (14 elements?). 826 */ 827struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) 828{ 829 struct intel_context *intel = &p->brw->intel; 830 struct brw_instruction *insn; 831 832 if (p->single_program_flow) { 833 assert(execute_size == BRW_EXECUTE_1); 834 835 insn = next_insn(p, BRW_OPCODE_ADD); 836 insn->header.predicate_inverse = 1; 837 } else { 838 insn = next_insn(p, BRW_OPCODE_IF); 839 } 840 841 /* Override the defaults for this instruction: 842 */ 843 if (intel->gen < 6) { 844 brw_set_dest(p, insn, brw_ip_reg()); 845 brw_set_src0(insn, brw_ip_reg()); 846 brw_set_src1(insn, brw_imm_d(0x0)); 847 } else { 848 brw_set_dest(p, insn, brw_imm_w(0)); 849 insn->bits1.branch_gen6.jump_count = 0; 850 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 851 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 852 } 853 854 insn->header.execution_size = execute_size; 855 insn->header.compression_control = BRW_COMPRESSION_NONE; 856 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 857 insn->header.mask_control = BRW_MASK_ENABLE; 858 if (!p->single_program_flow) 859 insn->header.thread_control = BRW_THREAD_SWITCH; 860 861 p->current->header.predicate_control = BRW_PREDICATE_NONE; 862 863 return insn; 864} 865 866struct brw_instruction * 867brw_IF_gen6(struct brw_compile *p, uint32_t conditional, 868 struct brw_reg src0, struct brw_reg src1) 869{ 870 struct brw_instruction *insn; 871 872 insn = next_insn(p, BRW_OPCODE_IF); 873 874 brw_set_dest(p, insn, brw_imm_w(0)); 875 insn->header.execution_size = BRW_EXECUTE_8; 876 insn->bits1.branch_gen6.jump_count = 0; 877 brw_set_src0(insn, src0); 878 brw_set_src1(insn, src1); 879 880 assert(insn->header.compression_control == BRW_COMPRESSION_NONE); 881 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 882 insn->header.destreg__conditionalmod = conditional; 883 884 if (!p->single_program_flow) 885 insn->header.thread_control = BRW_THREAD_SWITCH; 886 887 return insn; 888} 889 890struct brw_instruction *brw_ELSE(struct brw_compile *p, 891 struct brw_instruction *if_insn) 892{ 893 struct intel_context *intel = &p->brw->intel; 894 struct brw_instruction *insn; 895 GLuint br = 1; 896 897 /* jump count is for 64bit data chunk each, so one 128bit 898 instruction requires 2 chunks. */ 899 if (intel->gen >= 5) 900 br = 2; 901 902 if (p->single_program_flow) { 903 insn = next_insn(p, BRW_OPCODE_ADD); 904 } else { 905 insn = next_insn(p, BRW_OPCODE_ELSE); 906 } 907 908 if (intel->gen < 6) { 909 brw_set_dest(p, insn, brw_ip_reg()); 910 brw_set_src0(insn, brw_ip_reg()); 911 brw_set_src1(insn, brw_imm_d(0x0)); 912 } else { 913 brw_set_dest(p, insn, brw_imm_w(0)); 914 insn->bits1.branch_gen6.jump_count = 0; 915 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 916 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 917 } 918 919 insn->header.compression_control = BRW_COMPRESSION_NONE; 920 insn->header.execution_size = if_insn->header.execution_size; 921 insn->header.mask_control = BRW_MASK_ENABLE; 922 if (!p->single_program_flow) 923 insn->header.thread_control = BRW_THREAD_SWITCH; 924 925 /* Patch the if instruction to point at this instruction. 926 */ 927 if (p->single_program_flow) { 928 assert(if_insn->header.opcode == BRW_OPCODE_ADD); 929 930 if_insn->bits3.ud = (insn - if_insn + 1) * 16; 931 } else { 932 assert(if_insn->header.opcode == BRW_OPCODE_IF); 933 934 if (intel->gen < 6) { 935 if_insn->bits3.if_else.jump_count = br * (insn - if_insn); 936 if_insn->bits3.if_else.pop_count = 0; 937 if_insn->bits3.if_else.pad0 = 0; 938 } else { 939 if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1); 940 } 941 } 942 943 return insn; 944} 945 946void brw_ENDIF(struct brw_compile *p, 947 struct brw_instruction *patch_insn) 948{ 949 struct intel_context *intel = &p->brw->intel; 950 GLuint br = 1; 951 952 if (intel->gen >= 5) 953 br = 2; 954 955 if (p->single_program_flow) { 956 /* In single program flow mode, there's no need to execute an ENDIF, 957 * since we don't need to do any stack operations, and if we're executing 958 * currently, we want to just continue executing. 959 */ 960 struct brw_instruction *next = &p->store[p->nr_insn]; 961 962 assert(patch_insn->header.opcode == BRW_OPCODE_ADD); 963 964 patch_insn->bits3.ud = (next - patch_insn) * 16; 965 } else { 966 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); 967 968 if (intel->gen < 6) { 969 brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 970 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 971 brw_set_src1(insn, brw_imm_d(0x0)); 972 } else { 973 brw_set_dest(p, insn, brw_imm_w(0)); 974 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 975 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 976 } 977 978 insn->header.compression_control = BRW_COMPRESSION_NONE; 979 insn->header.execution_size = patch_insn->header.execution_size; 980 insn->header.mask_control = BRW_MASK_ENABLE; 981 insn->header.thread_control = BRW_THREAD_SWITCH; 982 983 if (intel->gen < 6) 984 assert(patch_insn->bits3.if_else.jump_count == 0); 985 else 986 assert(patch_insn->bits1.branch_gen6.jump_count == 0); 987 988 /* Patch the if or else instructions to point at this or the next 989 * instruction respectively. 990 */ 991 if (patch_insn->header.opcode == BRW_OPCODE_IF) { 992 if (intel->gen < 6) { 993 /* Turn it into an IFF, which means no mask stack operations for 994 * all-false and jumping past the ENDIF. 995 */ 996 patch_insn->header.opcode = BRW_OPCODE_IFF; 997 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 998 patch_insn->bits3.if_else.pop_count = 0; 999 patch_insn->bits3.if_else.pad0 = 0; 1000 } else { 1001 /* As of gen6, there is no IFF and IF must point to the ENDIF. */ 1002 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); 1003 } 1004 } else { 1005 assert(patch_insn->header.opcode == BRW_OPCODE_ELSE); 1006 if (intel->gen < 6) { 1007 /* BRW_OPCODE_ELSE pre-gen6 should point just past the 1008 * matching ENDIF. 1009 */ 1010 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 1011 patch_insn->bits3.if_else.pop_count = 1; 1012 patch_insn->bits3.if_else.pad0 = 0; 1013 } else { 1014 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ 1015 patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); 1016 } 1017 } 1018 1019 /* Also pop item off the stack in the endif instruction: 1020 */ 1021 if (intel->gen < 6) { 1022 insn->bits3.if_else.jump_count = 0; 1023 insn->bits3.if_else.pop_count = 1; 1024 insn->bits3.if_else.pad0 = 0; 1025 } else { 1026 insn->bits1.branch_gen6.jump_count = 2; 1027 } 1028 } 1029} 1030 1031struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 1032{ 1033 struct intel_context *intel = &p->brw->intel; 1034 struct brw_instruction *insn; 1035 1036 insn = next_insn(p, BRW_OPCODE_BREAK); 1037 if (intel->gen >= 6) { 1038 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1039 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1040 brw_set_src1(insn, brw_imm_d(0x0)); 1041 } else { 1042 brw_set_dest(p, insn, brw_ip_reg()); 1043 brw_set_src0(insn, brw_ip_reg()); 1044 brw_set_src1(insn, brw_imm_d(0x0)); 1045 insn->bits3.if_else.pad0 = 0; 1046 insn->bits3.if_else.pop_count = pop_count; 1047 } 1048 insn->header.compression_control = BRW_COMPRESSION_NONE; 1049 insn->header.execution_size = BRW_EXECUTE_8; 1050 1051 return insn; 1052} 1053 1054struct brw_instruction *brw_CONT_gen6(struct brw_compile *p, 1055 struct brw_instruction *do_insn) 1056{ 1057 struct brw_instruction *insn; 1058 int br = 2; 1059 1060 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1061 brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1062 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1063 brw_set_dest(p, insn, brw_ip_reg()); 1064 brw_set_src0(insn, brw_ip_reg()); 1065 brw_set_src1(insn, brw_imm_d(0x0)); 1066 1067 insn->bits3.break_cont.uip = br * (do_insn - insn); 1068 1069 insn->header.compression_control = BRW_COMPRESSION_NONE; 1070 insn->header.execution_size = BRW_EXECUTE_8; 1071 return insn; 1072} 1073 1074struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 1075{ 1076 struct brw_instruction *insn; 1077 insn = next_insn(p, BRW_OPCODE_CONTINUE); 1078 brw_set_dest(p, insn, brw_ip_reg()); 1079 brw_set_src0(insn, brw_ip_reg()); 1080 brw_set_src1(insn, brw_imm_d(0x0)); 1081 insn->header.compression_control = BRW_COMPRESSION_NONE; 1082 insn->header.execution_size = BRW_EXECUTE_8; 1083 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1084 insn->bits3.if_else.pad0 = 0; 1085 insn->bits3.if_else.pop_count = pop_count; 1086 return insn; 1087} 1088 1089/* DO/WHILE loop: 1090 * 1091 * The DO/WHILE is just an unterminated loop -- break or continue are 1092 * used for control within the loop. We have a few ways they can be 1093 * done. 1094 * 1095 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, 1096 * jip and no DO instruction. 1097 * 1098 * For non-uniform control flow pre-gen6, there's a DO instruction to 1099 * push the mask, and a WHILE to jump back, and BREAK to get out and 1100 * pop the mask. 1101 * 1102 * For gen6, there's no more mask stack, so no need for DO. WHILE 1103 * just points back to the first instruction of the loop. 1104 */ 1105struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 1106{ 1107 struct intel_context *intel = &p->brw->intel; 1108 1109 if (intel->gen >= 6 || p->single_program_flow) { 1110 return &p->store[p->nr_insn]; 1111 } else { 1112 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 1113 1114 /* Override the defaults for this instruction: 1115 */ 1116 brw_set_dest(p, insn, brw_null_reg()); 1117 brw_set_src0(insn, brw_null_reg()); 1118 brw_set_src1(insn, brw_null_reg()); 1119 1120 insn->header.compression_control = BRW_COMPRESSION_NONE; 1121 insn->header.execution_size = execute_size; 1122 insn->header.predicate_control = BRW_PREDICATE_NONE; 1123 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 1124 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1125 1126 return insn; 1127 } 1128} 1129 1130 1131 1132struct brw_instruction *brw_WHILE(struct brw_compile *p, 1133 struct brw_instruction *do_insn) 1134{ 1135 struct intel_context *intel = &p->brw->intel; 1136 struct brw_instruction *insn; 1137 GLuint br = 1; 1138 1139 if (intel->gen >= 5) 1140 br = 2; 1141 1142 if (intel->gen >= 6) { 1143 insn = next_insn(p, BRW_OPCODE_WHILE); 1144 1145 brw_set_dest(p, insn, brw_imm_w(0)); 1146 insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); 1147 brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1148 brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 1149 1150 insn->header.execution_size = do_insn->header.execution_size; 1151 assert(insn->header.execution_size == BRW_EXECUTE_8); 1152 } else { 1153 if (p->single_program_flow) { 1154 insn = next_insn(p, BRW_OPCODE_ADD); 1155 1156 brw_set_dest(p, insn, brw_ip_reg()); 1157 brw_set_src0(insn, brw_ip_reg()); 1158 brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16)); 1159 insn->header.execution_size = BRW_EXECUTE_1; 1160 } else { 1161 insn = next_insn(p, BRW_OPCODE_WHILE); 1162 1163 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1164 1165 brw_set_dest(p, insn, brw_ip_reg()); 1166 brw_set_src0(insn, brw_ip_reg()); 1167 brw_set_src1(insn, brw_imm_d(0)); 1168 1169 insn->header.execution_size = do_insn->header.execution_size; 1170 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1171 insn->bits3.if_else.pop_count = 0; 1172 insn->bits3.if_else.pad0 = 0; 1173 } 1174 } 1175 insn->header.compression_control = BRW_COMPRESSION_NONE; 1176 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1177 1178 return insn; 1179} 1180 1181 1182/* FORWARD JUMPS: 1183 */ 1184void brw_land_fwd_jump(struct brw_compile *p, 1185 struct brw_instruction *jmp_insn) 1186{ 1187 struct intel_context *intel = &p->brw->intel; 1188 struct brw_instruction *landing = &p->store[p->nr_insn]; 1189 GLuint jmpi = 1; 1190 1191 if (intel->gen >= 5) 1192 jmpi = 2; 1193 1194 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1195 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1196 1197 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1198} 1199 1200 1201 1202/* To integrate with the above, it makes sense that the comparison 1203 * instruction should populate the flag register. It might be simpler 1204 * just to use the flag reg for most WM tasks? 1205 */ 1206void brw_CMP(struct brw_compile *p, 1207 struct brw_reg dest, 1208 GLuint conditional, 1209 struct brw_reg src0, 1210 struct brw_reg src1) 1211{ 1212 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1213 1214 insn->header.destreg__conditionalmod = conditional; 1215 brw_set_dest(p, insn, dest); 1216 brw_set_src0(insn, src0); 1217 brw_set_src1(insn, src1); 1218 1219/* guess_execution_size(insn, src0); */ 1220 1221 1222 /* Make it so that future instructions will use the computed flag 1223 * value until brw_set_predicate_control_flag_value() is called 1224 * again. 1225 */ 1226 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1227 dest.nr == 0) { 1228 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1229 p->flag_value = 0xff; 1230 } 1231} 1232 1233/* Issue 'wait' instruction for n1, host could program MMIO 1234 to wake up thread. */ 1235void brw_WAIT (struct brw_compile *p) 1236{ 1237 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1238 struct brw_reg src = brw_notification_1_reg(); 1239 1240 brw_set_dest(p, insn, src); 1241 brw_set_src0(insn, src); 1242 brw_set_src1(insn, brw_null_reg()); 1243 insn->header.execution_size = 0; /* must */ 1244 insn->header.predicate_control = 0; 1245 insn->header.compression_control = 0; 1246} 1247 1248 1249/*********************************************************************** 1250 * Helpers for the various SEND message types: 1251 */ 1252 1253/** Extended math function, float[8]. 1254 */ 1255void brw_math( struct brw_compile *p, 1256 struct brw_reg dest, 1257 GLuint function, 1258 GLuint saturate, 1259 GLuint msg_reg_nr, 1260 struct brw_reg src, 1261 GLuint data_type, 1262 GLuint precision ) 1263{ 1264 struct intel_context *intel = &p->brw->intel; 1265 1266 if (intel->gen >= 6) { 1267 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1268 1269 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1270 assert(src.file == BRW_GENERAL_REGISTER_FILE); 1271 1272 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1273 assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); 1274 1275 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1276 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1277 assert(src.type == BRW_REGISTER_TYPE_F); 1278 } 1279 1280 /* Math is the same ISA format as other opcodes, except that CondModifier 1281 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1282 */ 1283 insn->header.destreg__conditionalmod = function; 1284 insn->header.saturate = saturate; 1285 1286 brw_set_dest(p, insn, dest); 1287 brw_set_src0(insn, src); 1288 brw_set_src1(insn, brw_null_reg()); 1289 } else { 1290 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1291 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1292 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1293 /* Example code doesn't set predicate_control for send 1294 * instructions. 1295 */ 1296 insn->header.predicate_control = 0; 1297 insn->header.destreg__conditionalmod = msg_reg_nr; 1298 1299 brw_set_dest(p, insn, dest); 1300 brw_set_src0(insn, src); 1301 brw_set_math_message(p->brw, 1302 insn, 1303 msg_length, response_length, 1304 function, 1305 BRW_MATH_INTEGER_UNSIGNED, 1306 precision, 1307 saturate, 1308 data_type); 1309 } 1310} 1311 1312/** Extended math function, float[8]. 1313 */ 1314void brw_math2(struct brw_compile *p, 1315 struct brw_reg dest, 1316 GLuint function, 1317 struct brw_reg src0, 1318 struct brw_reg src1) 1319{ 1320 struct intel_context *intel = &p->brw->intel; 1321 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1322 1323 assert(intel->gen >= 6); 1324 (void) intel; 1325 1326 1327 assert(dest.file == BRW_GENERAL_REGISTER_FILE); 1328 assert(src0.file == BRW_GENERAL_REGISTER_FILE); 1329 assert(src1.file == BRW_GENERAL_REGISTER_FILE); 1330 1331 assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); 1332 assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); 1333 assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); 1334 1335 if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && 1336 function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { 1337 assert(src0.type == BRW_REGISTER_TYPE_F); 1338 assert(src1.type == BRW_REGISTER_TYPE_F); 1339 } 1340 1341 /* Math is the same ISA format as other opcodes, except that CondModifier 1342 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1343 */ 1344 insn->header.destreg__conditionalmod = function; 1345 1346 brw_set_dest(p, insn, dest); 1347 brw_set_src0(insn, src0); 1348 brw_set_src1(insn, src1); 1349} 1350 1351/** 1352 * Extended math function, float[16]. 1353 * Use 2 send instructions. 1354 */ 1355void brw_math_16( struct brw_compile *p, 1356 struct brw_reg dest, 1357 GLuint function, 1358 GLuint saturate, 1359 GLuint msg_reg_nr, 1360 struct brw_reg src, 1361 GLuint precision ) 1362{ 1363 struct intel_context *intel = &p->brw->intel; 1364 struct brw_instruction *insn; 1365 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1366 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1367 1368 if (intel->gen >= 6) { 1369 insn = next_insn(p, BRW_OPCODE_MATH); 1370 1371 /* Math is the same ISA format as other opcodes, except that CondModifier 1372 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1373 */ 1374 insn->header.destreg__conditionalmod = function; 1375 insn->header.saturate = saturate; 1376 1377 brw_set_dest(p, insn, dest); 1378 brw_set_src0(insn, src); 1379 brw_set_src1(insn, brw_null_reg()); 1380 return; 1381 } 1382 1383 /* First instruction: 1384 */ 1385 brw_push_insn_state(p); 1386 brw_set_predicate_control_flag_value(p, 0xff); 1387 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1388 1389 insn = next_insn(p, BRW_OPCODE_SEND); 1390 insn->header.destreg__conditionalmod = msg_reg_nr; 1391 1392 brw_set_dest(p, insn, dest); 1393 brw_set_src0(insn, src); 1394 brw_set_math_message(p->brw, 1395 insn, 1396 msg_length, response_length, 1397 function, 1398 BRW_MATH_INTEGER_UNSIGNED, 1399 precision, 1400 saturate, 1401 BRW_MATH_DATA_VECTOR); 1402 1403 /* Second instruction: 1404 */ 1405 insn = next_insn(p, BRW_OPCODE_SEND); 1406 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1407 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1408 1409 brw_set_dest(p, insn, offset(dest,1)); 1410 brw_set_src0(insn, src); 1411 brw_set_math_message(p->brw, 1412 insn, 1413 msg_length, response_length, 1414 function, 1415 BRW_MATH_INTEGER_UNSIGNED, 1416 precision, 1417 saturate, 1418 BRW_MATH_DATA_VECTOR); 1419 1420 brw_pop_insn_state(p); 1421} 1422 1423 1424/** 1425 * Write a block of OWORDs (half a GRF each) from the scratch buffer, 1426 * using a constant offset per channel. 1427 * 1428 * The offset must be aligned to oword size (16 bytes). Used for 1429 * register spilling. 1430 */ 1431void brw_oword_block_write_scratch(struct brw_compile *p, 1432 struct brw_reg mrf, 1433 int num_regs, 1434 GLuint offset) 1435{ 1436 struct intel_context *intel = &p->brw->intel; 1437 uint32_t msg_control; 1438 int mlen; 1439 1440 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1441 1442 if (num_regs == 1) { 1443 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1444 mlen = 2; 1445 } else { 1446 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1447 mlen = 3; 1448 } 1449 1450 /* Set up the message header. This is g0, with g0.2 filled with 1451 * the offset. We don't want to leave our offset around in g0 or 1452 * it'll screw up texture samples, so set it up inside the message 1453 * reg. 1454 */ 1455 { 1456 brw_push_insn_state(p); 1457 brw_set_mask_control(p, BRW_MASK_DISABLE); 1458 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1459 1460 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1461 1462 /* set message header global offset field (reg 0, element 2) */ 1463 brw_MOV(p, 1464 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1465 mrf.nr, 1466 2), BRW_REGISTER_TYPE_UD), 1467 brw_imm_ud(offset)); 1468 1469 brw_pop_insn_state(p); 1470 } 1471 1472 { 1473 struct brw_reg dest; 1474 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1475 int send_commit_msg; 1476 struct brw_reg src_header = retype(brw_vec8_grf(0, 0), 1477 BRW_REGISTER_TYPE_UW); 1478 1479 if (insn->header.compression_control != BRW_COMPRESSION_NONE) { 1480 insn->header.compression_control = BRW_COMPRESSION_NONE; 1481 src_header = vec16(src_header); 1482 } 1483 assert(insn->header.predicate_control == BRW_PREDICATE_NONE); 1484 insn->header.destreg__conditionalmod = mrf.nr; 1485 1486 /* Until gen6, writes followed by reads from the same location 1487 * are not guaranteed to be ordered unless write_commit is set. 1488 * If set, then a no-op write is issued to the destination 1489 * register to set a dependency, and a read from the destination 1490 * can be used to ensure the ordering. 1491 * 1492 * For gen6, only writes between different threads need ordering 1493 * protection. Our use of DP writes is all about register 1494 * spilling within a thread. 1495 */ 1496 if (intel->gen >= 6) { 1497 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1498 send_commit_msg = 0; 1499 } else { 1500 dest = src_header; 1501 send_commit_msg = 1; 1502 } 1503 1504 brw_set_dest(p, insn, dest); 1505 brw_set_src0(insn, brw_null_reg()); 1506 1507 brw_set_dp_write_message(p->brw, 1508 insn, 1509 255, /* binding table index (255=stateless) */ 1510 msg_control, 1511 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ 1512 mlen, 1513 GL_TRUE, /* header_present */ 1514 0, /* pixel scoreboard */ 1515 send_commit_msg, /* response_length */ 1516 0, /* eot */ 1517 send_commit_msg); 1518 } 1519} 1520 1521 1522/** 1523 * Read a block of owords (half a GRF each) from the scratch buffer 1524 * using a constant index per channel. 1525 * 1526 * Offset must be aligned to oword size (16 bytes). Used for register 1527 * spilling. 1528 */ 1529void 1530brw_oword_block_read_scratch(struct brw_compile *p, 1531 struct brw_reg dest, 1532 struct brw_reg mrf, 1533 int num_regs, 1534 GLuint offset) 1535{ 1536 uint32_t msg_control; 1537 int rlen; 1538 1539 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1540 dest = retype(dest, BRW_REGISTER_TYPE_UW); 1541 1542 if (num_regs == 1) { 1543 msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; 1544 rlen = 1; 1545 } else { 1546 msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; 1547 rlen = 2; 1548 } 1549 1550 { 1551 brw_push_insn_state(p); 1552 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1553 brw_set_mask_control(p, BRW_MASK_DISABLE); 1554 1555 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1556 1557 /* set message header global offset field (reg 0, element 2) */ 1558 brw_MOV(p, 1559 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1560 mrf.nr, 1561 2), BRW_REGISTER_TYPE_UD), 1562 brw_imm_ud(offset)); 1563 1564 brw_pop_insn_state(p); 1565 } 1566 1567 { 1568 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1569 1570 assert(insn->header.predicate_control == 0); 1571 insn->header.compression_control = BRW_COMPRESSION_NONE; 1572 insn->header.destreg__conditionalmod = mrf.nr; 1573 1574 brw_set_dest(p, insn, dest); /* UW? */ 1575 brw_set_src0(insn, brw_null_reg()); 1576 1577 brw_set_dp_read_message(p->brw, 1578 insn, 1579 255, /* binding table index (255=stateless) */ 1580 msg_control, 1581 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1582 1, /* target cache (render/scratch) */ 1583 1, /* msg_length */ 1584 rlen); 1585 } 1586} 1587 1588/** 1589 * Read a float[4] vector from the data port Data Cache (const buffer). 1590 * Location (in buffer) should be a multiple of 16. 1591 * Used for fetching shader constants. 1592 */ 1593void brw_oword_block_read(struct brw_compile *p, 1594 struct brw_reg dest, 1595 struct brw_reg mrf, 1596 uint32_t offset, 1597 uint32_t bind_table_index) 1598{ 1599 struct intel_context *intel = &p->brw->intel; 1600 1601 /* On newer hardware, offset is in units of owords. */ 1602 if (intel->gen >= 6) 1603 offset /= 16; 1604 1605 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1606 1607 brw_push_insn_state(p); 1608 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1609 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1610 brw_set_mask_control(p, BRW_MASK_DISABLE); 1611 1612 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1613 1614 /* set message header global offset field (reg 0, element 2) */ 1615 brw_MOV(p, 1616 retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1617 mrf.nr, 1618 2), BRW_REGISTER_TYPE_UD), 1619 brw_imm_ud(offset)); 1620 1621 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1622 insn->header.destreg__conditionalmod = mrf.nr; 1623 1624 /* cast dest to a uword[8] vector */ 1625 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1626 1627 brw_set_dest(p, insn, dest); 1628 if (intel->gen >= 6) { 1629 brw_set_src0(insn, mrf); 1630 } else { 1631 brw_set_src0(insn, brw_null_reg()); 1632 } 1633 1634 brw_set_dp_read_message(p->brw, 1635 insn, 1636 bind_table_index, 1637 BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, 1638 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, 1639 0, /* source cache = data cache */ 1640 1, /* msg_length */ 1641 1); /* response_length (1 reg, 2 owords!) */ 1642 1643 brw_pop_insn_state(p); 1644} 1645 1646/** 1647 * Read a set of dwords from the data port Data Cache (const buffer). 1648 * 1649 * Location (in buffer) appears as UD offsets in the register after 1650 * the provided mrf header reg. 1651 */ 1652void brw_dword_scattered_read(struct brw_compile *p, 1653 struct brw_reg dest, 1654 struct brw_reg mrf, 1655 uint32_t bind_table_index) 1656{ 1657 mrf = retype(mrf, BRW_REGISTER_TYPE_UD); 1658 1659 brw_push_insn_state(p); 1660 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1661 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1662 brw_set_mask_control(p, BRW_MASK_DISABLE); 1663 brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); 1664 brw_pop_insn_state(p); 1665 1666 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1667 insn->header.destreg__conditionalmod = mrf.nr; 1668 1669 /* cast dest to a uword[8] vector */ 1670 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1671 1672 brw_set_dest(p, insn, dest); 1673 brw_set_src0(insn, brw_null_reg()); 1674 1675 brw_set_dp_read_message(p->brw, 1676 insn, 1677 bind_table_index, 1678 BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, 1679 BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, 1680 0, /* source cache = data cache */ 1681 2, /* msg_length */ 1682 1); /* response_length */ 1683} 1684 1685 1686 1687/** 1688 * Read float[4] constant(s) from VS constant buffer. 1689 * For relative addressing, two float[4] constants will be read into 'dest'. 1690 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1691 */ 1692void brw_dp_READ_4_vs(struct brw_compile *p, 1693 struct brw_reg dest, 1694 GLuint location, 1695 GLuint bind_table_index) 1696{ 1697 struct brw_instruction *insn; 1698 GLuint msg_reg_nr = 1; 1699 struct brw_reg b; 1700 1701 /* 1702 printf("vs const read msg, location %u, msg_reg_nr %d\n", 1703 location, msg_reg_nr); 1704 */ 1705 1706 /* Setup MRF[1] with location/offset into const buffer */ 1707 brw_push_insn_state(p); 1708 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1709 brw_set_mask_control(p, BRW_MASK_DISABLE); 1710 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1711 1712 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1713 * when the docs say only dword[2] should be set. Hmmm. But it works. 1714 */ 1715 b = brw_message_reg(msg_reg_nr); 1716 b = retype(b, BRW_REGISTER_TYPE_UD); 1717 /*b = get_element_ud(b, 2);*/ 1718 brw_MOV(p, b, brw_imm_ud(location)); 1719 1720 brw_pop_insn_state(p); 1721 1722 insn = next_insn(p, BRW_OPCODE_SEND); 1723 1724 insn->header.predicate_control = BRW_PREDICATE_NONE; 1725 insn->header.compression_control = BRW_COMPRESSION_NONE; 1726 insn->header.destreg__conditionalmod = msg_reg_nr; 1727 insn->header.mask_control = BRW_MASK_DISABLE; 1728 1729 brw_set_dest(p, insn, dest); 1730 brw_set_src0(insn, brw_null_reg()); 1731 1732 brw_set_dp_read_message(p->brw, 1733 insn, 1734 bind_table_index, 1735 0, 1736 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1737 0, /* source cache = data cache */ 1738 1, /* msg_length */ 1739 1); /* response_length (1 Oword) */ 1740} 1741 1742/** 1743 * Read a float[4] constant per vertex from VS constant buffer, with 1744 * relative addressing. 1745 */ 1746void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1747 struct brw_reg dest, 1748 struct brw_reg addr_reg, 1749 GLuint offset, 1750 GLuint bind_table_index) 1751{ 1752 struct intel_context *intel = &p->brw->intel; 1753 int msg_type; 1754 1755 /* Setup MRF[1] with offset into const buffer */ 1756 brw_push_insn_state(p); 1757 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1758 brw_set_mask_control(p, BRW_MASK_DISABLE); 1759 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1760 1761 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1762 * fields ignored. 1763 */ 1764 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), 1765 addr_reg, brw_imm_d(offset)); 1766 brw_pop_insn_state(p); 1767 1768 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1769 1770 insn->header.predicate_control = BRW_PREDICATE_NONE; 1771 insn->header.compression_control = BRW_COMPRESSION_NONE; 1772 insn->header.destreg__conditionalmod = 0; 1773 insn->header.mask_control = BRW_MASK_DISABLE; 1774 1775 brw_set_dest(p, insn, dest); 1776 brw_set_src0(insn, brw_vec8_grf(0, 0)); 1777 1778 if (intel->gen == 6) 1779 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1780 else if (intel->gen == 5 || intel->is_g4x) 1781 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1782 else 1783 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1784 1785 brw_set_dp_read_message(p->brw, 1786 insn, 1787 bind_table_index, 1788 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1789 msg_type, 1790 0, /* source cache = data cache */ 1791 2, /* msg_length */ 1792 1); /* response_length */ 1793} 1794 1795 1796 1797void brw_fb_WRITE(struct brw_compile *p, 1798 int dispatch_width, 1799 struct brw_reg dest, 1800 GLuint msg_reg_nr, 1801 struct brw_reg src0, 1802 GLuint binding_table_index, 1803 GLuint msg_length, 1804 GLuint response_length, 1805 GLboolean eot) 1806{ 1807 struct intel_context *intel = &p->brw->intel; 1808 struct brw_instruction *insn; 1809 GLuint msg_control, msg_type; 1810 GLboolean header_present = GL_TRUE; 1811 1812 if (intel->gen >= 6 && binding_table_index == 0) { 1813 insn = next_insn(p, BRW_OPCODE_SENDC); 1814 } else { 1815 insn = next_insn(p, BRW_OPCODE_SEND); 1816 } 1817 /* The execution mask is ignored for render target writes. */ 1818 insn->header.predicate_control = 0; 1819 insn->header.compression_control = BRW_COMPRESSION_NONE; 1820 1821 if (intel->gen >= 6) { 1822 if (msg_length == 4) 1823 header_present = GL_FALSE; 1824 1825 /* headerless version, just submit color payload */ 1826 src0 = brw_message_reg(msg_reg_nr); 1827 1828 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6; 1829 } else { 1830 insn->header.destreg__conditionalmod = msg_reg_nr; 1831 1832 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1833 } 1834 1835 if (dispatch_width == 16) 1836 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 1837 else 1838 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 1839 1840 brw_set_dest(p, insn, dest); 1841 brw_set_src0(insn, src0); 1842 brw_set_dp_write_message(p->brw, 1843 insn, 1844 binding_table_index, 1845 msg_control, 1846 msg_type, 1847 msg_length, 1848 header_present, 1849 1, /* pixel scoreboard */ 1850 response_length, 1851 eot, 1852 0 /* send_commit_msg */); 1853} 1854 1855 1856/** 1857 * Texture sample instruction. 1858 * Note: the msg_type plus msg_length values determine exactly what kind 1859 * of sampling operation is performed. See volume 4, page 161 of docs. 1860 */ 1861void brw_SAMPLE(struct brw_compile *p, 1862 struct brw_reg dest, 1863 GLuint msg_reg_nr, 1864 struct brw_reg src0, 1865 GLuint binding_table_index, 1866 GLuint sampler, 1867 GLuint writemask, 1868 GLuint msg_type, 1869 GLuint response_length, 1870 GLuint msg_length, 1871 GLboolean eot, 1872 GLuint header_present, 1873 GLuint simd_mode) 1874{ 1875 struct intel_context *intel = &p->brw->intel; 1876 GLboolean need_stall = 0; 1877 1878 if (writemask == 0) { 1879 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 1880 return; 1881 } 1882 1883 /* Hardware doesn't do destination dependency checking on send 1884 * instructions properly. Add a workaround which generates the 1885 * dependency by other means. In practice it seems like this bug 1886 * only crops up for texture samples, and only where registers are 1887 * written by the send and then written again later without being 1888 * read in between. Luckily for us, we already track that 1889 * information and use it to modify the writemask for the 1890 * instruction, so that is a guide for whether a workaround is 1891 * needed. 1892 */ 1893 if (writemask != WRITEMASK_XYZW) { 1894 GLuint dst_offset = 0; 1895 GLuint i, newmask = 0, len = 0; 1896 1897 for (i = 0; i < 4; i++) { 1898 if (writemask & (1<<i)) 1899 break; 1900 dst_offset += 2; 1901 } 1902 for (; i < 4; i++) { 1903 if (!(writemask & (1<<i))) 1904 break; 1905 newmask |= 1<<i; 1906 len++; 1907 } 1908 1909 if (newmask != writemask) { 1910 need_stall = 1; 1911 /* printf("need stall %x %x\n", newmask , writemask); */ 1912 } 1913 else { 1914 GLboolean dispatch_16 = GL_FALSE; 1915 1916 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1917 1918 guess_execution_size(p, p->current, dest); 1919 if (p->current->header.execution_size == BRW_EXECUTE_16) 1920 dispatch_16 = GL_TRUE; 1921 1922 newmask = ~newmask & WRITEMASK_XYZW; 1923 1924 brw_push_insn_state(p); 1925 1926 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1927 brw_set_mask_control(p, BRW_MASK_DISABLE); 1928 1929 brw_MOV(p, m1, brw_vec8_grf(0,0)); 1930 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 1931 1932 brw_pop_insn_state(p); 1933 1934 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 1935 dest = offset(dest, dst_offset); 1936 1937 /* For 16-wide dispatch, masked channels are skipped in the 1938 * response. For 8-wide, masked channels still take up slots, 1939 * and are just not written to. 1940 */ 1941 if (dispatch_16) 1942 response_length = len * 2; 1943 } 1944 } 1945 1946 { 1947 struct brw_instruction *insn; 1948 1949 /* Sandybridge doesn't have the implied move for SENDs, 1950 * and the first message register index comes from src0. 1951 */ 1952 if (intel->gen >= 6) { 1953 if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 1954 src0.nr != BRW_ARF_NULL) { 1955 brw_push_insn_state(p); 1956 brw_set_mask_control( p, BRW_MASK_DISABLE ); 1957 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1958 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0); 1959 brw_pop_insn_state(p); 1960 } 1961 src0 = brw_message_reg(msg_reg_nr); 1962 } 1963 1964 insn = next_insn(p, BRW_OPCODE_SEND); 1965 insn->header.predicate_control = 0; /* XXX */ 1966 insn->header.compression_control = BRW_COMPRESSION_NONE; 1967 if (intel->gen < 6) 1968 insn->header.destreg__conditionalmod = msg_reg_nr; 1969 1970 brw_set_dest(p, insn, dest); 1971 brw_set_src0(insn, src0); 1972 brw_set_sampler_message(p->brw, insn, 1973 binding_table_index, 1974 sampler, 1975 msg_type, 1976 response_length, 1977 msg_length, 1978 eot, 1979 header_present, 1980 simd_mode); 1981 } 1982 1983 if (need_stall) { 1984 struct brw_reg reg = vec8(offset(dest, response_length-1)); 1985 1986 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 1987 */ 1988 brw_push_insn_state(p); 1989 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1990 brw_MOV(p, reg, reg); 1991 brw_pop_insn_state(p); 1992 } 1993 1994} 1995 1996/* All these variables are pretty confusing - we might be better off 1997 * using bitmasks and macros for this, in the old style. Or perhaps 1998 * just having the caller instantiate the fields in dword3 itself. 1999 */ 2000void brw_urb_WRITE(struct brw_compile *p, 2001 struct brw_reg dest, 2002 GLuint msg_reg_nr, 2003 struct brw_reg src0, 2004 GLboolean allocate, 2005 GLboolean used, 2006 GLuint msg_length, 2007 GLuint response_length, 2008 GLboolean eot, 2009 GLboolean writes_complete, 2010 GLuint offset, 2011 GLuint swizzle) 2012{ 2013 struct intel_context *intel = &p->brw->intel; 2014 struct brw_instruction *insn; 2015 2016 /* Sandybridge doesn't have the implied move for SENDs, 2017 * and the first message register index comes from src0. 2018 */ 2019 if (intel->gen >= 6) { 2020 brw_push_insn_state(p); 2021 brw_set_mask_control( p, BRW_MASK_DISABLE ); 2022 brw_MOV(p, brw_message_reg(msg_reg_nr), src0); 2023 brw_pop_insn_state(p); 2024 src0 = brw_message_reg(msg_reg_nr); 2025 } 2026 2027 insn = next_insn(p, BRW_OPCODE_SEND); 2028 2029 assert(msg_length < BRW_MAX_MRF); 2030 2031 brw_set_dest(p, insn, dest); 2032 brw_set_src0(insn, src0); 2033 brw_set_src1(insn, brw_imm_d(0)); 2034 2035 if (intel->gen < 6) 2036 insn->header.destreg__conditionalmod = msg_reg_nr; 2037 2038 brw_set_urb_message(p->brw, 2039 insn, 2040 allocate, 2041 used, 2042 msg_length, 2043 response_length, 2044 eot, 2045 writes_complete, 2046 offset, 2047 swizzle); 2048} 2049 2050static int 2051brw_find_next_block_end(struct brw_compile *p, int start) 2052{ 2053 int ip; 2054 2055 for (ip = start + 1; ip < p->nr_insn; ip++) { 2056 struct brw_instruction *insn = &p->store[ip]; 2057 2058 switch (insn->header.opcode) { 2059 case BRW_OPCODE_ENDIF: 2060 case BRW_OPCODE_ELSE: 2061 case BRW_OPCODE_WHILE: 2062 return ip; 2063 } 2064 } 2065 assert(!"not reached"); 2066 return start + 1; 2067} 2068 2069/* There is no DO instruction on gen6, so to find the end of the loop 2070 * we have to see if the loop is jumping back before our start 2071 * instruction. 2072 */ 2073static int 2074brw_find_loop_end(struct brw_compile *p, int start) 2075{ 2076 int ip; 2077 int br = 2; 2078 2079 for (ip = start + 1; ip < p->nr_insn; ip++) { 2080 struct brw_instruction *insn = &p->store[ip]; 2081 2082 if (insn->header.opcode == BRW_OPCODE_WHILE) { 2083 if (ip + insn->bits1.branch_gen6.jump_count / br < start) 2084 return ip; 2085 } 2086 } 2087 assert(!"not reached"); 2088 return start + 1; 2089} 2090 2091/* After program generation, go back and update the UIP and JIP of 2092 * BREAK and CONT instructions to their correct locations. 2093 */ 2094void 2095brw_set_uip_jip(struct brw_compile *p) 2096{ 2097 struct intel_context *intel = &p->brw->intel; 2098 int ip; 2099 int br = 2; 2100 2101 if (intel->gen < 6) 2102 return; 2103 2104 for (ip = 0; ip < p->nr_insn; ip++) { 2105 struct brw_instruction *insn = &p->store[ip]; 2106 2107 switch (insn->header.opcode) { 2108 case BRW_OPCODE_BREAK: 2109 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2110 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1); 2111 break; 2112 case BRW_OPCODE_CONTINUE: 2113 /* JIP is set at CONTINUE emit time, since that's when we 2114 * know where the start of the loop is. 2115 */ 2116 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); 2117 assert(insn->bits3.break_cont.uip != 0); 2118 assert(insn->bits3.break_cont.jip != 0); 2119 break; 2120 } 2121 } 2122} 2123 2124void brw_ff_sync(struct brw_compile *p, 2125 struct brw_reg dest, 2126 GLuint msg_reg_nr, 2127 struct brw_reg src0, 2128 GLboolean allocate, 2129 GLuint response_length, 2130 GLboolean eot) 2131{ 2132 struct intel_context *intel = &p->brw->intel; 2133 struct brw_instruction *insn; 2134 2135 /* Sandybridge doesn't have the implied move for SENDs, 2136 * and the first message register index comes from src0. 2137 */ 2138 if (intel->gen >= 6) { 2139 brw_push_insn_state(p); 2140 brw_set_mask_control( p, BRW_MASK_DISABLE ); 2141 brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), 2142 retype(src0, BRW_REGISTER_TYPE_UD)); 2143 brw_pop_insn_state(p); 2144 src0 = brw_message_reg(msg_reg_nr); 2145 } 2146 2147 insn = next_insn(p, BRW_OPCODE_SEND); 2148 brw_set_dest(p, insn, dest); 2149 brw_set_src0(insn, src0); 2150 brw_set_src1(insn, brw_imm_d(0)); 2151 2152 if (intel->gen < 6) 2153 insn->header.destreg__conditionalmod = msg_reg_nr; 2154 2155 brw_set_ff_sync_message(p->brw, 2156 insn, 2157 allocate, 2158 response_length, 2159 eot); 2160} 2161