brw_eu_emit.c revision 0002069fd5117b52f0ae2be0b7e3d8e839a3a61c
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37 38 39 40/*********************************************************************** 41 * Internal helper for constructing instructions 42 */ 43 44static void guess_execution_size( struct brw_instruction *insn, 45 struct brw_reg reg ) 46{ 47 if (reg.width == BRW_WIDTH_8 && 48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 49 insn->header.execution_size = BRW_EXECUTE_16; 50 else 51 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 52} 53 54 55static void brw_set_dest( struct brw_instruction *insn, 56 struct brw_reg dest ) 57{ 58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 59 dest.file != BRW_MESSAGE_REGISTER_FILE) 60 assert(dest.nr < 128); 61 62 insn->bits1.da1.dest_reg_file = dest.file; 63 insn->bits1.da1.dest_reg_type = dest.type; 64 insn->bits1.da1.dest_address_mode = dest.address_mode; 65 66 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 67 insn->bits1.da1.dest_reg_nr = dest.nr; 68 69 if (insn->header.access_mode == BRW_ALIGN_1) { 70 insn->bits1.da1.dest_subreg_nr = dest.subnr; 71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 72 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 73 insn->bits1.da1.dest_horiz_stride = dest.hstride; 74 } 75 else { 76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 78 /* even ignored in da16, still need to set as '01' */ 79 insn->bits1.da16.dest_horiz_stride = 1; 80 } 81 } 82 else { 83 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 84 85 /* These are different sizes in align1 vs align16: 86 */ 87 if (insn->header.access_mode == BRW_ALIGN_1) { 88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 90 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 91 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 92 } 93 else { 94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 95 /* even ignored in da16, still need to set as '01' */ 96 insn->bits1.ia16.dest_horiz_stride = 1; 97 } 98 } 99 100 /* NEW: Set the execution size based on dest.width and 101 * insn->compression_control: 102 */ 103 guess_execution_size(insn, dest); 104} 105 106extern int reg_type_size[]; 107 108static void 109validate_reg(struct brw_instruction *insn, struct brw_reg reg) 110{ 111 int hstride_for_reg[] = {0, 1, 2, 4}; 112 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 113 int width_for_reg[] = {1, 2, 4, 8, 16}; 114 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 115 int width, hstride, vstride, execsize; 116 117 if (reg.file == BRW_IMMEDIATE_VALUE) { 118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 119 * mean the destination has to be 128-bit aligned and the 120 * destination horiz stride has to be a word. 121 */ 122 if (reg.type == BRW_REGISTER_TYPE_V) { 123 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 124 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 125 } 126 127 return; 128 } 129 130 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 131 reg.file == BRW_ARF_NULL) 132 return; 133 134 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 135 hstride = hstride_for_reg[reg.hstride]; 136 137 if (reg.vstride == 0xf) { 138 vstride = -1; 139 } else { 140 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 141 vstride = vstride_for_reg[reg.vstride]; 142 } 143 144 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 145 width = width_for_reg[reg.width]; 146 147 assert(insn->header.execution_size >= 0 && 148 insn->header.execution_size < Elements(execsize_for_reg)); 149 execsize = execsize_for_reg[insn->header.execution_size]; 150 151 /* Restrictions from 3.3.10: Register Region Restrictions. */ 152 /* 3. */ 153 assert(execsize >= width); 154 155 /* 4. */ 156 if (execsize == width && hstride != 0) { 157 assert(vstride == -1 || vstride == width * hstride); 158 } 159 160 /* 5. */ 161 if (execsize == width && hstride == 0) { 162 /* no restriction on vstride. */ 163 } 164 165 /* 6. */ 166 if (width == 1) { 167 assert(hstride == 0); 168 } 169 170 /* 7. */ 171 if (execsize == 1 && width == 1) { 172 assert(hstride == 0); 173 assert(vstride == 0); 174 } 175 176 /* 8. */ 177 if (vstride == 0 && hstride == 0) { 178 assert(width == 1); 179 } 180 181 /* 10. Check destination issues. */ 182} 183 184static void brw_set_src0( struct brw_instruction *insn, 185 struct brw_reg reg ) 186{ 187 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 188 assert(reg.nr < 128); 189 190 validate_reg(insn, reg); 191 192 insn->bits1.da1.src0_reg_file = reg.file; 193 insn->bits1.da1.src0_reg_type = reg.type; 194 insn->bits2.da1.src0_abs = reg.abs; 195 insn->bits2.da1.src0_negate = reg.negate; 196 insn->bits2.da1.src0_address_mode = reg.address_mode; 197 198 if (reg.file == BRW_IMMEDIATE_VALUE) { 199 insn->bits3.ud = reg.dw1.ud; 200 201 /* Required to set some fields in src1 as well: 202 */ 203 insn->bits1.da1.src1_reg_file = 0; /* arf */ 204 insn->bits1.da1.src1_reg_type = reg.type; 205 } 206 else 207 { 208 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 209 if (insn->header.access_mode == BRW_ALIGN_1) { 210 insn->bits2.da1.src0_subreg_nr = reg.subnr; 211 insn->bits2.da1.src0_reg_nr = reg.nr; 212 } 213 else { 214 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 215 insn->bits2.da16.src0_reg_nr = reg.nr; 216 } 217 } 218 else { 219 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 220 221 if (insn->header.access_mode == BRW_ALIGN_1) { 222 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 223 } 224 else { 225 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 226 } 227 } 228 229 if (insn->header.access_mode == BRW_ALIGN_1) { 230 if (reg.width == BRW_WIDTH_1 && 231 insn->header.execution_size == BRW_EXECUTE_1) { 232 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 233 insn->bits2.da1.src0_width = BRW_WIDTH_1; 234 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 235 } 236 else { 237 insn->bits2.da1.src0_horiz_stride = reg.hstride; 238 insn->bits2.da1.src0_width = reg.width; 239 insn->bits2.da1.src0_vert_stride = reg.vstride; 240 } 241 } 242 else { 243 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 244 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 245 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 246 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 247 248 /* This is an oddity of the fact we're using the same 249 * descriptions for registers in align_16 as align_1: 250 */ 251 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 252 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 253 else 254 insn->bits2.da16.src0_vert_stride = reg.vstride; 255 } 256 } 257} 258 259 260void brw_set_src1( struct brw_instruction *insn, 261 struct brw_reg reg ) 262{ 263 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 264 265 assert(reg.nr < 128); 266 267 validate_reg(insn, reg); 268 269 insn->bits1.da1.src1_reg_file = reg.file; 270 insn->bits1.da1.src1_reg_type = reg.type; 271 insn->bits3.da1.src1_abs = reg.abs; 272 insn->bits3.da1.src1_negate = reg.negate; 273 274 /* Only src1 can be immediate in two-argument instructions. 275 */ 276 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 277 278 if (reg.file == BRW_IMMEDIATE_VALUE) { 279 insn->bits3.ud = reg.dw1.ud; 280 } 281 else { 282 /* This is a hardware restriction, which may or may not be lifted 283 * in the future: 284 */ 285 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 287 288 if (insn->header.access_mode == BRW_ALIGN_1) { 289 insn->bits3.da1.src1_subreg_nr = reg.subnr; 290 insn->bits3.da1.src1_reg_nr = reg.nr; 291 } 292 else { 293 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 294 insn->bits3.da16.src1_reg_nr = reg.nr; 295 } 296 297 if (insn->header.access_mode == BRW_ALIGN_1) { 298 if (reg.width == BRW_WIDTH_1 && 299 insn->header.execution_size == BRW_EXECUTE_1) { 300 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 301 insn->bits3.da1.src1_width = BRW_WIDTH_1; 302 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 303 } 304 else { 305 insn->bits3.da1.src1_horiz_stride = reg.hstride; 306 insn->bits3.da1.src1_width = reg.width; 307 insn->bits3.da1.src1_vert_stride = reg.vstride; 308 } 309 } 310 else { 311 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 312 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 313 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 314 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 315 316 /* This is an oddity of the fact we're using the same 317 * descriptions for registers in align_16 as align_1: 318 */ 319 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 320 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 321 else 322 insn->bits3.da16.src1_vert_stride = reg.vstride; 323 } 324 } 325} 326 327 328 329static void brw_set_math_message( struct brw_context *brw, 330 struct brw_instruction *insn, 331 GLuint msg_length, 332 GLuint response_length, 333 GLuint function, 334 GLuint integer_type, 335 GLboolean low_precision, 336 GLboolean saturate, 337 GLuint dataType ) 338{ 339 struct intel_context *intel = &brw->intel; 340 brw_set_src1(insn, brw_imm_d(0)); 341 342 if (intel->gen == 5) { 343 insn->bits3.math_gen5.function = function; 344 insn->bits3.math_gen5.int_type = integer_type; 345 insn->bits3.math_gen5.precision = low_precision; 346 insn->bits3.math_gen5.saturate = saturate; 347 insn->bits3.math_gen5.data_type = dataType; 348 insn->bits3.math_gen5.snapshot = 0; 349 insn->bits3.math_gen5.header_present = 0; 350 insn->bits3.math_gen5.response_length = response_length; 351 insn->bits3.math_gen5.msg_length = msg_length; 352 insn->bits3.math_gen5.end_of_thread = 0; 353 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; 354 insn->bits2.send_gen5.end_of_thread = 0; 355 } else { 356 insn->bits3.math.function = function; 357 insn->bits3.math.int_type = integer_type; 358 insn->bits3.math.precision = low_precision; 359 insn->bits3.math.saturate = saturate; 360 insn->bits3.math.data_type = dataType; 361 insn->bits3.math.response_length = response_length; 362 insn->bits3.math.msg_length = msg_length; 363 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 364 insn->bits3.math.end_of_thread = 0; 365 } 366} 367 368 369static void brw_set_ff_sync_message(struct brw_context *brw, 370 struct brw_instruction *insn, 371 GLboolean allocate, 372 GLuint response_length, 373 GLboolean end_of_thread) 374{ 375 struct intel_context *intel = &brw->intel; 376 brw_set_src1(insn, brw_imm_d(0)); 377 378 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 379 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 380 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 381 insn->bits3.urb_gen5.allocate = allocate; 382 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 383 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 384 insn->bits3.urb_gen5.header_present = 1; 385 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ 386 insn->bits3.urb_gen5.msg_length = 1; 387 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 388 if (intel->gen >= 6) { 389 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 390 } else { 391 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 392 insn->bits2.send_gen5.end_of_thread = end_of_thread; 393 } 394} 395 396static void brw_set_urb_message( struct brw_context *brw, 397 struct brw_instruction *insn, 398 GLboolean allocate, 399 GLboolean used, 400 GLuint msg_length, 401 GLuint response_length, 402 GLboolean end_of_thread, 403 GLboolean complete, 404 GLuint offset, 405 GLuint swizzle_control ) 406{ 407 struct intel_context *intel = &brw->intel; 408 brw_set_src1(insn, brw_imm_d(0)); 409 410 if (intel->gen >= 5) { 411 insn->bits3.urb_gen5.opcode = 0; /* ? */ 412 insn->bits3.urb_gen5.offset = offset; 413 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 414 insn->bits3.urb_gen5.allocate = allocate; 415 insn->bits3.urb_gen5.used = used; /* ? */ 416 insn->bits3.urb_gen5.complete = complete; 417 insn->bits3.urb_gen5.header_present = 1; 418 insn->bits3.urb_gen5.response_length = response_length; 419 insn->bits3.urb_gen5.msg_length = msg_length; 420 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 421 if (intel->gen >= 6) { 422 /* For SNB, the SFID bits moved to the condmod bits, and 423 * EOT stayed in bits3 above. Does the EOT bit setting 424 * below on Ironlake even do anything? 425 */ 426 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 427 } else { 428 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 429 insn->bits2.send_gen5.end_of_thread = end_of_thread; 430 } 431 } else { 432 insn->bits3.urb.opcode = 0; /* ? */ 433 insn->bits3.urb.offset = offset; 434 insn->bits3.urb.swizzle_control = swizzle_control; 435 insn->bits3.urb.allocate = allocate; 436 insn->bits3.urb.used = used; /* ? */ 437 insn->bits3.urb.complete = complete; 438 insn->bits3.urb.response_length = response_length; 439 insn->bits3.urb.msg_length = msg_length; 440 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 441 insn->bits3.urb.end_of_thread = end_of_thread; 442 } 443} 444 445static void brw_set_dp_write_message( struct brw_context *brw, 446 struct brw_instruction *insn, 447 GLuint binding_table_index, 448 GLuint msg_control, 449 GLuint msg_type, 450 GLuint msg_length, 451 GLuint pixel_scoreboard_clear, 452 GLuint response_length, 453 GLuint end_of_thread, 454 GLuint send_commit_msg) 455{ 456 struct intel_context *intel = &brw->intel; 457 brw_set_src1(insn, brw_imm_ud(0)); 458 459 if (intel->gen >= 6) { 460 insn->bits3.dp_render_cache.binding_table_index = binding_table_index; 461 insn->bits3.dp_render_cache.msg_control = msg_control; 462 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; 463 insn->bits3.dp_render_cache.msg_type = msg_type; 464 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; 465 insn->bits3.dp_render_cache.header_present = 0; /* XXX */ 466 insn->bits3.dp_render_cache.response_length = response_length; 467 insn->bits3.dp_render_cache.msg_length = msg_length; 468 insn->bits3.dp_render_cache.end_of_thread = end_of_thread; 469 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 470 /* XXX really need below? */ 471 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 472 insn->bits2.send_gen5.end_of_thread = end_of_thread; 473 } else if (intel->gen == 5) { 474 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 475 insn->bits3.dp_write_gen5.msg_control = msg_control; 476 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; 477 insn->bits3.dp_write_gen5.msg_type = msg_type; 478 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 479 insn->bits3.dp_write_gen5.header_present = 1; 480 insn->bits3.dp_write_gen5.response_length = response_length; 481 insn->bits3.dp_write_gen5.msg_length = msg_length; 482 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; 483 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 484 insn->bits2.send_gen5.end_of_thread = end_of_thread; 485 } else { 486 insn->bits3.dp_write.binding_table_index = binding_table_index; 487 insn->bits3.dp_write.msg_control = msg_control; 488 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 489 insn->bits3.dp_write.msg_type = msg_type; 490 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 491 insn->bits3.dp_write.response_length = response_length; 492 insn->bits3.dp_write.msg_length = msg_length; 493 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 494 insn->bits3.dp_write.end_of_thread = end_of_thread; 495 } 496} 497 498static void brw_set_dp_read_message( struct brw_context *brw, 499 struct brw_instruction *insn, 500 GLuint binding_table_index, 501 GLuint msg_control, 502 GLuint msg_type, 503 GLuint target_cache, 504 GLuint msg_length, 505 GLuint response_length, 506 GLuint end_of_thread ) 507{ 508 struct intel_context *intel = &brw->intel; 509 brw_set_src1(insn, brw_imm_d(0)); 510 511 if (intel->gen == 5) { 512 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 513 insn->bits3.dp_read_gen5.msg_control = msg_control; 514 insn->bits3.dp_read_gen5.msg_type = msg_type; 515 insn->bits3.dp_read_gen5.target_cache = target_cache; 516 insn->bits3.dp_read_gen5.header_present = 1; 517 insn->bits3.dp_read_gen5.response_length = response_length; 518 insn->bits3.dp_read_gen5.msg_length = msg_length; 519 insn->bits3.dp_read_gen5.pad1 = 0; 520 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread; 521 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 522 insn->bits2.send_gen5.end_of_thread = end_of_thread; 523 } else { 524 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 525 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 526 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 527 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 528 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 529 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 530 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 531 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 532 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ 533 } 534} 535 536static void brw_set_sampler_message(struct brw_context *brw, 537 struct brw_instruction *insn, 538 GLuint binding_table_index, 539 GLuint sampler, 540 GLuint msg_type, 541 GLuint response_length, 542 GLuint msg_length, 543 GLboolean eot, 544 GLuint header_present, 545 GLuint simd_mode) 546{ 547 struct intel_context *intel = &brw->intel; 548 assert(eot == 0); 549 brw_set_src1(insn, brw_imm_d(0)); 550 551 if (intel->gen == 5) { 552 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 553 insn->bits3.sampler_gen5.sampler = sampler; 554 insn->bits3.sampler_gen5.msg_type = msg_type; 555 insn->bits3.sampler_gen5.simd_mode = simd_mode; 556 insn->bits3.sampler_gen5.header_present = header_present; 557 insn->bits3.sampler_gen5.response_length = response_length; 558 insn->bits3.sampler_gen5.msg_length = msg_length; 559 insn->bits3.sampler_gen5.end_of_thread = eot; 560 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; 561 insn->bits2.send_gen5.end_of_thread = eot; 562 } else if (intel->is_g4x) { 563 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 564 insn->bits3.sampler_g4x.sampler = sampler; 565 insn->bits3.sampler_g4x.msg_type = msg_type; 566 insn->bits3.sampler_g4x.response_length = response_length; 567 insn->bits3.sampler_g4x.msg_length = msg_length; 568 insn->bits3.sampler_g4x.end_of_thread = eot; 569 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 570 } else { 571 insn->bits3.sampler.binding_table_index = binding_table_index; 572 insn->bits3.sampler.sampler = sampler; 573 insn->bits3.sampler.msg_type = msg_type; 574 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 575 insn->bits3.sampler.response_length = response_length; 576 insn->bits3.sampler.msg_length = msg_length; 577 insn->bits3.sampler.end_of_thread = eot; 578 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 579 } 580} 581 582 583 584static struct brw_instruction *next_insn( struct brw_compile *p, 585 GLuint opcode ) 586{ 587 struct brw_instruction *insn; 588 589 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 590 591 insn = &p->store[p->nr_insn++]; 592 memcpy(insn, p->current, sizeof(*insn)); 593 594 /* Reset this one-shot flag: 595 */ 596 597 if (p->current->header.destreg__conditionalmod) { 598 p->current->header.destreg__conditionalmod = 0; 599 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 600 } 601 602 insn->header.opcode = opcode; 603 return insn; 604} 605 606 607static struct brw_instruction *brw_alu1( struct brw_compile *p, 608 GLuint opcode, 609 struct brw_reg dest, 610 struct brw_reg src ) 611{ 612 struct brw_instruction *insn = next_insn(p, opcode); 613 brw_set_dest(insn, dest); 614 brw_set_src0(insn, src); 615 return insn; 616} 617 618static struct brw_instruction *brw_alu2(struct brw_compile *p, 619 GLuint opcode, 620 struct brw_reg dest, 621 struct brw_reg src0, 622 struct brw_reg src1 ) 623{ 624 struct brw_instruction *insn = next_insn(p, opcode); 625 brw_set_dest(insn, dest); 626 brw_set_src0(insn, src0); 627 brw_set_src1(insn, src1); 628 return insn; 629} 630 631 632/*********************************************************************** 633 * Convenience routines. 634 */ 635#define ALU1(OP) \ 636struct brw_instruction *brw_##OP(struct brw_compile *p, \ 637 struct brw_reg dest, \ 638 struct brw_reg src0) \ 639{ \ 640 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 641} 642 643#define ALU2(OP) \ 644struct brw_instruction *brw_##OP(struct brw_compile *p, \ 645 struct brw_reg dest, \ 646 struct brw_reg src0, \ 647 struct brw_reg src1) \ 648{ \ 649 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 650} 651 652 653ALU1(MOV) 654ALU2(SEL) 655ALU1(NOT) 656ALU2(AND) 657ALU2(OR) 658ALU2(XOR) 659ALU2(SHR) 660ALU2(SHL) 661ALU2(RSR) 662ALU2(RSL) 663ALU2(ASR) 664ALU2(ADD) 665ALU2(MUL) 666ALU1(FRC) 667ALU1(RNDD) 668ALU1(RNDZ) 669ALU2(MAC) 670ALU2(MACH) 671ALU1(LZD) 672ALU2(DP4) 673ALU2(DPH) 674ALU2(DP3) 675ALU2(DP2) 676ALU2(LINE) 677ALU2(PLN) 678 679 680 681void brw_NOP(struct brw_compile *p) 682{ 683 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 684 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 685 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 686 brw_set_src1(insn, brw_imm_ud(0x0)); 687} 688 689 690 691 692 693/*********************************************************************** 694 * Comparisons, if/else/endif 695 */ 696 697struct brw_instruction *brw_JMPI(struct brw_compile *p, 698 struct brw_reg dest, 699 struct brw_reg src0, 700 struct brw_reg src1) 701{ 702 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 703 704 insn->header.execution_size = 1; 705 insn->header.compression_control = BRW_COMPRESSION_NONE; 706 insn->header.mask_control = BRW_MASK_DISABLE; 707 708 p->current->header.predicate_control = BRW_PREDICATE_NONE; 709 710 return insn; 711} 712 713/* EU takes the value from the flag register and pushes it onto some 714 * sort of a stack (presumably merging with any flag value already on 715 * the stack). Within an if block, the flags at the top of the stack 716 * control execution on each channel of the unit, eg. on each of the 717 * 16 pixel values in our wm programs. 718 * 719 * When the matching 'else' instruction is reached (presumably by 720 * countdown of the instruction count patched in by our ELSE/ENDIF 721 * functions), the relevent flags are inverted. 722 * 723 * When the matching 'endif' instruction is reached, the flags are 724 * popped off. If the stack is now empty, normal execution resumes. 725 * 726 * No attempt is made to deal with stack overflow (14 elements?). 727 */ 728struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) 729{ 730 struct brw_instruction *insn; 731 732 if (p->single_program_flow) { 733 assert(execute_size == BRW_EXECUTE_1); 734 735 insn = next_insn(p, BRW_OPCODE_ADD); 736 insn->header.predicate_inverse = 1; 737 } else { 738 insn = next_insn(p, BRW_OPCODE_IF); 739 } 740 741 /* Override the defaults for this instruction: 742 */ 743 brw_set_dest(insn, brw_ip_reg()); 744 brw_set_src0(insn, brw_ip_reg()); 745 brw_set_src1(insn, brw_imm_d(0x0)); 746 747 insn->header.execution_size = execute_size; 748 insn->header.compression_control = BRW_COMPRESSION_NONE; 749 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 750 insn->header.mask_control = BRW_MASK_ENABLE; 751 if (!p->single_program_flow) 752 insn->header.thread_control = BRW_THREAD_SWITCH; 753 754 p->current->header.predicate_control = BRW_PREDICATE_NONE; 755 756 return insn; 757} 758 759 760struct brw_instruction *brw_ELSE(struct brw_compile *p, 761 struct brw_instruction *if_insn) 762{ 763 struct intel_context *intel = &p->brw->intel; 764 struct brw_instruction *insn; 765 GLuint br = 1; 766 767 if (intel->gen == 5) 768 br = 2; 769 770 if (p->single_program_flow) { 771 insn = next_insn(p, BRW_OPCODE_ADD); 772 } else { 773 insn = next_insn(p, BRW_OPCODE_ELSE); 774 } 775 776 brw_set_dest(insn, brw_ip_reg()); 777 brw_set_src0(insn, brw_ip_reg()); 778 brw_set_src1(insn, brw_imm_d(0x0)); 779 780 insn->header.compression_control = BRW_COMPRESSION_NONE; 781 insn->header.execution_size = if_insn->header.execution_size; 782 insn->header.mask_control = BRW_MASK_ENABLE; 783 if (!p->single_program_flow) 784 insn->header.thread_control = BRW_THREAD_SWITCH; 785 786 /* Patch the if instruction to point at this instruction. 787 */ 788 if (p->single_program_flow) { 789 assert(if_insn->header.opcode == BRW_OPCODE_ADD); 790 791 if_insn->bits3.ud = (insn - if_insn + 1) * 16; 792 } else { 793 assert(if_insn->header.opcode == BRW_OPCODE_IF); 794 795 if_insn->bits3.if_else.jump_count = br * (insn - if_insn); 796 if_insn->bits3.if_else.pop_count = 0; 797 if_insn->bits3.if_else.pad0 = 0; 798 } 799 800 return insn; 801} 802 803void brw_ENDIF(struct brw_compile *p, 804 struct brw_instruction *patch_insn) 805{ 806 struct intel_context *intel = &p->brw->intel; 807 GLuint br = 1; 808 809 if (intel->gen == 5) 810 br = 2; 811 812 if (p->single_program_flow) { 813 /* In single program flow mode, there's no need to execute an ENDIF, 814 * since we don't need to do any stack operations, and if we're executing 815 * currently, we want to just continue executing. 816 */ 817 struct brw_instruction *next = &p->store[p->nr_insn]; 818 819 assert(patch_insn->header.opcode == BRW_OPCODE_ADD); 820 821 patch_insn->bits3.ud = (next - patch_insn) * 16; 822 } else { 823 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); 824 825 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 826 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 827 brw_set_src1(insn, brw_imm_d(0x0)); 828 829 insn->header.compression_control = BRW_COMPRESSION_NONE; 830 insn->header.execution_size = patch_insn->header.execution_size; 831 insn->header.mask_control = BRW_MASK_ENABLE; 832 insn->header.thread_control = BRW_THREAD_SWITCH; 833 834 assert(patch_insn->bits3.if_else.jump_count == 0); 835 836 /* Patch the if or else instructions to point at this or the next 837 * instruction respectively. 838 */ 839 if (patch_insn->header.opcode == BRW_OPCODE_IF) { 840 /* Automagically turn it into an IFF: 841 */ 842 patch_insn->header.opcode = BRW_OPCODE_IFF; 843 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 844 patch_insn->bits3.if_else.pop_count = 0; 845 patch_insn->bits3.if_else.pad0 = 0; 846 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { 847 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 848 patch_insn->bits3.if_else.pop_count = 1; 849 patch_insn->bits3.if_else.pad0 = 0; 850 } else { 851 assert(0); 852 } 853 854 /* Also pop item off the stack in the endif instruction: 855 */ 856 insn->bits3.if_else.jump_count = 0; 857 insn->bits3.if_else.pop_count = 1; 858 insn->bits3.if_else.pad0 = 0; 859 } 860} 861 862struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 863{ 864 struct brw_instruction *insn; 865 insn = next_insn(p, BRW_OPCODE_BREAK); 866 brw_set_dest(insn, brw_ip_reg()); 867 brw_set_src0(insn, brw_ip_reg()); 868 brw_set_src1(insn, brw_imm_d(0x0)); 869 insn->header.compression_control = BRW_COMPRESSION_NONE; 870 insn->header.execution_size = BRW_EXECUTE_8; 871 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 872 insn->bits3.if_else.pad0 = 0; 873 insn->bits3.if_else.pop_count = pop_count; 874 return insn; 875} 876 877struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 878{ 879 struct brw_instruction *insn; 880 insn = next_insn(p, BRW_OPCODE_CONTINUE); 881 brw_set_dest(insn, brw_ip_reg()); 882 brw_set_src0(insn, brw_ip_reg()); 883 brw_set_src1(insn, brw_imm_d(0x0)); 884 insn->header.compression_control = BRW_COMPRESSION_NONE; 885 insn->header.execution_size = BRW_EXECUTE_8; 886 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 887 insn->bits3.if_else.pad0 = 0; 888 insn->bits3.if_else.pop_count = pop_count; 889 return insn; 890} 891 892/* DO/WHILE loop: 893 */ 894struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 895{ 896 if (p->single_program_flow) { 897 return &p->store[p->nr_insn]; 898 } else { 899 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 900 901 /* Override the defaults for this instruction: 902 */ 903 brw_set_dest(insn, brw_null_reg()); 904 brw_set_src0(insn, brw_null_reg()); 905 brw_set_src1(insn, brw_null_reg()); 906 907 insn->header.compression_control = BRW_COMPRESSION_NONE; 908 insn->header.execution_size = execute_size; 909 insn->header.predicate_control = BRW_PREDICATE_NONE; 910 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 911 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 912 913 return insn; 914 } 915} 916 917 918 919struct brw_instruction *brw_WHILE(struct brw_compile *p, 920 struct brw_instruction *do_insn) 921{ 922 struct intel_context *intel = &p->brw->intel; 923 struct brw_instruction *insn; 924 GLuint br = 1; 925 926 if (intel->gen == 5) 927 br = 2; 928 929 if (p->single_program_flow) 930 insn = next_insn(p, BRW_OPCODE_ADD); 931 else 932 insn = next_insn(p, BRW_OPCODE_WHILE); 933 934 brw_set_dest(insn, brw_ip_reg()); 935 brw_set_src0(insn, brw_ip_reg()); 936 brw_set_src1(insn, brw_imm_d(0x0)); 937 938 insn->header.compression_control = BRW_COMPRESSION_NONE; 939 940 if (p->single_program_flow) { 941 insn->header.execution_size = BRW_EXECUTE_1; 942 943 insn->bits3.d = (do_insn - insn) * 16; 944 } else { 945 insn->header.execution_size = do_insn->header.execution_size; 946 947 assert(do_insn->header.opcode == BRW_OPCODE_DO); 948 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 949 insn->bits3.if_else.pop_count = 0; 950 insn->bits3.if_else.pad0 = 0; 951 } 952 953/* insn->header.mask_control = BRW_MASK_ENABLE; */ 954 955 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 956 p->current->header.predicate_control = BRW_PREDICATE_NONE; 957 return insn; 958} 959 960 961/* FORWARD JUMPS: 962 */ 963void brw_land_fwd_jump(struct brw_compile *p, 964 struct brw_instruction *jmp_insn) 965{ 966 struct intel_context *intel = &p->brw->intel; 967 struct brw_instruction *landing = &p->store[p->nr_insn]; 968 GLuint jmpi = 1; 969 970 if (intel->gen == 5) 971 jmpi = 2; 972 973 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 974 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 975 976 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 977} 978 979 980 981/* To integrate with the above, it makes sense that the comparison 982 * instruction should populate the flag register. It might be simpler 983 * just to use the flag reg for most WM tasks? 984 */ 985void brw_CMP(struct brw_compile *p, 986 struct brw_reg dest, 987 GLuint conditional, 988 struct brw_reg src0, 989 struct brw_reg src1) 990{ 991 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 992 993 insn->header.destreg__conditionalmod = conditional; 994 brw_set_dest(insn, dest); 995 brw_set_src0(insn, src0); 996 brw_set_src1(insn, src1); 997 998/* guess_execution_size(insn, src0); */ 999 1000 1001 /* Make it so that future instructions will use the computed flag 1002 * value until brw_set_predicate_control_flag_value() is called 1003 * again. 1004 */ 1005 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1006 dest.nr == 0) { 1007 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1008 p->flag_value = 0xff; 1009 } 1010} 1011 1012/* Issue 'wait' instruction for n1, host could program MMIO 1013 to wake up thread. */ 1014void brw_WAIT (struct brw_compile *p) 1015{ 1016 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1017 struct brw_reg src = brw_notification_1_reg(); 1018 1019 brw_set_dest(insn, src); 1020 brw_set_src0(insn, src); 1021 brw_set_src1(insn, brw_null_reg()); 1022 insn->header.execution_size = 0; /* must */ 1023 insn->header.predicate_control = 0; 1024 insn->header.compression_control = 0; 1025} 1026 1027 1028/*********************************************************************** 1029 * Helpers for the various SEND message types: 1030 */ 1031 1032/** Extended math function, float[8]. 1033 */ 1034void brw_math( struct brw_compile *p, 1035 struct brw_reg dest, 1036 GLuint function, 1037 GLuint saturate, 1038 GLuint msg_reg_nr, 1039 struct brw_reg src, 1040 GLuint data_type, 1041 GLuint precision ) 1042{ 1043 struct intel_context *intel = &p->brw->intel; 1044 1045 if (intel->gen >= 6) { 1046 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1047 1048 /* Math is the same ISA format as other opcodes, except that CondModifier 1049 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1050 */ 1051 insn->header.destreg__conditionalmod = function; 1052 1053 brw_set_dest(insn, dest); 1054 brw_set_src0(insn, src); 1055 brw_set_src1(insn, brw_null_reg()); 1056 } else { 1057 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1058 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1059 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1060 /* Example code doesn't set predicate_control for send 1061 * instructions. 1062 */ 1063 insn->header.predicate_control = 0; 1064 insn->header.destreg__conditionalmod = msg_reg_nr; 1065 1066 brw_set_dest(insn, dest); 1067 brw_set_src0(insn, src); 1068 brw_set_math_message(p->brw, 1069 insn, 1070 msg_length, response_length, 1071 function, 1072 BRW_MATH_INTEGER_UNSIGNED, 1073 precision, 1074 saturate, 1075 data_type); 1076 } 1077} 1078 1079/** 1080 * Extended math function, float[16]. 1081 * Use 2 send instructions. 1082 */ 1083void brw_math_16( struct brw_compile *p, 1084 struct brw_reg dest, 1085 GLuint function, 1086 GLuint saturate, 1087 GLuint msg_reg_nr, 1088 struct brw_reg src, 1089 GLuint precision ) 1090{ 1091 struct intel_context *intel = &p->brw->intel; 1092 struct brw_instruction *insn; 1093 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1094 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1095 1096 if (intel->gen >= 6) { 1097 insn = next_insn(p, BRW_OPCODE_MATH); 1098 1099 /* Math is the same ISA format as other opcodes, except that CondModifier 1100 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1101 */ 1102 insn->header.destreg__conditionalmod = function; 1103 1104 brw_set_dest(insn, dest); 1105 brw_set_src0(insn, src); 1106 brw_set_src1(insn, brw_null_reg()); 1107 return; 1108 } 1109 1110 /* First instruction: 1111 */ 1112 brw_push_insn_state(p); 1113 brw_set_predicate_control_flag_value(p, 0xff); 1114 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1115 1116 insn = next_insn(p, BRW_OPCODE_SEND); 1117 insn->header.destreg__conditionalmod = msg_reg_nr; 1118 1119 brw_set_dest(insn, dest); 1120 brw_set_src0(insn, src); 1121 brw_set_math_message(p->brw, 1122 insn, 1123 msg_length, response_length, 1124 function, 1125 BRW_MATH_INTEGER_UNSIGNED, 1126 precision, 1127 saturate, 1128 BRW_MATH_DATA_VECTOR); 1129 1130 /* Second instruction: 1131 */ 1132 insn = next_insn(p, BRW_OPCODE_SEND); 1133 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1134 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1135 1136 brw_set_dest(insn, offset(dest,1)); 1137 brw_set_src0(insn, src); 1138 brw_set_math_message(p->brw, 1139 insn, 1140 msg_length, response_length, 1141 function, 1142 BRW_MATH_INTEGER_UNSIGNED, 1143 precision, 1144 saturate, 1145 BRW_MATH_DATA_VECTOR); 1146 1147 brw_pop_insn_state(p); 1148} 1149 1150 1151/** 1152 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. 1153 * Scratch offset should be a multiple of 64. 1154 * Used for register spilling. 1155 */ 1156void brw_dp_WRITE_16( struct brw_compile *p, 1157 struct brw_reg src, 1158 GLuint scratch_offset ) 1159{ 1160 struct intel_context *intel = &p->brw->intel; 1161 GLuint msg_reg_nr = 1; 1162 { 1163 brw_push_insn_state(p); 1164 brw_set_mask_control(p, BRW_MASK_DISABLE); 1165 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1166 1167 /* set message header global offset field (reg 0, element 2) */ 1168 brw_MOV(p, 1169 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 1170 brw_imm_d(scratch_offset)); 1171 1172 brw_pop_insn_state(p); 1173 } 1174 1175 { 1176 GLuint msg_length = 3; 1177 struct brw_reg dest; 1178 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1179 int send_commit_msg; 1180 1181 insn->header.predicate_control = 0; /* XXX */ 1182 insn->header.compression_control = BRW_COMPRESSION_NONE; 1183 insn->header.destreg__conditionalmod = msg_reg_nr; 1184 1185 /* Until gen6, writes followed by reads from the same location 1186 * are not guaranteed to be ordered unless write_commit is set. 1187 * If set, then a no-op write is issued to the destination 1188 * register to set a dependency, and a read from the destination 1189 * can be used to ensure the ordering. 1190 * 1191 * For gen6, only writes between different threads need ordering 1192 * protection. Our use of DP writes is all about register 1193 * spilling within a thread. 1194 */ 1195 if (intel->gen >= 6) { 1196 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1197 send_commit_msg = 0; 1198 } else { 1199 dest = brw_uw16_grf(0, 0); 1200 send_commit_msg = 1; 1201 } 1202 1203 brw_set_dest(insn, dest); 1204 brw_set_src0(insn, src); 1205 1206 brw_set_dp_write_message(p->brw, 1207 insn, 1208 255, /* binding table index (255=stateless) */ 1209 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ 1210 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ 1211 msg_length, 1212 0, /* pixel scoreboard */ 1213 send_commit_msg, /* response_length */ 1214 0, /* eot */ 1215 send_commit_msg); 1216 } 1217} 1218 1219 1220/** 1221 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. 1222 * Scratch offset should be a multiple of 64. 1223 * Used for register spilling. 1224 */ 1225void brw_dp_READ_16( struct brw_compile *p, 1226 struct brw_reg dest, 1227 GLuint scratch_offset ) 1228{ 1229 GLuint msg_reg_nr = 1; 1230 { 1231 brw_push_insn_state(p); 1232 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1233 brw_set_mask_control(p, BRW_MASK_DISABLE); 1234 1235 /* set message header global offset field (reg 0, element 2) */ 1236 brw_MOV(p, 1237 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 1238 brw_imm_d(scratch_offset)); 1239 1240 brw_pop_insn_state(p); 1241 } 1242 1243 { 1244 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1245 1246 insn->header.predicate_control = 0; /* XXX */ 1247 insn->header.compression_control = BRW_COMPRESSION_NONE; 1248 insn->header.destreg__conditionalmod = msg_reg_nr; 1249 1250 brw_set_dest(insn, dest); /* UW? */ 1251 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 1252 1253 brw_set_dp_read_message(p->brw, 1254 insn, 1255 255, /* binding table index (255=stateless) */ 1256 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, 1257 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1258 1, /* target cache (render/scratch) */ 1259 1, /* msg_length */ 1260 2, /* response_length */ 1261 0); /* eot */ 1262 } 1263} 1264 1265 1266/** 1267 * Read a float[4] vector from the data port Data Cache (const buffer). 1268 * Location (in buffer) should be a multiple of 16. 1269 * Used for fetching shader constants. 1270 * If relAddr is true, we'll do an indirect fetch using the address register. 1271 */ 1272void brw_dp_READ_4( struct brw_compile *p, 1273 struct brw_reg dest, 1274 GLboolean relAddr, 1275 GLuint location, 1276 GLuint bind_table_index ) 1277{ 1278 /* XXX: relAddr not implemented */ 1279 GLuint msg_reg_nr = 1; 1280 { 1281 struct brw_reg b; 1282 brw_push_insn_state(p); 1283 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1284 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1285 brw_set_mask_control(p, BRW_MASK_DISABLE); 1286 1287 /* Setup MRF[1] with location/offset into const buffer */ 1288 b = brw_message_reg(msg_reg_nr); 1289 b = retype(b, BRW_REGISTER_TYPE_UD); 1290 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1291 * when the docs say only dword[2] should be set. Hmmm. But it works. 1292 */ 1293 brw_MOV(p, b, brw_imm_ud(location)); 1294 brw_pop_insn_state(p); 1295 } 1296 1297 { 1298 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1299 1300 insn->header.predicate_control = BRW_PREDICATE_NONE; 1301 insn->header.compression_control = BRW_COMPRESSION_NONE; 1302 insn->header.destreg__conditionalmod = msg_reg_nr; 1303 insn->header.mask_control = BRW_MASK_DISABLE; 1304 1305 /* cast dest to a uword[8] vector */ 1306 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1307 1308 brw_set_dest(insn, dest); 1309 brw_set_src0(insn, brw_null_reg()); 1310 1311 brw_set_dp_read_message(p->brw, 1312 insn, 1313 bind_table_index, 1314 0, /* msg_control (0 means 1 Oword) */ 1315 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1316 0, /* source cache = data cache */ 1317 1, /* msg_length */ 1318 1, /* response_length (1 Oword) */ 1319 0); /* eot */ 1320 } 1321} 1322 1323 1324/** 1325 * Read float[4] constant(s) from VS constant buffer. 1326 * For relative addressing, two float[4] constants will be read into 'dest'. 1327 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1328 */ 1329void brw_dp_READ_4_vs(struct brw_compile *p, 1330 struct brw_reg dest, 1331 GLuint location, 1332 GLuint bind_table_index) 1333{ 1334 struct brw_instruction *insn; 1335 GLuint msg_reg_nr = 1; 1336 struct brw_reg b; 1337 1338 /* 1339 printf("vs const read msg, location %u, msg_reg_nr %d\n", 1340 location, msg_reg_nr); 1341 */ 1342 1343 /* Setup MRF[1] with location/offset into const buffer */ 1344 brw_push_insn_state(p); 1345 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1346 brw_set_mask_control(p, BRW_MASK_DISABLE); 1347 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1348 1349 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1350 * when the docs say only dword[2] should be set. Hmmm. But it works. 1351 */ 1352 b = brw_message_reg(msg_reg_nr); 1353 b = retype(b, BRW_REGISTER_TYPE_UD); 1354 /*b = get_element_ud(b, 2);*/ 1355 brw_MOV(p, b, brw_imm_ud(location)); 1356 1357 brw_pop_insn_state(p); 1358 1359 insn = next_insn(p, BRW_OPCODE_SEND); 1360 1361 insn->header.predicate_control = BRW_PREDICATE_NONE; 1362 insn->header.compression_control = BRW_COMPRESSION_NONE; 1363 insn->header.destreg__conditionalmod = msg_reg_nr; 1364 insn->header.mask_control = BRW_MASK_DISABLE; 1365 1366 brw_set_dest(insn, dest); 1367 brw_set_src0(insn, brw_null_reg()); 1368 1369 brw_set_dp_read_message(p->brw, 1370 insn, 1371 bind_table_index, 1372 0, 1373 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1374 0, /* source cache = data cache */ 1375 1, /* msg_length */ 1376 1, /* response_length (1 Oword) */ 1377 0); /* eot */ 1378} 1379 1380/** 1381 * Read a float[4] constant per vertex from VS constant buffer, with 1382 * relative addressing. 1383 */ 1384void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1385 struct brw_reg dest, 1386 struct brw_reg addr_reg, 1387 GLuint offset, 1388 GLuint bind_table_index) 1389{ 1390 struct intel_context *intel = &p->brw->intel; 1391 int msg_type; 1392 1393 /* Setup MRF[1] with offset into const buffer */ 1394 brw_push_insn_state(p); 1395 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1396 brw_set_mask_control(p, BRW_MASK_DISABLE); 1397 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1398 1399 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1400 * fields ignored. 1401 */ 1402 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), 1403 addr_reg, brw_imm_d(offset)); 1404 brw_pop_insn_state(p); 1405 1406 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1407 1408 insn->header.predicate_control = BRW_PREDICATE_NONE; 1409 insn->header.compression_control = BRW_COMPRESSION_NONE; 1410 insn->header.destreg__conditionalmod = 0; 1411 insn->header.mask_control = BRW_MASK_DISABLE; 1412 1413 brw_set_dest(insn, dest); 1414 brw_set_src0(insn, brw_vec8_grf(0, 0)); 1415 1416 if (intel->gen == 6) 1417 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1418 else if (intel->gen == 5 || intel->is_g4x) 1419 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1420 else 1421 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1422 1423 brw_set_dp_read_message(p->brw, 1424 insn, 1425 bind_table_index, 1426 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1427 msg_type, 1428 0, /* source cache = data cache */ 1429 2, /* msg_length */ 1430 1, /* response_length */ 1431 0); /* eot */ 1432} 1433 1434 1435 1436void brw_fb_WRITE(struct brw_compile *p, 1437 int dispatch_width, 1438 struct brw_reg dest, 1439 GLuint msg_reg_nr, 1440 struct brw_reg src0, 1441 GLuint binding_table_index, 1442 GLuint msg_length, 1443 GLuint response_length, 1444 GLboolean eot) 1445{ 1446 struct intel_context *intel = &p->brw->intel; 1447 struct brw_instruction *insn; 1448 GLuint msg_control, msg_type; 1449 1450 insn = next_insn(p, BRW_OPCODE_SEND); 1451 insn->header.predicate_control = 0; /* XXX */ 1452 insn->header.compression_control = BRW_COMPRESSION_NONE; 1453 1454 if (intel->gen >= 6) { 1455 /* headerless version, just submit color payload */ 1456 src0 = brw_message_reg(msg_reg_nr); 1457 1458 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6; 1459 } else { 1460 insn->header.destreg__conditionalmod = msg_reg_nr; 1461 1462 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1463 } 1464 1465 if (dispatch_width == 16) 1466 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 1467 else 1468 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 1469 1470 brw_set_dest(insn, dest); 1471 brw_set_src0(insn, src0); 1472 brw_set_dp_write_message(p->brw, 1473 insn, 1474 binding_table_index, 1475 msg_control, 1476 msg_type, 1477 msg_length, 1478 1, /* pixel scoreboard */ 1479 response_length, 1480 eot, 1481 0 /* send_commit_msg */); 1482} 1483 1484 1485/** 1486 * Texture sample instruction. 1487 * Note: the msg_type plus msg_length values determine exactly what kind 1488 * of sampling operation is performed. See volume 4, page 161 of docs. 1489 */ 1490void brw_SAMPLE(struct brw_compile *p, 1491 struct brw_reg dest, 1492 GLuint msg_reg_nr, 1493 struct brw_reg src0, 1494 GLuint binding_table_index, 1495 GLuint sampler, 1496 GLuint writemask, 1497 GLuint msg_type, 1498 GLuint response_length, 1499 GLuint msg_length, 1500 GLboolean eot, 1501 GLuint header_present, 1502 GLuint simd_mode) 1503{ 1504 GLboolean need_stall = 0; 1505 1506 if (writemask == 0) { 1507 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 1508 return; 1509 } 1510 1511 /* Hardware doesn't do destination dependency checking on send 1512 * instructions properly. Add a workaround which generates the 1513 * dependency by other means. In practice it seems like this bug 1514 * only crops up for texture samples, and only where registers are 1515 * written by the send and then written again later without being 1516 * read in between. Luckily for us, we already track that 1517 * information and use it to modify the writemask for the 1518 * instruction, so that is a guide for whether a workaround is 1519 * needed. 1520 */ 1521 if (writemask != WRITEMASK_XYZW) { 1522 GLuint dst_offset = 0; 1523 GLuint i, newmask = 0, len = 0; 1524 1525 for (i = 0; i < 4; i++) { 1526 if (writemask & (1<<i)) 1527 break; 1528 dst_offset += 2; 1529 } 1530 for (; i < 4; i++) { 1531 if (!(writemask & (1<<i))) 1532 break; 1533 newmask |= 1<<i; 1534 len++; 1535 } 1536 1537 if (newmask != writemask) { 1538 need_stall = 1; 1539 /* printf("need stall %x %x\n", newmask , writemask); */ 1540 } 1541 else { 1542 GLboolean dispatch_16 = GL_FALSE; 1543 1544 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1545 1546 guess_execution_size(p->current, dest); 1547 if (p->current->header.execution_size == BRW_EXECUTE_16) 1548 dispatch_16 = GL_TRUE; 1549 1550 newmask = ~newmask & WRITEMASK_XYZW; 1551 1552 brw_push_insn_state(p); 1553 1554 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1555 brw_set_mask_control(p, BRW_MASK_DISABLE); 1556 1557 brw_MOV(p, m1, brw_vec8_grf(0,0)); 1558 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 1559 1560 brw_pop_insn_state(p); 1561 1562 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 1563 dest = offset(dest, dst_offset); 1564 1565 /* For 16-wide dispatch, masked channels are skipped in the 1566 * response. For 8-wide, masked channels still take up slots, 1567 * and are just not written to. 1568 */ 1569 if (dispatch_16) 1570 response_length = len * 2; 1571 } 1572 } 1573 1574 { 1575 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1576 1577 insn->header.predicate_control = 0; /* XXX */ 1578 insn->header.compression_control = BRW_COMPRESSION_NONE; 1579 insn->header.destreg__conditionalmod = msg_reg_nr; 1580 1581 brw_set_dest(insn, dest); 1582 brw_set_src0(insn, src0); 1583 brw_set_sampler_message(p->brw, insn, 1584 binding_table_index, 1585 sampler, 1586 msg_type, 1587 response_length, 1588 msg_length, 1589 eot, 1590 header_present, 1591 simd_mode); 1592 } 1593 1594 if (need_stall) { 1595 struct brw_reg reg = vec8(offset(dest, response_length-1)); 1596 1597 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 1598 */ 1599 brw_push_insn_state(p); 1600 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1601 brw_MOV(p, reg, reg); 1602 brw_pop_insn_state(p); 1603 } 1604 1605} 1606 1607/* All these variables are pretty confusing - we might be better off 1608 * using bitmasks and macros for this, in the old style. Or perhaps 1609 * just having the caller instantiate the fields in dword3 itself. 1610 */ 1611void brw_urb_WRITE(struct brw_compile *p, 1612 struct brw_reg dest, 1613 GLuint msg_reg_nr, 1614 struct brw_reg src0, 1615 GLboolean allocate, 1616 GLboolean used, 1617 GLuint msg_length, 1618 GLuint response_length, 1619 GLboolean eot, 1620 GLboolean writes_complete, 1621 GLuint offset, 1622 GLuint swizzle) 1623{ 1624 struct intel_context *intel = &p->brw->intel; 1625 struct brw_instruction *insn; 1626 1627 /* Sandybridge doesn't have the implied move for SENDs, 1628 * and the first message register index comes from src0. 1629 */ 1630 if (intel->gen >= 6) { 1631 brw_push_insn_state(p); 1632 brw_set_mask_control( p, BRW_MASK_DISABLE ); 1633 brw_MOV(p, brw_message_reg(msg_reg_nr), src0); 1634 brw_pop_insn_state(p); 1635 src0 = brw_message_reg(msg_reg_nr); 1636 } 1637 1638 insn = next_insn(p, BRW_OPCODE_SEND); 1639 1640 assert(msg_length < BRW_MAX_MRF); 1641 1642 brw_set_dest(insn, dest); 1643 brw_set_src0(insn, src0); 1644 brw_set_src1(insn, brw_imm_d(0)); 1645 1646 if (intel->gen < 6) 1647 insn->header.destreg__conditionalmod = msg_reg_nr; 1648 1649 brw_set_urb_message(p->brw, 1650 insn, 1651 allocate, 1652 used, 1653 msg_length, 1654 response_length, 1655 eot, 1656 writes_complete, 1657 offset, 1658 swizzle); 1659} 1660 1661void brw_ff_sync(struct brw_compile *p, 1662 struct brw_reg dest, 1663 GLuint msg_reg_nr, 1664 struct brw_reg src0, 1665 GLboolean allocate, 1666 GLuint response_length, 1667 GLboolean eot) 1668{ 1669 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1670 1671 brw_set_dest(insn, dest); 1672 brw_set_src0(insn, src0); 1673 brw_set_src1(insn, brw_imm_d(0)); 1674 1675 insn->header.destreg__conditionalmod = msg_reg_nr; 1676 1677 brw_set_ff_sync_message(p->brw, 1678 insn, 1679 allocate, 1680 response_length, 1681 eot); 1682} 1683