brw_eu_emit.c revision ea909be58dda7e916cb9ce434ecb78597881ad33
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37 38 39 40/*********************************************************************** 41 * Internal helper for constructing instructions 42 */ 43 44static void guess_execution_size( struct brw_instruction *insn, 45 struct brw_reg reg ) 46{ 47 if (reg.width == BRW_WIDTH_8 && 48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 49 insn->header.execution_size = BRW_EXECUTE_16; 50 else 51 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 52} 53 54 55static void brw_set_dest( struct brw_instruction *insn, 56 struct brw_reg dest ) 57{ 58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 59 dest.file != BRW_MESSAGE_REGISTER_FILE) 60 assert(dest.nr < 128); 61 62 insn->bits1.da1.dest_reg_file = dest.file; 63 insn->bits1.da1.dest_reg_type = dest.type; 64 insn->bits1.da1.dest_address_mode = dest.address_mode; 65 66 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 67 insn->bits1.da1.dest_reg_nr = dest.nr; 68 69 if (insn->header.access_mode == BRW_ALIGN_1) { 70 insn->bits1.da1.dest_subreg_nr = dest.subnr; 71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 72 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 73 insn->bits1.da1.dest_horiz_stride = dest.hstride; 74 } 75 else { 76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 78 /* even ignored in da16, still need to set as '01' */ 79 insn->bits1.da16.dest_horiz_stride = 1; 80 } 81 } 82 else { 83 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 84 85 /* These are different sizes in align1 vs align16: 86 */ 87 if (insn->header.access_mode == BRW_ALIGN_1) { 88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 90 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 91 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 92 } 93 else { 94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 95 /* even ignored in da16, still need to set as '01' */ 96 insn->bits1.ia16.dest_horiz_stride = 1; 97 } 98 } 99 100 /* NEW: Set the execution size based on dest.width and 101 * insn->compression_control: 102 */ 103 guess_execution_size(insn, dest); 104} 105 106extern int reg_type_size[]; 107 108static void 109validate_reg(struct brw_instruction *insn, struct brw_reg reg) 110{ 111 int hstride_for_reg[] = {0, 1, 2, 4}; 112 int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; 113 int width_for_reg[] = {1, 2, 4, 8, 16}; 114 int execsize_for_reg[] = {1, 2, 4, 8, 16}; 115 int width, hstride, vstride, execsize; 116 117 if (reg.file == BRW_IMMEDIATE_VALUE) { 118 /* 3.3.6: Region Parameters. Restriction: Immediate vectors 119 * mean the destination has to be 128-bit aligned and the 120 * destination horiz stride has to be a word. 121 */ 122 if (reg.type == BRW_REGISTER_TYPE_V) { 123 assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * 124 reg_type_size[insn->bits1.da1.dest_reg_type] == 2); 125 } 126 127 return; 128 } 129 130 if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && 131 reg.file == BRW_ARF_NULL) 132 return; 133 134 assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg)); 135 hstride = hstride_for_reg[reg.hstride]; 136 137 if (reg.vstride == 0xf) { 138 vstride = -1; 139 } else { 140 assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg)); 141 vstride = vstride_for_reg[reg.vstride]; 142 } 143 144 assert(reg.width >= 0 && reg.width < Elements(width_for_reg)); 145 width = width_for_reg[reg.width]; 146 147 assert(insn->header.execution_size >= 0 && 148 insn->header.execution_size < Elements(execsize_for_reg)); 149 execsize = execsize_for_reg[insn->header.execution_size]; 150 151 /* Restrictions from 3.3.10: Register Region Restrictions. */ 152 /* 3. */ 153 assert(execsize >= width); 154 155 /* 4. */ 156 if (execsize == width && hstride != 0) { 157 assert(vstride == -1 || vstride == width * hstride); 158 } 159 160 /* 5. */ 161 if (execsize == width && hstride == 0) { 162 /* no restriction on vstride. */ 163 } 164 165 /* 6. */ 166 if (width == 1) { 167 assert(hstride == 0); 168 } 169 170 /* 7. */ 171 if (execsize == 1 && width == 1) { 172 assert(hstride == 0); 173 assert(vstride == 0); 174 } 175 176 /* 8. */ 177 if (vstride == 0 && hstride == 0) { 178 assert(width == 1); 179 } 180 181 /* 10. Check destination issues. */ 182} 183 184static void brw_set_src0( struct brw_instruction *insn, 185 struct brw_reg reg ) 186{ 187 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 188 assert(reg.nr < 128); 189 190 validate_reg(insn, reg); 191 192 insn->bits1.da1.src0_reg_file = reg.file; 193 insn->bits1.da1.src0_reg_type = reg.type; 194 insn->bits2.da1.src0_abs = reg.abs; 195 insn->bits2.da1.src0_negate = reg.negate; 196 insn->bits2.da1.src0_address_mode = reg.address_mode; 197 198 if (reg.file == BRW_IMMEDIATE_VALUE) { 199 insn->bits3.ud = reg.dw1.ud; 200 201 /* Required to set some fields in src1 as well: 202 */ 203 insn->bits1.da1.src1_reg_file = 0; /* arf */ 204 insn->bits1.da1.src1_reg_type = reg.type; 205 } 206 else 207 { 208 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 209 if (insn->header.access_mode == BRW_ALIGN_1) { 210 insn->bits2.da1.src0_subreg_nr = reg.subnr; 211 insn->bits2.da1.src0_reg_nr = reg.nr; 212 } 213 else { 214 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 215 insn->bits2.da16.src0_reg_nr = reg.nr; 216 } 217 } 218 else { 219 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 220 221 if (insn->header.access_mode == BRW_ALIGN_1) { 222 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 223 } 224 else { 225 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 226 } 227 } 228 229 if (insn->header.access_mode == BRW_ALIGN_1) { 230 if (reg.width == BRW_WIDTH_1 && 231 insn->header.execution_size == BRW_EXECUTE_1) { 232 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 233 insn->bits2.da1.src0_width = BRW_WIDTH_1; 234 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 235 } 236 else { 237 insn->bits2.da1.src0_horiz_stride = reg.hstride; 238 insn->bits2.da1.src0_width = reg.width; 239 insn->bits2.da1.src0_vert_stride = reg.vstride; 240 } 241 } 242 else { 243 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 244 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 245 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 246 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 247 248 /* This is an oddity of the fact we're using the same 249 * descriptions for registers in align_16 as align_1: 250 */ 251 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 252 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 253 else 254 insn->bits2.da16.src0_vert_stride = reg.vstride; 255 } 256 } 257} 258 259 260void brw_set_src1( struct brw_instruction *insn, 261 struct brw_reg reg ) 262{ 263 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 264 265 assert(reg.nr < 128); 266 267 validate_reg(insn, reg); 268 269 insn->bits1.da1.src1_reg_file = reg.file; 270 insn->bits1.da1.src1_reg_type = reg.type; 271 insn->bits3.da1.src1_abs = reg.abs; 272 insn->bits3.da1.src1_negate = reg.negate; 273 274 /* Only src1 can be immediate in two-argument instructions. 275 */ 276 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 277 278 if (reg.file == BRW_IMMEDIATE_VALUE) { 279 insn->bits3.ud = reg.dw1.ud; 280 } 281 else { 282 /* This is a hardware restriction, which may or may not be lifted 283 * in the future: 284 */ 285 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 286 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 287 288 if (insn->header.access_mode == BRW_ALIGN_1) { 289 insn->bits3.da1.src1_subreg_nr = reg.subnr; 290 insn->bits3.da1.src1_reg_nr = reg.nr; 291 } 292 else { 293 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 294 insn->bits3.da16.src1_reg_nr = reg.nr; 295 } 296 297 if (insn->header.access_mode == BRW_ALIGN_1) { 298 if (reg.width == BRW_WIDTH_1 && 299 insn->header.execution_size == BRW_EXECUTE_1) { 300 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 301 insn->bits3.da1.src1_width = BRW_WIDTH_1; 302 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 303 } 304 else { 305 insn->bits3.da1.src1_horiz_stride = reg.hstride; 306 insn->bits3.da1.src1_width = reg.width; 307 insn->bits3.da1.src1_vert_stride = reg.vstride; 308 } 309 } 310 else { 311 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 312 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 313 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 314 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 315 316 /* This is an oddity of the fact we're using the same 317 * descriptions for registers in align_16 as align_1: 318 */ 319 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 320 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 321 else 322 insn->bits3.da16.src1_vert_stride = reg.vstride; 323 } 324 } 325} 326 327 328 329static void brw_set_math_message( struct brw_context *brw, 330 struct brw_instruction *insn, 331 GLuint msg_length, 332 GLuint response_length, 333 GLuint function, 334 GLuint integer_type, 335 GLboolean low_precision, 336 GLboolean saturate, 337 GLuint dataType ) 338{ 339 struct intel_context *intel = &brw->intel; 340 brw_set_src1(insn, brw_imm_d(0)); 341 342 if (intel->gen == 5) { 343 insn->bits3.math_gen5.function = function; 344 insn->bits3.math_gen5.int_type = integer_type; 345 insn->bits3.math_gen5.precision = low_precision; 346 insn->bits3.math_gen5.saturate = saturate; 347 insn->bits3.math_gen5.data_type = dataType; 348 insn->bits3.math_gen5.snapshot = 0; 349 insn->bits3.math_gen5.header_present = 0; 350 insn->bits3.math_gen5.response_length = response_length; 351 insn->bits3.math_gen5.msg_length = msg_length; 352 insn->bits3.math_gen5.end_of_thread = 0; 353 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; 354 insn->bits2.send_gen5.end_of_thread = 0; 355 } else { 356 insn->bits3.math.function = function; 357 insn->bits3.math.int_type = integer_type; 358 insn->bits3.math.precision = low_precision; 359 insn->bits3.math.saturate = saturate; 360 insn->bits3.math.data_type = dataType; 361 insn->bits3.math.response_length = response_length; 362 insn->bits3.math.msg_length = msg_length; 363 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 364 insn->bits3.math.end_of_thread = 0; 365 } 366} 367 368 369static void brw_set_ff_sync_message(struct brw_context *brw, 370 struct brw_instruction *insn, 371 GLboolean allocate, 372 GLuint response_length, 373 GLboolean end_of_thread) 374{ 375 struct intel_context *intel = &brw->intel; 376 brw_set_src1(insn, brw_imm_d(0)); 377 378 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 379 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 380 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 381 insn->bits3.urb_gen5.allocate = allocate; 382 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 383 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 384 insn->bits3.urb_gen5.header_present = 1; 385 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ 386 insn->bits3.urb_gen5.msg_length = 1; 387 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 388 if (intel->gen >= 6) { 389 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 390 } else { 391 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 392 insn->bits2.send_gen5.end_of_thread = end_of_thread; 393 } 394} 395 396static void brw_set_urb_message( struct brw_context *brw, 397 struct brw_instruction *insn, 398 GLboolean allocate, 399 GLboolean used, 400 GLuint msg_length, 401 GLuint response_length, 402 GLboolean end_of_thread, 403 GLboolean complete, 404 GLuint offset, 405 GLuint swizzle_control ) 406{ 407 struct intel_context *intel = &brw->intel; 408 brw_set_src1(insn, brw_imm_d(0)); 409 410 if (intel->gen >= 5) { 411 insn->bits3.urb_gen5.opcode = 0; /* ? */ 412 insn->bits3.urb_gen5.offset = offset; 413 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 414 insn->bits3.urb_gen5.allocate = allocate; 415 insn->bits3.urb_gen5.used = used; /* ? */ 416 insn->bits3.urb_gen5.complete = complete; 417 insn->bits3.urb_gen5.header_present = 1; 418 insn->bits3.urb_gen5.response_length = response_length; 419 insn->bits3.urb_gen5.msg_length = msg_length; 420 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 421 if (intel->gen >= 6) { 422 /* For SNB, the SFID bits moved to the condmod bits, and 423 * EOT stayed in bits3 above. Does the EOT bit setting 424 * below on Ironlake even do anything? 425 */ 426 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 427 } else { 428 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 429 insn->bits2.send_gen5.end_of_thread = end_of_thread; 430 } 431 } else { 432 insn->bits3.urb.opcode = 0; /* ? */ 433 insn->bits3.urb.offset = offset; 434 insn->bits3.urb.swizzle_control = swizzle_control; 435 insn->bits3.urb.allocate = allocate; 436 insn->bits3.urb.used = used; /* ? */ 437 insn->bits3.urb.complete = complete; 438 insn->bits3.urb.response_length = response_length; 439 insn->bits3.urb.msg_length = msg_length; 440 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 441 insn->bits3.urb.end_of_thread = end_of_thread; 442 } 443} 444 445static void brw_set_dp_write_message( struct brw_context *brw, 446 struct brw_instruction *insn, 447 GLuint binding_table_index, 448 GLuint msg_control, 449 GLuint msg_type, 450 GLuint msg_length, 451 GLboolean header_present, 452 GLuint pixel_scoreboard_clear, 453 GLuint response_length, 454 GLuint end_of_thread, 455 GLuint send_commit_msg) 456{ 457 struct intel_context *intel = &brw->intel; 458 brw_set_src1(insn, brw_imm_ud(0)); 459 460 if (intel->gen >= 6) { 461 insn->bits3.dp_render_cache.binding_table_index = binding_table_index; 462 insn->bits3.dp_render_cache.msg_control = msg_control; 463 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; 464 insn->bits3.dp_render_cache.msg_type = msg_type; 465 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; 466 insn->bits3.dp_render_cache.header_present = header_present; 467 insn->bits3.dp_render_cache.response_length = response_length; 468 insn->bits3.dp_render_cache.msg_length = msg_length; 469 insn->bits3.dp_render_cache.end_of_thread = end_of_thread; 470 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 471 /* XXX really need below? */ 472 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 473 insn->bits2.send_gen5.end_of_thread = end_of_thread; 474 } else if (intel->gen == 5) { 475 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 476 insn->bits3.dp_write_gen5.msg_control = msg_control; 477 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; 478 insn->bits3.dp_write_gen5.msg_type = msg_type; 479 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 480 insn->bits3.dp_write_gen5.header_present = header_present; 481 insn->bits3.dp_write_gen5.response_length = response_length; 482 insn->bits3.dp_write_gen5.msg_length = msg_length; 483 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; 484 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 485 insn->bits2.send_gen5.end_of_thread = end_of_thread; 486 } else { 487 insn->bits3.dp_write.binding_table_index = binding_table_index; 488 insn->bits3.dp_write.msg_control = msg_control; 489 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 490 insn->bits3.dp_write.msg_type = msg_type; 491 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 492 insn->bits3.dp_write.response_length = response_length; 493 insn->bits3.dp_write.msg_length = msg_length; 494 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 495 insn->bits3.dp_write.end_of_thread = end_of_thread; 496 } 497} 498 499static void brw_set_dp_read_message( struct brw_context *brw, 500 struct brw_instruction *insn, 501 GLuint binding_table_index, 502 GLuint msg_control, 503 GLuint msg_type, 504 GLuint target_cache, 505 GLuint msg_length, 506 GLuint response_length, 507 GLuint end_of_thread ) 508{ 509 struct intel_context *intel = &brw->intel; 510 brw_set_src1(insn, brw_imm_d(0)); 511 512 if (intel->gen == 5) { 513 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 514 insn->bits3.dp_read_gen5.msg_control = msg_control; 515 insn->bits3.dp_read_gen5.msg_type = msg_type; 516 insn->bits3.dp_read_gen5.target_cache = target_cache; 517 insn->bits3.dp_read_gen5.header_present = 1; 518 insn->bits3.dp_read_gen5.response_length = response_length; 519 insn->bits3.dp_read_gen5.msg_length = msg_length; 520 insn->bits3.dp_read_gen5.pad1 = 0; 521 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread; 522 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 523 insn->bits2.send_gen5.end_of_thread = end_of_thread; 524 } else { 525 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 526 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 527 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 528 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 529 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 530 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 531 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 532 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 533 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ 534 } 535} 536 537static void brw_set_sampler_message(struct brw_context *brw, 538 struct brw_instruction *insn, 539 GLuint binding_table_index, 540 GLuint sampler, 541 GLuint msg_type, 542 GLuint response_length, 543 GLuint msg_length, 544 GLboolean eot, 545 GLuint header_present, 546 GLuint simd_mode) 547{ 548 struct intel_context *intel = &brw->intel; 549 assert(eot == 0); 550 brw_set_src1(insn, brw_imm_d(0)); 551 552 if (intel->gen >= 5) { 553 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 554 insn->bits3.sampler_gen5.sampler = sampler; 555 insn->bits3.sampler_gen5.msg_type = msg_type; 556 insn->bits3.sampler_gen5.simd_mode = simd_mode; 557 insn->bits3.sampler_gen5.header_present = header_present; 558 insn->bits3.sampler_gen5.response_length = response_length; 559 insn->bits3.sampler_gen5.msg_length = msg_length; 560 insn->bits3.sampler_gen5.end_of_thread = eot; 561 if (intel->gen >= 6) 562 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; 563 else { 564 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; 565 insn->bits2.send_gen5.end_of_thread = eot; 566 } 567 } else if (intel->is_g4x) { 568 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 569 insn->bits3.sampler_g4x.sampler = sampler; 570 insn->bits3.sampler_g4x.msg_type = msg_type; 571 insn->bits3.sampler_g4x.response_length = response_length; 572 insn->bits3.sampler_g4x.msg_length = msg_length; 573 insn->bits3.sampler_g4x.end_of_thread = eot; 574 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 575 } else { 576 insn->bits3.sampler.binding_table_index = binding_table_index; 577 insn->bits3.sampler.sampler = sampler; 578 insn->bits3.sampler.msg_type = msg_type; 579 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 580 insn->bits3.sampler.response_length = response_length; 581 insn->bits3.sampler.msg_length = msg_length; 582 insn->bits3.sampler.end_of_thread = eot; 583 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 584 } 585} 586 587 588 589static struct brw_instruction *next_insn( struct brw_compile *p, 590 GLuint opcode ) 591{ 592 struct brw_instruction *insn; 593 594 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 595 596 insn = &p->store[p->nr_insn++]; 597 memcpy(insn, p->current, sizeof(*insn)); 598 599 /* Reset this one-shot flag: 600 */ 601 602 if (p->current->header.destreg__conditionalmod) { 603 p->current->header.destreg__conditionalmod = 0; 604 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 605 } 606 607 insn->header.opcode = opcode; 608 return insn; 609} 610 611 612static struct brw_instruction *brw_alu1( struct brw_compile *p, 613 GLuint opcode, 614 struct brw_reg dest, 615 struct brw_reg src ) 616{ 617 struct brw_instruction *insn = next_insn(p, opcode); 618 brw_set_dest(insn, dest); 619 brw_set_src0(insn, src); 620 return insn; 621} 622 623static struct brw_instruction *brw_alu2(struct brw_compile *p, 624 GLuint opcode, 625 struct brw_reg dest, 626 struct brw_reg src0, 627 struct brw_reg src1 ) 628{ 629 struct brw_instruction *insn = next_insn(p, opcode); 630 brw_set_dest(insn, dest); 631 brw_set_src0(insn, src0); 632 brw_set_src1(insn, src1); 633 return insn; 634} 635 636 637/*********************************************************************** 638 * Convenience routines. 639 */ 640#define ALU1(OP) \ 641struct brw_instruction *brw_##OP(struct brw_compile *p, \ 642 struct brw_reg dest, \ 643 struct brw_reg src0) \ 644{ \ 645 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 646} 647 648#define ALU2(OP) \ 649struct brw_instruction *brw_##OP(struct brw_compile *p, \ 650 struct brw_reg dest, \ 651 struct brw_reg src0, \ 652 struct brw_reg src1) \ 653{ \ 654 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 655} 656 657 658ALU1(MOV) 659ALU2(SEL) 660ALU1(NOT) 661ALU2(AND) 662ALU2(OR) 663ALU2(XOR) 664ALU2(SHR) 665ALU2(SHL) 666ALU2(RSR) 667ALU2(RSL) 668ALU2(ASR) 669ALU1(FRC) 670ALU1(RNDD) 671ALU1(RNDZ) 672ALU2(MAC) 673ALU2(MACH) 674ALU1(LZD) 675ALU2(DP4) 676ALU2(DPH) 677ALU2(DP3) 678ALU2(DP2) 679ALU2(LINE) 680ALU2(PLN) 681 682struct brw_instruction *brw_ADD(struct brw_compile *p, 683 struct brw_reg dest, 684 struct brw_reg src0, 685 struct brw_reg src1) 686{ 687 /* 6.2.2: add */ 688 if (src0.type == BRW_REGISTER_TYPE_F || 689 (src0.file == BRW_IMMEDIATE_VALUE && 690 src0.type == BRW_REGISTER_TYPE_VF)) { 691 assert(src1.type != BRW_REGISTER_TYPE_UD); 692 assert(src1.type != BRW_REGISTER_TYPE_D); 693 } 694 695 if (src1.type == BRW_REGISTER_TYPE_F || 696 (src1.file == BRW_IMMEDIATE_VALUE && 697 src1.type == BRW_REGISTER_TYPE_VF)) { 698 assert(src0.type != BRW_REGISTER_TYPE_UD); 699 assert(src0.type != BRW_REGISTER_TYPE_D); 700 } 701 702 return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); 703} 704 705struct brw_instruction *brw_MUL(struct brw_compile *p, 706 struct brw_reg dest, 707 struct brw_reg src0, 708 struct brw_reg src1) 709{ 710 /* 6.32.38: mul */ 711 if (src0.type == BRW_REGISTER_TYPE_D || 712 src0.type == BRW_REGISTER_TYPE_UD || 713 src1.type == BRW_REGISTER_TYPE_D || 714 src1.type == BRW_REGISTER_TYPE_UD) { 715 assert(dest.type != BRW_REGISTER_TYPE_F); 716 } 717 718 if (src0.type == BRW_REGISTER_TYPE_F || 719 (src0.file == BRW_IMMEDIATE_VALUE && 720 src0.type == BRW_REGISTER_TYPE_VF)) { 721 assert(src1.type != BRW_REGISTER_TYPE_UD); 722 assert(src1.type != BRW_REGISTER_TYPE_D); 723 } 724 725 if (src1.type == BRW_REGISTER_TYPE_F || 726 (src1.file == BRW_IMMEDIATE_VALUE && 727 src1.type == BRW_REGISTER_TYPE_VF)) { 728 assert(src0.type != BRW_REGISTER_TYPE_UD); 729 assert(src0.type != BRW_REGISTER_TYPE_D); 730 } 731 732 assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || 733 src0.nr != BRW_ARF_ACCUMULATOR); 734 assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || 735 src1.nr != BRW_ARF_ACCUMULATOR); 736 737 return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); 738} 739 740 741void brw_NOP(struct brw_compile *p) 742{ 743 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 744 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 745 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 746 brw_set_src1(insn, brw_imm_ud(0x0)); 747} 748 749 750 751 752 753/*********************************************************************** 754 * Comparisons, if/else/endif 755 */ 756 757struct brw_instruction *brw_JMPI(struct brw_compile *p, 758 struct brw_reg dest, 759 struct brw_reg src0, 760 struct brw_reg src1) 761{ 762 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 763 764 insn->header.execution_size = 1; 765 insn->header.compression_control = BRW_COMPRESSION_NONE; 766 insn->header.mask_control = BRW_MASK_DISABLE; 767 768 p->current->header.predicate_control = BRW_PREDICATE_NONE; 769 770 return insn; 771} 772 773/* EU takes the value from the flag register and pushes it onto some 774 * sort of a stack (presumably merging with any flag value already on 775 * the stack). Within an if block, the flags at the top of the stack 776 * control execution on each channel of the unit, eg. on each of the 777 * 16 pixel values in our wm programs. 778 * 779 * When the matching 'else' instruction is reached (presumably by 780 * countdown of the instruction count patched in by our ELSE/ENDIF 781 * functions), the relevent flags are inverted. 782 * 783 * When the matching 'endif' instruction is reached, the flags are 784 * popped off. If the stack is now empty, normal execution resumes. 785 * 786 * No attempt is made to deal with stack overflow (14 elements?). 787 */ 788struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) 789{ 790 struct brw_instruction *insn; 791 792 if (p->single_program_flow) { 793 assert(execute_size == BRW_EXECUTE_1); 794 795 insn = next_insn(p, BRW_OPCODE_ADD); 796 insn->header.predicate_inverse = 1; 797 } else { 798 insn = next_insn(p, BRW_OPCODE_IF); 799 } 800 801 /* Override the defaults for this instruction: 802 */ 803 brw_set_dest(insn, brw_ip_reg()); 804 brw_set_src0(insn, brw_ip_reg()); 805 brw_set_src1(insn, brw_imm_d(0x0)); 806 807 insn->header.execution_size = execute_size; 808 insn->header.compression_control = BRW_COMPRESSION_NONE; 809 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 810 insn->header.mask_control = BRW_MASK_ENABLE; 811 if (!p->single_program_flow) 812 insn->header.thread_control = BRW_THREAD_SWITCH; 813 814 p->current->header.predicate_control = BRW_PREDICATE_NONE; 815 816 return insn; 817} 818 819 820struct brw_instruction *brw_ELSE(struct brw_compile *p, 821 struct brw_instruction *if_insn) 822{ 823 struct intel_context *intel = &p->brw->intel; 824 struct brw_instruction *insn; 825 GLuint br = 1; 826 827 /* jump count is for 64bit data chunk each, so one 128bit 828 instruction requires 2 chunks. */ 829 if (intel->gen >= 5) 830 br = 2; 831 832 if (p->single_program_flow) { 833 insn = next_insn(p, BRW_OPCODE_ADD); 834 } else { 835 insn = next_insn(p, BRW_OPCODE_ELSE); 836 } 837 838 brw_set_dest(insn, brw_ip_reg()); 839 brw_set_src0(insn, brw_ip_reg()); 840 brw_set_src1(insn, brw_imm_d(0x0)); 841 842 insn->header.compression_control = BRW_COMPRESSION_NONE; 843 insn->header.execution_size = if_insn->header.execution_size; 844 insn->header.mask_control = BRW_MASK_ENABLE; 845 if (!p->single_program_flow) 846 insn->header.thread_control = BRW_THREAD_SWITCH; 847 848 /* Patch the if instruction to point at this instruction. 849 */ 850 if (p->single_program_flow) { 851 assert(if_insn->header.opcode == BRW_OPCODE_ADD); 852 853 if_insn->bits3.ud = (insn - if_insn + 1) * 16; 854 } else { 855 assert(if_insn->header.opcode == BRW_OPCODE_IF); 856 857 if_insn->bits3.if_else.jump_count = br * (insn - if_insn); 858 if_insn->bits3.if_else.pop_count = 0; 859 if_insn->bits3.if_else.pad0 = 0; 860 } 861 862 return insn; 863} 864 865void brw_ENDIF(struct brw_compile *p, 866 struct brw_instruction *patch_insn) 867{ 868 struct intel_context *intel = &p->brw->intel; 869 GLuint br = 1; 870 871 if (intel->gen >= 5) 872 br = 2; 873 874 if (p->single_program_flow) { 875 /* In single program flow mode, there's no need to execute an ENDIF, 876 * since we don't need to do any stack operations, and if we're executing 877 * currently, we want to just continue executing. 878 */ 879 struct brw_instruction *next = &p->store[p->nr_insn]; 880 881 assert(patch_insn->header.opcode == BRW_OPCODE_ADD); 882 883 patch_insn->bits3.ud = (next - patch_insn) * 16; 884 } else { 885 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); 886 887 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 888 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 889 brw_set_src1(insn, brw_imm_d(0x0)); 890 891 insn->header.compression_control = BRW_COMPRESSION_NONE; 892 insn->header.execution_size = patch_insn->header.execution_size; 893 insn->header.mask_control = BRW_MASK_ENABLE; 894 insn->header.thread_control = BRW_THREAD_SWITCH; 895 896 assert(patch_insn->bits3.if_else.jump_count == 0); 897 898 /* Patch the if or else instructions to point at this or the next 899 * instruction respectively. 900 */ 901 if (patch_insn->header.opcode == BRW_OPCODE_IF) { 902 /* Automagically turn it into an IFF: 903 */ 904 patch_insn->header.opcode = BRW_OPCODE_IFF; 905 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 906 patch_insn->bits3.if_else.pop_count = 0; 907 patch_insn->bits3.if_else.pad0 = 0; 908 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { 909 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 910 patch_insn->bits3.if_else.pop_count = 1; 911 patch_insn->bits3.if_else.pad0 = 0; 912 } else { 913 assert(0); 914 } 915 916 /* Also pop item off the stack in the endif instruction: 917 */ 918 insn->bits3.if_else.jump_count = 0; 919 insn->bits3.if_else.pop_count = 1; 920 insn->bits3.if_else.pad0 = 0; 921 } 922} 923 924struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) 925{ 926 struct brw_instruction *insn; 927 insn = next_insn(p, BRW_OPCODE_BREAK); 928 brw_set_dest(insn, brw_ip_reg()); 929 brw_set_src0(insn, brw_ip_reg()); 930 brw_set_src1(insn, brw_imm_d(0x0)); 931 insn->header.compression_control = BRW_COMPRESSION_NONE; 932 insn->header.execution_size = BRW_EXECUTE_8; 933 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 934 insn->bits3.if_else.pad0 = 0; 935 insn->bits3.if_else.pop_count = pop_count; 936 return insn; 937} 938 939struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) 940{ 941 struct brw_instruction *insn; 942 insn = next_insn(p, BRW_OPCODE_CONTINUE); 943 brw_set_dest(insn, brw_ip_reg()); 944 brw_set_src0(insn, brw_ip_reg()); 945 brw_set_src1(insn, brw_imm_d(0x0)); 946 insn->header.compression_control = BRW_COMPRESSION_NONE; 947 insn->header.execution_size = BRW_EXECUTE_8; 948 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 949 insn->bits3.if_else.pad0 = 0; 950 insn->bits3.if_else.pop_count = pop_count; 951 return insn; 952} 953 954/* DO/WHILE loop: 955 */ 956struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 957{ 958 if (p->single_program_flow) { 959 return &p->store[p->nr_insn]; 960 } else { 961 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 962 963 /* Override the defaults for this instruction: 964 */ 965 brw_set_dest(insn, brw_null_reg()); 966 brw_set_src0(insn, brw_null_reg()); 967 brw_set_src1(insn, brw_null_reg()); 968 969 insn->header.compression_control = BRW_COMPRESSION_NONE; 970 insn->header.execution_size = execute_size; 971 insn->header.predicate_control = BRW_PREDICATE_NONE; 972 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 973 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 974 975 return insn; 976 } 977} 978 979 980 981struct brw_instruction *brw_WHILE(struct brw_compile *p, 982 struct brw_instruction *do_insn) 983{ 984 struct intel_context *intel = &p->brw->intel; 985 struct brw_instruction *insn; 986 GLuint br = 1; 987 988 if (intel->gen >= 5) 989 br = 2; 990 991 if (p->single_program_flow) 992 insn = next_insn(p, BRW_OPCODE_ADD); 993 else 994 insn = next_insn(p, BRW_OPCODE_WHILE); 995 996 brw_set_dest(insn, brw_ip_reg()); 997 brw_set_src0(insn, brw_ip_reg()); 998 brw_set_src1(insn, brw_imm_d(0x0)); 999 1000 insn->header.compression_control = BRW_COMPRESSION_NONE; 1001 1002 if (p->single_program_flow) { 1003 insn->header.execution_size = BRW_EXECUTE_1; 1004 1005 insn->bits3.d = (do_insn - insn) * 16; 1006 } else { 1007 insn->header.execution_size = do_insn->header.execution_size; 1008 1009 assert(do_insn->header.opcode == BRW_OPCODE_DO); 1010 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 1011 insn->bits3.if_else.pop_count = 0; 1012 insn->bits3.if_else.pad0 = 0; 1013 } 1014 1015/* insn->header.mask_control = BRW_MASK_ENABLE; */ 1016 1017 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 1018 p->current->header.predicate_control = BRW_PREDICATE_NONE; 1019 return insn; 1020} 1021 1022 1023/* FORWARD JUMPS: 1024 */ 1025void brw_land_fwd_jump(struct brw_compile *p, 1026 struct brw_instruction *jmp_insn) 1027{ 1028 struct intel_context *intel = &p->brw->intel; 1029 struct brw_instruction *landing = &p->store[p->nr_insn]; 1030 GLuint jmpi = 1; 1031 1032 if (intel->gen >= 5) 1033 jmpi = 2; 1034 1035 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 1036 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 1037 1038 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 1039} 1040 1041 1042 1043/* To integrate with the above, it makes sense that the comparison 1044 * instruction should populate the flag register. It might be simpler 1045 * just to use the flag reg for most WM tasks? 1046 */ 1047void brw_CMP(struct brw_compile *p, 1048 struct brw_reg dest, 1049 GLuint conditional, 1050 struct brw_reg src0, 1051 struct brw_reg src1) 1052{ 1053 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 1054 1055 insn->header.destreg__conditionalmod = conditional; 1056 brw_set_dest(insn, dest); 1057 brw_set_src0(insn, src0); 1058 brw_set_src1(insn, src1); 1059 1060/* guess_execution_size(insn, src0); */ 1061 1062 1063 /* Make it so that future instructions will use the computed flag 1064 * value until brw_set_predicate_control_flag_value() is called 1065 * again. 1066 */ 1067 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 1068 dest.nr == 0) { 1069 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 1070 p->flag_value = 0xff; 1071 } 1072} 1073 1074/* Issue 'wait' instruction for n1, host could program MMIO 1075 to wake up thread. */ 1076void brw_WAIT (struct brw_compile *p) 1077{ 1078 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 1079 struct brw_reg src = brw_notification_1_reg(); 1080 1081 brw_set_dest(insn, src); 1082 brw_set_src0(insn, src); 1083 brw_set_src1(insn, brw_null_reg()); 1084 insn->header.execution_size = 0; /* must */ 1085 insn->header.predicate_control = 0; 1086 insn->header.compression_control = 0; 1087} 1088 1089 1090/*********************************************************************** 1091 * Helpers for the various SEND message types: 1092 */ 1093 1094/** Extended math function, float[8]. 1095 */ 1096void brw_math( struct brw_compile *p, 1097 struct brw_reg dest, 1098 GLuint function, 1099 GLuint saturate, 1100 GLuint msg_reg_nr, 1101 struct brw_reg src, 1102 GLuint data_type, 1103 GLuint precision ) 1104{ 1105 struct intel_context *intel = &p->brw->intel; 1106 1107 if (intel->gen >= 6) { 1108 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1109 1110 /* Math is the same ISA format as other opcodes, except that CondModifier 1111 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1112 */ 1113 insn->header.destreg__conditionalmod = function; 1114 1115 brw_set_dest(insn, dest); 1116 brw_set_src0(insn, src); 1117 brw_set_src1(insn, brw_null_reg()); 1118 } else { 1119 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1120 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1121 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1122 /* Example code doesn't set predicate_control for send 1123 * instructions. 1124 */ 1125 insn->header.predicate_control = 0; 1126 insn->header.destreg__conditionalmod = msg_reg_nr; 1127 1128 brw_set_dest(insn, dest); 1129 brw_set_src0(insn, src); 1130 brw_set_math_message(p->brw, 1131 insn, 1132 msg_length, response_length, 1133 function, 1134 BRW_MATH_INTEGER_UNSIGNED, 1135 precision, 1136 saturate, 1137 data_type); 1138 } 1139} 1140 1141/** Extended math function, float[8]. 1142 */ 1143void brw_math2(struct brw_compile *p, 1144 struct brw_reg dest, 1145 GLuint function, 1146 struct brw_reg src0, 1147 struct brw_reg src1) 1148{ 1149 struct intel_context *intel = &p->brw->intel; 1150 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 1151 1152 assert(intel->gen >= 6); 1153 1154 /* Math is the same ISA format as other opcodes, except that CondModifier 1155 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1156 */ 1157 insn->header.destreg__conditionalmod = function; 1158 1159 brw_set_dest(insn, dest); 1160 brw_set_src0(insn, src0); 1161 brw_set_src1(insn, src1); 1162} 1163 1164/** 1165 * Extended math function, float[16]. 1166 * Use 2 send instructions. 1167 */ 1168void brw_math_16( struct brw_compile *p, 1169 struct brw_reg dest, 1170 GLuint function, 1171 GLuint saturate, 1172 GLuint msg_reg_nr, 1173 struct brw_reg src, 1174 GLuint precision ) 1175{ 1176 struct intel_context *intel = &p->brw->intel; 1177 struct brw_instruction *insn; 1178 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1179 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1180 1181 if (intel->gen >= 6) { 1182 insn = next_insn(p, BRW_OPCODE_MATH); 1183 1184 /* Math is the same ISA format as other opcodes, except that CondModifier 1185 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1186 */ 1187 insn->header.destreg__conditionalmod = function; 1188 1189 brw_set_dest(insn, dest); 1190 brw_set_src0(insn, src); 1191 brw_set_src1(insn, brw_null_reg()); 1192 return; 1193 } 1194 1195 /* First instruction: 1196 */ 1197 brw_push_insn_state(p); 1198 brw_set_predicate_control_flag_value(p, 0xff); 1199 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1200 1201 insn = next_insn(p, BRW_OPCODE_SEND); 1202 insn->header.destreg__conditionalmod = msg_reg_nr; 1203 1204 brw_set_dest(insn, dest); 1205 brw_set_src0(insn, src); 1206 brw_set_math_message(p->brw, 1207 insn, 1208 msg_length, response_length, 1209 function, 1210 BRW_MATH_INTEGER_UNSIGNED, 1211 precision, 1212 saturate, 1213 BRW_MATH_DATA_VECTOR); 1214 1215 /* Second instruction: 1216 */ 1217 insn = next_insn(p, BRW_OPCODE_SEND); 1218 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1219 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1220 1221 brw_set_dest(insn, offset(dest,1)); 1222 brw_set_src0(insn, src); 1223 brw_set_math_message(p->brw, 1224 insn, 1225 msg_length, response_length, 1226 function, 1227 BRW_MATH_INTEGER_UNSIGNED, 1228 precision, 1229 saturate, 1230 BRW_MATH_DATA_VECTOR); 1231 1232 brw_pop_insn_state(p); 1233} 1234 1235 1236/** 1237 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. 1238 * Scratch offset should be a multiple of 64. 1239 * Used for register spilling. 1240 */ 1241void brw_dp_WRITE_16( struct brw_compile *p, 1242 struct brw_reg src, 1243 GLuint scratch_offset ) 1244{ 1245 struct intel_context *intel = &p->brw->intel; 1246 GLuint msg_reg_nr = 1; 1247 { 1248 brw_push_insn_state(p); 1249 brw_set_mask_control(p, BRW_MASK_DISABLE); 1250 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1251 1252 /* set message header global offset field (reg 0, element 2) */ 1253 brw_MOV(p, 1254 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 1255 brw_imm_d(scratch_offset)); 1256 1257 brw_pop_insn_state(p); 1258 } 1259 1260 { 1261 GLuint msg_length = 3; 1262 struct brw_reg dest; 1263 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1264 int send_commit_msg; 1265 1266 insn->header.predicate_control = 0; /* XXX */ 1267 insn->header.compression_control = BRW_COMPRESSION_NONE; 1268 insn->header.destreg__conditionalmod = msg_reg_nr; 1269 1270 /* Until gen6, writes followed by reads from the same location 1271 * are not guaranteed to be ordered unless write_commit is set. 1272 * If set, then a no-op write is issued to the destination 1273 * register to set a dependency, and a read from the destination 1274 * can be used to ensure the ordering. 1275 * 1276 * For gen6, only writes between different threads need ordering 1277 * protection. Our use of DP writes is all about register 1278 * spilling within a thread. 1279 */ 1280 if (intel->gen >= 6) { 1281 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1282 send_commit_msg = 0; 1283 } else { 1284 dest = brw_uw16_grf(0, 0); 1285 send_commit_msg = 1; 1286 } 1287 1288 brw_set_dest(insn, dest); 1289 brw_set_src0(insn, src); 1290 1291 brw_set_dp_write_message(p->brw, 1292 insn, 1293 255, /* binding table index (255=stateless) */ 1294 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ 1295 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ 1296 msg_length, 1297 GL_TRUE, /* header_present */ 1298 0, /* pixel scoreboard */ 1299 send_commit_msg, /* response_length */ 1300 0, /* eot */ 1301 send_commit_msg); 1302 } 1303} 1304 1305 1306/** 1307 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. 1308 * Scratch offset should be a multiple of 64. 1309 * Used for register spilling. 1310 */ 1311void brw_dp_READ_16( struct brw_compile *p, 1312 struct brw_reg dest, 1313 GLuint scratch_offset ) 1314{ 1315 GLuint msg_reg_nr = 1; 1316 { 1317 brw_push_insn_state(p); 1318 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1319 brw_set_mask_control(p, BRW_MASK_DISABLE); 1320 1321 /* set message header global offset field (reg 0, element 2) */ 1322 brw_MOV(p, 1323 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 1324 brw_imm_d(scratch_offset)); 1325 1326 brw_pop_insn_state(p); 1327 } 1328 1329 { 1330 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1331 1332 insn->header.predicate_control = 0; /* XXX */ 1333 insn->header.compression_control = BRW_COMPRESSION_NONE; 1334 insn->header.destreg__conditionalmod = msg_reg_nr; 1335 1336 brw_set_dest(insn, dest); /* UW? */ 1337 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 1338 1339 brw_set_dp_read_message(p->brw, 1340 insn, 1341 255, /* binding table index (255=stateless) */ 1342 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, 1343 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1344 1, /* target cache (render/scratch) */ 1345 1, /* msg_length */ 1346 2, /* response_length */ 1347 0); /* eot */ 1348 } 1349} 1350 1351 1352/** 1353 * Read a float[4] vector from the data port Data Cache (const buffer). 1354 * Location (in buffer) should be a multiple of 16. 1355 * Used for fetching shader constants. 1356 * If relAddr is true, we'll do an indirect fetch using the address register. 1357 */ 1358void brw_dp_READ_4( struct brw_compile *p, 1359 struct brw_reg dest, 1360 GLboolean relAddr, 1361 GLuint location, 1362 GLuint bind_table_index ) 1363{ 1364 /* XXX: relAddr not implemented */ 1365 GLuint msg_reg_nr = 1; 1366 { 1367 struct brw_reg b; 1368 brw_push_insn_state(p); 1369 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1370 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1371 brw_set_mask_control(p, BRW_MASK_DISABLE); 1372 1373 /* Setup MRF[1] with location/offset into const buffer */ 1374 b = brw_message_reg(msg_reg_nr); 1375 b = retype(b, BRW_REGISTER_TYPE_UD); 1376 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1377 * when the docs say only dword[2] should be set. Hmmm. But it works. 1378 */ 1379 brw_MOV(p, b, brw_imm_ud(location)); 1380 brw_pop_insn_state(p); 1381 } 1382 1383 { 1384 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1385 1386 insn->header.predicate_control = BRW_PREDICATE_NONE; 1387 insn->header.compression_control = BRW_COMPRESSION_NONE; 1388 insn->header.destreg__conditionalmod = msg_reg_nr; 1389 insn->header.mask_control = BRW_MASK_DISABLE; 1390 1391 /* cast dest to a uword[8] vector */ 1392 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1393 1394 brw_set_dest(insn, dest); 1395 brw_set_src0(insn, brw_null_reg()); 1396 1397 brw_set_dp_read_message(p->brw, 1398 insn, 1399 bind_table_index, 1400 0, /* msg_control (0 means 1 Oword) */ 1401 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1402 0, /* source cache = data cache */ 1403 1, /* msg_length */ 1404 1, /* response_length (1 Oword) */ 1405 0); /* eot */ 1406 } 1407} 1408 1409 1410/** 1411 * Read float[4] constant(s) from VS constant buffer. 1412 * For relative addressing, two float[4] constants will be read into 'dest'. 1413 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1414 */ 1415void brw_dp_READ_4_vs(struct brw_compile *p, 1416 struct brw_reg dest, 1417 GLuint location, 1418 GLuint bind_table_index) 1419{ 1420 struct brw_instruction *insn; 1421 GLuint msg_reg_nr = 1; 1422 struct brw_reg b; 1423 1424 /* 1425 printf("vs const read msg, location %u, msg_reg_nr %d\n", 1426 location, msg_reg_nr); 1427 */ 1428 1429 /* Setup MRF[1] with location/offset into const buffer */ 1430 brw_push_insn_state(p); 1431 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1432 brw_set_mask_control(p, BRW_MASK_DISABLE); 1433 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1434 1435 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1436 * when the docs say only dword[2] should be set. Hmmm. But it works. 1437 */ 1438 b = brw_message_reg(msg_reg_nr); 1439 b = retype(b, BRW_REGISTER_TYPE_UD); 1440 /*b = get_element_ud(b, 2);*/ 1441 brw_MOV(p, b, brw_imm_ud(location)); 1442 1443 brw_pop_insn_state(p); 1444 1445 insn = next_insn(p, BRW_OPCODE_SEND); 1446 1447 insn->header.predicate_control = BRW_PREDICATE_NONE; 1448 insn->header.compression_control = BRW_COMPRESSION_NONE; 1449 insn->header.destreg__conditionalmod = msg_reg_nr; 1450 insn->header.mask_control = BRW_MASK_DISABLE; 1451 1452 brw_set_dest(insn, dest); 1453 brw_set_src0(insn, brw_null_reg()); 1454 1455 brw_set_dp_read_message(p->brw, 1456 insn, 1457 bind_table_index, 1458 0, 1459 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1460 0, /* source cache = data cache */ 1461 1, /* msg_length */ 1462 1, /* response_length (1 Oword) */ 1463 0); /* eot */ 1464} 1465 1466/** 1467 * Read a float[4] constant per vertex from VS constant buffer, with 1468 * relative addressing. 1469 */ 1470void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1471 struct brw_reg dest, 1472 struct brw_reg addr_reg, 1473 GLuint offset, 1474 GLuint bind_table_index) 1475{ 1476 struct intel_context *intel = &p->brw->intel; 1477 int msg_type; 1478 1479 /* Setup MRF[1] with offset into const buffer */ 1480 brw_push_insn_state(p); 1481 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1482 brw_set_mask_control(p, BRW_MASK_DISABLE); 1483 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1484 1485 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1486 * fields ignored. 1487 */ 1488 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), 1489 addr_reg, brw_imm_d(offset)); 1490 brw_pop_insn_state(p); 1491 1492 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1493 1494 insn->header.predicate_control = BRW_PREDICATE_NONE; 1495 insn->header.compression_control = BRW_COMPRESSION_NONE; 1496 insn->header.destreg__conditionalmod = 0; 1497 insn->header.mask_control = BRW_MASK_DISABLE; 1498 1499 brw_set_dest(insn, dest); 1500 brw_set_src0(insn, brw_vec8_grf(0, 0)); 1501 1502 if (intel->gen == 6) 1503 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1504 else if (intel->gen == 5 || intel->is_g4x) 1505 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1506 else 1507 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1508 1509 brw_set_dp_read_message(p->brw, 1510 insn, 1511 bind_table_index, 1512 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1513 msg_type, 1514 0, /* source cache = data cache */ 1515 2, /* msg_length */ 1516 1, /* response_length */ 1517 0); /* eot */ 1518} 1519 1520 1521 1522void brw_fb_WRITE(struct brw_compile *p, 1523 int dispatch_width, 1524 struct brw_reg dest, 1525 GLuint msg_reg_nr, 1526 struct brw_reg src0, 1527 GLuint binding_table_index, 1528 GLuint msg_length, 1529 GLuint response_length, 1530 GLboolean eot) 1531{ 1532 struct intel_context *intel = &p->brw->intel; 1533 struct brw_instruction *insn; 1534 GLuint msg_control, msg_type; 1535 GLboolean header_present = GL_TRUE; 1536 1537 insn = next_insn(p, BRW_OPCODE_SEND); 1538 insn->header.predicate_control = 0; /* XXX */ 1539 insn->header.compression_control = BRW_COMPRESSION_NONE; 1540 1541 if (intel->gen >= 6) { 1542 if (msg_length == 4) 1543 header_present = GL_FALSE; 1544 1545 /* headerless version, just submit color payload */ 1546 src0 = brw_message_reg(msg_reg_nr); 1547 1548 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6; 1549 } else { 1550 insn->header.destreg__conditionalmod = msg_reg_nr; 1551 1552 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1553 } 1554 1555 if (dispatch_width == 16) 1556 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 1557 else 1558 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 1559 1560 brw_set_dest(insn, dest); 1561 brw_set_src0(insn, src0); 1562 brw_set_dp_write_message(p->brw, 1563 insn, 1564 binding_table_index, 1565 msg_control, 1566 msg_type, 1567 msg_length, 1568 header_present, 1569 1, /* pixel scoreboard */ 1570 response_length, 1571 eot, 1572 0 /* send_commit_msg */); 1573} 1574 1575 1576/** 1577 * Texture sample instruction. 1578 * Note: the msg_type plus msg_length values determine exactly what kind 1579 * of sampling operation is performed. See volume 4, page 161 of docs. 1580 */ 1581void brw_SAMPLE(struct brw_compile *p, 1582 struct brw_reg dest, 1583 GLuint msg_reg_nr, 1584 struct brw_reg src0, 1585 GLuint binding_table_index, 1586 GLuint sampler, 1587 GLuint writemask, 1588 GLuint msg_type, 1589 GLuint response_length, 1590 GLuint msg_length, 1591 GLboolean eot, 1592 GLuint header_present, 1593 GLuint simd_mode) 1594{ 1595 struct intel_context *intel = &p->brw->intel; 1596 GLboolean need_stall = 0; 1597 1598 if (writemask == 0) { 1599 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 1600 return; 1601 } 1602 1603 /* Hardware doesn't do destination dependency checking on send 1604 * instructions properly. Add a workaround which generates the 1605 * dependency by other means. In practice it seems like this bug 1606 * only crops up for texture samples, and only where registers are 1607 * written by the send and then written again later without being 1608 * read in between. Luckily for us, we already track that 1609 * information and use it to modify the writemask for the 1610 * instruction, so that is a guide for whether a workaround is 1611 * needed. 1612 */ 1613 if (writemask != WRITEMASK_XYZW) { 1614 GLuint dst_offset = 0; 1615 GLuint i, newmask = 0, len = 0; 1616 1617 for (i = 0; i < 4; i++) { 1618 if (writemask & (1<<i)) 1619 break; 1620 dst_offset += 2; 1621 } 1622 for (; i < 4; i++) { 1623 if (!(writemask & (1<<i))) 1624 break; 1625 newmask |= 1<<i; 1626 len++; 1627 } 1628 1629 if (newmask != writemask) { 1630 need_stall = 1; 1631 /* printf("need stall %x %x\n", newmask , writemask); */ 1632 } 1633 else { 1634 GLboolean dispatch_16 = GL_FALSE; 1635 1636 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1637 1638 guess_execution_size(p->current, dest); 1639 if (p->current->header.execution_size == BRW_EXECUTE_16) 1640 dispatch_16 = GL_TRUE; 1641 1642 newmask = ~newmask & WRITEMASK_XYZW; 1643 1644 brw_push_insn_state(p); 1645 1646 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1647 brw_set_mask_control(p, BRW_MASK_DISABLE); 1648 1649 brw_MOV(p, m1, brw_vec8_grf(0,0)); 1650 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 1651 1652 brw_pop_insn_state(p); 1653 1654 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 1655 dest = offset(dest, dst_offset); 1656 1657 /* For 16-wide dispatch, masked channels are skipped in the 1658 * response. For 8-wide, masked channels still take up slots, 1659 * and are just not written to. 1660 */ 1661 if (dispatch_16) 1662 response_length = len * 2; 1663 } 1664 } 1665 1666 { 1667 struct brw_instruction *insn; 1668 1669 /* Sandybridge doesn't have the implied move for SENDs, 1670 * and the first message register index comes from src0. 1671 */ 1672 if (intel->gen >= 6) { 1673 brw_push_insn_state(p); 1674 brw_set_mask_control( p, BRW_MASK_DISABLE ); 1675 /* m1 contains header? */ 1676 brw_MOV(p, brw_message_reg(msg_reg_nr), src0); 1677 brw_pop_insn_state(p); 1678 src0 = brw_message_reg(msg_reg_nr); 1679 } 1680 1681 insn = next_insn(p, BRW_OPCODE_SEND); 1682 insn->header.predicate_control = 0; /* XXX */ 1683 insn->header.compression_control = BRW_COMPRESSION_NONE; 1684 if (intel->gen < 6) 1685 insn->header.destreg__conditionalmod = msg_reg_nr; 1686 1687 brw_set_dest(insn, dest); 1688 brw_set_src0(insn, src0); 1689 brw_set_sampler_message(p->brw, insn, 1690 binding_table_index, 1691 sampler, 1692 msg_type, 1693 response_length, 1694 msg_length, 1695 eot, 1696 header_present, 1697 simd_mode); 1698 } 1699 1700 if (need_stall) { 1701 struct brw_reg reg = vec8(offset(dest, response_length-1)); 1702 1703 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 1704 */ 1705 brw_push_insn_state(p); 1706 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1707 brw_MOV(p, reg, reg); 1708 brw_pop_insn_state(p); 1709 } 1710 1711} 1712 1713/* All these variables are pretty confusing - we might be better off 1714 * using bitmasks and macros for this, in the old style. Or perhaps 1715 * just having the caller instantiate the fields in dword3 itself. 1716 */ 1717void brw_urb_WRITE(struct brw_compile *p, 1718 struct brw_reg dest, 1719 GLuint msg_reg_nr, 1720 struct brw_reg src0, 1721 GLboolean allocate, 1722 GLboolean used, 1723 GLuint msg_length, 1724 GLuint response_length, 1725 GLboolean eot, 1726 GLboolean writes_complete, 1727 GLuint offset, 1728 GLuint swizzle) 1729{ 1730 struct intel_context *intel = &p->brw->intel; 1731 struct brw_instruction *insn; 1732 1733 /* Sandybridge doesn't have the implied move for SENDs, 1734 * and the first message register index comes from src0. 1735 */ 1736 if (intel->gen >= 6) { 1737 brw_push_insn_state(p); 1738 brw_set_mask_control( p, BRW_MASK_DISABLE ); 1739 brw_MOV(p, brw_message_reg(msg_reg_nr), src0); 1740 brw_pop_insn_state(p); 1741 src0 = brw_message_reg(msg_reg_nr); 1742 } 1743 1744 insn = next_insn(p, BRW_OPCODE_SEND); 1745 1746 assert(msg_length < BRW_MAX_MRF); 1747 1748 brw_set_dest(insn, dest); 1749 brw_set_src0(insn, src0); 1750 brw_set_src1(insn, brw_imm_d(0)); 1751 1752 if (intel->gen < 6) 1753 insn->header.destreg__conditionalmod = msg_reg_nr; 1754 1755 brw_set_urb_message(p->brw, 1756 insn, 1757 allocate, 1758 used, 1759 msg_length, 1760 response_length, 1761 eot, 1762 writes_complete, 1763 offset, 1764 swizzle); 1765} 1766 1767void brw_ff_sync(struct brw_compile *p, 1768 struct brw_reg dest, 1769 GLuint msg_reg_nr, 1770 struct brw_reg src0, 1771 GLboolean allocate, 1772 GLuint response_length, 1773 GLboolean eot) 1774{ 1775 struct intel_context *intel = &p->brw->intel; 1776 struct brw_instruction *insn; 1777 1778 /* Sandybridge doesn't have the implied move for SENDs, 1779 * and the first message register index comes from src0. 1780 */ 1781 if (intel->gen >= 6) { 1782 brw_push_insn_state(p); 1783 brw_set_mask_control( p, BRW_MASK_DISABLE ); 1784 brw_MOV(p, brw_message_reg(msg_reg_nr), src0); 1785 brw_pop_insn_state(p); 1786 src0 = brw_message_reg(msg_reg_nr); 1787 } 1788 1789 insn = next_insn(p, BRW_OPCODE_SEND); 1790 brw_set_dest(insn, dest); 1791 brw_set_src0(insn, src0); 1792 brw_set_src1(insn, brw_imm_d(0)); 1793 1794 if (intel->gen < 6) 1795 insn->header.destreg__conditionalmod = msg_reg_nr; 1796 1797 brw_set_ff_sync_message(p->brw, 1798 insn, 1799 allocate, 1800 response_length, 1801 eot); 1802} 1803