brw_eu_emit.c revision 56ff30a9f97a1a7094432333906544d6138d6bf2
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37 38 39 40/*********************************************************************** 41 * Internal helper for constructing instructions 42 */ 43 44static void guess_execution_size( struct brw_instruction *insn, 45 struct brw_reg reg ) 46{ 47 if (reg.width == BRW_WIDTH_8 && 48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 49 insn->header.execution_size = BRW_EXECUTE_16; 50 else 51 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 52} 53 54 55static void brw_set_dest( struct brw_instruction *insn, 56 struct brw_reg dest ) 57{ 58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 59 dest.file != BRW_MESSAGE_REGISTER_FILE) 60 assert(dest.nr < 128); 61 62 insn->bits1.da1.dest_reg_file = dest.file; 63 insn->bits1.da1.dest_reg_type = dest.type; 64 insn->bits1.da1.dest_address_mode = dest.address_mode; 65 66 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 67 insn->bits1.da1.dest_reg_nr = dest.nr; 68 69 if (insn->header.access_mode == BRW_ALIGN_1) { 70 insn->bits1.da1.dest_subreg_nr = dest.subnr; 71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 72 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 73 insn->bits1.da1.dest_horiz_stride = dest.hstride; 74 } 75 else { 76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 78 } 79 } 80 else { 81 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 82 83 /* These are different sizes in align1 vs align16: 84 */ 85 if (insn->header.access_mode == BRW_ALIGN_1) { 86 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 87 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 88 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 89 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 90 } 91 else { 92 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 93 } 94 } 95 96 /* NEW: Set the execution size based on dest.width and 97 * insn->compression_control: 98 */ 99 guess_execution_size(insn, dest); 100} 101 102static void brw_set_src0( struct brw_instruction *insn, 103 struct brw_reg reg ) 104{ 105 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 106 assert(reg.nr < 128); 107 108 insn->bits1.da1.src0_reg_file = reg.file; 109 insn->bits1.da1.src0_reg_type = reg.type; 110 insn->bits2.da1.src0_abs = reg.abs; 111 insn->bits2.da1.src0_negate = reg.negate; 112 insn->bits2.da1.src0_address_mode = reg.address_mode; 113 114 if (reg.file == BRW_IMMEDIATE_VALUE) { 115 insn->bits3.ud = reg.dw1.ud; 116 117 /* Required to set some fields in src1 as well: 118 */ 119 insn->bits1.da1.src1_reg_file = 0; /* arf */ 120 insn->bits1.da1.src1_reg_type = reg.type; 121 } 122 else 123 { 124 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 125 if (insn->header.access_mode == BRW_ALIGN_1) { 126 insn->bits2.da1.src0_subreg_nr = reg.subnr; 127 insn->bits2.da1.src0_reg_nr = reg.nr; 128 } 129 else { 130 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 131 insn->bits2.da16.src0_reg_nr = reg.nr; 132 } 133 } 134 else { 135 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 136 137 if (insn->header.access_mode == BRW_ALIGN_1) { 138 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 139 } 140 else { 141 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 142 } 143 } 144 145 if (insn->header.access_mode == BRW_ALIGN_1) { 146 if (reg.width == BRW_WIDTH_1 && 147 insn->header.execution_size == BRW_EXECUTE_1) { 148 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 149 insn->bits2.da1.src0_width = BRW_WIDTH_1; 150 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 151 } 152 else { 153 insn->bits2.da1.src0_horiz_stride = reg.hstride; 154 insn->bits2.da1.src0_width = reg.width; 155 insn->bits2.da1.src0_vert_stride = reg.vstride; 156 } 157 } 158 else { 159 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 160 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 161 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 162 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 163 164 /* This is an oddity of the fact we're using the same 165 * descriptions for registers in align_16 as align_1: 166 */ 167 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 168 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 169 else 170 insn->bits2.da16.src0_vert_stride = reg.vstride; 171 } 172 } 173} 174 175 176void brw_set_src1( struct brw_instruction *insn, 177 struct brw_reg reg ) 178{ 179 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 180 181 assert(reg.nr < 128); 182 183 insn->bits1.da1.src1_reg_file = reg.file; 184 insn->bits1.da1.src1_reg_type = reg.type; 185 insn->bits3.da1.src1_abs = reg.abs; 186 insn->bits3.da1.src1_negate = reg.negate; 187 188 /* Only src1 can be immediate in two-argument instructions. 189 */ 190 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 191 192 if (reg.file == BRW_IMMEDIATE_VALUE) { 193 insn->bits3.ud = reg.dw1.ud; 194 } 195 else { 196 /* This is a hardware restriction, which may or may not be lifted 197 * in the future: 198 */ 199 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 200 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 201 202 if (insn->header.access_mode == BRW_ALIGN_1) { 203 insn->bits3.da1.src1_subreg_nr = reg.subnr; 204 insn->bits3.da1.src1_reg_nr = reg.nr; 205 } 206 else { 207 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 208 insn->bits3.da16.src1_reg_nr = reg.nr; 209 } 210 211 if (insn->header.access_mode == BRW_ALIGN_1) { 212 if (reg.width == BRW_WIDTH_1 && 213 insn->header.execution_size == BRW_EXECUTE_1) { 214 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 215 insn->bits3.da1.src1_width = BRW_WIDTH_1; 216 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 217 } 218 else { 219 insn->bits3.da1.src1_horiz_stride = reg.hstride; 220 insn->bits3.da1.src1_width = reg.width; 221 insn->bits3.da1.src1_vert_stride = reg.vstride; 222 } 223 } 224 else { 225 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 226 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 227 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 228 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 229 230 /* This is an oddity of the fact we're using the same 231 * descriptions for registers in align_16 as align_1: 232 */ 233 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 234 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 235 else 236 insn->bits3.da16.src1_vert_stride = reg.vstride; 237 } 238 } 239} 240 241 242 243static void brw_set_math_message( struct brw_context *brw, 244 struct brw_instruction *insn, 245 GLuint msg_length, 246 GLuint response_length, 247 GLuint function, 248 GLuint integer_type, 249 GLboolean low_precision, 250 GLboolean saturate, 251 GLuint dataType ) 252{ 253 struct intel_context *intel = &brw->intel; 254 brw_set_src1(insn, brw_imm_d(0)); 255 256 if (intel->is_ironlake) { 257 insn->bits3.math_igdng.function = function; 258 insn->bits3.math_igdng.int_type = integer_type; 259 insn->bits3.math_igdng.precision = low_precision; 260 insn->bits3.math_igdng.saturate = saturate; 261 insn->bits3.math_igdng.data_type = dataType; 262 insn->bits3.math_igdng.snapshot = 0; 263 insn->bits3.math_igdng.header_present = 0; 264 insn->bits3.math_igdng.response_length = response_length; 265 insn->bits3.math_igdng.msg_length = msg_length; 266 insn->bits3.math_igdng.end_of_thread = 0; 267 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; 268 insn->bits2.send_igdng.end_of_thread = 0; 269 } else { 270 insn->bits3.math.function = function; 271 insn->bits3.math.int_type = integer_type; 272 insn->bits3.math.precision = low_precision; 273 insn->bits3.math.saturate = saturate; 274 insn->bits3.math.data_type = dataType; 275 insn->bits3.math.response_length = response_length; 276 insn->bits3.math.msg_length = msg_length; 277 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 278 insn->bits3.math.end_of_thread = 0; 279 } 280} 281 282 283static void brw_set_ff_sync_message( struct brw_context *brw, 284 struct brw_instruction *insn, 285 GLboolean allocate, 286 GLboolean used, 287 GLuint msg_length, 288 GLuint response_length, 289 GLboolean end_of_thread, 290 GLboolean complete, 291 GLuint offset, 292 GLuint swizzle_control ) 293{ 294 brw_set_src1(insn, brw_imm_d(0)); 295 296 insn->bits3.urb_igdng.opcode = 1; 297 insn->bits3.urb_igdng.offset = offset; 298 insn->bits3.urb_igdng.swizzle_control = swizzle_control; 299 insn->bits3.urb_igdng.allocate = allocate; 300 insn->bits3.urb_igdng.used = used; 301 insn->bits3.urb_igdng.complete = complete; 302 insn->bits3.urb_igdng.header_present = 1; 303 insn->bits3.urb_igdng.response_length = response_length; 304 insn->bits3.urb_igdng.msg_length = msg_length; 305 insn->bits3.urb_igdng.end_of_thread = end_of_thread; 306 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; 307 insn->bits2.send_igdng.end_of_thread = end_of_thread; 308} 309 310static void brw_set_urb_message( struct brw_context *brw, 311 struct brw_instruction *insn, 312 GLboolean allocate, 313 GLboolean used, 314 GLuint msg_length, 315 GLuint response_length, 316 GLboolean end_of_thread, 317 GLboolean complete, 318 GLuint offset, 319 GLuint swizzle_control ) 320{ 321 struct intel_context *intel = &brw->intel; 322 brw_set_src1(insn, brw_imm_d(0)); 323 324 if (intel->is_ironlake || intel->gen >= 6) { 325 insn->bits3.urb_igdng.opcode = 0; /* ? */ 326 insn->bits3.urb_igdng.offset = offset; 327 insn->bits3.urb_igdng.swizzle_control = swizzle_control; 328 insn->bits3.urb_igdng.allocate = allocate; 329 insn->bits3.urb_igdng.used = used; /* ? */ 330 insn->bits3.urb_igdng.complete = complete; 331 insn->bits3.urb_igdng.header_present = 1; 332 insn->bits3.urb_igdng.response_length = response_length; 333 insn->bits3.urb_igdng.msg_length = msg_length; 334 insn->bits3.urb_igdng.end_of_thread = end_of_thread; 335 if (intel->gen >= 6) { 336 /* For SNB, the SFID bits moved to the condmod bits, and 337 * EOT stayed in bits3 above. Does the EOT bit setting 338 * below on Ironlake even do anything? 339 */ 340 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 341 } else { 342 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; 343 insn->bits2.send_igdng.end_of_thread = end_of_thread; 344 } 345 } else { 346 insn->bits3.urb.opcode = 0; /* ? */ 347 insn->bits3.urb.offset = offset; 348 insn->bits3.urb.swizzle_control = swizzle_control; 349 insn->bits3.urb.allocate = allocate; 350 insn->bits3.urb.used = used; /* ? */ 351 insn->bits3.urb.complete = complete; 352 insn->bits3.urb.response_length = response_length; 353 insn->bits3.urb.msg_length = msg_length; 354 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 355 insn->bits3.urb.end_of_thread = end_of_thread; 356 } 357} 358 359static void brw_set_dp_write_message( struct brw_context *brw, 360 struct brw_instruction *insn, 361 GLuint binding_table_index, 362 GLuint msg_control, 363 GLuint msg_type, 364 GLuint msg_length, 365 GLuint pixel_scoreboard_clear, 366 GLuint response_length, 367 GLuint end_of_thread ) 368{ 369 struct intel_context *intel = &brw->intel; 370 brw_set_src1(insn, brw_imm_d(0)); 371 372 if (intel->is_ironlake) { 373 insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; 374 insn->bits3.dp_write_igdng.msg_control = msg_control; 375 insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; 376 insn->bits3.dp_write_igdng.msg_type = msg_type; 377 insn->bits3.dp_write_igdng.send_commit_msg = 0; 378 insn->bits3.dp_write_igdng.header_present = 1; 379 insn->bits3.dp_write_igdng.response_length = response_length; 380 insn->bits3.dp_write_igdng.msg_length = msg_length; 381 insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; 382 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 383 insn->bits2.send_igdng.end_of_thread = end_of_thread; 384 } else { 385 insn->bits3.dp_write.binding_table_index = binding_table_index; 386 insn->bits3.dp_write.msg_control = msg_control; 387 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 388 insn->bits3.dp_write.msg_type = msg_type; 389 insn->bits3.dp_write.send_commit_msg = 0; 390 insn->bits3.dp_write.response_length = response_length; 391 insn->bits3.dp_write.msg_length = msg_length; 392 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 393 insn->bits3.dp_write.end_of_thread = end_of_thread; 394 } 395} 396 397static void brw_set_dp_read_message( struct brw_context *brw, 398 struct brw_instruction *insn, 399 GLuint binding_table_index, 400 GLuint msg_control, 401 GLuint msg_type, 402 GLuint target_cache, 403 GLuint msg_length, 404 GLuint response_length, 405 GLuint end_of_thread ) 406{ 407 struct intel_context *intel = &brw->intel; 408 brw_set_src1(insn, brw_imm_d(0)); 409 410 if (intel->is_ironlake) { 411 insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; 412 insn->bits3.dp_read_igdng.msg_control = msg_control; 413 insn->bits3.dp_read_igdng.msg_type = msg_type; 414 insn->bits3.dp_read_igdng.target_cache = target_cache; 415 insn->bits3.dp_read_igdng.header_present = 1; 416 insn->bits3.dp_read_igdng.response_length = response_length; 417 insn->bits3.dp_read_igdng.msg_length = msg_length; 418 insn->bits3.dp_read_igdng.pad1 = 0; 419 insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; 420 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 421 insn->bits2.send_igdng.end_of_thread = end_of_thread; 422 } else { 423 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 424 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 425 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 426 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 427 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 428 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 429 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 430 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 431 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ 432 } 433} 434 435static void brw_set_sampler_message(struct brw_context *brw, 436 struct brw_instruction *insn, 437 GLuint binding_table_index, 438 GLuint sampler, 439 GLuint msg_type, 440 GLuint response_length, 441 GLuint msg_length, 442 GLboolean eot, 443 GLuint header_present, 444 GLuint simd_mode) 445{ 446 struct intel_context *intel = &brw->intel; 447 assert(eot == 0); 448 brw_set_src1(insn, brw_imm_d(0)); 449 450 if (intel->is_ironlake) { 451 insn->bits3.sampler_igdng.binding_table_index = binding_table_index; 452 insn->bits3.sampler_igdng.sampler = sampler; 453 insn->bits3.sampler_igdng.msg_type = msg_type; 454 insn->bits3.sampler_igdng.simd_mode = simd_mode; 455 insn->bits3.sampler_igdng.header_present = header_present; 456 insn->bits3.sampler_igdng.response_length = response_length; 457 insn->bits3.sampler_igdng.msg_length = msg_length; 458 insn->bits3.sampler_igdng.end_of_thread = eot; 459 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; 460 insn->bits2.send_igdng.end_of_thread = eot; 461 } else if (intel->is_g4x) { 462 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 463 insn->bits3.sampler_g4x.sampler = sampler; 464 insn->bits3.sampler_g4x.msg_type = msg_type; 465 insn->bits3.sampler_g4x.response_length = response_length; 466 insn->bits3.sampler_g4x.msg_length = msg_length; 467 insn->bits3.sampler_g4x.end_of_thread = eot; 468 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 469 } else { 470 insn->bits3.sampler.binding_table_index = binding_table_index; 471 insn->bits3.sampler.sampler = sampler; 472 insn->bits3.sampler.msg_type = msg_type; 473 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 474 insn->bits3.sampler.response_length = response_length; 475 insn->bits3.sampler.msg_length = msg_length; 476 insn->bits3.sampler.end_of_thread = eot; 477 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 478 } 479} 480 481 482 483static struct brw_instruction *next_insn( struct brw_compile *p, 484 GLuint opcode ) 485{ 486 struct brw_instruction *insn; 487 488 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 489 490 insn = &p->store[p->nr_insn++]; 491 memcpy(insn, p->current, sizeof(*insn)); 492 493 /* Reset this one-shot flag: 494 */ 495 496 if (p->current->header.destreg__conditionalmod) { 497 p->current->header.destreg__conditionalmod = 0; 498 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 499 } 500 501 insn->header.opcode = opcode; 502 return insn; 503} 504 505 506static struct brw_instruction *brw_alu1( struct brw_compile *p, 507 GLuint opcode, 508 struct brw_reg dest, 509 struct brw_reg src ) 510{ 511 struct brw_instruction *insn = next_insn(p, opcode); 512 brw_set_dest(insn, dest); 513 brw_set_src0(insn, src); 514 return insn; 515} 516 517static struct brw_instruction *brw_alu2(struct brw_compile *p, 518 GLuint opcode, 519 struct brw_reg dest, 520 struct brw_reg src0, 521 struct brw_reg src1 ) 522{ 523 struct brw_instruction *insn = next_insn(p, opcode); 524 brw_set_dest(insn, dest); 525 brw_set_src0(insn, src0); 526 brw_set_src1(insn, src1); 527 return insn; 528} 529 530 531/*********************************************************************** 532 * Convenience routines. 533 */ 534#define ALU1(OP) \ 535struct brw_instruction *brw_##OP(struct brw_compile *p, \ 536 struct brw_reg dest, \ 537 struct brw_reg src0) \ 538{ \ 539 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 540} 541 542#define ALU2(OP) \ 543struct brw_instruction *brw_##OP(struct brw_compile *p, \ 544 struct brw_reg dest, \ 545 struct brw_reg src0, \ 546 struct brw_reg src1) \ 547{ \ 548 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 549} 550 551 552ALU1(MOV) 553ALU2(SEL) 554ALU1(NOT) 555ALU2(AND) 556ALU2(OR) 557ALU2(XOR) 558ALU2(SHR) 559ALU2(SHL) 560ALU2(RSR) 561ALU2(RSL) 562ALU2(ASR) 563ALU2(ADD) 564ALU2(MUL) 565ALU1(FRC) 566ALU1(RNDD) 567ALU1(RNDZ) 568ALU2(MAC) 569ALU2(MACH) 570ALU1(LZD) 571ALU2(DP4) 572ALU2(DPH) 573ALU2(DP3) 574ALU2(DP2) 575ALU2(LINE) 576ALU2(PLN) 577 578 579 580void brw_NOP(struct brw_compile *p) 581{ 582 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 583 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 584 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 585 brw_set_src1(insn, brw_imm_ud(0x0)); 586} 587 588 589 590 591 592/*********************************************************************** 593 * Comparisons, if/else/endif 594 */ 595 596struct brw_instruction *brw_JMPI(struct brw_compile *p, 597 struct brw_reg dest, 598 struct brw_reg src0, 599 struct brw_reg src1) 600{ 601 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 602 603 insn->header.execution_size = 1; 604 insn->header.compression_control = BRW_COMPRESSION_NONE; 605 insn->header.mask_control = BRW_MASK_DISABLE; 606 607 p->current->header.predicate_control = BRW_PREDICATE_NONE; 608 609 return insn; 610} 611 612/* EU takes the value from the flag register and pushes it onto some 613 * sort of a stack (presumably merging with any flag value already on 614 * the stack). Within an if block, the flags at the top of the stack 615 * control execution on each channel of the unit, eg. on each of the 616 * 16 pixel values in our wm programs. 617 * 618 * When the matching 'else' instruction is reached (presumably by 619 * countdown of the instruction count patched in by our ELSE/ENDIF 620 * functions), the relevent flags are inverted. 621 * 622 * When the matching 'endif' instruction is reached, the flags are 623 * popped off. If the stack is now empty, normal execution resumes. 624 * 625 * No attempt is made to deal with stack overflow (14 elements?). 626 */ 627struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) 628{ 629 struct brw_instruction *insn; 630 631 if (p->single_program_flow) { 632 assert(execute_size == BRW_EXECUTE_1); 633 634 insn = next_insn(p, BRW_OPCODE_ADD); 635 insn->header.predicate_inverse = 1; 636 } else { 637 insn = next_insn(p, BRW_OPCODE_IF); 638 } 639 640 /* Override the defaults for this instruction: 641 */ 642 brw_set_dest(insn, brw_ip_reg()); 643 brw_set_src0(insn, brw_ip_reg()); 644 brw_set_src1(insn, brw_imm_d(0x0)); 645 646 insn->header.execution_size = execute_size; 647 insn->header.compression_control = BRW_COMPRESSION_NONE; 648 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 649 insn->header.mask_control = BRW_MASK_ENABLE; 650 if (!p->single_program_flow) 651 insn->header.thread_control = BRW_THREAD_SWITCH; 652 653 p->current->header.predicate_control = BRW_PREDICATE_NONE; 654 655 return insn; 656} 657 658 659struct brw_instruction *brw_ELSE(struct brw_compile *p, 660 struct brw_instruction *if_insn) 661{ 662 struct intel_context *intel = &p->brw->intel; 663 struct brw_instruction *insn; 664 GLuint br = 1; 665 666 if (intel->is_ironlake) 667 br = 2; 668 669 if (p->single_program_flow) { 670 insn = next_insn(p, BRW_OPCODE_ADD); 671 } else { 672 insn = next_insn(p, BRW_OPCODE_ELSE); 673 } 674 675 brw_set_dest(insn, brw_ip_reg()); 676 brw_set_src0(insn, brw_ip_reg()); 677 brw_set_src1(insn, brw_imm_d(0x0)); 678 679 insn->header.compression_control = BRW_COMPRESSION_NONE; 680 insn->header.execution_size = if_insn->header.execution_size; 681 insn->header.mask_control = BRW_MASK_ENABLE; 682 if (!p->single_program_flow) 683 insn->header.thread_control = BRW_THREAD_SWITCH; 684 685 /* Patch the if instruction to point at this instruction. 686 */ 687 if (p->single_program_flow) { 688 assert(if_insn->header.opcode == BRW_OPCODE_ADD); 689 690 if_insn->bits3.ud = (insn - if_insn + 1) * 16; 691 } else { 692 assert(if_insn->header.opcode == BRW_OPCODE_IF); 693 694 if_insn->bits3.if_else.jump_count = br * (insn - if_insn); 695 if_insn->bits3.if_else.pop_count = 0; 696 if_insn->bits3.if_else.pad0 = 0; 697 } 698 699 return insn; 700} 701 702void brw_ENDIF(struct brw_compile *p, 703 struct brw_instruction *patch_insn) 704{ 705 struct intel_context *intel = &p->brw->intel; 706 GLuint br = 1; 707 708 if (intel->is_ironlake) 709 br = 2; 710 711 if (p->single_program_flow) { 712 /* In single program flow mode, there's no need to execute an ENDIF, 713 * since we don't need to do any stack operations, and if we're executing 714 * currently, we want to just continue executing. 715 */ 716 struct brw_instruction *next = &p->store[p->nr_insn]; 717 718 assert(patch_insn->header.opcode == BRW_OPCODE_ADD); 719 720 patch_insn->bits3.ud = (next - patch_insn) * 16; 721 } else { 722 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); 723 724 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 725 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 726 brw_set_src1(insn, brw_imm_d(0x0)); 727 728 insn->header.compression_control = BRW_COMPRESSION_NONE; 729 insn->header.execution_size = patch_insn->header.execution_size; 730 insn->header.mask_control = BRW_MASK_ENABLE; 731 insn->header.thread_control = BRW_THREAD_SWITCH; 732 733 assert(patch_insn->bits3.if_else.jump_count == 0); 734 735 /* Patch the if or else instructions to point at this or the next 736 * instruction respectively. 737 */ 738 if (patch_insn->header.opcode == BRW_OPCODE_IF) { 739 /* Automagically turn it into an IFF: 740 */ 741 patch_insn->header.opcode = BRW_OPCODE_IFF; 742 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 743 patch_insn->bits3.if_else.pop_count = 0; 744 patch_insn->bits3.if_else.pad0 = 0; 745 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { 746 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 747 patch_insn->bits3.if_else.pop_count = 1; 748 patch_insn->bits3.if_else.pad0 = 0; 749 } else { 750 assert(0); 751 } 752 753 /* Also pop item off the stack in the endif instruction: 754 */ 755 insn->bits3.if_else.jump_count = 0; 756 insn->bits3.if_else.pop_count = 1; 757 insn->bits3.if_else.pad0 = 0; 758 } 759} 760 761struct brw_instruction *brw_BREAK(struct brw_compile *p) 762{ 763 struct brw_instruction *insn; 764 insn = next_insn(p, BRW_OPCODE_BREAK); 765 brw_set_dest(insn, brw_ip_reg()); 766 brw_set_src0(insn, brw_ip_reg()); 767 brw_set_src1(insn, brw_imm_d(0x0)); 768 insn->header.compression_control = BRW_COMPRESSION_NONE; 769 insn->header.execution_size = BRW_EXECUTE_8; 770 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 771 insn->bits3.if_else.pad0 = 0; 772 return insn; 773} 774 775struct brw_instruction *brw_CONT(struct brw_compile *p) 776{ 777 struct brw_instruction *insn; 778 insn = next_insn(p, BRW_OPCODE_CONTINUE); 779 brw_set_dest(insn, brw_ip_reg()); 780 brw_set_src0(insn, brw_ip_reg()); 781 brw_set_src1(insn, brw_imm_d(0x0)); 782 insn->header.compression_control = BRW_COMPRESSION_NONE; 783 insn->header.execution_size = BRW_EXECUTE_8; 784 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 785 insn->bits3.if_else.pad0 = 0; 786 return insn; 787} 788 789/* DO/WHILE loop: 790 */ 791struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 792{ 793 if (p->single_program_flow) { 794 return &p->store[p->nr_insn]; 795 } else { 796 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 797 798 /* Override the defaults for this instruction: 799 */ 800 brw_set_dest(insn, brw_null_reg()); 801 brw_set_src0(insn, brw_null_reg()); 802 brw_set_src1(insn, brw_null_reg()); 803 804 insn->header.compression_control = BRW_COMPRESSION_NONE; 805 insn->header.execution_size = execute_size; 806 insn->header.predicate_control = BRW_PREDICATE_NONE; 807 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 808 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 809 810 return insn; 811 } 812} 813 814 815 816struct brw_instruction *brw_WHILE(struct brw_compile *p, 817 struct brw_instruction *do_insn) 818{ 819 struct intel_context *intel = &p->brw->intel; 820 struct brw_instruction *insn; 821 GLuint br = 1; 822 823 if (intel->is_ironlake) 824 br = 2; 825 826 if (p->single_program_flow) 827 insn = next_insn(p, BRW_OPCODE_ADD); 828 else 829 insn = next_insn(p, BRW_OPCODE_WHILE); 830 831 brw_set_dest(insn, brw_ip_reg()); 832 brw_set_src0(insn, brw_ip_reg()); 833 brw_set_src1(insn, brw_imm_d(0x0)); 834 835 insn->header.compression_control = BRW_COMPRESSION_NONE; 836 837 if (p->single_program_flow) { 838 insn->header.execution_size = BRW_EXECUTE_1; 839 840 insn->bits3.d = (do_insn - insn) * 16; 841 } else { 842 insn->header.execution_size = do_insn->header.execution_size; 843 844 assert(do_insn->header.opcode == BRW_OPCODE_DO); 845 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 846 insn->bits3.if_else.pop_count = 0; 847 insn->bits3.if_else.pad0 = 0; 848 } 849 850/* insn->header.mask_control = BRW_MASK_ENABLE; */ 851 852 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 853 p->current->header.predicate_control = BRW_PREDICATE_NONE; 854 return insn; 855} 856 857 858/* FORWARD JUMPS: 859 */ 860void brw_land_fwd_jump(struct brw_compile *p, 861 struct brw_instruction *jmp_insn) 862{ 863 struct intel_context *intel = &p->brw->intel; 864 struct brw_instruction *landing = &p->store[p->nr_insn]; 865 GLuint jmpi = 1; 866 867 if (intel->is_ironlake) 868 jmpi = 2; 869 870 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 871 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 872 873 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 874} 875 876 877 878/* To integrate with the above, it makes sense that the comparison 879 * instruction should populate the flag register. It might be simpler 880 * just to use the flag reg for most WM tasks? 881 */ 882void brw_CMP(struct brw_compile *p, 883 struct brw_reg dest, 884 GLuint conditional, 885 struct brw_reg src0, 886 struct brw_reg src1) 887{ 888 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 889 890 insn->header.destreg__conditionalmod = conditional; 891 brw_set_dest(insn, dest); 892 brw_set_src0(insn, src0); 893 brw_set_src1(insn, src1); 894 895/* guess_execution_size(insn, src0); */ 896 897 898 /* Make it so that future instructions will use the computed flag 899 * value until brw_set_predicate_control_flag_value() is called 900 * again. 901 */ 902 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 903 dest.nr == 0) { 904 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 905 p->flag_value = 0xff; 906 } 907} 908 909 910 911/*********************************************************************** 912 * Helpers for the various SEND message types: 913 */ 914 915/** Extended math function, float[8]. 916 */ 917void brw_math( struct brw_compile *p, 918 struct brw_reg dest, 919 GLuint function, 920 GLuint saturate, 921 GLuint msg_reg_nr, 922 struct brw_reg src, 923 GLuint data_type, 924 GLuint precision ) 925{ 926 struct intel_context *intel = &p->brw->intel; 927 928 if (intel->gen >= 6) { 929 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 930 931 /* Math is the same ISA format as other opcodes, except that CondModifier 932 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 933 */ 934 insn->header.destreg__conditionalmod = function; 935 936 brw_set_dest(insn, dest); 937 brw_set_src0(insn, src); 938 brw_set_src1(insn, brw_null_reg()); 939 } else { 940 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 941 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 942 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 943 /* Example code doesn't set predicate_control for send 944 * instructions. 945 */ 946 insn->header.predicate_control = 0; 947 insn->header.destreg__conditionalmod = msg_reg_nr; 948 949 brw_set_dest(insn, dest); 950 brw_set_src0(insn, src); 951 brw_set_math_message(p->brw, 952 insn, 953 msg_length, response_length, 954 function, 955 BRW_MATH_INTEGER_UNSIGNED, 956 precision, 957 saturate, 958 data_type); 959 } 960} 961 962/** 963 * Extended math function, float[16]. 964 * Use 2 send instructions. 965 */ 966void brw_math_16( struct brw_compile *p, 967 struct brw_reg dest, 968 GLuint function, 969 GLuint saturate, 970 GLuint msg_reg_nr, 971 struct brw_reg src, 972 GLuint precision ) 973{ 974 struct brw_instruction *insn; 975 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 976 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 977 978 /* First instruction: 979 */ 980 brw_push_insn_state(p); 981 brw_set_predicate_control_flag_value(p, 0xff); 982 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 983 984 insn = next_insn(p, BRW_OPCODE_SEND); 985 insn->header.destreg__conditionalmod = msg_reg_nr; 986 987 brw_set_dest(insn, dest); 988 brw_set_src0(insn, src); 989 brw_set_math_message(p->brw, 990 insn, 991 msg_length, response_length, 992 function, 993 BRW_MATH_INTEGER_UNSIGNED, 994 precision, 995 saturate, 996 BRW_MATH_DATA_VECTOR); 997 998 /* Second instruction: 999 */ 1000 insn = next_insn(p, BRW_OPCODE_SEND); 1001 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1002 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1003 1004 brw_set_dest(insn, offset(dest,1)); 1005 brw_set_src0(insn, src); 1006 brw_set_math_message(p->brw, 1007 insn, 1008 msg_length, response_length, 1009 function, 1010 BRW_MATH_INTEGER_UNSIGNED, 1011 precision, 1012 saturate, 1013 BRW_MATH_DATA_VECTOR); 1014 1015 brw_pop_insn_state(p); 1016} 1017 1018 1019/** 1020 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. 1021 * Scratch offset should be a multiple of 64. 1022 * Used for register spilling. 1023 */ 1024void brw_dp_WRITE_16( struct brw_compile *p, 1025 struct brw_reg src, 1026 GLuint scratch_offset ) 1027{ 1028 GLuint msg_reg_nr = 1; 1029 { 1030 brw_push_insn_state(p); 1031 brw_set_mask_control(p, BRW_MASK_DISABLE); 1032 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1033 1034 /* set message header global offset field (reg 0, element 2) */ 1035 brw_MOV(p, 1036 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 1037 brw_imm_d(scratch_offset)); 1038 1039 brw_pop_insn_state(p); 1040 } 1041 1042 { 1043 GLuint msg_length = 3; 1044 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 1045 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1046 1047 insn->header.predicate_control = 0; /* XXX */ 1048 insn->header.compression_control = BRW_COMPRESSION_NONE; 1049 insn->header.destreg__conditionalmod = msg_reg_nr; 1050 1051 brw_set_dest(insn, dest); 1052 brw_set_src0(insn, src); 1053 1054 brw_set_dp_write_message(p->brw, 1055 insn, 1056 255, /* binding table index (255=stateless) */ 1057 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ 1058 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ 1059 msg_length, 1060 0, /* pixel scoreboard */ 1061 0, /* response_length */ 1062 0); /* eot */ 1063 } 1064} 1065 1066 1067/** 1068 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. 1069 * Scratch offset should be a multiple of 64. 1070 * Used for register spilling. 1071 */ 1072void brw_dp_READ_16( struct brw_compile *p, 1073 struct brw_reg dest, 1074 GLuint scratch_offset ) 1075{ 1076 GLuint msg_reg_nr = 1; 1077 { 1078 brw_push_insn_state(p); 1079 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1080 brw_set_mask_control(p, BRW_MASK_DISABLE); 1081 1082 /* set message header global offset field (reg 0, element 2) */ 1083 brw_MOV(p, 1084 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 1085 brw_imm_d(scratch_offset)); 1086 1087 brw_pop_insn_state(p); 1088 } 1089 1090 { 1091 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1092 1093 insn->header.predicate_control = 0; /* XXX */ 1094 insn->header.compression_control = BRW_COMPRESSION_NONE; 1095 insn->header.destreg__conditionalmod = msg_reg_nr; 1096 1097 brw_set_dest(insn, dest); /* UW? */ 1098 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 1099 1100 brw_set_dp_read_message(p->brw, 1101 insn, 1102 255, /* binding table index (255=stateless) */ 1103 3, /* msg_control (3 means 4 Owords) */ 1104 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1105 1, /* target cache (render/scratch) */ 1106 1, /* msg_length */ 1107 2, /* response_length */ 1108 0); /* eot */ 1109 } 1110} 1111 1112 1113/** 1114 * Read a float[4] vector from the data port Data Cache (const buffer). 1115 * Location (in buffer) should be a multiple of 16. 1116 * Used for fetching shader constants. 1117 * If relAddr is true, we'll do an indirect fetch using the address register. 1118 */ 1119void brw_dp_READ_4( struct brw_compile *p, 1120 struct brw_reg dest, 1121 GLboolean relAddr, 1122 GLuint location, 1123 GLuint bind_table_index ) 1124{ 1125 /* XXX: relAddr not implemented */ 1126 GLuint msg_reg_nr = 1; 1127 { 1128 struct brw_reg b; 1129 brw_push_insn_state(p); 1130 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1131 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1132 brw_set_mask_control(p, BRW_MASK_DISABLE); 1133 1134 /* Setup MRF[1] with location/offset into const buffer */ 1135 b = brw_message_reg(msg_reg_nr); 1136 b = retype(b, BRW_REGISTER_TYPE_UD); 1137 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1138 * when the docs say only dword[2] should be set. Hmmm. But it works. 1139 */ 1140 brw_MOV(p, b, brw_imm_ud(location)); 1141 brw_pop_insn_state(p); 1142 } 1143 1144 { 1145 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1146 1147 insn->header.predicate_control = BRW_PREDICATE_NONE; 1148 insn->header.compression_control = BRW_COMPRESSION_NONE; 1149 insn->header.destreg__conditionalmod = msg_reg_nr; 1150 insn->header.mask_control = BRW_MASK_DISABLE; 1151 1152 /* cast dest to a uword[8] vector */ 1153 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1154 1155 brw_set_dest(insn, dest); 1156 brw_set_src0(insn, brw_null_reg()); 1157 1158 brw_set_dp_read_message(p->brw, 1159 insn, 1160 bind_table_index, 1161 0, /* msg_control (0 means 1 Oword) */ 1162 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1163 0, /* source cache = data cache */ 1164 1, /* msg_length */ 1165 1, /* response_length (1 Oword) */ 1166 0); /* eot */ 1167 } 1168} 1169 1170 1171/** 1172 * Read float[4] constant(s) from VS constant buffer. 1173 * For relative addressing, two float[4] constants will be read into 'dest'. 1174 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1175 */ 1176void brw_dp_READ_4_vs(struct brw_compile *p, 1177 struct brw_reg dest, 1178 GLuint oword, 1179 GLboolean relAddr, 1180 struct brw_reg addrReg, 1181 GLuint location, 1182 GLuint bind_table_index) 1183{ 1184 GLuint msg_reg_nr = 1; 1185 1186 assert(oword < 2); 1187 /* 1188 printf("vs const read msg, location %u, msg_reg_nr %d\n", 1189 location, msg_reg_nr); 1190 */ 1191 1192 /* Setup MRF[1] with location/offset into const buffer */ 1193 { 1194 struct brw_reg b; 1195 1196 brw_push_insn_state(p); 1197 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1198 brw_set_mask_control(p, BRW_MASK_DISABLE); 1199 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1200 /*brw_set_access_mode(p, BRW_ALIGN_16);*/ 1201 1202 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1203 * when the docs say only dword[2] should be set. Hmmm. But it works. 1204 */ 1205 b = brw_message_reg(msg_reg_nr); 1206 b = retype(b, BRW_REGISTER_TYPE_UD); 1207 /*b = get_element_ud(b, 2);*/ 1208 if (relAddr) { 1209 brw_ADD(p, b, addrReg, brw_imm_ud(location)); 1210 } 1211 else { 1212 brw_MOV(p, b, brw_imm_ud(location)); 1213 } 1214 1215 brw_pop_insn_state(p); 1216 } 1217 1218 { 1219 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1220 1221 insn->header.predicate_control = BRW_PREDICATE_NONE; 1222 insn->header.compression_control = BRW_COMPRESSION_NONE; 1223 insn->header.destreg__conditionalmod = msg_reg_nr; 1224 insn->header.mask_control = BRW_MASK_DISABLE; 1225 /*insn->header.access_mode = BRW_ALIGN_16;*/ 1226 1227 brw_set_dest(insn, dest); 1228 brw_set_src0(insn, brw_null_reg()); 1229 1230 brw_set_dp_read_message(p->brw, 1231 insn, 1232 bind_table_index, 1233 oword, /* 0 = lower Oword, 1 = upper Oword */ 1234 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1235 0, /* source cache = data cache */ 1236 1, /* msg_length */ 1237 1, /* response_length (1 Oword) */ 1238 0); /* eot */ 1239 } 1240} 1241 1242 1243 1244void brw_fb_WRITE(struct brw_compile *p, 1245 struct brw_reg dest, 1246 GLuint msg_reg_nr, 1247 struct brw_reg src0, 1248 GLuint binding_table_index, 1249 GLuint msg_length, 1250 GLuint response_length, 1251 GLboolean eot) 1252{ 1253 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1254 1255 insn->header.predicate_control = 0; /* XXX */ 1256 insn->header.compression_control = BRW_COMPRESSION_NONE; 1257 insn->header.destreg__conditionalmod = msg_reg_nr; 1258 1259 brw_set_dest(insn, dest); 1260 brw_set_src0(insn, src0); 1261 brw_set_dp_write_message(p->brw, 1262 insn, 1263 binding_table_index, 1264 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ 1265 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ 1266 msg_length, 1267 1, /* pixel scoreboard */ 1268 response_length, 1269 eot); 1270} 1271 1272 1273/** 1274 * Texture sample instruction. 1275 * Note: the msg_type plus msg_length values determine exactly what kind 1276 * of sampling operation is performed. See volume 4, page 161 of docs. 1277 */ 1278void brw_SAMPLE(struct brw_compile *p, 1279 struct brw_reg dest, 1280 GLuint msg_reg_nr, 1281 struct brw_reg src0, 1282 GLuint binding_table_index, 1283 GLuint sampler, 1284 GLuint writemask, 1285 GLuint msg_type, 1286 GLuint response_length, 1287 GLuint msg_length, 1288 GLboolean eot, 1289 GLuint header_present, 1290 GLuint simd_mode) 1291{ 1292 GLboolean need_stall = 0; 1293 1294 if (writemask == 0) { 1295 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 1296 return; 1297 } 1298 1299 /* Hardware doesn't do destination dependency checking on send 1300 * instructions properly. Add a workaround which generates the 1301 * dependency by other means. In practice it seems like this bug 1302 * only crops up for texture samples, and only where registers are 1303 * written by the send and then written again later without being 1304 * read in between. Luckily for us, we already track that 1305 * information and use it to modify the writemask for the 1306 * instruction, so that is a guide for whether a workaround is 1307 * needed. 1308 */ 1309 if (writemask != WRITEMASK_XYZW) { 1310 GLuint dst_offset = 0; 1311 GLuint i, newmask = 0, len = 0; 1312 1313 for (i = 0; i < 4; i++) { 1314 if (writemask & (1<<i)) 1315 break; 1316 dst_offset += 2; 1317 } 1318 for (; i < 4; i++) { 1319 if (!(writemask & (1<<i))) 1320 break; 1321 newmask |= 1<<i; 1322 len++; 1323 } 1324 1325 if (newmask != writemask) { 1326 need_stall = 1; 1327 /* printf("need stall %x %x\n", newmask , writemask); */ 1328 } 1329 else { 1330 GLboolean dispatch_16 = GL_FALSE; 1331 1332 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1333 1334 guess_execution_size(p->current, dest); 1335 if (p->current->header.execution_size == BRW_EXECUTE_16) 1336 dispatch_16 = GL_TRUE; 1337 1338 newmask = ~newmask & WRITEMASK_XYZW; 1339 1340 brw_push_insn_state(p); 1341 1342 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1343 brw_set_mask_control(p, BRW_MASK_DISABLE); 1344 1345 brw_MOV(p, m1, brw_vec8_grf(0,0)); 1346 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 1347 1348 brw_pop_insn_state(p); 1349 1350 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 1351 dest = offset(dest, dst_offset); 1352 1353 /* For 16-wide dispatch, masked channels are skipped in the 1354 * response. For 8-wide, masked channels still take up slots, 1355 * and are just not written to. 1356 */ 1357 if (dispatch_16) 1358 response_length = len * 2; 1359 } 1360 } 1361 1362 { 1363 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1364 1365 insn->header.predicate_control = 0; /* XXX */ 1366 insn->header.compression_control = BRW_COMPRESSION_NONE; 1367 insn->header.destreg__conditionalmod = msg_reg_nr; 1368 1369 brw_set_dest(insn, dest); 1370 brw_set_src0(insn, src0); 1371 brw_set_sampler_message(p->brw, insn, 1372 binding_table_index, 1373 sampler, 1374 msg_type, 1375 response_length, 1376 msg_length, 1377 eot, 1378 header_present, 1379 simd_mode); 1380 } 1381 1382 if (need_stall) { 1383 struct brw_reg reg = vec8(offset(dest, response_length-1)); 1384 1385 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 1386 */ 1387 brw_push_insn_state(p); 1388 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1389 brw_MOV(p, reg, reg); 1390 brw_pop_insn_state(p); 1391 } 1392 1393} 1394 1395/* All these variables are pretty confusing - we might be better off 1396 * using bitmasks and macros for this, in the old style. Or perhaps 1397 * just having the caller instantiate the fields in dword3 itself. 1398 */ 1399void brw_urb_WRITE(struct brw_compile *p, 1400 struct brw_reg dest, 1401 GLuint msg_reg_nr, 1402 struct brw_reg src0, 1403 GLboolean allocate, 1404 GLboolean used, 1405 GLuint msg_length, 1406 GLuint response_length, 1407 GLboolean eot, 1408 GLboolean writes_complete, 1409 GLuint offset, 1410 GLuint swizzle) 1411{ 1412 struct intel_context *intel = &p->brw->intel; 1413 struct brw_instruction *insn; 1414 1415 /* Sandybridge doesn't have the implied move for SENDs, 1416 * and the first message register index comes from src0. 1417 */ 1418 if (intel->gen >= 6) { 1419 brw_MOV(p, brw_message_reg(msg_reg_nr), src0); 1420 src0 = brw_message_reg(msg_reg_nr); 1421 } 1422 1423 insn = next_insn(p, BRW_OPCODE_SEND); 1424 1425 assert(msg_length < BRW_MAX_MRF); 1426 1427 brw_set_dest(insn, dest); 1428 brw_set_src0(insn, src0); 1429 brw_set_src1(insn, brw_imm_d(0)); 1430 1431 if (intel->gen < 6) 1432 insn->header.destreg__conditionalmod = msg_reg_nr; 1433 1434 brw_set_urb_message(p->brw, 1435 insn, 1436 allocate, 1437 used, 1438 msg_length, 1439 response_length, 1440 eot, 1441 writes_complete, 1442 offset, 1443 swizzle); 1444} 1445 1446void brw_ff_sync(struct brw_compile *p, 1447 struct brw_reg dest, 1448 GLuint msg_reg_nr, 1449 struct brw_reg src0, 1450 GLboolean allocate, 1451 GLboolean used, 1452 GLuint msg_length, 1453 GLuint response_length, 1454 GLboolean eot, 1455 GLboolean writes_complete, 1456 GLuint offset, 1457 GLuint swizzle) 1458{ 1459 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1460 1461 assert(msg_length < 16); 1462 1463 brw_set_dest(insn, dest); 1464 brw_set_src0(insn, src0); 1465 brw_set_src1(insn, brw_imm_d(0)); 1466 1467 insn->header.destreg__conditionalmod = msg_reg_nr; 1468 1469 brw_set_ff_sync_message(p->brw, 1470 insn, 1471 allocate, 1472 used, 1473 msg_length, 1474 response_length, 1475 eot, 1476 writes_complete, 1477 offset, 1478 swizzle); 1479} 1480