brw_eu_emit.c revision 25024d948298a9f3f3210a0b91486f79a3917b0f
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37 38 39 40/*********************************************************************** 41 * Internal helper for constructing instructions 42 */ 43 44static void guess_execution_size( struct brw_instruction *insn, 45 struct brw_reg reg ) 46{ 47 if (reg.width == BRW_WIDTH_8 && 48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 49 insn->header.execution_size = BRW_EXECUTE_16; 50 else 51 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 52} 53 54 55static void brw_set_dest( struct brw_instruction *insn, 56 struct brw_reg dest ) 57{ 58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 59 dest.file != BRW_MESSAGE_REGISTER_FILE) 60 assert(dest.nr < 128); 61 62 insn->bits1.da1.dest_reg_file = dest.file; 63 insn->bits1.da1.dest_reg_type = dest.type; 64 insn->bits1.da1.dest_address_mode = dest.address_mode; 65 66 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 67 insn->bits1.da1.dest_reg_nr = dest.nr; 68 69 if (insn->header.access_mode == BRW_ALIGN_1) { 70 insn->bits1.da1.dest_subreg_nr = dest.subnr; 71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 72 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 73 insn->bits1.da1.dest_horiz_stride = dest.hstride; 74 } 75 else { 76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 78 } 79 } 80 else { 81 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 82 83 /* These are different sizes in align1 vs align16: 84 */ 85 if (insn->header.access_mode == BRW_ALIGN_1) { 86 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 87 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 88 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 89 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 90 } 91 else { 92 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 93 } 94 } 95 96 /* NEW: Set the execution size based on dest.width and 97 * insn->compression_control: 98 */ 99 guess_execution_size(insn, dest); 100} 101 102static void brw_set_src0( struct brw_instruction *insn, 103 struct brw_reg reg ) 104{ 105 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 106 107 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 108 assert(reg.nr < 128); 109 110 insn->bits1.da1.src0_reg_file = reg.file; 111 insn->bits1.da1.src0_reg_type = reg.type; 112 insn->bits2.da1.src0_abs = reg.abs; 113 insn->bits2.da1.src0_negate = reg.negate; 114 insn->bits2.da1.src0_address_mode = reg.address_mode; 115 116 if (reg.file == BRW_IMMEDIATE_VALUE) { 117 insn->bits3.ud = reg.dw1.ud; 118 119 /* Required to set some fields in src1 as well: 120 */ 121 insn->bits1.da1.src1_reg_file = 0; /* arf */ 122 insn->bits1.da1.src1_reg_type = reg.type; 123 } 124 else 125 { 126 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 127 if (insn->header.access_mode == BRW_ALIGN_1) { 128 insn->bits2.da1.src0_subreg_nr = reg.subnr; 129 insn->bits2.da1.src0_reg_nr = reg.nr; 130 } 131 else { 132 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 133 insn->bits2.da16.src0_reg_nr = reg.nr; 134 } 135 } 136 else { 137 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 138 139 if (insn->header.access_mode == BRW_ALIGN_1) { 140 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 141 } 142 else { 143 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 144 } 145 } 146 147 if (insn->header.access_mode == BRW_ALIGN_1) { 148 if (reg.width == BRW_WIDTH_1 && 149 insn->header.execution_size == BRW_EXECUTE_1) { 150 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 151 insn->bits2.da1.src0_width = BRW_WIDTH_1; 152 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 153 } 154 else { 155 insn->bits2.da1.src0_horiz_stride = reg.hstride; 156 insn->bits2.da1.src0_width = reg.width; 157 insn->bits2.da1.src0_vert_stride = reg.vstride; 158 } 159 } 160 else { 161 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 162 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 163 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 164 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 165 166 /* This is an oddity of the fact we're using the same 167 * descriptions for registers in align_16 as align_1: 168 */ 169 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 170 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 171 else 172 insn->bits2.da16.src0_vert_stride = reg.vstride; 173 } 174 } 175} 176 177 178void brw_set_src1( struct brw_instruction *insn, 179 struct brw_reg reg ) 180{ 181 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 182 183 assert(reg.nr < 128); 184 185 insn->bits1.da1.src1_reg_file = reg.file; 186 insn->bits1.da1.src1_reg_type = reg.type; 187 insn->bits3.da1.src1_abs = reg.abs; 188 insn->bits3.da1.src1_negate = reg.negate; 189 190 /* Only src1 can be immediate in two-argument instructions. 191 */ 192 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 193 194 if (reg.file == BRW_IMMEDIATE_VALUE) { 195 insn->bits3.ud = reg.dw1.ud; 196 } 197 else { 198 /* This is a hardware restriction, which may or may not be lifted 199 * in the future: 200 */ 201 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 202 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 203 204 if (insn->header.access_mode == BRW_ALIGN_1) { 205 insn->bits3.da1.src1_subreg_nr = reg.subnr; 206 insn->bits3.da1.src1_reg_nr = reg.nr; 207 } 208 else { 209 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 210 insn->bits3.da16.src1_reg_nr = reg.nr; 211 } 212 213 if (insn->header.access_mode == BRW_ALIGN_1) { 214 if (reg.width == BRW_WIDTH_1 && 215 insn->header.execution_size == BRW_EXECUTE_1) { 216 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 217 insn->bits3.da1.src1_width = BRW_WIDTH_1; 218 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 219 } 220 else { 221 insn->bits3.da1.src1_horiz_stride = reg.hstride; 222 insn->bits3.da1.src1_width = reg.width; 223 insn->bits3.da1.src1_vert_stride = reg.vstride; 224 } 225 } 226 else { 227 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 228 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 229 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 230 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 231 232 /* This is an oddity of the fact we're using the same 233 * descriptions for registers in align_16 as align_1: 234 */ 235 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 236 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 237 else 238 insn->bits3.da16.src1_vert_stride = reg.vstride; 239 } 240 } 241} 242 243 244 245static void brw_set_math_message( struct brw_context *brw, 246 struct brw_instruction *insn, 247 GLuint msg_length, 248 GLuint response_length, 249 GLuint function, 250 GLuint integer_type, 251 GLboolean low_precision, 252 GLboolean saturate, 253 GLuint dataType ) 254{ 255 struct intel_context *intel = &brw->intel; 256 brw_set_src1(insn, brw_imm_d(0)); 257 258 if (intel->is_ironlake) { 259 insn->bits3.math_igdng.function = function; 260 insn->bits3.math_igdng.int_type = integer_type; 261 insn->bits3.math_igdng.precision = low_precision; 262 insn->bits3.math_igdng.saturate = saturate; 263 insn->bits3.math_igdng.data_type = dataType; 264 insn->bits3.math_igdng.snapshot = 0; 265 insn->bits3.math_igdng.header_present = 0; 266 insn->bits3.math_igdng.response_length = response_length; 267 insn->bits3.math_igdng.msg_length = msg_length; 268 insn->bits3.math_igdng.end_of_thread = 0; 269 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; 270 insn->bits2.send_igdng.end_of_thread = 0; 271 } else { 272 insn->bits3.math.function = function; 273 insn->bits3.math.int_type = integer_type; 274 insn->bits3.math.precision = low_precision; 275 insn->bits3.math.saturate = saturate; 276 insn->bits3.math.data_type = dataType; 277 insn->bits3.math.response_length = response_length; 278 insn->bits3.math.msg_length = msg_length; 279 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 280 insn->bits3.math.end_of_thread = 0; 281 } 282} 283 284 285static void brw_set_ff_sync_message( struct brw_context *brw, 286 struct brw_instruction *insn, 287 GLboolean allocate, 288 GLboolean used, 289 GLuint msg_length, 290 GLuint response_length, 291 GLboolean end_of_thread, 292 GLboolean complete, 293 GLuint offset, 294 GLuint swizzle_control ) 295{ 296 brw_set_src1(insn, brw_imm_d(0)); 297 298 insn->bits3.urb_igdng.opcode = 1; 299 insn->bits3.urb_igdng.offset = offset; 300 insn->bits3.urb_igdng.swizzle_control = swizzle_control; 301 insn->bits3.urb_igdng.allocate = allocate; 302 insn->bits3.urb_igdng.used = used; 303 insn->bits3.urb_igdng.complete = complete; 304 insn->bits3.urb_igdng.header_present = 1; 305 insn->bits3.urb_igdng.response_length = response_length; 306 insn->bits3.urb_igdng.msg_length = msg_length; 307 insn->bits3.urb_igdng.end_of_thread = end_of_thread; 308 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; 309 insn->bits2.send_igdng.end_of_thread = end_of_thread; 310} 311 312static void brw_set_urb_message( struct brw_context *brw, 313 struct brw_instruction *insn, 314 GLboolean allocate, 315 GLboolean used, 316 GLuint msg_length, 317 GLuint response_length, 318 GLboolean end_of_thread, 319 GLboolean complete, 320 GLuint offset, 321 GLuint swizzle_control ) 322{ 323 struct intel_context *intel = &brw->intel; 324 brw_set_src1(insn, brw_imm_d(0)); 325 326 if (intel->is_ironlake) { 327 insn->bits3.urb_igdng.opcode = 0; /* ? */ 328 insn->bits3.urb_igdng.offset = offset; 329 insn->bits3.urb_igdng.swizzle_control = swizzle_control; 330 insn->bits3.urb_igdng.allocate = allocate; 331 insn->bits3.urb_igdng.used = used; /* ? */ 332 insn->bits3.urb_igdng.complete = complete; 333 insn->bits3.urb_igdng.header_present = 1; 334 insn->bits3.urb_igdng.response_length = response_length; 335 insn->bits3.urb_igdng.msg_length = msg_length; 336 insn->bits3.urb_igdng.end_of_thread = end_of_thread; 337 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; 338 insn->bits2.send_igdng.end_of_thread = end_of_thread; 339 } else { 340 insn->bits3.urb.opcode = 0; /* ? */ 341 insn->bits3.urb.offset = offset; 342 insn->bits3.urb.swizzle_control = swizzle_control; 343 insn->bits3.urb.allocate = allocate; 344 insn->bits3.urb.used = used; /* ? */ 345 insn->bits3.urb.complete = complete; 346 insn->bits3.urb.response_length = response_length; 347 insn->bits3.urb.msg_length = msg_length; 348 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 349 insn->bits3.urb.end_of_thread = end_of_thread; 350 } 351} 352 353static void brw_set_dp_write_message( struct brw_context *brw, 354 struct brw_instruction *insn, 355 GLuint binding_table_index, 356 GLuint msg_control, 357 GLuint msg_type, 358 GLuint msg_length, 359 GLuint pixel_scoreboard_clear, 360 GLuint response_length, 361 GLuint end_of_thread ) 362{ 363 struct intel_context *intel = &brw->intel; 364 brw_set_src1(insn, brw_imm_d(0)); 365 366 if (intel->is_ironlake) { 367 insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; 368 insn->bits3.dp_write_igdng.msg_control = msg_control; 369 insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; 370 insn->bits3.dp_write_igdng.msg_type = msg_type; 371 insn->bits3.dp_write_igdng.send_commit_msg = 0; 372 insn->bits3.dp_write_igdng.header_present = 1; 373 insn->bits3.dp_write_igdng.response_length = response_length; 374 insn->bits3.dp_write_igdng.msg_length = msg_length; 375 insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; 376 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 377 insn->bits2.send_igdng.end_of_thread = end_of_thread; 378 } else { 379 insn->bits3.dp_write.binding_table_index = binding_table_index; 380 insn->bits3.dp_write.msg_control = msg_control; 381 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 382 insn->bits3.dp_write.msg_type = msg_type; 383 insn->bits3.dp_write.send_commit_msg = 0; 384 insn->bits3.dp_write.response_length = response_length; 385 insn->bits3.dp_write.msg_length = msg_length; 386 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 387 insn->bits3.dp_write.end_of_thread = end_of_thread; 388 } 389} 390 391static void brw_set_dp_read_message( struct brw_context *brw, 392 struct brw_instruction *insn, 393 GLuint binding_table_index, 394 GLuint msg_control, 395 GLuint msg_type, 396 GLuint target_cache, 397 GLuint msg_length, 398 GLuint response_length, 399 GLuint end_of_thread ) 400{ 401 struct intel_context *intel = &brw->intel; 402 brw_set_src1(insn, brw_imm_d(0)); 403 404 if (intel->is_ironlake) { 405 insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; 406 insn->bits3.dp_read_igdng.msg_control = msg_control; 407 insn->bits3.dp_read_igdng.msg_type = msg_type; 408 insn->bits3.dp_read_igdng.target_cache = target_cache; 409 insn->bits3.dp_read_igdng.header_present = 1; 410 insn->bits3.dp_read_igdng.response_length = response_length; 411 insn->bits3.dp_read_igdng.msg_length = msg_length; 412 insn->bits3.dp_read_igdng.pad1 = 0; 413 insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; 414 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 415 insn->bits2.send_igdng.end_of_thread = end_of_thread; 416 } else { 417 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 418 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 419 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 420 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 421 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 422 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 423 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 424 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 425 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ 426 } 427} 428 429static void brw_set_sampler_message(struct brw_context *brw, 430 struct brw_instruction *insn, 431 GLuint binding_table_index, 432 GLuint sampler, 433 GLuint msg_type, 434 GLuint response_length, 435 GLuint msg_length, 436 GLboolean eot, 437 GLuint header_present, 438 GLuint simd_mode) 439{ 440 struct intel_context *intel = &brw->intel; 441 assert(eot == 0); 442 brw_set_src1(insn, brw_imm_d(0)); 443 444 if (intel->is_ironlake) { 445 insn->bits3.sampler_igdng.binding_table_index = binding_table_index; 446 insn->bits3.sampler_igdng.sampler = sampler; 447 insn->bits3.sampler_igdng.msg_type = msg_type; 448 insn->bits3.sampler_igdng.simd_mode = simd_mode; 449 insn->bits3.sampler_igdng.header_present = header_present; 450 insn->bits3.sampler_igdng.response_length = response_length; 451 insn->bits3.sampler_igdng.msg_length = msg_length; 452 insn->bits3.sampler_igdng.end_of_thread = eot; 453 insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; 454 insn->bits2.send_igdng.end_of_thread = eot; 455 } else if (intel->is_g4x) { 456 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 457 insn->bits3.sampler_g4x.sampler = sampler; 458 insn->bits3.sampler_g4x.msg_type = msg_type; 459 insn->bits3.sampler_g4x.response_length = response_length; 460 insn->bits3.sampler_g4x.msg_length = msg_length; 461 insn->bits3.sampler_g4x.end_of_thread = eot; 462 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 463 } else { 464 insn->bits3.sampler.binding_table_index = binding_table_index; 465 insn->bits3.sampler.sampler = sampler; 466 insn->bits3.sampler.msg_type = msg_type; 467 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 468 insn->bits3.sampler.response_length = response_length; 469 insn->bits3.sampler.msg_length = msg_length; 470 insn->bits3.sampler.end_of_thread = eot; 471 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 472 } 473} 474 475 476 477static struct brw_instruction *next_insn( struct brw_compile *p, 478 GLuint opcode ) 479{ 480 struct brw_instruction *insn; 481 482 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 483 484 insn = &p->store[p->nr_insn++]; 485 memcpy(insn, p->current, sizeof(*insn)); 486 487 /* Reset this one-shot flag: 488 */ 489 490 if (p->current->header.destreg__conditionalmod) { 491 p->current->header.destreg__conditionalmod = 0; 492 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 493 } 494 495 insn->header.opcode = opcode; 496 return insn; 497} 498 499 500static struct brw_instruction *brw_alu1( struct brw_compile *p, 501 GLuint opcode, 502 struct brw_reg dest, 503 struct brw_reg src ) 504{ 505 struct brw_instruction *insn = next_insn(p, opcode); 506 brw_set_dest(insn, dest); 507 brw_set_src0(insn, src); 508 return insn; 509} 510 511static struct brw_instruction *brw_alu2(struct brw_compile *p, 512 GLuint opcode, 513 struct brw_reg dest, 514 struct brw_reg src0, 515 struct brw_reg src1 ) 516{ 517 struct brw_instruction *insn = next_insn(p, opcode); 518 brw_set_dest(insn, dest); 519 brw_set_src0(insn, src0); 520 brw_set_src1(insn, src1); 521 return insn; 522} 523 524 525/*********************************************************************** 526 * Convenience routines. 527 */ 528#define ALU1(OP) \ 529struct brw_instruction *brw_##OP(struct brw_compile *p, \ 530 struct brw_reg dest, \ 531 struct brw_reg src0) \ 532{ \ 533 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 534} 535 536#define ALU2(OP) \ 537struct brw_instruction *brw_##OP(struct brw_compile *p, \ 538 struct brw_reg dest, \ 539 struct brw_reg src0, \ 540 struct brw_reg src1) \ 541{ \ 542 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 543} 544 545 546ALU1(MOV) 547ALU2(SEL) 548ALU1(NOT) 549ALU2(AND) 550ALU2(OR) 551ALU2(XOR) 552ALU2(SHR) 553ALU2(SHL) 554ALU2(RSR) 555ALU2(RSL) 556ALU2(ASR) 557ALU2(ADD) 558ALU2(MUL) 559ALU1(FRC) 560ALU1(RNDD) 561ALU1(RNDZ) 562ALU2(MAC) 563ALU2(MACH) 564ALU1(LZD) 565ALU2(DP4) 566ALU2(DPH) 567ALU2(DP3) 568ALU2(DP2) 569ALU2(LINE) 570 571 572 573 574void brw_NOP(struct brw_compile *p) 575{ 576 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 577 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 578 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 579 brw_set_src1(insn, brw_imm_ud(0x0)); 580} 581 582 583 584 585 586/*********************************************************************** 587 * Comparisons, if/else/endif 588 */ 589 590struct brw_instruction *brw_JMPI(struct brw_compile *p, 591 struct brw_reg dest, 592 struct brw_reg src0, 593 struct brw_reg src1) 594{ 595 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 596 597 insn->header.execution_size = 1; 598 insn->header.compression_control = BRW_COMPRESSION_NONE; 599 insn->header.mask_control = BRW_MASK_DISABLE; 600 601 p->current->header.predicate_control = BRW_PREDICATE_NONE; 602 603 return insn; 604} 605 606/* EU takes the value from the flag register and pushes it onto some 607 * sort of a stack (presumably merging with any flag value already on 608 * the stack). Within an if block, the flags at the top of the stack 609 * control execution on each channel of the unit, eg. on each of the 610 * 16 pixel values in our wm programs. 611 * 612 * When the matching 'else' instruction is reached (presumably by 613 * countdown of the instruction count patched in by our ELSE/ENDIF 614 * functions), the relevent flags are inverted. 615 * 616 * When the matching 'endif' instruction is reached, the flags are 617 * popped off. If the stack is now empty, normal execution resumes. 618 * 619 * No attempt is made to deal with stack overflow (14 elements?). 620 */ 621struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) 622{ 623 struct brw_instruction *insn; 624 625 if (p->single_program_flow) { 626 assert(execute_size == BRW_EXECUTE_1); 627 628 insn = next_insn(p, BRW_OPCODE_ADD); 629 insn->header.predicate_inverse = 1; 630 } else { 631 insn = next_insn(p, BRW_OPCODE_IF); 632 } 633 634 /* Override the defaults for this instruction: 635 */ 636 brw_set_dest(insn, brw_ip_reg()); 637 brw_set_src0(insn, brw_ip_reg()); 638 brw_set_src1(insn, brw_imm_d(0x0)); 639 640 insn->header.execution_size = execute_size; 641 insn->header.compression_control = BRW_COMPRESSION_NONE; 642 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 643 insn->header.mask_control = BRW_MASK_ENABLE; 644 if (!p->single_program_flow) 645 insn->header.thread_control = BRW_THREAD_SWITCH; 646 647 p->current->header.predicate_control = BRW_PREDICATE_NONE; 648 649 return insn; 650} 651 652 653struct brw_instruction *brw_ELSE(struct brw_compile *p, 654 struct brw_instruction *if_insn) 655{ 656 struct intel_context *intel = &p->brw->intel; 657 struct brw_instruction *insn; 658 GLuint br = 1; 659 660 if (intel->is_ironlake) 661 br = 2; 662 663 if (p->single_program_flow) { 664 insn = next_insn(p, BRW_OPCODE_ADD); 665 } else { 666 insn = next_insn(p, BRW_OPCODE_ELSE); 667 } 668 669 brw_set_dest(insn, brw_ip_reg()); 670 brw_set_src0(insn, brw_ip_reg()); 671 brw_set_src1(insn, brw_imm_d(0x0)); 672 673 insn->header.compression_control = BRW_COMPRESSION_NONE; 674 insn->header.execution_size = if_insn->header.execution_size; 675 insn->header.mask_control = BRW_MASK_ENABLE; 676 if (!p->single_program_flow) 677 insn->header.thread_control = BRW_THREAD_SWITCH; 678 679 /* Patch the if instruction to point at this instruction. 680 */ 681 if (p->single_program_flow) { 682 assert(if_insn->header.opcode == BRW_OPCODE_ADD); 683 684 if_insn->bits3.ud = (insn - if_insn + 1) * 16; 685 } else { 686 assert(if_insn->header.opcode == BRW_OPCODE_IF); 687 688 if_insn->bits3.if_else.jump_count = br * (insn - if_insn); 689 if_insn->bits3.if_else.pop_count = 0; 690 if_insn->bits3.if_else.pad0 = 0; 691 } 692 693 return insn; 694} 695 696void brw_ENDIF(struct brw_compile *p, 697 struct brw_instruction *patch_insn) 698{ 699 struct intel_context *intel = &p->brw->intel; 700 GLuint br = 1; 701 702 if (intel->is_ironlake) 703 br = 2; 704 705 if (p->single_program_flow) { 706 /* In single program flow mode, there's no need to execute an ENDIF, 707 * since we don't need to do any stack operations, and if we're executing 708 * currently, we want to just continue executing. 709 */ 710 struct brw_instruction *next = &p->store[p->nr_insn]; 711 712 assert(patch_insn->header.opcode == BRW_OPCODE_ADD); 713 714 patch_insn->bits3.ud = (next - patch_insn) * 16; 715 } else { 716 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); 717 718 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 719 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 720 brw_set_src1(insn, brw_imm_d(0x0)); 721 722 insn->header.compression_control = BRW_COMPRESSION_NONE; 723 insn->header.execution_size = patch_insn->header.execution_size; 724 insn->header.mask_control = BRW_MASK_ENABLE; 725 insn->header.thread_control = BRW_THREAD_SWITCH; 726 727 assert(patch_insn->bits3.if_else.jump_count == 0); 728 729 /* Patch the if or else instructions to point at this or the next 730 * instruction respectively. 731 */ 732 if (patch_insn->header.opcode == BRW_OPCODE_IF) { 733 /* Automagically turn it into an IFF: 734 */ 735 patch_insn->header.opcode = BRW_OPCODE_IFF; 736 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 737 patch_insn->bits3.if_else.pop_count = 0; 738 patch_insn->bits3.if_else.pad0 = 0; 739 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { 740 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 741 patch_insn->bits3.if_else.pop_count = 1; 742 patch_insn->bits3.if_else.pad0 = 0; 743 } else { 744 assert(0); 745 } 746 747 /* Also pop item off the stack in the endif instruction: 748 */ 749 insn->bits3.if_else.jump_count = 0; 750 insn->bits3.if_else.pop_count = 1; 751 insn->bits3.if_else.pad0 = 0; 752 } 753} 754 755struct brw_instruction *brw_BREAK(struct brw_compile *p) 756{ 757 struct brw_instruction *insn; 758 insn = next_insn(p, BRW_OPCODE_BREAK); 759 brw_set_dest(insn, brw_ip_reg()); 760 brw_set_src0(insn, brw_ip_reg()); 761 brw_set_src1(insn, brw_imm_d(0x0)); 762 insn->header.compression_control = BRW_COMPRESSION_NONE; 763 insn->header.execution_size = BRW_EXECUTE_8; 764 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 765 insn->bits3.if_else.pad0 = 0; 766 return insn; 767} 768 769struct brw_instruction *brw_CONT(struct brw_compile *p) 770{ 771 struct brw_instruction *insn; 772 insn = next_insn(p, BRW_OPCODE_CONTINUE); 773 brw_set_dest(insn, brw_ip_reg()); 774 brw_set_src0(insn, brw_ip_reg()); 775 brw_set_src1(insn, brw_imm_d(0x0)); 776 insn->header.compression_control = BRW_COMPRESSION_NONE; 777 insn->header.execution_size = BRW_EXECUTE_8; 778 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 779 insn->bits3.if_else.pad0 = 0; 780 return insn; 781} 782 783/* DO/WHILE loop: 784 */ 785struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 786{ 787 if (p->single_program_flow) { 788 return &p->store[p->nr_insn]; 789 } else { 790 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 791 792 /* Override the defaults for this instruction: 793 */ 794 brw_set_dest(insn, brw_null_reg()); 795 brw_set_src0(insn, brw_null_reg()); 796 brw_set_src1(insn, brw_null_reg()); 797 798 insn->header.compression_control = BRW_COMPRESSION_NONE; 799 insn->header.execution_size = execute_size; 800 insn->header.predicate_control = BRW_PREDICATE_NONE; 801 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 802 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 803 804 return insn; 805 } 806} 807 808 809 810struct brw_instruction *brw_WHILE(struct brw_compile *p, 811 struct brw_instruction *do_insn) 812{ 813 struct intel_context *intel = &p->brw->intel; 814 struct brw_instruction *insn; 815 GLuint br = 1; 816 817 if (intel->is_ironlake) 818 br = 2; 819 820 if (p->single_program_flow) 821 insn = next_insn(p, BRW_OPCODE_ADD); 822 else 823 insn = next_insn(p, BRW_OPCODE_WHILE); 824 825 brw_set_dest(insn, brw_ip_reg()); 826 brw_set_src0(insn, brw_ip_reg()); 827 brw_set_src1(insn, brw_imm_d(0x0)); 828 829 insn->header.compression_control = BRW_COMPRESSION_NONE; 830 831 if (p->single_program_flow) { 832 insn->header.execution_size = BRW_EXECUTE_1; 833 834 insn->bits3.d = (do_insn - insn) * 16; 835 } else { 836 insn->header.execution_size = do_insn->header.execution_size; 837 838 assert(do_insn->header.opcode == BRW_OPCODE_DO); 839 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 840 insn->bits3.if_else.pop_count = 0; 841 insn->bits3.if_else.pad0 = 0; 842 } 843 844/* insn->header.mask_control = BRW_MASK_ENABLE; */ 845 846 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 847 p->current->header.predicate_control = BRW_PREDICATE_NONE; 848 return insn; 849} 850 851 852/* FORWARD JUMPS: 853 */ 854void brw_land_fwd_jump(struct brw_compile *p, 855 struct brw_instruction *jmp_insn) 856{ 857 struct intel_context *intel = &p->brw->intel; 858 struct brw_instruction *landing = &p->store[p->nr_insn]; 859 GLuint jmpi = 1; 860 861 if (intel->is_ironlake) 862 jmpi = 2; 863 864 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 865 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 866 867 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 868} 869 870 871 872/* To integrate with the above, it makes sense that the comparison 873 * instruction should populate the flag register. It might be simpler 874 * just to use the flag reg for most WM tasks? 875 */ 876void brw_CMP(struct brw_compile *p, 877 struct brw_reg dest, 878 GLuint conditional, 879 struct brw_reg src0, 880 struct brw_reg src1) 881{ 882 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 883 884 insn->header.destreg__conditionalmod = conditional; 885 brw_set_dest(insn, dest); 886 brw_set_src0(insn, src0); 887 brw_set_src1(insn, src1); 888 889/* guess_execution_size(insn, src0); */ 890 891 892 /* Make it so that future instructions will use the computed flag 893 * value until brw_set_predicate_control_flag_value() is called 894 * again. 895 */ 896 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 897 dest.nr == 0) { 898 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 899 p->flag_value = 0xff; 900 } 901} 902 903 904 905/*********************************************************************** 906 * Helpers for the various SEND message types: 907 */ 908 909/** Extended math function, float[8]. 910 */ 911void brw_math( struct brw_compile *p, 912 struct brw_reg dest, 913 GLuint function, 914 GLuint saturate, 915 GLuint msg_reg_nr, 916 struct brw_reg src, 917 GLuint data_type, 918 GLuint precision ) 919{ 920 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 921 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 922 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 923 924 /* Example code doesn't set predicate_control for send 925 * instructions. 926 */ 927 insn->header.predicate_control = 0; 928 insn->header.destreg__conditionalmod = msg_reg_nr; 929 930 brw_set_dest(insn, dest); 931 brw_set_src0(insn, src); 932 brw_set_math_message(p->brw, 933 insn, 934 msg_length, response_length, 935 function, 936 BRW_MATH_INTEGER_UNSIGNED, 937 precision, 938 saturate, 939 data_type); 940} 941 942/** 943 * Extended math function, float[16]. 944 * Use 2 send instructions. 945 */ 946void brw_math_16( struct brw_compile *p, 947 struct brw_reg dest, 948 GLuint function, 949 GLuint saturate, 950 GLuint msg_reg_nr, 951 struct brw_reg src, 952 GLuint precision ) 953{ 954 struct brw_instruction *insn; 955 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 956 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 957 958 /* First instruction: 959 */ 960 brw_push_insn_state(p); 961 brw_set_predicate_control_flag_value(p, 0xff); 962 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 963 964 insn = next_insn(p, BRW_OPCODE_SEND); 965 insn->header.destreg__conditionalmod = msg_reg_nr; 966 967 brw_set_dest(insn, dest); 968 brw_set_src0(insn, src); 969 brw_set_math_message(p->brw, 970 insn, 971 msg_length, response_length, 972 function, 973 BRW_MATH_INTEGER_UNSIGNED, 974 precision, 975 saturate, 976 BRW_MATH_DATA_VECTOR); 977 978 /* Second instruction: 979 */ 980 insn = next_insn(p, BRW_OPCODE_SEND); 981 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 982 insn->header.destreg__conditionalmod = msg_reg_nr+1; 983 984 brw_set_dest(insn, offset(dest,1)); 985 brw_set_src0(insn, src); 986 brw_set_math_message(p->brw, 987 insn, 988 msg_length, response_length, 989 function, 990 BRW_MATH_INTEGER_UNSIGNED, 991 precision, 992 saturate, 993 BRW_MATH_DATA_VECTOR); 994 995 brw_pop_insn_state(p); 996} 997 998 999/** 1000 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. 1001 * Scratch offset should be a multiple of 64. 1002 * Used for register spilling. 1003 */ 1004void brw_dp_WRITE_16( struct brw_compile *p, 1005 struct brw_reg src, 1006 GLuint scratch_offset ) 1007{ 1008 GLuint msg_reg_nr = 1; 1009 { 1010 brw_push_insn_state(p); 1011 brw_set_mask_control(p, BRW_MASK_DISABLE); 1012 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1013 1014 /* set message header global offset field (reg 0, element 2) */ 1015 brw_MOV(p, 1016 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 1017 brw_imm_d(scratch_offset)); 1018 1019 brw_pop_insn_state(p); 1020 } 1021 1022 { 1023 GLuint msg_length = 3; 1024 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 1025 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1026 1027 insn->header.predicate_control = 0; /* XXX */ 1028 insn->header.compression_control = BRW_COMPRESSION_NONE; 1029 insn->header.destreg__conditionalmod = msg_reg_nr; 1030 1031 brw_set_dest(insn, dest); 1032 brw_set_src0(insn, src); 1033 1034 brw_set_dp_write_message(p->brw, 1035 insn, 1036 255, /* binding table index (255=stateless) */ 1037 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ 1038 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ 1039 msg_length, 1040 0, /* pixel scoreboard */ 1041 0, /* response_length */ 1042 0); /* eot */ 1043 } 1044} 1045 1046 1047/** 1048 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. 1049 * Scratch offset should be a multiple of 64. 1050 * Used for register spilling. 1051 */ 1052void brw_dp_READ_16( struct brw_compile *p, 1053 struct brw_reg dest, 1054 GLuint scratch_offset ) 1055{ 1056 GLuint msg_reg_nr = 1; 1057 { 1058 brw_push_insn_state(p); 1059 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1060 brw_set_mask_control(p, BRW_MASK_DISABLE); 1061 1062 /* set message header global offset field (reg 0, element 2) */ 1063 brw_MOV(p, 1064 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 1065 brw_imm_d(scratch_offset)); 1066 1067 brw_pop_insn_state(p); 1068 } 1069 1070 { 1071 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1072 1073 insn->header.predicate_control = 0; /* XXX */ 1074 insn->header.compression_control = BRW_COMPRESSION_NONE; 1075 insn->header.destreg__conditionalmod = msg_reg_nr; 1076 1077 brw_set_dest(insn, dest); /* UW? */ 1078 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 1079 1080 brw_set_dp_read_message(p->brw, 1081 insn, 1082 255, /* binding table index (255=stateless) */ 1083 3, /* msg_control (3 means 4 Owords) */ 1084 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1085 1, /* target cache (render/scratch) */ 1086 1, /* msg_length */ 1087 2, /* response_length */ 1088 0); /* eot */ 1089 } 1090} 1091 1092 1093/** 1094 * Read a float[4] vector from the data port Data Cache (const buffer). 1095 * Location (in buffer) should be a multiple of 16. 1096 * Used for fetching shader constants. 1097 * If relAddr is true, we'll do an indirect fetch using the address register. 1098 */ 1099void brw_dp_READ_4( struct brw_compile *p, 1100 struct brw_reg dest, 1101 GLboolean relAddr, 1102 GLuint location, 1103 GLuint bind_table_index ) 1104{ 1105 /* XXX: relAddr not implemented */ 1106 GLuint msg_reg_nr = 1; 1107 { 1108 struct brw_reg b; 1109 brw_push_insn_state(p); 1110 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1111 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1112 brw_set_mask_control(p, BRW_MASK_DISABLE); 1113 1114 /* Setup MRF[1] with location/offset into const buffer */ 1115 b = brw_message_reg(msg_reg_nr); 1116 b = retype(b, BRW_REGISTER_TYPE_UD); 1117 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1118 * when the docs say only dword[2] should be set. Hmmm. But it works. 1119 */ 1120 brw_MOV(p, b, brw_imm_ud(location)); 1121 brw_pop_insn_state(p); 1122 } 1123 1124 { 1125 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1126 1127 insn->header.predicate_control = BRW_PREDICATE_NONE; 1128 insn->header.compression_control = BRW_COMPRESSION_NONE; 1129 insn->header.destreg__conditionalmod = msg_reg_nr; 1130 insn->header.mask_control = BRW_MASK_DISABLE; 1131 1132 /* cast dest to a uword[8] vector */ 1133 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1134 1135 brw_set_dest(insn, dest); 1136 brw_set_src0(insn, brw_null_reg()); 1137 1138 brw_set_dp_read_message(p->brw, 1139 insn, 1140 bind_table_index, 1141 0, /* msg_control (0 means 1 Oword) */ 1142 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1143 0, /* source cache = data cache */ 1144 1, /* msg_length */ 1145 1, /* response_length (1 Oword) */ 1146 0); /* eot */ 1147 } 1148} 1149 1150 1151/** 1152 * Read float[4] constant(s) from VS constant buffer. 1153 * For relative addressing, two float[4] constants will be read into 'dest'. 1154 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1155 */ 1156void brw_dp_READ_4_vs(struct brw_compile *p, 1157 struct brw_reg dest, 1158 GLuint oword, 1159 GLboolean relAddr, 1160 struct brw_reg addrReg, 1161 GLuint location, 1162 GLuint bind_table_index) 1163{ 1164 GLuint msg_reg_nr = 1; 1165 1166 assert(oword < 2); 1167 /* 1168 printf("vs const read msg, location %u, msg_reg_nr %d\n", 1169 location, msg_reg_nr); 1170 */ 1171 1172 /* Setup MRF[1] with location/offset into const buffer */ 1173 { 1174 struct brw_reg b; 1175 1176 brw_push_insn_state(p); 1177 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1178 brw_set_mask_control(p, BRW_MASK_DISABLE); 1179 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1180 /*brw_set_access_mode(p, BRW_ALIGN_16);*/ 1181 1182 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1183 * when the docs say only dword[2] should be set. Hmmm. But it works. 1184 */ 1185 b = brw_message_reg(msg_reg_nr); 1186 b = retype(b, BRW_REGISTER_TYPE_UD); 1187 /*b = get_element_ud(b, 2);*/ 1188 if (relAddr) { 1189 brw_ADD(p, b, addrReg, brw_imm_ud(location)); 1190 } 1191 else { 1192 brw_MOV(p, b, brw_imm_ud(location)); 1193 } 1194 1195 brw_pop_insn_state(p); 1196 } 1197 1198 { 1199 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1200 1201 insn->header.predicate_control = BRW_PREDICATE_NONE; 1202 insn->header.compression_control = BRW_COMPRESSION_NONE; 1203 insn->header.destreg__conditionalmod = msg_reg_nr; 1204 insn->header.mask_control = BRW_MASK_DISABLE; 1205 /*insn->header.access_mode = BRW_ALIGN_16;*/ 1206 1207 brw_set_dest(insn, dest); 1208 brw_set_src0(insn, brw_null_reg()); 1209 1210 brw_set_dp_read_message(p->brw, 1211 insn, 1212 bind_table_index, 1213 oword, /* 0 = lower Oword, 1 = upper Oword */ 1214 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1215 0, /* source cache = data cache */ 1216 1, /* msg_length */ 1217 1, /* response_length (1 Oword) */ 1218 0); /* eot */ 1219 } 1220} 1221 1222 1223 1224void brw_fb_WRITE(struct brw_compile *p, 1225 struct brw_reg dest, 1226 GLuint msg_reg_nr, 1227 struct brw_reg src0, 1228 GLuint binding_table_index, 1229 GLuint msg_length, 1230 GLuint response_length, 1231 GLboolean eot) 1232{ 1233 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1234 1235 insn->header.predicate_control = 0; /* XXX */ 1236 insn->header.compression_control = BRW_COMPRESSION_NONE; 1237 insn->header.destreg__conditionalmod = msg_reg_nr; 1238 1239 brw_set_dest(insn, dest); 1240 brw_set_src0(insn, src0); 1241 brw_set_dp_write_message(p->brw, 1242 insn, 1243 binding_table_index, 1244 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ 1245 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ 1246 msg_length, 1247 1, /* pixel scoreboard */ 1248 response_length, 1249 eot); 1250} 1251 1252 1253/** 1254 * Texture sample instruction. 1255 * Note: the msg_type plus msg_length values determine exactly what kind 1256 * of sampling operation is performed. See volume 4, page 161 of docs. 1257 */ 1258void brw_SAMPLE(struct brw_compile *p, 1259 struct brw_reg dest, 1260 GLuint msg_reg_nr, 1261 struct brw_reg src0, 1262 GLuint binding_table_index, 1263 GLuint sampler, 1264 GLuint writemask, 1265 GLuint msg_type, 1266 GLuint response_length, 1267 GLuint msg_length, 1268 GLboolean eot, 1269 GLuint header_present, 1270 GLuint simd_mode) 1271{ 1272 GLboolean need_stall = 0; 1273 1274 if (writemask == 0) { 1275 /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ 1276 return; 1277 } 1278 1279 /* Hardware doesn't do destination dependency checking on send 1280 * instructions properly. Add a workaround which generates the 1281 * dependency by other means. In practice it seems like this bug 1282 * only crops up for texture samples, and only where registers are 1283 * written by the send and then written again later without being 1284 * read in between. Luckily for us, we already track that 1285 * information and use it to modify the writemask for the 1286 * instruction, so that is a guide for whether a workaround is 1287 * needed. 1288 */ 1289 if (writemask != WRITEMASK_XYZW) { 1290 GLuint dst_offset = 0; 1291 GLuint i, newmask = 0, len = 0; 1292 1293 for (i = 0; i < 4; i++) { 1294 if (writemask & (1<<i)) 1295 break; 1296 dst_offset += 2; 1297 } 1298 for (; i < 4; i++) { 1299 if (!(writemask & (1<<i))) 1300 break; 1301 newmask |= 1<<i; 1302 len++; 1303 } 1304 1305 if (newmask != writemask) { 1306 need_stall = 1; 1307 /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ 1308 } 1309 else { 1310 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1311 1312 newmask = ~newmask & WRITEMASK_XYZW; 1313 1314 brw_push_insn_state(p); 1315 1316 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1317 brw_set_mask_control(p, BRW_MASK_DISABLE); 1318 1319 brw_MOV(p, m1, brw_vec8_grf(0,0)); 1320 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 1321 1322 brw_pop_insn_state(p); 1323 1324 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 1325 dest = offset(dest, dst_offset); 1326 response_length = len * 2; 1327 } 1328 } 1329 1330 { 1331 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1332 1333 insn->header.predicate_control = 0; /* XXX */ 1334 insn->header.compression_control = BRW_COMPRESSION_NONE; 1335 insn->header.destreg__conditionalmod = msg_reg_nr; 1336 1337 brw_set_dest(insn, dest); 1338 brw_set_src0(insn, src0); 1339 brw_set_sampler_message(p->brw, insn, 1340 binding_table_index, 1341 sampler, 1342 msg_type, 1343 response_length, 1344 msg_length, 1345 eot, 1346 header_present, 1347 simd_mode); 1348 } 1349 1350 if (need_stall) { 1351 struct brw_reg reg = vec8(offset(dest, response_length-1)); 1352 1353 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 1354 */ 1355 brw_push_insn_state(p); 1356 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1357 brw_MOV(p, reg, reg); 1358 brw_pop_insn_state(p); 1359 } 1360 1361} 1362 1363/* All these variables are pretty confusing - we might be better off 1364 * using bitmasks and macros for this, in the old style. Or perhaps 1365 * just having the caller instantiate the fields in dword3 itself. 1366 */ 1367void brw_urb_WRITE(struct brw_compile *p, 1368 struct brw_reg dest, 1369 GLuint msg_reg_nr, 1370 struct brw_reg src0, 1371 GLboolean allocate, 1372 GLboolean used, 1373 GLuint msg_length, 1374 GLuint response_length, 1375 GLboolean eot, 1376 GLboolean writes_complete, 1377 GLuint offset, 1378 GLuint swizzle) 1379{ 1380 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1381 1382 assert(msg_length < BRW_MAX_MRF); 1383 1384 brw_set_dest(insn, dest); 1385 brw_set_src0(insn, src0); 1386 brw_set_src1(insn, brw_imm_d(0)); 1387 1388 insn->header.destreg__conditionalmod = msg_reg_nr; 1389 1390 brw_set_urb_message(p->brw, 1391 insn, 1392 allocate, 1393 used, 1394 msg_length, 1395 response_length, 1396 eot, 1397 writes_complete, 1398 offset, 1399 swizzle); 1400} 1401 1402void brw_ff_sync(struct brw_compile *p, 1403 struct brw_reg dest, 1404 GLuint msg_reg_nr, 1405 struct brw_reg src0, 1406 GLboolean allocate, 1407 GLboolean used, 1408 GLuint msg_length, 1409 GLuint response_length, 1410 GLboolean eot, 1411 GLboolean writes_complete, 1412 GLuint offset, 1413 GLuint swizzle) 1414{ 1415 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1416 1417 assert(msg_length < 16); 1418 1419 brw_set_dest(insn, dest); 1420 brw_set_src0(insn, src0); 1421 brw_set_src1(insn, brw_imm_d(0)); 1422 1423 insn->header.destreg__conditionalmod = msg_reg_nr; 1424 1425 brw_set_ff_sync_message(p->brw, 1426 insn, 1427 allocate, 1428 used, 1429 msg_length, 1430 response_length, 1431 eot, 1432 writes_complete, 1433 offset, 1434 swizzle); 1435} 1436