brw_eu_emit.c revision e6ec500e19f455237828f4f3955f888ad0b56382
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37 38 39 40/*********************************************************************** 41 * Internal helper for constructing instructions 42 */ 43 44static void guess_execution_size( struct brw_instruction *insn, 45 struct brw_reg reg ) 46{ 47 if (reg.width == BRW_WIDTH_8 && 48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 49 insn->header.execution_size = BRW_EXECUTE_16; 50 else 51 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 52} 53 54 55static void brw_set_dest( struct brw_instruction *insn, 56 struct brw_reg dest ) 57{ 58 if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && 59 dest.file != BRW_MESSAGE_REGISTER_FILE) 60 assert(dest.nr < 128); 61 62 insn->bits1.da1.dest_reg_file = dest.file; 63 insn->bits1.da1.dest_reg_type = dest.type; 64 insn->bits1.da1.dest_address_mode = dest.address_mode; 65 66 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 67 insn->bits1.da1.dest_reg_nr = dest.nr; 68 69 if (insn->header.access_mode == BRW_ALIGN_1) { 70 insn->bits1.da1.dest_subreg_nr = dest.subnr; 71 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 72 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 73 insn->bits1.da1.dest_horiz_stride = dest.hstride; 74 } 75 else { 76 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 77 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 78 /* even ignored in da16, still need to set as '01' */ 79 insn->bits1.da16.dest_horiz_stride = 1; 80 } 81 } 82 else { 83 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 84 85 /* These are different sizes in align1 vs align16: 86 */ 87 if (insn->header.access_mode == BRW_ALIGN_1) { 88 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 89 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) 90 dest.hstride = BRW_HORIZONTAL_STRIDE_1; 91 insn->bits1.ia1.dest_horiz_stride = dest.hstride; 92 } 93 else { 94 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 95 /* even ignored in da16, still need to set as '01' */ 96 insn->bits1.ia16.dest_horiz_stride = 1; 97 } 98 } 99 100 /* NEW: Set the execution size based on dest.width and 101 * insn->compression_control: 102 */ 103 guess_execution_size(insn, dest); 104} 105 106static void brw_set_src0( struct brw_instruction *insn, 107 struct brw_reg reg ) 108{ 109 if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) 110 assert(reg.nr < 128); 111 112 insn->bits1.da1.src0_reg_file = reg.file; 113 insn->bits1.da1.src0_reg_type = reg.type; 114 insn->bits2.da1.src0_abs = reg.abs; 115 insn->bits2.da1.src0_negate = reg.negate; 116 insn->bits2.da1.src0_address_mode = reg.address_mode; 117 118 if (reg.file == BRW_IMMEDIATE_VALUE) { 119 insn->bits3.ud = reg.dw1.ud; 120 121 /* Required to set some fields in src1 as well: 122 */ 123 insn->bits1.da1.src1_reg_file = 0; /* arf */ 124 insn->bits1.da1.src1_reg_type = reg.type; 125 } 126 else 127 { 128 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 129 if (insn->header.access_mode == BRW_ALIGN_1) { 130 insn->bits2.da1.src0_subreg_nr = reg.subnr; 131 insn->bits2.da1.src0_reg_nr = reg.nr; 132 } 133 else { 134 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 135 insn->bits2.da16.src0_reg_nr = reg.nr; 136 } 137 } 138 else { 139 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 140 141 if (insn->header.access_mode == BRW_ALIGN_1) { 142 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 143 } 144 else { 145 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 146 } 147 } 148 149 if (insn->header.access_mode == BRW_ALIGN_1) { 150 if (reg.width == BRW_WIDTH_1 && 151 insn->header.execution_size == BRW_EXECUTE_1) { 152 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 153 insn->bits2.da1.src0_width = BRW_WIDTH_1; 154 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 155 } 156 else { 157 insn->bits2.da1.src0_horiz_stride = reg.hstride; 158 insn->bits2.da1.src0_width = reg.width; 159 insn->bits2.da1.src0_vert_stride = reg.vstride; 160 } 161 } 162 else { 163 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 164 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 165 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 166 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 167 168 /* This is an oddity of the fact we're using the same 169 * descriptions for registers in align_16 as align_1: 170 */ 171 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 172 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 173 else 174 insn->bits2.da16.src0_vert_stride = reg.vstride; 175 } 176 } 177} 178 179 180void brw_set_src1( struct brw_instruction *insn, 181 struct brw_reg reg ) 182{ 183 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 184 185 assert(reg.nr < 128); 186 187 insn->bits1.da1.src1_reg_file = reg.file; 188 insn->bits1.da1.src1_reg_type = reg.type; 189 insn->bits3.da1.src1_abs = reg.abs; 190 insn->bits3.da1.src1_negate = reg.negate; 191 192 /* Only src1 can be immediate in two-argument instructions. 193 */ 194 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 195 196 if (reg.file == BRW_IMMEDIATE_VALUE) { 197 insn->bits3.ud = reg.dw1.ud; 198 } 199 else { 200 /* This is a hardware restriction, which may or may not be lifted 201 * in the future: 202 */ 203 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 204 /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ 205 206 if (insn->header.access_mode == BRW_ALIGN_1) { 207 insn->bits3.da1.src1_subreg_nr = reg.subnr; 208 insn->bits3.da1.src1_reg_nr = reg.nr; 209 } 210 else { 211 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 212 insn->bits3.da16.src1_reg_nr = reg.nr; 213 } 214 215 if (insn->header.access_mode == BRW_ALIGN_1) { 216 if (reg.width == BRW_WIDTH_1 && 217 insn->header.execution_size == BRW_EXECUTE_1) { 218 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 219 insn->bits3.da1.src1_width = BRW_WIDTH_1; 220 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 221 } 222 else { 223 insn->bits3.da1.src1_horiz_stride = reg.hstride; 224 insn->bits3.da1.src1_width = reg.width; 225 insn->bits3.da1.src1_vert_stride = reg.vstride; 226 } 227 } 228 else { 229 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 230 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 231 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 232 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 233 234 /* This is an oddity of the fact we're using the same 235 * descriptions for registers in align_16 as align_1: 236 */ 237 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 238 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 239 else 240 insn->bits3.da16.src1_vert_stride = reg.vstride; 241 } 242 } 243} 244 245 246 247static void brw_set_math_message( struct brw_context *brw, 248 struct brw_instruction *insn, 249 GLuint msg_length, 250 GLuint response_length, 251 GLuint function, 252 GLuint integer_type, 253 GLboolean low_precision, 254 GLboolean saturate, 255 GLuint dataType ) 256{ 257 struct intel_context *intel = &brw->intel; 258 brw_set_src1(insn, brw_imm_d(0)); 259 260 if (intel->gen == 5) { 261 insn->bits3.math_gen5.function = function; 262 insn->bits3.math_gen5.int_type = integer_type; 263 insn->bits3.math_gen5.precision = low_precision; 264 insn->bits3.math_gen5.saturate = saturate; 265 insn->bits3.math_gen5.data_type = dataType; 266 insn->bits3.math_gen5.snapshot = 0; 267 insn->bits3.math_gen5.header_present = 0; 268 insn->bits3.math_gen5.response_length = response_length; 269 insn->bits3.math_gen5.msg_length = msg_length; 270 insn->bits3.math_gen5.end_of_thread = 0; 271 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH; 272 insn->bits2.send_gen5.end_of_thread = 0; 273 } else { 274 insn->bits3.math.function = function; 275 insn->bits3.math.int_type = integer_type; 276 insn->bits3.math.precision = low_precision; 277 insn->bits3.math.saturate = saturate; 278 insn->bits3.math.data_type = dataType; 279 insn->bits3.math.response_length = response_length; 280 insn->bits3.math.msg_length = msg_length; 281 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 282 insn->bits3.math.end_of_thread = 0; 283 } 284} 285 286 287static void brw_set_ff_sync_message(struct brw_context *brw, 288 struct brw_instruction *insn, 289 GLboolean allocate, 290 GLuint response_length, 291 GLboolean end_of_thread) 292{ 293 struct intel_context *intel = &brw->intel; 294 brw_set_src1(insn, brw_imm_d(0)); 295 296 insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ 297 insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ 298 insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ 299 insn->bits3.urb_gen5.allocate = allocate; 300 insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ 301 insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ 302 insn->bits3.urb_gen5.header_present = 1; 303 insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ 304 insn->bits3.urb_gen5.msg_length = 1; 305 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 306 if (intel->gen >= 6) { 307 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 308 } else { 309 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 310 insn->bits2.send_gen5.end_of_thread = end_of_thread; 311 } 312} 313 314static void brw_set_urb_message( struct brw_context *brw, 315 struct brw_instruction *insn, 316 GLboolean allocate, 317 GLboolean used, 318 GLuint msg_length, 319 GLuint response_length, 320 GLboolean end_of_thread, 321 GLboolean complete, 322 GLuint offset, 323 GLuint swizzle_control ) 324{ 325 struct intel_context *intel = &brw->intel; 326 brw_set_src1(insn, brw_imm_d(0)); 327 328 if (intel->gen >= 5) { 329 insn->bits3.urb_gen5.opcode = 0; /* ? */ 330 insn->bits3.urb_gen5.offset = offset; 331 insn->bits3.urb_gen5.swizzle_control = swizzle_control; 332 insn->bits3.urb_gen5.allocate = allocate; 333 insn->bits3.urb_gen5.used = used; /* ? */ 334 insn->bits3.urb_gen5.complete = complete; 335 insn->bits3.urb_gen5.header_present = 1; 336 insn->bits3.urb_gen5.response_length = response_length; 337 insn->bits3.urb_gen5.msg_length = msg_length; 338 insn->bits3.urb_gen5.end_of_thread = end_of_thread; 339 if (intel->gen >= 6) { 340 /* For SNB, the SFID bits moved to the condmod bits, and 341 * EOT stayed in bits3 above. Does the EOT bit setting 342 * below on Ironlake even do anything? 343 */ 344 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; 345 } else { 346 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; 347 insn->bits2.send_gen5.end_of_thread = end_of_thread; 348 } 349 } else { 350 insn->bits3.urb.opcode = 0; /* ? */ 351 insn->bits3.urb.offset = offset; 352 insn->bits3.urb.swizzle_control = swizzle_control; 353 insn->bits3.urb.allocate = allocate; 354 insn->bits3.urb.used = used; /* ? */ 355 insn->bits3.urb.complete = complete; 356 insn->bits3.urb.response_length = response_length; 357 insn->bits3.urb.msg_length = msg_length; 358 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 359 insn->bits3.urb.end_of_thread = end_of_thread; 360 } 361} 362 363static void brw_set_dp_write_message( struct brw_context *brw, 364 struct brw_instruction *insn, 365 GLuint binding_table_index, 366 GLuint msg_control, 367 GLuint msg_type, 368 GLuint msg_length, 369 GLuint pixel_scoreboard_clear, 370 GLuint response_length, 371 GLuint end_of_thread, 372 GLuint send_commit_msg) 373{ 374 struct intel_context *intel = &brw->intel; 375 brw_set_src1(insn, brw_imm_ud(0)); 376 377 if (intel->gen >= 6) { 378 insn->bits3.dp_render_cache.binding_table_index = binding_table_index; 379 insn->bits3.dp_render_cache.msg_control = msg_control; 380 insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; 381 insn->bits3.dp_render_cache.msg_type = msg_type; 382 insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; 383 insn->bits3.dp_render_cache.header_present = 0; /* XXX */ 384 insn->bits3.dp_render_cache.response_length = response_length; 385 insn->bits3.dp_render_cache.msg_length = msg_length; 386 insn->bits3.dp_render_cache.end_of_thread = end_of_thread; 387 insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 388 /* XXX really need below? */ 389 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 390 insn->bits2.send_gen5.end_of_thread = end_of_thread; 391 } else if (intel->gen == 5) { 392 insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; 393 insn->bits3.dp_write_gen5.msg_control = msg_control; 394 insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; 395 insn->bits3.dp_write_gen5.msg_type = msg_type; 396 insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; 397 insn->bits3.dp_write_gen5.header_present = 1; 398 insn->bits3.dp_write_gen5.response_length = response_length; 399 insn->bits3.dp_write_gen5.msg_length = msg_length; 400 insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; 401 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 402 insn->bits2.send_gen5.end_of_thread = end_of_thread; 403 } else { 404 insn->bits3.dp_write.binding_table_index = binding_table_index; 405 insn->bits3.dp_write.msg_control = msg_control; 406 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 407 insn->bits3.dp_write.msg_type = msg_type; 408 insn->bits3.dp_write.send_commit_msg = send_commit_msg; 409 insn->bits3.dp_write.response_length = response_length; 410 insn->bits3.dp_write.msg_length = msg_length; 411 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 412 insn->bits3.dp_write.end_of_thread = end_of_thread; 413 } 414} 415 416static void brw_set_dp_read_message( struct brw_context *brw, 417 struct brw_instruction *insn, 418 GLuint binding_table_index, 419 GLuint msg_control, 420 GLuint msg_type, 421 GLuint target_cache, 422 GLuint msg_length, 423 GLuint response_length, 424 GLuint end_of_thread ) 425{ 426 struct intel_context *intel = &brw->intel; 427 brw_set_src1(insn, brw_imm_d(0)); 428 429 if (intel->gen == 5) { 430 insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; 431 insn->bits3.dp_read_gen5.msg_control = msg_control; 432 insn->bits3.dp_read_gen5.msg_type = msg_type; 433 insn->bits3.dp_read_gen5.target_cache = target_cache; 434 insn->bits3.dp_read_gen5.header_present = 1; 435 insn->bits3.dp_read_gen5.response_length = response_length; 436 insn->bits3.dp_read_gen5.msg_length = msg_length; 437 insn->bits3.dp_read_gen5.pad1 = 0; 438 insn->bits3.dp_read_gen5.end_of_thread = end_of_thread; 439 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; 440 insn->bits2.send_gen5.end_of_thread = end_of_thread; 441 } else { 442 insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ 443 insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ 444 insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ 445 insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ 446 insn->bits3.dp_read.response_length = response_length; /*16:19*/ 447 insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ 448 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ 449 insn->bits3.dp_read.pad1 = 0; /*28:30*/ 450 insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ 451 } 452} 453 454static void brw_set_sampler_message(struct brw_context *brw, 455 struct brw_instruction *insn, 456 GLuint binding_table_index, 457 GLuint sampler, 458 GLuint msg_type, 459 GLuint response_length, 460 GLuint msg_length, 461 GLboolean eot, 462 GLuint header_present, 463 GLuint simd_mode) 464{ 465 struct intel_context *intel = &brw->intel; 466 assert(eot == 0); 467 brw_set_src1(insn, brw_imm_d(0)); 468 469 if (intel->gen == 5) { 470 insn->bits3.sampler_gen5.binding_table_index = binding_table_index; 471 insn->bits3.sampler_gen5.sampler = sampler; 472 insn->bits3.sampler_gen5.msg_type = msg_type; 473 insn->bits3.sampler_gen5.simd_mode = simd_mode; 474 insn->bits3.sampler_gen5.header_present = header_present; 475 insn->bits3.sampler_gen5.response_length = response_length; 476 insn->bits3.sampler_gen5.msg_length = msg_length; 477 insn->bits3.sampler_gen5.end_of_thread = eot; 478 insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; 479 insn->bits2.send_gen5.end_of_thread = eot; 480 } else if (intel->is_g4x) { 481 insn->bits3.sampler_g4x.binding_table_index = binding_table_index; 482 insn->bits3.sampler_g4x.sampler = sampler; 483 insn->bits3.sampler_g4x.msg_type = msg_type; 484 insn->bits3.sampler_g4x.response_length = response_length; 485 insn->bits3.sampler_g4x.msg_length = msg_length; 486 insn->bits3.sampler_g4x.end_of_thread = eot; 487 insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 488 } else { 489 insn->bits3.sampler.binding_table_index = binding_table_index; 490 insn->bits3.sampler.sampler = sampler; 491 insn->bits3.sampler.msg_type = msg_type; 492 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 493 insn->bits3.sampler.response_length = response_length; 494 insn->bits3.sampler.msg_length = msg_length; 495 insn->bits3.sampler.end_of_thread = eot; 496 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 497 } 498} 499 500 501 502static struct brw_instruction *next_insn( struct brw_compile *p, 503 GLuint opcode ) 504{ 505 struct brw_instruction *insn; 506 507 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 508 509 insn = &p->store[p->nr_insn++]; 510 memcpy(insn, p->current, sizeof(*insn)); 511 512 /* Reset this one-shot flag: 513 */ 514 515 if (p->current->header.destreg__conditionalmod) { 516 p->current->header.destreg__conditionalmod = 0; 517 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 518 } 519 520 insn->header.opcode = opcode; 521 return insn; 522} 523 524 525static struct brw_instruction *brw_alu1( struct brw_compile *p, 526 GLuint opcode, 527 struct brw_reg dest, 528 struct brw_reg src ) 529{ 530 struct brw_instruction *insn = next_insn(p, opcode); 531 brw_set_dest(insn, dest); 532 brw_set_src0(insn, src); 533 return insn; 534} 535 536static struct brw_instruction *brw_alu2(struct brw_compile *p, 537 GLuint opcode, 538 struct brw_reg dest, 539 struct brw_reg src0, 540 struct brw_reg src1 ) 541{ 542 struct brw_instruction *insn = next_insn(p, opcode); 543 brw_set_dest(insn, dest); 544 brw_set_src0(insn, src0); 545 brw_set_src1(insn, src1); 546 return insn; 547} 548 549 550/*********************************************************************** 551 * Convenience routines. 552 */ 553#define ALU1(OP) \ 554struct brw_instruction *brw_##OP(struct brw_compile *p, \ 555 struct brw_reg dest, \ 556 struct brw_reg src0) \ 557{ \ 558 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 559} 560 561#define ALU2(OP) \ 562struct brw_instruction *brw_##OP(struct brw_compile *p, \ 563 struct brw_reg dest, \ 564 struct brw_reg src0, \ 565 struct brw_reg src1) \ 566{ \ 567 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 568} 569 570 571ALU1(MOV) 572ALU2(SEL) 573ALU1(NOT) 574ALU2(AND) 575ALU2(OR) 576ALU2(XOR) 577ALU2(SHR) 578ALU2(SHL) 579ALU2(RSR) 580ALU2(RSL) 581ALU2(ASR) 582ALU2(ADD) 583ALU2(MUL) 584ALU1(FRC) 585ALU1(RNDD) 586ALU1(RNDZ) 587ALU2(MAC) 588ALU2(MACH) 589ALU1(LZD) 590ALU2(DP4) 591ALU2(DPH) 592ALU2(DP3) 593ALU2(DP2) 594ALU2(LINE) 595ALU2(PLN) 596 597 598 599void brw_NOP(struct brw_compile *p) 600{ 601 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 602 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 603 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 604 brw_set_src1(insn, brw_imm_ud(0x0)); 605} 606 607 608 609 610 611/*********************************************************************** 612 * Comparisons, if/else/endif 613 */ 614 615struct brw_instruction *brw_JMPI(struct brw_compile *p, 616 struct brw_reg dest, 617 struct brw_reg src0, 618 struct brw_reg src1) 619{ 620 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 621 622 insn->header.execution_size = 1; 623 insn->header.compression_control = BRW_COMPRESSION_NONE; 624 insn->header.mask_control = BRW_MASK_DISABLE; 625 626 p->current->header.predicate_control = BRW_PREDICATE_NONE; 627 628 return insn; 629} 630 631/* EU takes the value from the flag register and pushes it onto some 632 * sort of a stack (presumably merging with any flag value already on 633 * the stack). Within an if block, the flags at the top of the stack 634 * control execution on each channel of the unit, eg. on each of the 635 * 16 pixel values in our wm programs. 636 * 637 * When the matching 'else' instruction is reached (presumably by 638 * countdown of the instruction count patched in by our ELSE/ENDIF 639 * functions), the relevent flags are inverted. 640 * 641 * When the matching 'endif' instruction is reached, the flags are 642 * popped off. If the stack is now empty, normal execution resumes. 643 * 644 * No attempt is made to deal with stack overflow (14 elements?). 645 */ 646struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) 647{ 648 struct brw_instruction *insn; 649 650 if (p->single_program_flow) { 651 assert(execute_size == BRW_EXECUTE_1); 652 653 insn = next_insn(p, BRW_OPCODE_ADD); 654 insn->header.predicate_inverse = 1; 655 } else { 656 insn = next_insn(p, BRW_OPCODE_IF); 657 } 658 659 /* Override the defaults for this instruction: 660 */ 661 brw_set_dest(insn, brw_ip_reg()); 662 brw_set_src0(insn, brw_ip_reg()); 663 brw_set_src1(insn, brw_imm_d(0x0)); 664 665 insn->header.execution_size = execute_size; 666 insn->header.compression_control = BRW_COMPRESSION_NONE; 667 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 668 insn->header.mask_control = BRW_MASK_ENABLE; 669 if (!p->single_program_flow) 670 insn->header.thread_control = BRW_THREAD_SWITCH; 671 672 p->current->header.predicate_control = BRW_PREDICATE_NONE; 673 674 return insn; 675} 676 677 678struct brw_instruction *brw_ELSE(struct brw_compile *p, 679 struct brw_instruction *if_insn) 680{ 681 struct intel_context *intel = &p->brw->intel; 682 struct brw_instruction *insn; 683 GLuint br = 1; 684 685 if (intel->gen == 5) 686 br = 2; 687 688 if (p->single_program_flow) { 689 insn = next_insn(p, BRW_OPCODE_ADD); 690 } else { 691 insn = next_insn(p, BRW_OPCODE_ELSE); 692 } 693 694 brw_set_dest(insn, brw_ip_reg()); 695 brw_set_src0(insn, brw_ip_reg()); 696 brw_set_src1(insn, brw_imm_d(0x0)); 697 698 insn->header.compression_control = BRW_COMPRESSION_NONE; 699 insn->header.execution_size = if_insn->header.execution_size; 700 insn->header.mask_control = BRW_MASK_ENABLE; 701 if (!p->single_program_flow) 702 insn->header.thread_control = BRW_THREAD_SWITCH; 703 704 /* Patch the if instruction to point at this instruction. 705 */ 706 if (p->single_program_flow) { 707 assert(if_insn->header.opcode == BRW_OPCODE_ADD); 708 709 if_insn->bits3.ud = (insn - if_insn + 1) * 16; 710 } else { 711 assert(if_insn->header.opcode == BRW_OPCODE_IF); 712 713 if_insn->bits3.if_else.jump_count = br * (insn - if_insn); 714 if_insn->bits3.if_else.pop_count = 0; 715 if_insn->bits3.if_else.pad0 = 0; 716 } 717 718 return insn; 719} 720 721void brw_ENDIF(struct brw_compile *p, 722 struct brw_instruction *patch_insn) 723{ 724 struct intel_context *intel = &p->brw->intel; 725 GLuint br = 1; 726 727 if (intel->gen == 5) 728 br = 2; 729 730 if (p->single_program_flow) { 731 /* In single program flow mode, there's no need to execute an ENDIF, 732 * since we don't need to do any stack operations, and if we're executing 733 * currently, we want to just continue executing. 734 */ 735 struct brw_instruction *next = &p->store[p->nr_insn]; 736 737 assert(patch_insn->header.opcode == BRW_OPCODE_ADD); 738 739 patch_insn->bits3.ud = (next - patch_insn) * 16; 740 } else { 741 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); 742 743 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 744 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 745 brw_set_src1(insn, brw_imm_d(0x0)); 746 747 insn->header.compression_control = BRW_COMPRESSION_NONE; 748 insn->header.execution_size = patch_insn->header.execution_size; 749 insn->header.mask_control = BRW_MASK_ENABLE; 750 insn->header.thread_control = BRW_THREAD_SWITCH; 751 752 assert(patch_insn->bits3.if_else.jump_count == 0); 753 754 /* Patch the if or else instructions to point at this or the next 755 * instruction respectively. 756 */ 757 if (patch_insn->header.opcode == BRW_OPCODE_IF) { 758 /* Automagically turn it into an IFF: 759 */ 760 patch_insn->header.opcode = BRW_OPCODE_IFF; 761 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 762 patch_insn->bits3.if_else.pop_count = 0; 763 patch_insn->bits3.if_else.pad0 = 0; 764 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { 765 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); 766 patch_insn->bits3.if_else.pop_count = 1; 767 patch_insn->bits3.if_else.pad0 = 0; 768 } else { 769 assert(0); 770 } 771 772 /* Also pop item off the stack in the endif instruction: 773 */ 774 insn->bits3.if_else.jump_count = 0; 775 insn->bits3.if_else.pop_count = 1; 776 insn->bits3.if_else.pad0 = 0; 777 } 778} 779 780struct brw_instruction *brw_BREAK(struct brw_compile *p) 781{ 782 struct brw_instruction *insn; 783 insn = next_insn(p, BRW_OPCODE_BREAK); 784 brw_set_dest(insn, brw_ip_reg()); 785 brw_set_src0(insn, brw_ip_reg()); 786 brw_set_src1(insn, brw_imm_d(0x0)); 787 insn->header.compression_control = BRW_COMPRESSION_NONE; 788 insn->header.execution_size = BRW_EXECUTE_8; 789 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 790 insn->bits3.if_else.pad0 = 0; 791 return insn; 792} 793 794struct brw_instruction *brw_CONT(struct brw_compile *p) 795{ 796 struct brw_instruction *insn; 797 insn = next_insn(p, BRW_OPCODE_CONTINUE); 798 brw_set_dest(insn, brw_ip_reg()); 799 brw_set_src0(insn, brw_ip_reg()); 800 brw_set_src1(insn, brw_imm_d(0x0)); 801 insn->header.compression_control = BRW_COMPRESSION_NONE; 802 insn->header.execution_size = BRW_EXECUTE_8; 803 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 804 insn->bits3.if_else.pad0 = 0; 805 return insn; 806} 807 808/* DO/WHILE loop: 809 */ 810struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 811{ 812 if (p->single_program_flow) { 813 return &p->store[p->nr_insn]; 814 } else { 815 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 816 817 /* Override the defaults for this instruction: 818 */ 819 brw_set_dest(insn, brw_null_reg()); 820 brw_set_src0(insn, brw_null_reg()); 821 brw_set_src1(insn, brw_null_reg()); 822 823 insn->header.compression_control = BRW_COMPRESSION_NONE; 824 insn->header.execution_size = execute_size; 825 insn->header.predicate_control = BRW_PREDICATE_NONE; 826 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 827 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 828 829 return insn; 830 } 831} 832 833 834 835struct brw_instruction *brw_WHILE(struct brw_compile *p, 836 struct brw_instruction *do_insn) 837{ 838 struct intel_context *intel = &p->brw->intel; 839 struct brw_instruction *insn; 840 GLuint br = 1; 841 842 if (intel->gen == 5) 843 br = 2; 844 845 if (p->single_program_flow) 846 insn = next_insn(p, BRW_OPCODE_ADD); 847 else 848 insn = next_insn(p, BRW_OPCODE_WHILE); 849 850 brw_set_dest(insn, brw_ip_reg()); 851 brw_set_src0(insn, brw_ip_reg()); 852 brw_set_src1(insn, brw_imm_d(0x0)); 853 854 insn->header.compression_control = BRW_COMPRESSION_NONE; 855 856 if (p->single_program_flow) { 857 insn->header.execution_size = BRW_EXECUTE_1; 858 859 insn->bits3.d = (do_insn - insn) * 16; 860 } else { 861 insn->header.execution_size = do_insn->header.execution_size; 862 863 assert(do_insn->header.opcode == BRW_OPCODE_DO); 864 insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); 865 insn->bits3.if_else.pop_count = 0; 866 insn->bits3.if_else.pad0 = 0; 867 } 868 869/* insn->header.mask_control = BRW_MASK_ENABLE; */ 870 871 /* insn->header.mask_control = BRW_MASK_DISABLE; */ 872 p->current->header.predicate_control = BRW_PREDICATE_NONE; 873 return insn; 874} 875 876 877/* FORWARD JUMPS: 878 */ 879void brw_land_fwd_jump(struct brw_compile *p, 880 struct brw_instruction *jmp_insn) 881{ 882 struct intel_context *intel = &p->brw->intel; 883 struct brw_instruction *landing = &p->store[p->nr_insn]; 884 GLuint jmpi = 1; 885 886 if (intel->gen == 5) 887 jmpi = 2; 888 889 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 890 assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); 891 892 jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); 893} 894 895 896 897/* To integrate with the above, it makes sense that the comparison 898 * instruction should populate the flag register. It might be simpler 899 * just to use the flag reg for most WM tasks? 900 */ 901void brw_CMP(struct brw_compile *p, 902 struct brw_reg dest, 903 GLuint conditional, 904 struct brw_reg src0, 905 struct brw_reg src1) 906{ 907 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 908 909 insn->header.destreg__conditionalmod = conditional; 910 brw_set_dest(insn, dest); 911 brw_set_src0(insn, src0); 912 brw_set_src1(insn, src1); 913 914/* guess_execution_size(insn, src0); */ 915 916 917 /* Make it so that future instructions will use the computed flag 918 * value until brw_set_predicate_control_flag_value() is called 919 * again. 920 */ 921 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 922 dest.nr == 0) { 923 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 924 p->flag_value = 0xff; 925 } 926} 927 928/* Issue 'wait' instruction for n1, host could program MMIO 929 to wake up thread. */ 930void brw_WAIT (struct brw_compile *p) 931{ 932 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); 933 struct brw_reg src = brw_notification_1_reg(); 934 935 brw_set_dest(insn, src); 936 brw_set_src0(insn, src); 937 brw_set_src1(insn, brw_null_reg()); 938 insn->header.execution_size = 0; /* must */ 939 insn->header.predicate_control = 0; 940 insn->header.compression_control = 0; 941} 942 943 944/*********************************************************************** 945 * Helpers for the various SEND message types: 946 */ 947 948/** Extended math function, float[8]. 949 */ 950void brw_math( struct brw_compile *p, 951 struct brw_reg dest, 952 GLuint function, 953 GLuint saturate, 954 GLuint msg_reg_nr, 955 struct brw_reg src, 956 GLuint data_type, 957 GLuint precision ) 958{ 959 struct intel_context *intel = &p->brw->intel; 960 961 if (intel->gen >= 6) { 962 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); 963 964 /* Math is the same ISA format as other opcodes, except that CondModifier 965 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 966 */ 967 insn->header.destreg__conditionalmod = function; 968 969 brw_set_dest(insn, dest); 970 brw_set_src0(insn, src); 971 brw_set_src1(insn, brw_null_reg()); 972 } else { 973 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 974 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 975 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 976 /* Example code doesn't set predicate_control for send 977 * instructions. 978 */ 979 insn->header.predicate_control = 0; 980 insn->header.destreg__conditionalmod = msg_reg_nr; 981 982 brw_set_dest(insn, dest); 983 brw_set_src0(insn, src); 984 brw_set_math_message(p->brw, 985 insn, 986 msg_length, response_length, 987 function, 988 BRW_MATH_INTEGER_UNSIGNED, 989 precision, 990 saturate, 991 data_type); 992 } 993} 994 995/** 996 * Extended math function, float[16]. 997 * Use 2 send instructions. 998 */ 999void brw_math_16( struct brw_compile *p, 1000 struct brw_reg dest, 1001 GLuint function, 1002 GLuint saturate, 1003 GLuint msg_reg_nr, 1004 struct brw_reg src, 1005 GLuint precision ) 1006{ 1007 struct intel_context *intel = &p->brw->intel; 1008 struct brw_instruction *insn; 1009 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 1010 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 1011 1012 if (intel->gen >= 6) { 1013 insn = next_insn(p, BRW_OPCODE_MATH); 1014 1015 /* Math is the same ISA format as other opcodes, except that CondModifier 1016 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. 1017 */ 1018 insn->header.destreg__conditionalmod = function; 1019 1020 brw_set_dest(insn, dest); 1021 brw_set_src0(insn, src); 1022 brw_set_src1(insn, brw_null_reg()); 1023 return; 1024 } 1025 1026 /* First instruction: 1027 */ 1028 brw_push_insn_state(p); 1029 brw_set_predicate_control_flag_value(p, 0xff); 1030 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1031 1032 insn = next_insn(p, BRW_OPCODE_SEND); 1033 insn->header.destreg__conditionalmod = msg_reg_nr; 1034 1035 brw_set_dest(insn, dest); 1036 brw_set_src0(insn, src); 1037 brw_set_math_message(p->brw, 1038 insn, 1039 msg_length, response_length, 1040 function, 1041 BRW_MATH_INTEGER_UNSIGNED, 1042 precision, 1043 saturate, 1044 BRW_MATH_DATA_VECTOR); 1045 1046 /* Second instruction: 1047 */ 1048 insn = next_insn(p, BRW_OPCODE_SEND); 1049 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 1050 insn->header.destreg__conditionalmod = msg_reg_nr+1; 1051 1052 brw_set_dest(insn, offset(dest,1)); 1053 brw_set_src0(insn, src); 1054 brw_set_math_message(p->brw, 1055 insn, 1056 msg_length, response_length, 1057 function, 1058 BRW_MATH_INTEGER_UNSIGNED, 1059 precision, 1060 saturate, 1061 BRW_MATH_DATA_VECTOR); 1062 1063 brw_pop_insn_state(p); 1064} 1065 1066 1067/** 1068 * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. 1069 * Scratch offset should be a multiple of 64. 1070 * Used for register spilling. 1071 */ 1072void brw_dp_WRITE_16( struct brw_compile *p, 1073 struct brw_reg src, 1074 GLuint scratch_offset ) 1075{ 1076 struct intel_context *intel = &p->brw->intel; 1077 GLuint msg_reg_nr = 1; 1078 { 1079 brw_push_insn_state(p); 1080 brw_set_mask_control(p, BRW_MASK_DISABLE); 1081 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1082 1083 /* set message header global offset field (reg 0, element 2) */ 1084 brw_MOV(p, 1085 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 1086 brw_imm_d(scratch_offset)); 1087 1088 brw_pop_insn_state(p); 1089 } 1090 1091 { 1092 GLuint msg_length = 3; 1093 struct brw_reg dest; 1094 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1095 int send_commit_msg; 1096 1097 insn->header.predicate_control = 0; /* XXX */ 1098 insn->header.compression_control = BRW_COMPRESSION_NONE; 1099 insn->header.destreg__conditionalmod = msg_reg_nr; 1100 1101 /* Until gen6, writes followed by reads from the same location 1102 * are not guaranteed to be ordered unless write_commit is set. 1103 * If set, then a no-op write is issued to the destination 1104 * register to set a dependency, and a read from the destination 1105 * can be used to ensure the ordering. 1106 * 1107 * For gen6, only writes between different threads need ordering 1108 * protection. Our use of DP writes is all about register 1109 * spilling within a thread. 1110 */ 1111 if (intel->gen >= 6) { 1112 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); 1113 send_commit_msg = 0; 1114 } else { 1115 dest = brw_uw16_grf(0, 0); 1116 send_commit_msg = 1; 1117 } 1118 1119 brw_set_dest(insn, dest); 1120 brw_set_src0(insn, src); 1121 1122 brw_set_dp_write_message(p->brw, 1123 insn, 1124 255, /* binding table index (255=stateless) */ 1125 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ 1126 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ 1127 msg_length, 1128 0, /* pixel scoreboard */ 1129 send_commit_msg, /* response_length */ 1130 0, /* eot */ 1131 send_commit_msg); 1132 } 1133} 1134 1135 1136/** 1137 * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. 1138 * Scratch offset should be a multiple of 64. 1139 * Used for register spilling. 1140 */ 1141void brw_dp_READ_16( struct brw_compile *p, 1142 struct brw_reg dest, 1143 GLuint scratch_offset ) 1144{ 1145 GLuint msg_reg_nr = 1; 1146 { 1147 brw_push_insn_state(p); 1148 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1149 brw_set_mask_control(p, BRW_MASK_DISABLE); 1150 1151 /* set message header global offset field (reg 0, element 2) */ 1152 brw_MOV(p, 1153 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 1154 brw_imm_d(scratch_offset)); 1155 1156 brw_pop_insn_state(p); 1157 } 1158 1159 { 1160 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1161 1162 insn->header.predicate_control = 0; /* XXX */ 1163 insn->header.compression_control = BRW_COMPRESSION_NONE; 1164 insn->header.destreg__conditionalmod = msg_reg_nr; 1165 1166 brw_set_dest(insn, dest); /* UW? */ 1167 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 1168 1169 brw_set_dp_read_message(p->brw, 1170 insn, 1171 255, /* binding table index (255=stateless) */ 1172 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, 1173 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1174 1, /* target cache (render/scratch) */ 1175 1, /* msg_length */ 1176 2, /* response_length */ 1177 0); /* eot */ 1178 } 1179} 1180 1181 1182/** 1183 * Read a float[4] vector from the data port Data Cache (const buffer). 1184 * Location (in buffer) should be a multiple of 16. 1185 * Used for fetching shader constants. 1186 * If relAddr is true, we'll do an indirect fetch using the address register. 1187 */ 1188void brw_dp_READ_4( struct brw_compile *p, 1189 struct brw_reg dest, 1190 GLboolean relAddr, 1191 GLuint location, 1192 GLuint bind_table_index ) 1193{ 1194 /* XXX: relAddr not implemented */ 1195 GLuint msg_reg_nr = 1; 1196 { 1197 struct brw_reg b; 1198 brw_push_insn_state(p); 1199 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1200 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1201 brw_set_mask_control(p, BRW_MASK_DISABLE); 1202 1203 /* Setup MRF[1] with location/offset into const buffer */ 1204 b = brw_message_reg(msg_reg_nr); 1205 b = retype(b, BRW_REGISTER_TYPE_UD); 1206 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1207 * when the docs say only dword[2] should be set. Hmmm. But it works. 1208 */ 1209 brw_MOV(p, b, brw_imm_ud(location)); 1210 brw_pop_insn_state(p); 1211 } 1212 1213 { 1214 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1215 1216 insn->header.predicate_control = BRW_PREDICATE_NONE; 1217 insn->header.compression_control = BRW_COMPRESSION_NONE; 1218 insn->header.destreg__conditionalmod = msg_reg_nr; 1219 insn->header.mask_control = BRW_MASK_DISABLE; 1220 1221 /* cast dest to a uword[8] vector */ 1222 dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); 1223 1224 brw_set_dest(insn, dest); 1225 brw_set_src0(insn, brw_null_reg()); 1226 1227 brw_set_dp_read_message(p->brw, 1228 insn, 1229 bind_table_index, 1230 0, /* msg_control (0 means 1 Oword) */ 1231 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1232 0, /* source cache = data cache */ 1233 1, /* msg_length */ 1234 1, /* response_length (1 Oword) */ 1235 0); /* eot */ 1236 } 1237} 1238 1239 1240/** 1241 * Read float[4] constant(s) from VS constant buffer. 1242 * For relative addressing, two float[4] constants will be read into 'dest'. 1243 * Otherwise, one float[4] constant will be read into the lower half of 'dest'. 1244 */ 1245void brw_dp_READ_4_vs(struct brw_compile *p, 1246 struct brw_reg dest, 1247 GLuint location, 1248 GLuint bind_table_index) 1249{ 1250 struct brw_instruction *insn; 1251 GLuint msg_reg_nr = 1; 1252 struct brw_reg b; 1253 1254 /* 1255 printf("vs const read msg, location %u, msg_reg_nr %d\n", 1256 location, msg_reg_nr); 1257 */ 1258 1259 /* Setup MRF[1] with location/offset into const buffer */ 1260 brw_push_insn_state(p); 1261 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1262 brw_set_mask_control(p, BRW_MASK_DISABLE); 1263 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1264 1265 /* XXX I think we're setting all the dwords of MRF[1] to 'location'. 1266 * when the docs say only dword[2] should be set. Hmmm. But it works. 1267 */ 1268 b = brw_message_reg(msg_reg_nr); 1269 b = retype(b, BRW_REGISTER_TYPE_UD); 1270 /*b = get_element_ud(b, 2);*/ 1271 brw_MOV(p, b, brw_imm_ud(location)); 1272 1273 brw_pop_insn_state(p); 1274 1275 insn = next_insn(p, BRW_OPCODE_SEND); 1276 1277 insn->header.predicate_control = BRW_PREDICATE_NONE; 1278 insn->header.compression_control = BRW_COMPRESSION_NONE; 1279 insn->header.destreg__conditionalmod = msg_reg_nr; 1280 insn->header.mask_control = BRW_MASK_DISABLE; 1281 1282 brw_set_dest(insn, dest); 1283 brw_set_src0(insn, brw_null_reg()); 1284 1285 brw_set_dp_read_message(p->brw, 1286 insn, 1287 bind_table_index, 1288 0, 1289 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1290 0, /* source cache = data cache */ 1291 1, /* msg_length */ 1292 1, /* response_length (1 Oword) */ 1293 0); /* eot */ 1294} 1295 1296/** 1297 * Read a float[4] constant per vertex from VS constant buffer, with 1298 * relative addressing. 1299 */ 1300void brw_dp_READ_4_vs_relative(struct brw_compile *p, 1301 struct brw_reg dest, 1302 struct brw_reg addr_reg, 1303 GLuint offset, 1304 GLuint bind_table_index) 1305{ 1306 struct intel_context *intel = &p->brw->intel; 1307 int msg_type; 1308 1309 /* Setup MRF[1] with offset into const buffer */ 1310 brw_push_insn_state(p); 1311 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1312 brw_set_mask_control(p, BRW_MASK_DISABLE); 1313 brw_set_predicate_control(p, BRW_PREDICATE_NONE); 1314 1315 /* M1.0 is block offset 0, M1.4 is block offset 1, all other 1316 * fields ignored. 1317 */ 1318 brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), 1319 addr_reg, brw_imm_d(offset)); 1320 brw_pop_insn_state(p); 1321 1322 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1323 1324 insn->header.predicate_control = BRW_PREDICATE_NONE; 1325 insn->header.compression_control = BRW_COMPRESSION_NONE; 1326 insn->header.destreg__conditionalmod = 0; 1327 insn->header.mask_control = BRW_MASK_DISABLE; 1328 1329 brw_set_dest(insn, dest); 1330 brw_set_src0(insn, brw_vec8_grf(0, 0)); 1331 1332 if (intel->gen == 6) 1333 msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1334 else if (intel->gen == 5 || intel->is_g4x) 1335 msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1336 else 1337 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; 1338 1339 brw_set_dp_read_message(p->brw, 1340 insn, 1341 bind_table_index, 1342 BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, 1343 msg_type, 1344 0, /* source cache = data cache */ 1345 2, /* msg_length */ 1346 1, /* response_length */ 1347 0); /* eot */ 1348} 1349 1350 1351 1352void brw_fb_WRITE(struct brw_compile *p, 1353 int dispatch_width, 1354 struct brw_reg dest, 1355 GLuint msg_reg_nr, 1356 struct brw_reg src0, 1357 GLuint binding_table_index, 1358 GLuint msg_length, 1359 GLuint response_length, 1360 GLboolean eot) 1361{ 1362 struct intel_context *intel = &p->brw->intel; 1363 struct brw_instruction *insn; 1364 GLuint msg_control, msg_type; 1365 1366 insn = next_insn(p, BRW_OPCODE_SEND); 1367 insn->header.predicate_control = 0; /* XXX */ 1368 insn->header.compression_control = BRW_COMPRESSION_NONE; 1369 1370 if (intel->gen >= 6) { 1371 /* headerless version, just submit color payload */ 1372 src0 = brw_message_reg(msg_reg_nr); 1373 1374 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6; 1375 } else { 1376 insn->header.destreg__conditionalmod = msg_reg_nr; 1377 1378 msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; 1379 } 1380 1381 if (dispatch_width == 16) 1382 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; 1383 else 1384 msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; 1385 1386 brw_set_dest(insn, dest); 1387 brw_set_src0(insn, src0); 1388 brw_set_dp_write_message(p->brw, 1389 insn, 1390 binding_table_index, 1391 msg_control, 1392 msg_type, 1393 msg_length, 1394 1, /* pixel scoreboard */ 1395 response_length, 1396 eot, 1397 0 /* send_commit_msg */); 1398} 1399 1400 1401/** 1402 * Texture sample instruction. 1403 * Note: the msg_type plus msg_length values determine exactly what kind 1404 * of sampling operation is performed. See volume 4, page 161 of docs. 1405 */ 1406void brw_SAMPLE(struct brw_compile *p, 1407 struct brw_reg dest, 1408 GLuint msg_reg_nr, 1409 struct brw_reg src0, 1410 GLuint binding_table_index, 1411 GLuint sampler, 1412 GLuint writemask, 1413 GLuint msg_type, 1414 GLuint response_length, 1415 GLuint msg_length, 1416 GLboolean eot, 1417 GLuint header_present, 1418 GLuint simd_mode) 1419{ 1420 GLboolean need_stall = 0; 1421 1422 if (writemask == 0) { 1423 /*printf("%s: zero writemask??\n", __FUNCTION__); */ 1424 return; 1425 } 1426 1427 /* Hardware doesn't do destination dependency checking on send 1428 * instructions properly. Add a workaround which generates the 1429 * dependency by other means. In practice it seems like this bug 1430 * only crops up for texture samples, and only where registers are 1431 * written by the send and then written again later without being 1432 * read in between. Luckily for us, we already track that 1433 * information and use it to modify the writemask for the 1434 * instruction, so that is a guide for whether a workaround is 1435 * needed. 1436 */ 1437 if (writemask != WRITEMASK_XYZW) { 1438 GLuint dst_offset = 0; 1439 GLuint i, newmask = 0, len = 0; 1440 1441 for (i = 0; i < 4; i++) { 1442 if (writemask & (1<<i)) 1443 break; 1444 dst_offset += 2; 1445 } 1446 for (; i < 4; i++) { 1447 if (!(writemask & (1<<i))) 1448 break; 1449 newmask |= 1<<i; 1450 len++; 1451 } 1452 1453 if (newmask != writemask) { 1454 need_stall = 1; 1455 /* printf("need stall %x %x\n", newmask , writemask); */ 1456 } 1457 else { 1458 GLboolean dispatch_16 = GL_FALSE; 1459 1460 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1461 1462 guess_execution_size(p->current, dest); 1463 if (p->current->header.execution_size == BRW_EXECUTE_16) 1464 dispatch_16 = GL_TRUE; 1465 1466 newmask = ~newmask & WRITEMASK_XYZW; 1467 1468 brw_push_insn_state(p); 1469 1470 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1471 brw_set_mask_control(p, BRW_MASK_DISABLE); 1472 1473 brw_MOV(p, m1, brw_vec8_grf(0,0)); 1474 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 1475 1476 brw_pop_insn_state(p); 1477 1478 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 1479 dest = offset(dest, dst_offset); 1480 1481 /* For 16-wide dispatch, masked channels are skipped in the 1482 * response. For 8-wide, masked channels still take up slots, 1483 * and are just not written to. 1484 */ 1485 if (dispatch_16) 1486 response_length = len * 2; 1487 } 1488 } 1489 1490 { 1491 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1492 1493 insn->header.predicate_control = 0; /* XXX */ 1494 insn->header.compression_control = BRW_COMPRESSION_NONE; 1495 insn->header.destreg__conditionalmod = msg_reg_nr; 1496 1497 brw_set_dest(insn, dest); 1498 brw_set_src0(insn, src0); 1499 brw_set_sampler_message(p->brw, insn, 1500 binding_table_index, 1501 sampler, 1502 msg_type, 1503 response_length, 1504 msg_length, 1505 eot, 1506 header_present, 1507 simd_mode); 1508 } 1509 1510 if (need_stall) { 1511 struct brw_reg reg = vec8(offset(dest, response_length-1)); 1512 1513 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 1514 */ 1515 brw_push_insn_state(p); 1516 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1517 brw_MOV(p, reg, reg); 1518 brw_pop_insn_state(p); 1519 } 1520 1521} 1522 1523/* All these variables are pretty confusing - we might be better off 1524 * using bitmasks and macros for this, in the old style. Or perhaps 1525 * just having the caller instantiate the fields in dword3 itself. 1526 */ 1527void brw_urb_WRITE(struct brw_compile *p, 1528 struct brw_reg dest, 1529 GLuint msg_reg_nr, 1530 struct brw_reg src0, 1531 GLboolean allocate, 1532 GLboolean used, 1533 GLuint msg_length, 1534 GLuint response_length, 1535 GLboolean eot, 1536 GLboolean writes_complete, 1537 GLuint offset, 1538 GLuint swizzle) 1539{ 1540 struct intel_context *intel = &p->brw->intel; 1541 struct brw_instruction *insn; 1542 1543 /* Sandybridge doesn't have the implied move for SENDs, 1544 * and the first message register index comes from src0. 1545 */ 1546 if (intel->gen >= 6) { 1547 brw_push_insn_state(p); 1548 brw_set_mask_control( p, BRW_MASK_DISABLE ); 1549 brw_MOV(p, brw_message_reg(msg_reg_nr), src0); 1550 brw_pop_insn_state(p); 1551 src0 = brw_message_reg(msg_reg_nr); 1552 } 1553 1554 insn = next_insn(p, BRW_OPCODE_SEND); 1555 1556 assert(msg_length < BRW_MAX_MRF); 1557 1558 brw_set_dest(insn, dest); 1559 brw_set_src0(insn, src0); 1560 brw_set_src1(insn, brw_imm_d(0)); 1561 1562 if (intel->gen < 6) 1563 insn->header.destreg__conditionalmod = msg_reg_nr; 1564 1565 brw_set_urb_message(p->brw, 1566 insn, 1567 allocate, 1568 used, 1569 msg_length, 1570 response_length, 1571 eot, 1572 writes_complete, 1573 offset, 1574 swizzle); 1575} 1576 1577void brw_ff_sync(struct brw_compile *p, 1578 struct brw_reg dest, 1579 GLuint msg_reg_nr, 1580 struct brw_reg src0, 1581 GLboolean allocate, 1582 GLuint response_length, 1583 GLboolean eot) 1584{ 1585 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1586 1587 brw_set_dest(insn, dest); 1588 brw_set_src0(insn, src0); 1589 brw_set_src1(insn, brw_imm_d(0)); 1590 1591 insn->header.destreg__conditionalmod = msg_reg_nr; 1592 1593 brw_set_ff_sync_message(p->brw, 1594 insn, 1595 allocate, 1596 response_length, 1597 eot); 1598} 1599