brw_eu_emit.c revision 92c075eeb7c330ea420400d1c2bae57356b19f03
1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keith@tungstengraphics.com> 30 */ 31 32 33#include "brw_context.h" 34#include "brw_defines.h" 35#include "brw_eu.h" 36 37 38 39 40/*********************************************************************** 41 * Internal helper for constructing instructions 42 */ 43 44static void guess_execution_size( struct brw_instruction *insn, 45 struct brw_reg reg ) 46{ 47 if (reg.width == BRW_WIDTH_8 && 48 insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 49 insn->header.execution_size = BRW_EXECUTE_16; 50 else 51 insn->header.execution_size = reg.width; /* note - definitions are compatible */ 52} 53 54 55static void brw_set_dest( struct brw_instruction *insn, 56 struct brw_reg dest ) 57{ 58 insn->bits1.da1.dest_reg_file = dest.file; 59 insn->bits1.da1.dest_reg_type = dest.type; 60 insn->bits1.da1.dest_address_mode = dest.address_mode; 61 62 if (dest.address_mode == BRW_ADDRESS_DIRECT) { 63 insn->bits1.da1.dest_reg_nr = dest.nr; 64 65 if (insn->header.access_mode == BRW_ALIGN_1) { 66 insn->bits1.da1.dest_subreg_nr = dest.subnr; 67 insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; 68 } 69 else { 70 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; 71 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; 72 } 73 } 74 else { 75 insn->bits1.ia1.dest_subreg_nr = dest.subnr; 76 77 /* These are different sizes in align1 vs align16: 78 */ 79 if (insn->header.access_mode == BRW_ALIGN_1) { 80 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; 81 insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; 82 } 83 else { 84 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; 85 } 86 } 87 88 /* NEW: Set the execution size based on dest.width and 89 * insn->compression_control: 90 */ 91 guess_execution_size(insn, dest); 92} 93 94static void brw_set_src0( struct brw_instruction *insn, 95 struct brw_reg reg ) 96{ 97 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 98 99 insn->bits1.da1.src0_reg_file = reg.file; 100 insn->bits1.da1.src0_reg_type = reg.type; 101 insn->bits2.da1.src0_abs = reg.abs; 102 insn->bits2.da1.src0_negate = reg.negate; 103 insn->bits2.da1.src0_address_mode = reg.address_mode; 104 105 if (reg.file == BRW_IMMEDIATE_VALUE) { 106 insn->bits3.ud = reg.dw1.ud; 107 108 /* Required to set some fields in src1 as well: 109 */ 110 insn->bits1.da1.src1_reg_file = 0; /* arf */ 111 insn->bits1.da1.src1_reg_type = reg.type; 112 } 113 else 114 { 115 if (reg.address_mode == BRW_ADDRESS_DIRECT) { 116 if (insn->header.access_mode == BRW_ALIGN_1) { 117 insn->bits2.da1.src0_subreg_nr = reg.subnr; 118 insn->bits2.da1.src0_reg_nr = reg.nr; 119 } 120 else { 121 insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; 122 insn->bits2.da16.src0_reg_nr = reg.nr; 123 } 124 } 125 else { 126 insn->bits2.ia1.src0_subreg_nr = reg.subnr; 127 128 if (insn->header.access_mode == BRW_ALIGN_1) { 129 insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 130 } 131 else { 132 insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; 133 } 134 } 135 136 if (insn->header.access_mode == BRW_ALIGN_1) { 137 if (reg.width == BRW_WIDTH_1 && 138 insn->header.execution_size == BRW_EXECUTE_1) { 139 insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 140 insn->bits2.da1.src0_width = BRW_WIDTH_1; 141 insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; 142 } 143 else { 144 insn->bits2.da1.src0_horiz_stride = reg.hstride; 145 insn->bits2.da1.src0_width = reg.width; 146 insn->bits2.da1.src0_vert_stride = reg.vstride; 147 } 148 } 149 else { 150 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 151 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 152 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 153 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 154 155 /* This is an oddity of the fact we're using the same 156 * descriptions for registers in align_16 as align_1: 157 */ 158 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 159 insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; 160 else 161 insn->bits2.da16.src0_vert_stride = reg.vstride; 162 } 163 } 164} 165 166 167void brw_set_src1( struct brw_instruction *insn, 168 struct brw_reg reg ) 169{ 170 assert(reg.file != BRW_MESSAGE_REGISTER_FILE); 171 172 insn->bits1.da1.src1_reg_file = reg.file; 173 insn->bits1.da1.src1_reg_type = reg.type; 174 insn->bits3.da1.src1_abs = reg.abs; 175 insn->bits3.da1.src1_negate = reg.negate; 176 177 /* Only src1 can be immediate in two-argument instructions. 178 */ 179 assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); 180 181 if (reg.file == BRW_IMMEDIATE_VALUE) { 182 insn->bits3.ud = reg.dw1.ud; 183 } 184 else { 185 /* This is a hardware restriction, which may or may not be lifted 186 * in the future: 187 */ 188 assert (reg.address_mode == BRW_ADDRESS_DIRECT); 189 //assert (reg.file == BRW_GENERAL_REGISTER_FILE); 190 191 if (insn->header.access_mode == BRW_ALIGN_1) { 192 insn->bits3.da1.src1_subreg_nr = reg.subnr; 193 insn->bits3.da1.src1_reg_nr = reg.nr; 194 } 195 else { 196 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; 197 insn->bits3.da16.src1_reg_nr = reg.nr; 198 } 199 200 if (insn->header.access_mode == BRW_ALIGN_1) { 201 if (reg.width == BRW_WIDTH_1 && 202 insn->header.execution_size == BRW_EXECUTE_1) { 203 insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; 204 insn->bits3.da1.src1_width = BRW_WIDTH_1; 205 insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; 206 } 207 else { 208 insn->bits3.da1.src1_horiz_stride = reg.hstride; 209 insn->bits3.da1.src1_width = reg.width; 210 insn->bits3.da1.src1_vert_stride = reg.vstride; 211 } 212 } 213 else { 214 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); 215 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); 216 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); 217 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); 218 219 /* This is an oddity of the fact we're using the same 220 * descriptions for registers in align_16 as align_1: 221 */ 222 if (reg.vstride == BRW_VERTICAL_STRIDE_8) 223 insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; 224 else 225 insn->bits3.da16.src1_vert_stride = reg.vstride; 226 } 227 } 228} 229 230 231 232static void brw_set_math_message( struct brw_instruction *insn, 233 GLuint msg_length, 234 GLuint response_length, 235 GLuint function, 236 GLuint integer_type, 237 GLboolean low_precision, 238 GLboolean saturate, 239 GLuint dataType ) 240{ 241 brw_set_src1(insn, brw_imm_d(0)); 242 243 insn->bits3.math.function = function; 244 insn->bits3.math.int_type = integer_type; 245 insn->bits3.math.precision = low_precision; 246 insn->bits3.math.saturate = saturate; 247 insn->bits3.math.data_type = dataType; 248 insn->bits3.math.response_length = response_length; 249 insn->bits3.math.msg_length = msg_length; 250 insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; 251 insn->bits3.math.end_of_thread = 0; 252} 253 254static void brw_set_urb_message( struct brw_instruction *insn, 255 GLboolean allocate, 256 GLboolean used, 257 GLuint msg_length, 258 GLuint response_length, 259 GLboolean end_of_thread, 260 GLboolean complete, 261 GLuint offset, 262 GLuint swizzle_control ) 263{ 264 brw_set_src1(insn, brw_imm_d(0)); 265 266 insn->bits3.urb.opcode = 0; /* ? */ 267 insn->bits3.urb.offset = offset; 268 insn->bits3.urb.swizzle_control = swizzle_control; 269 insn->bits3.urb.allocate = allocate; 270 insn->bits3.urb.used = used; /* ? */ 271 insn->bits3.urb.complete = complete; 272 insn->bits3.urb.response_length = response_length; 273 insn->bits3.urb.msg_length = msg_length; 274 insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; 275 insn->bits3.urb.end_of_thread = end_of_thread; 276} 277 278static void brw_set_dp_write_message( struct brw_instruction *insn, 279 GLuint binding_table_index, 280 GLuint msg_control, 281 GLuint msg_type, 282 GLuint msg_length, 283 GLuint pixel_scoreboard_clear, 284 GLuint response_length, 285 GLuint end_of_thread ) 286{ 287 brw_set_src1(insn, brw_imm_d(0)); 288 289 insn->bits3.dp_write.binding_table_index = binding_table_index; 290 insn->bits3.dp_write.msg_control = msg_control; 291 insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; 292 insn->bits3.dp_write.msg_type = msg_type; 293 insn->bits3.dp_write.send_commit_msg = 0; 294 insn->bits3.dp_write.response_length = response_length; 295 insn->bits3.dp_write.msg_length = msg_length; 296 insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; 297 insn->bits3.urb.end_of_thread = end_of_thread; 298} 299 300static void brw_set_dp_read_message( struct brw_instruction *insn, 301 GLuint binding_table_index, 302 GLuint msg_control, 303 GLuint msg_type, 304 GLuint target_cache, 305 GLuint msg_length, 306 GLuint response_length, 307 GLuint end_of_thread ) 308{ 309 brw_set_src1(insn, brw_imm_d(0)); 310 311 insn->bits3.dp_read.binding_table_index = binding_table_index; 312 insn->bits3.dp_read.msg_control = msg_control; 313 insn->bits3.dp_read.msg_type = msg_type; 314 insn->bits3.dp_read.target_cache = target_cache; 315 insn->bits3.dp_read.response_length = response_length; 316 insn->bits3.dp_read.msg_length = msg_length; 317 insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; 318 insn->bits3.dp_read.end_of_thread = end_of_thread; 319} 320 321static void brw_set_sampler_message(struct brw_context *brw, 322 struct brw_instruction *insn, 323 GLuint binding_table_index, 324 GLuint sampler, 325 GLuint msg_type, 326 GLuint response_length, 327 GLuint msg_length, 328 GLboolean eot) 329{ 330 brw_set_src1(insn, brw_imm_d(0)); 331 332 if (BRW_IS_GM45(brw) || BRW_IS_G4X(brw)) { 333 insn->bits3.sampler_gm45_g4x.binding_table_index = binding_table_index; 334 insn->bits3.sampler_gm45_g4x.sampler = sampler; 335 insn->bits3.sampler_gm45_g4x.msg_type = msg_type; 336 insn->bits3.sampler_gm45_g4x.response_length = response_length; 337 insn->bits3.sampler_gm45_g4x.msg_length = msg_length; 338 insn->bits3.sampler_gm45_g4x.end_of_thread = eot; 339 insn->bits3.sampler_gm45_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 340 } else { 341 insn->bits3.sampler.binding_table_index = binding_table_index; 342 insn->bits3.sampler.sampler = sampler; 343 insn->bits3.sampler.msg_type = msg_type; 344 insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; 345 insn->bits3.sampler.response_length = response_length; 346 insn->bits3.sampler.msg_length = msg_length; 347 insn->bits3.sampler.end_of_thread = eot; 348 insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; 349 } 350} 351 352 353 354static struct brw_instruction *next_insn( struct brw_compile *p, 355 GLuint opcode ) 356{ 357 struct brw_instruction *insn; 358 359 assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); 360 361 insn = &p->store[p->nr_insn++]; 362 memcpy(insn, p->current, sizeof(*insn)); 363 364 /* Reset this one-shot flag: 365 */ 366 367 if (p->current->header.destreg__conditonalmod) { 368 p->current->header.destreg__conditonalmod = 0; 369 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 370 } 371 372 insn->header.opcode = opcode; 373 return insn; 374} 375 376 377static struct brw_instruction *brw_alu1( struct brw_compile *p, 378 GLuint opcode, 379 struct brw_reg dest, 380 struct brw_reg src ) 381{ 382 struct brw_instruction *insn = next_insn(p, opcode); 383 brw_set_dest(insn, dest); 384 brw_set_src0(insn, src); 385 return insn; 386} 387 388static struct brw_instruction *brw_alu2(struct brw_compile *p, 389 GLuint opcode, 390 struct brw_reg dest, 391 struct brw_reg src0, 392 struct brw_reg src1 ) 393{ 394 struct brw_instruction *insn = next_insn(p, opcode); 395 brw_set_dest(insn, dest); 396 brw_set_src0(insn, src0); 397 brw_set_src1(insn, src1); 398 return insn; 399} 400 401 402/*********************************************************************** 403 * Convenience routines. 404 */ 405#define ALU1(OP) \ 406struct brw_instruction *brw_##OP(struct brw_compile *p, \ 407 struct brw_reg dest, \ 408 struct brw_reg src0) \ 409{ \ 410 return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ 411} 412 413#define ALU2(OP) \ 414struct brw_instruction *brw_##OP(struct brw_compile *p, \ 415 struct brw_reg dest, \ 416 struct brw_reg src0, \ 417 struct brw_reg src1) \ 418{ \ 419 return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ 420} 421 422 423ALU1(MOV) 424ALU2(SEL) 425ALU1(NOT) 426ALU2(AND) 427ALU2(OR) 428ALU2(XOR) 429ALU2(SHR) 430ALU2(SHL) 431ALU2(RSR) 432ALU2(RSL) 433ALU2(ASR) 434ALU2(ADD) 435ALU2(MUL) 436ALU1(FRC) 437ALU1(RNDD) 438ALU2(MAC) 439ALU2(MACH) 440ALU1(LZD) 441ALU2(DP4) 442ALU2(DPH) 443ALU2(DP3) 444ALU2(DP2) 445ALU2(LINE) 446 447 448 449 450void brw_NOP(struct brw_compile *p) 451{ 452 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); 453 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 454 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 455 brw_set_src1(insn, brw_imm_ud(0x0)); 456} 457 458 459 460 461 462/*********************************************************************** 463 * Comparisons, if/else/endif 464 */ 465 466struct brw_instruction *brw_JMPI(struct brw_compile *p, 467 struct brw_reg dest, 468 struct brw_reg src0, 469 struct brw_reg src1) 470{ 471 struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); 472 473 p->current->header.predicate_control = BRW_PREDICATE_NONE; 474 475 return insn; 476} 477 478/* EU takes the value from the flag register and pushes it onto some 479 * sort of a stack (presumably merging with any flag value already on 480 * the stack). Within an if block, the flags at the top of the stack 481 * control execution on each channel of the unit, eg. on each of the 482 * 16 pixel values in our wm programs. 483 * 484 * When the matching 'else' instruction is reached (presumably by 485 * countdown of the instruction count patched in by our ELSE/ENDIF 486 * functions), the relevent flags are inverted. 487 * 488 * When the matching 'endif' instruction is reached, the flags are 489 * popped off. If the stack is now empty, normal execution resumes. 490 * 491 * No attempt is made to deal with stack overflow (14 elements?). 492 */ 493struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) 494{ 495 struct brw_instruction *insn; 496 497 if (p->single_program_flow) { 498 assert(execute_size == BRW_EXECUTE_1); 499 500 insn = next_insn(p, BRW_OPCODE_ADD); 501 insn->header.predicate_inverse = 1; 502 } else { 503 insn = next_insn(p, BRW_OPCODE_IF); 504 } 505 506 /* Override the defaults for this instruction: 507 */ 508 brw_set_dest(insn, brw_ip_reg()); 509 brw_set_src0(insn, brw_ip_reg()); 510 brw_set_src1(insn, brw_imm_d(0x0)); 511 512 insn->header.execution_size = execute_size; 513 insn->header.compression_control = BRW_COMPRESSION_NONE; 514 insn->header.predicate_control = BRW_PREDICATE_NORMAL; 515 insn->header.mask_control = BRW_MASK_ENABLE; 516 517 p->current->header.predicate_control = BRW_PREDICATE_NONE; 518 519 return insn; 520} 521 522 523struct brw_instruction *brw_ELSE(struct brw_compile *p, 524 struct brw_instruction *if_insn) 525{ 526 struct brw_instruction *insn; 527 528 if (p->single_program_flow) { 529 insn = next_insn(p, BRW_OPCODE_ADD); 530 } else { 531 insn = next_insn(p, BRW_OPCODE_ELSE); 532 } 533 534 brw_set_dest(insn, brw_ip_reg()); 535 brw_set_src0(insn, brw_ip_reg()); 536 brw_set_src1(insn, brw_imm_d(0x0)); 537 538 insn->header.compression_control = BRW_COMPRESSION_NONE; 539 insn->header.execution_size = if_insn->header.execution_size; 540 insn->header.mask_control = BRW_MASK_ENABLE; 541 542 /* Patch the if instruction to point at this instruction. 543 */ 544 if (p->single_program_flow) { 545 assert(if_insn->header.opcode == BRW_OPCODE_ADD); 546 547 if_insn->bits3.ud = (insn - if_insn + 1) * 16; 548 } else { 549 assert(if_insn->header.opcode == BRW_OPCODE_IF); 550 551 if_insn->bits3.if_else.jump_count = insn - if_insn; 552 if_insn->bits3.if_else.pop_count = 1; 553 if_insn->bits3.if_else.pad0 = 0; 554 } 555 556 return insn; 557} 558 559void brw_ENDIF(struct brw_compile *p, 560 struct brw_instruction *patch_insn) 561{ 562 if (p->single_program_flow) { 563 /* In single program flow mode, there's no need to execute an ENDIF, 564 * since we don't need to do any stack operations, and if we're executing 565 * currently, we want to just continue executing. 566 */ 567 struct brw_instruction *next = &p->store[p->nr_insn]; 568 569 assert(patch_insn->header.opcode == BRW_OPCODE_ADD); 570 571 patch_insn->bits3.ud = (next - patch_insn) * 16; 572 } else { 573 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); 574 575 brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 576 brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); 577 brw_set_src1(insn, brw_imm_d(0x0)); 578 579 insn->header.compression_control = BRW_COMPRESSION_NONE; 580 insn->header.execution_size = patch_insn->header.execution_size; 581 insn->header.mask_control = BRW_MASK_ENABLE; 582 583 assert(patch_insn->bits3.if_else.jump_count == 0); 584 585 /* Patch the if or else instructions to point at this or the next 586 * instruction respectively. 587 */ 588 if (patch_insn->header.opcode == BRW_OPCODE_IF) { 589 /* Automagically turn it into an IFF: 590 */ 591 patch_insn->header.opcode = BRW_OPCODE_IFF; 592 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; 593 patch_insn->bits3.if_else.pop_count = 0; 594 patch_insn->bits3.if_else.pad0 = 0; 595 } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { 596 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; 597 patch_insn->bits3.if_else.pop_count = 1; 598 patch_insn->bits3.if_else.pad0 = 0; 599 } else { 600 assert(0); 601 } 602 603 /* Also pop item off the stack in the endif instruction: 604 */ 605 insn->bits3.if_else.jump_count = 0; 606 insn->bits3.if_else.pop_count = 1; 607 insn->bits3.if_else.pad0 = 0; 608 } 609} 610 611struct brw_instruction *brw_BREAK(struct brw_compile *p) 612{ 613 struct brw_instruction *insn; 614 insn = next_insn(p, BRW_OPCODE_BREAK); 615 brw_set_dest(insn, brw_ip_reg()); 616 brw_set_src0(insn, brw_ip_reg()); 617 brw_set_src1(insn, brw_imm_d(0x0)); 618 insn->header.compression_control = BRW_COMPRESSION_NONE; 619 insn->header.execution_size = BRW_EXECUTE_8; 620 insn->header.mask_control = BRW_MASK_DISABLE; 621 insn->bits3.if_else.pad0 = 0; 622 return insn; 623} 624 625struct brw_instruction *brw_CONT(struct brw_compile *p) 626{ 627 struct brw_instruction *insn; 628 insn = next_insn(p, BRW_OPCODE_CONTINUE); 629 brw_set_dest(insn, brw_ip_reg()); 630 brw_set_src0(insn, brw_ip_reg()); 631 brw_set_src1(insn, brw_imm_d(0x0)); 632 insn->header.compression_control = BRW_COMPRESSION_NONE; 633 insn->header.execution_size = BRW_EXECUTE_8; 634 insn->header.mask_control = BRW_MASK_DISABLE; 635 insn->bits3.if_else.pad0 = 0; 636 return insn; 637} 638 639/* DO/WHILE loop: 640 */ 641struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) 642{ 643 if (p->single_program_flow) { 644 return &p->store[p->nr_insn]; 645 } else { 646 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); 647 648 /* Override the defaults for this instruction: 649 */ 650 brw_set_dest(insn, brw_null_reg()); 651 brw_set_src0(insn, brw_null_reg()); 652 brw_set_src1(insn, brw_null_reg()); 653 654 insn->header.compression_control = BRW_COMPRESSION_NONE; 655 insn->header.execution_size = execute_size; 656 insn->header.predicate_control = BRW_PREDICATE_NONE; 657 /* insn->header.mask_control = BRW_MASK_ENABLE; */ 658 insn->header.mask_control = BRW_MASK_DISABLE; 659 660 return insn; 661 } 662} 663 664 665 666struct brw_instruction *brw_WHILE(struct brw_compile *p, 667 struct brw_instruction *do_insn) 668{ 669 struct brw_instruction *insn; 670 671 if (p->single_program_flow) 672 insn = next_insn(p, BRW_OPCODE_ADD); 673 else 674 insn = next_insn(p, BRW_OPCODE_WHILE); 675 676 brw_set_dest(insn, brw_ip_reg()); 677 brw_set_src0(insn, brw_ip_reg()); 678 brw_set_src1(insn, brw_imm_d(0x0)); 679 680 insn->header.compression_control = BRW_COMPRESSION_NONE; 681 682 if (p->single_program_flow) { 683 insn->header.execution_size = BRW_EXECUTE_1; 684 685 insn->bits3.d = (do_insn - insn) * 16; 686 } else { 687 insn->header.execution_size = do_insn->header.execution_size; 688 689 assert(do_insn->header.opcode == BRW_OPCODE_DO); 690 insn->bits3.if_else.jump_count = do_insn - insn + 1; 691 insn->bits3.if_else.pop_count = 0; 692 insn->bits3.if_else.pad0 = 0; 693 } 694 695/* insn->header.mask_control = BRW_MASK_ENABLE; */ 696 697 insn->header.mask_control = BRW_MASK_DISABLE; 698 p->current->header.predicate_control = BRW_PREDICATE_NONE; 699 return insn; 700} 701 702 703/* FORWARD JUMPS: 704 */ 705void brw_land_fwd_jump(struct brw_compile *p, 706 struct brw_instruction *jmp_insn) 707{ 708 struct brw_instruction *landing = &p->store[p->nr_insn]; 709 710 assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); 711 assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); 712 713 jmp_insn->bits3.ud = (landing - jmp_insn) - 1; 714} 715 716 717 718/* To integrate with the above, it makes sense that the comparison 719 * instruction should populate the flag register. It might be simpler 720 * just to use the flag reg for most WM tasks? 721 */ 722void brw_CMP(struct brw_compile *p, 723 struct brw_reg dest, 724 GLuint conditional, 725 struct brw_reg src0, 726 struct brw_reg src1) 727{ 728 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); 729 730 insn->header.destreg__conditonalmod = conditional; 731 brw_set_dest(insn, dest); 732 brw_set_src0(insn, src0); 733 brw_set_src1(insn, src1); 734 735/* guess_execution_size(insn, src0); */ 736 737 738 /* Make it so that future instructions will use the computed flag 739 * value until brw_set_predicate_control_flag_value() is called 740 * again. 741 */ 742 if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && 743 dest.nr == 0) { 744 p->current->header.predicate_control = BRW_PREDICATE_NORMAL; 745 p->flag_value = 0xff; 746 } 747} 748 749 750 751/*********************************************************************** 752 * Helpers for the various SEND message types: 753 */ 754 755/* Invert 8 values 756 */ 757void brw_math( struct brw_compile *p, 758 struct brw_reg dest, 759 GLuint function, 760 GLuint saturate, 761 GLuint msg_reg_nr, 762 struct brw_reg src, 763 GLuint data_type, 764 GLuint precision ) 765{ 766 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 767 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 768 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 769 770 /* Example code doesn't set predicate_control for send 771 * instructions. 772 */ 773 insn->header.predicate_control = 0; 774 insn->header.destreg__conditonalmod = msg_reg_nr; 775 776 brw_set_dest(insn, dest); 777 brw_set_src0(insn, src); 778 brw_set_math_message(insn, 779 msg_length, response_length, 780 function, 781 BRW_MATH_INTEGER_UNSIGNED, 782 precision, 783 saturate, 784 data_type); 785} 786 787/* Use 2 send instructions to invert 16 elements 788 */ 789void brw_math_16( struct brw_compile *p, 790 struct brw_reg dest, 791 GLuint function, 792 GLuint saturate, 793 GLuint msg_reg_nr, 794 struct brw_reg src, 795 GLuint precision ) 796{ 797 struct brw_instruction *insn; 798 GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 799 GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 800 801 /* First instruction: 802 */ 803 brw_push_insn_state(p); 804 brw_set_predicate_control_flag_value(p, 0xff); 805 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 806 807 insn = next_insn(p, BRW_OPCODE_SEND); 808 insn->header.destreg__conditonalmod = msg_reg_nr; 809 810 brw_set_dest(insn, dest); 811 brw_set_src0(insn, src); 812 brw_set_math_message(insn, 813 msg_length, response_length, 814 function, 815 BRW_MATH_INTEGER_UNSIGNED, 816 precision, 817 saturate, 818 BRW_MATH_DATA_VECTOR); 819 820 /* Second instruction: 821 */ 822 insn = next_insn(p, BRW_OPCODE_SEND); 823 insn->header.compression_control = BRW_COMPRESSION_2NDHALF; 824 insn->header.destreg__conditonalmod = msg_reg_nr+1; 825 826 brw_set_dest(insn, offset(dest,1)); 827 brw_set_src0(insn, src); 828 brw_set_math_message(insn, 829 msg_length, response_length, 830 function, 831 BRW_MATH_INTEGER_UNSIGNED, 832 precision, 833 saturate, 834 BRW_MATH_DATA_VECTOR); 835 836 brw_pop_insn_state(p); 837} 838 839 840 841 842void brw_dp_WRITE_16( struct brw_compile *p, 843 struct brw_reg src, 844 GLuint msg_reg_nr, 845 GLuint scratch_offset ) 846{ 847 { 848 brw_push_insn_state(p); 849 brw_set_mask_control(p, BRW_MASK_DISABLE); 850 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 851 852 brw_MOV(p, 853 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 854 brw_imm_d(scratch_offset)); 855 856 brw_pop_insn_state(p); 857 } 858 859 { 860 GLuint msg_length = 3; 861 struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 862 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 863 864 insn->header.predicate_control = 0; /* XXX */ 865 insn->header.compression_control = BRW_COMPRESSION_NONE; 866 insn->header.destreg__conditonalmod = msg_reg_nr; 867 868 brw_set_dest(insn, dest); 869 brw_set_src0(insn, src); 870 871 brw_set_dp_write_message(insn, 872 255, /* bti */ 873 BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ 874 BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ 875 msg_length, 876 0, /* pixel scoreboard */ 877 0, /* response_length */ 878 0); /* eot */ 879 } 880 881} 882 883 884void brw_dp_READ_16( struct brw_compile *p, 885 struct brw_reg dest, 886 GLuint msg_reg_nr, 887 GLuint scratch_offset ) 888{ 889 { 890 brw_push_insn_state(p); 891 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 892 brw_set_mask_control(p, BRW_MASK_DISABLE); 893 894 brw_MOV(p, 895 retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), 896 brw_imm_d(scratch_offset)); 897 898 brw_pop_insn_state(p); 899 } 900 901 { 902 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 903 904 insn->header.predicate_control = 0; /* XXX */ 905 insn->header.compression_control = BRW_COMPRESSION_NONE; 906 insn->header.destreg__conditonalmod = msg_reg_nr; 907 908 brw_set_dest(insn, dest); /* UW? */ 909 brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); 910 911 brw_set_dp_read_message(insn, 912 255, /* bti */ 913 3, /* msg_control */ 914 BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 915 1, /* target cache */ 916 1, /* msg_length */ 917 2, /* response_length */ 918 0); /* eot */ 919 } 920} 921 922 923void brw_fb_WRITE(struct brw_compile *p, 924 struct brw_reg dest, 925 GLuint msg_reg_nr, 926 struct brw_reg src0, 927 GLuint binding_table_index, 928 GLuint msg_length, 929 GLuint response_length, 930 GLboolean eot) 931{ 932 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 933 934 insn->header.predicate_control = 0; /* XXX */ 935 insn->header.compression_control = BRW_COMPRESSION_NONE; 936 insn->header.destreg__conditonalmod = msg_reg_nr; 937 938 brw_set_dest(insn, dest); 939 brw_set_src0(insn, src0); 940 brw_set_dp_write_message(insn, 941 binding_table_index, 942 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ 943 BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ 944 msg_length, 945 1, /* pixel scoreboard */ 946 response_length, 947 eot); 948} 949 950 951 952void brw_SAMPLE(struct brw_compile *p, 953 struct brw_reg dest, 954 GLuint msg_reg_nr, 955 struct brw_reg src0, 956 GLuint binding_table_index, 957 GLuint sampler, 958 GLuint writemask, 959 GLuint msg_type, 960 GLuint response_length, 961 GLuint msg_length, 962 GLboolean eot) 963{ 964 GLboolean need_stall = 0; 965 966 if(writemask == 0) { 967/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ 968 return; 969 } 970 971 /* Hardware doesn't do destination dependency checking on send 972 * instructions properly. Add a workaround which generates the 973 * dependency by other means. In practice it seems like this bug 974 * only crops up for texture samples, and only where registers are 975 * written by the send and then written again later without being 976 * read in between. Luckily for us, we already track that 977 * information and use it to modify the writemask for the 978 * instruction, so that is a guide for whether a workaround is 979 * needed. 980 */ 981 if (writemask != WRITEMASK_XYZW) { 982 GLuint dst_offset = 0; 983 GLuint i, newmask = 0, len = 0; 984 985 for (i = 0; i < 4; i++) { 986 if (writemask & (1<<i)) 987 break; 988 dst_offset += 2; 989 } 990 for (; i < 4; i++) { 991 if (!(writemask & (1<<i))) 992 break; 993 newmask |= 1<<i; 994 len++; 995 } 996 997 if (newmask != writemask) { 998 need_stall = 1; 999/* _mesa_printf("need stall %x %x\n", newmask , writemask); */ 1000 } 1001 else { 1002 struct brw_reg m1 = brw_message_reg(msg_reg_nr); 1003 1004 newmask = ~newmask & WRITEMASK_XYZW; 1005 1006 brw_push_insn_state(p); 1007 1008 brw_set_compression_control(p, BRW_COMPRESSION_NONE); 1009 brw_set_mask_control(p, BRW_MASK_DISABLE); 1010 1011 brw_MOV(p, m1, brw_vec8_grf(0,0)); 1012 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 1013 1014 brw_pop_insn_state(p); 1015 1016 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 1017 dest = offset(dest, dst_offset); 1018 response_length = len * 2; 1019 } 1020 } 1021 1022 { 1023 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1024 1025 insn->header.predicate_control = 0; /* XXX */ 1026 insn->header.compression_control = BRW_COMPRESSION_NONE; 1027 insn->header.destreg__conditonalmod = msg_reg_nr; 1028 1029 brw_set_dest(insn, dest); 1030 brw_set_src0(insn, src0); 1031 brw_set_sampler_message(p->brw, insn, 1032 binding_table_index, 1033 sampler, 1034 msg_type, 1035 response_length, 1036 msg_length, 1037 eot); 1038 } 1039 1040 if (need_stall) 1041 { 1042 struct brw_reg reg = vec8(offset(dest, response_length-1)); 1043 1044 /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } 1045 */ 1046 brw_push_insn_state(p); 1047 brw_set_compression_control(p, GL_FALSE); 1048 brw_MOV(p, reg, reg); 1049 brw_pop_insn_state(p); 1050 } 1051 1052} 1053 1054/* All these variables are pretty confusing - we might be better off 1055 * using bitmasks and macros for this, in the old style. Or perhaps 1056 * just having the caller instantiate the fields in dword3 itself. 1057 */ 1058void brw_urb_WRITE(struct brw_compile *p, 1059 struct brw_reg dest, 1060 GLuint msg_reg_nr, 1061 struct brw_reg src0, 1062 GLboolean allocate, 1063 GLboolean used, 1064 GLuint msg_length, 1065 GLuint response_length, 1066 GLboolean eot, 1067 GLboolean writes_complete, 1068 GLuint offset, 1069 GLuint swizzle) 1070{ 1071 struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); 1072 1073 assert(msg_length < 16); 1074 1075 brw_set_dest(insn, dest); 1076 brw_set_src0(insn, src0); 1077 brw_set_src1(insn, brw_imm_d(0)); 1078 1079 insn->header.destreg__conditonalmod = msg_reg_nr; 1080 1081 brw_set_urb_message(insn, 1082 allocate, 1083 used, 1084 msg_length, 1085 response_length, 1086 eot, 1087 writes_complete, 1088 offset, 1089 swizzle); 1090} 1091 1092