radeon_optimize.c revision e945fb04d04c33da5e77d22d739c5740a522a61e
1/* 2 * Copyright (C) 2009 Nicolai Haehnle. 3 * Copyright 2010 Tom Stellard <tstellar@gmail.com> 4 * 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial 17 * portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 */ 28 29#include "radeon_dataflow.h" 30 31#include "radeon_compiler.h" 32#include "radeon_compiler_util.h" 33#include "radeon_list.h" 34#include "radeon_swizzle.h" 35#include "radeon_variable.h" 36 37struct src_clobbered_reads_cb_data { 38 rc_register_file File; 39 unsigned int Index; 40 unsigned int Mask; 41 struct rc_reader_data * ReaderData; 42}; 43 44typedef void (*rc_presub_replace_fn)(struct rc_instruction *, 45 struct rc_instruction *, 46 unsigned int); 47 48static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) 49{ 50 struct rc_src_register combine; 51 combine.File = inner.File; 52 combine.Index = inner.Index; 53 combine.RelAddr = inner.RelAddr; 54 if (outer.Abs) { 55 combine.Abs = 1; 56 combine.Negate = outer.Negate; 57 } else { 58 combine.Abs = inner.Abs; 59 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); 60 combine.Negate ^= outer.Negate; 61 } 62 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); 63 return combine; 64} 65 66static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, 67 struct rc_src_register * src) 68{ 69 rc_register_file file = src->File; 70 struct rc_reader_data * reader_data = data; 71 72 if(!rc_inst_can_use_presub(inst, 73 reader_data->Writer->U.I.PreSub.Opcode, 74 rc_swizzle_to_writemask(src->Swizzle), 75 src, 76 &reader_data->Writer->U.I.PreSub.SrcReg[0], 77 &reader_data->Writer->U.I.PreSub.SrcReg[1])) { 78 reader_data->Abort = 1; 79 return; 80 } 81 82 /* XXX This could probably be handled better. */ 83 if (file == RC_FILE_ADDRESS) { 84 reader_data->Abort = 1; 85 return; 86 } 87 88 /* These instructions cannot read from the constants file. 89 * see radeonTransformTEX() 90 */ 91 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && 92 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && 93 (inst->U.I.Opcode == RC_OPCODE_TEX || 94 inst->U.I.Opcode == RC_OPCODE_TXB || 95 inst->U.I.Opcode == RC_OPCODE_TXP || 96 inst->U.I.Opcode == RC_OPCODE_TXD || 97 inst->U.I.Opcode == RC_OPCODE_TXL || 98 inst->U.I.Opcode == RC_OPCODE_KIL)){ 99 reader_data->Abort = 1; 100 return; 101 } 102} 103 104static void src_clobbered_reads_cb( 105 void * data, 106 struct rc_instruction * inst, 107 struct rc_src_register * src) 108{ 109 struct src_clobbered_reads_cb_data * sc_data = data; 110 111 if (src->File == sc_data->File 112 && src->Index == sc_data->Index 113 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { 114 115 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 116 } 117 118 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { 119 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 120 } 121} 122 123static void is_src_clobbered_scan_write( 124 void * data, 125 struct rc_instruction * inst, 126 rc_register_file file, 127 unsigned int index, 128 unsigned int mask) 129{ 130 struct src_clobbered_reads_cb_data sc_data; 131 struct rc_reader_data * reader_data = data; 132 sc_data.File = file; 133 sc_data.Index = index; 134 sc_data.Mask = mask; 135 sc_data.ReaderData = reader_data; 136 rc_for_all_reads_src(reader_data->Writer, 137 src_clobbered_reads_cb, &sc_data); 138} 139 140static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) 141{ 142 struct rc_reader_data reader_data; 143 unsigned int i; 144 145 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || 146 inst_mov->U.I.WriteALUResult || 147 inst_mov->U.I.SaturateMode) 148 return; 149 150 /* Get a list of all the readers of this MOV instruction. */ 151 reader_data.ExitOnAbort = 1; 152 rc_get_readers(c, inst_mov, &reader_data, 153 copy_propagate_scan_read, NULL, 154 is_src_clobbered_scan_write); 155 156 if (reader_data.Abort || reader_data.ReaderCount == 0) 157 return; 158 159 /* Propagate the MOV instruction. */ 160 for (i = 0; i < reader_data.ReaderCount; i++) { 161 struct rc_instruction * inst = reader_data.Readers[i].Inst; 162 *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); 163 164 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) 165 inst->U.I.PreSub = inst_mov->U.I.PreSub; 166 } 167 168 /* Finally, remove the original MOV instruction */ 169 rc_remove_instruction(inst_mov); 170} 171 172/** 173 * Check if a source register is actually always the same 174 * swizzle constant. 175 */ 176static int is_src_uniform_constant(struct rc_src_register src, 177 rc_swizzle * pswz, unsigned int * pnegate) 178{ 179 int have_used = 0; 180 181 if (src.File != RC_FILE_NONE) { 182 *pswz = 0; 183 return 0; 184 } 185 186 for(unsigned int chan = 0; chan < 4; ++chan) { 187 unsigned int swz = GET_SWZ(src.Swizzle, chan); 188 if (swz < 4) { 189 *pswz = 0; 190 return 0; 191 } 192 if (swz == RC_SWIZZLE_UNUSED) 193 continue; 194 195 if (!have_used) { 196 *pswz = swz; 197 *pnegate = GET_BIT(src.Negate, chan); 198 have_used = 1; 199 } else { 200 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { 201 *pswz = 0; 202 return 0; 203 } 204 } 205 } 206 207 return 1; 208} 209 210static void constant_folding_mad(struct rc_instruction * inst) 211{ 212 rc_swizzle swz = 0; 213 unsigned int negate= 0; 214 215 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { 216 if (swz == RC_SWIZZLE_ZERO) { 217 inst->U.I.Opcode = RC_OPCODE_MUL; 218 return; 219 } 220 } 221 222 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 223 if (swz == RC_SWIZZLE_ONE) { 224 inst->U.I.Opcode = RC_OPCODE_ADD; 225 if (negate) 226 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 227 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; 228 return; 229 } else if (swz == RC_SWIZZLE_ZERO) { 230 inst->U.I.Opcode = RC_OPCODE_MOV; 231 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 232 return; 233 } 234 } 235 236 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 237 if (swz == RC_SWIZZLE_ONE) { 238 inst->U.I.Opcode = RC_OPCODE_ADD; 239 if (negate) 240 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; 241 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 242 return; 243 } else if (swz == RC_SWIZZLE_ZERO) { 244 inst->U.I.Opcode = RC_OPCODE_MOV; 245 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 246 return; 247 } 248 } 249} 250 251static void constant_folding_mul(struct rc_instruction * inst) 252{ 253 rc_swizzle swz = 0; 254 unsigned int negate = 0; 255 256 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 257 if (swz == RC_SWIZZLE_ONE) { 258 inst->U.I.Opcode = RC_OPCODE_MOV; 259 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 260 if (negate) 261 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 262 return; 263 } else if (swz == RC_SWIZZLE_ZERO) { 264 inst->U.I.Opcode = RC_OPCODE_MOV; 265 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 266 return; 267 } 268 } 269 270 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 271 if (swz == RC_SWIZZLE_ONE) { 272 inst->U.I.Opcode = RC_OPCODE_MOV; 273 if (negate) 274 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 275 return; 276 } else if (swz == RC_SWIZZLE_ZERO) { 277 inst->U.I.Opcode = RC_OPCODE_MOV; 278 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 279 return; 280 } 281 } 282} 283 284static void constant_folding_add(struct rc_instruction * inst) 285{ 286 rc_swizzle swz = 0; 287 unsigned int negate = 0; 288 289 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 290 if (swz == RC_SWIZZLE_ZERO) { 291 inst->U.I.Opcode = RC_OPCODE_MOV; 292 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 293 return; 294 } 295 } 296 297 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 298 if (swz == RC_SWIZZLE_ZERO) { 299 inst->U.I.Opcode = RC_OPCODE_MOV; 300 return; 301 } 302 } 303} 304 305/** 306 * Replace 0.0, 1.0 and 0.5 immediate constants by their 307 * respective swizzles. Simplify instructions like ADD dst, src, 0; 308 */ 309static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) 310{ 311 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 312 unsigned int i; 313 314 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ 315 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { 316 struct rc_constant * constant; 317 struct rc_src_register newsrc; 318 int have_real_reference; 319 unsigned int chan; 320 321 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ 322 for (chan = 0; chan < 4; ++chan) 323 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) 324 break; 325 if (chan == 4) { 326 inst->U.I.SrcReg[src].File = RC_FILE_NONE; 327 continue; 328 } 329 330 /* Convert immediates to swizzles. */ 331 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || 332 inst->U.I.SrcReg[src].RelAddr || 333 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) 334 continue; 335 336 constant = 337 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; 338 339 if (constant->Type != RC_CONSTANT_IMMEDIATE) 340 continue; 341 342 newsrc = inst->U.I.SrcReg[src]; 343 have_real_reference = 0; 344 for (chan = 0; chan < 4; ++chan) { 345 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); 346 unsigned int newswz; 347 float imm; 348 float baseimm; 349 350 if (swz >= 4) 351 continue; 352 353 imm = constant->u.Immediate[swz]; 354 baseimm = imm; 355 if (imm < 0.0) 356 baseimm = -baseimm; 357 358 if (baseimm == 0.0) { 359 newswz = RC_SWIZZLE_ZERO; 360 } else if (baseimm == 1.0) { 361 newswz = RC_SWIZZLE_ONE; 362 } else if (baseimm == 0.5 && c->has_half_swizzles) { 363 newswz = RC_SWIZZLE_HALF; 364 } else { 365 have_real_reference = 1; 366 continue; 367 } 368 369 SET_SWZ(newsrc.Swizzle, chan, newswz); 370 if (imm < 0.0 && !newsrc.Abs) 371 newsrc.Negate ^= 1 << chan; 372 } 373 374 if (!have_real_reference) { 375 newsrc.File = RC_FILE_NONE; 376 newsrc.Index = 0; 377 } 378 379 /* don't make the swizzle worse */ 380 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && 381 c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) 382 continue; 383 384 inst->U.I.SrcReg[src] = newsrc; 385 } 386 387 /* Simplify instructions based on constants */ 388 if (inst->U.I.Opcode == RC_OPCODE_MAD) 389 constant_folding_mad(inst); 390 391 /* note: MAD can simplify to MUL or ADD */ 392 if (inst->U.I.Opcode == RC_OPCODE_MUL) 393 constant_folding_mul(inst); 394 else if (inst->U.I.Opcode == RC_OPCODE_ADD) 395 constant_folding_add(inst); 396 397 /* In case this instruction has been converted, make sure all of the 398 * registers that are no longer used are empty. */ 399 opcode = rc_get_opcode_info(inst->U.I.Opcode); 400 for(i = opcode->NumSrcRegs; i < 3; i++) { 401 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); 402 } 403} 404 405/** 406 * If src and dst use the same register, this function returns a writemask that 407 * indicates wich components are read by src. Otherwise zero is returned. 408 */ 409static unsigned int src_reads_dst_mask(struct rc_src_register src, 410 struct rc_dst_register dst) 411{ 412 if (dst.File != src.File || dst.Index != src.Index) { 413 return 0; 414 } 415 return rc_swizzle_to_writemask(src.Swizzle); 416} 417 418/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) 419 * in any of its channels. Return 0 otherwise. */ 420static int src_has_const_swz(struct rc_src_register src) { 421 int chan; 422 for(chan = 0; chan < 4; chan++) { 423 unsigned int swz = GET_SWZ(src.Swizzle, chan); 424 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF 425 || swz == RC_SWIZZLE_ONE) { 426 return 1; 427 } 428 } 429 return 0; 430} 431 432static void presub_scan_read( 433 void * data, 434 struct rc_instruction * inst, 435 struct rc_src_register * src) 436{ 437 struct rc_reader_data * reader_data = data; 438 rc_presubtract_op * presub_opcode = reader_data->CbData; 439 440 if (!rc_inst_can_use_presub(inst, *presub_opcode, 441 reader_data->Writer->U.I.DstReg.WriteMask, 442 src, 443 &reader_data->Writer->U.I.SrcReg[0], 444 &reader_data->Writer->U.I.SrcReg[1])) { 445 reader_data->Abort = 1; 446 return; 447 } 448} 449 450static int presub_helper( 451 struct radeon_compiler * c, 452 struct rc_instruction * inst_add, 453 rc_presubtract_op presub_opcode, 454 rc_presub_replace_fn presub_replace) 455{ 456 struct rc_reader_data reader_data; 457 unsigned int i; 458 rc_presubtract_op cb_op = presub_opcode; 459 460 reader_data.CbData = &cb_op; 461 reader_data.ExitOnAbort = 1; 462 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, 463 is_src_clobbered_scan_write); 464 465 if (reader_data.Abort || reader_data.ReaderCount == 0) 466 return 0; 467 468 for(i = 0; i < reader_data.ReaderCount; i++) { 469 unsigned int src_index; 470 struct rc_reader reader = reader_data.Readers[i]; 471 const struct rc_opcode_info * info = 472 rc_get_opcode_info(reader.Inst->U.I.Opcode); 473 474 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { 475 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) 476 presub_replace(inst_add, reader.Inst, src_index); 477 } 478 } 479 return 1; 480} 481 482/* This function assumes that inst_add->U.I.SrcReg[0] and 483 * inst_add->U.I.SrcReg[1] aren't both negative. */ 484static void presub_replace_add( 485 struct rc_instruction * inst_add, 486 struct rc_instruction * inst_reader, 487 unsigned int src_index) 488{ 489 rc_presubtract_op presub_opcode; 490 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) 491 presub_opcode = RC_PRESUB_SUB; 492 else 493 presub_opcode = RC_PRESUB_ADD; 494 495 if (inst_add->U.I.SrcReg[1].Negate) { 496 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 497 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; 498 } else { 499 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; 500 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; 501 } 502 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 503 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; 504 inst_reader->U.I.PreSub.Opcode = presub_opcode; 505 inst_reader->U.I.SrcReg[src_index] = 506 chain_srcregs(inst_reader->U.I.SrcReg[src_index], 507 inst_reader->U.I.PreSub.SrcReg[0]); 508 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 509 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; 510} 511 512static int is_presub_candidate( 513 struct radeon_compiler * c, 514 struct rc_instruction * inst) 515{ 516 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); 517 unsigned int i; 518 unsigned int is_constant[2] = {0, 0}; 519 520 assert(inst->U.I.Opcode == RC_OPCODE_ADD); 521 522 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE 523 || inst->U.I.SaturateMode 524 || inst->U.I.WriteALUResult 525 || inst->U.I.Omod) { 526 return 0; 527 } 528 529 /* If both sources use a constant swizzle, then we can't convert it to 530 * a presubtract operation. In fact for the ADD and SUB presubtract 531 * operations neither source can contain a constant swizzle. This 532 * specific case is checked in peephole_add_presub_add() when 533 * we make sure the swizzles for both sources are equal, so we 534 * don't need to worry about it here. */ 535 for (i = 0; i < 2; i++) { 536 int chan; 537 for (chan = 0; chan < 4; chan++) { 538 rc_swizzle swz = 539 get_swz(inst->U.I.SrcReg[i].Swizzle, chan); 540 if (swz == RC_SWIZZLE_ONE 541 || swz == RC_SWIZZLE_ZERO 542 || swz == RC_SWIZZLE_HALF) { 543 is_constant[i] = 1; 544 } 545 } 546 } 547 if (is_constant[0] && is_constant[1]) 548 return 0; 549 550 for(i = 0; i < info->NumSrcRegs; i++) { 551 struct rc_src_register src = inst->U.I.SrcReg[i]; 552 if (src_reads_dst_mask(src, inst->U.I.DstReg)) 553 return 0; 554 555 src.File = RC_FILE_PRESUB; 556 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) 557 return 0; 558 } 559 return 1; 560} 561 562static int peephole_add_presub_add( 563 struct radeon_compiler * c, 564 struct rc_instruction * inst_add) 565{ 566 unsigned dstmask = inst_add->U.I.DstReg.WriteMask; 567 unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; 568 unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; 569 570 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) 571 return 0; 572 573 /* src0 and src1 can't have absolute values */ 574 if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) 575 return 0; 576 577 /* presub_replace_add() assumes only one is negative */ 578 if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) 579 return 0; 580 581 /* if src0 is negative, at least all bits of dstmask have to be set */ 582 if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) 583 return 0; 584 585 /* if src1 is negative, at least all bits of dstmask have to be set */ 586 if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) 587 return 0; 588 589 if (!is_presub_candidate(c, inst_add)) 590 return 0; 591 592 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { 593 rc_remove_instruction(inst_add); 594 return 1; 595 } 596 return 0; 597} 598 599static void presub_replace_inv( 600 struct rc_instruction * inst_add, 601 struct rc_instruction * inst_reader, 602 unsigned int src_index) 603{ 604 /* We must be careful not to modify inst_add, since it 605 * is possible it will remain part of the program.*/ 606 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 607 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 608 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; 609 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], 610 inst_reader->U.I.PreSub.SrcReg[0]); 611 612 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 613 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; 614} 615 616/** 617 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] 618 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source 619 * of the add instruction must have the constatnt 1 swizzle. This function 620 * does not check const registers to see if their value is 1.0, so it should 621 * be called after the constant_folding optimization. 622 * @return 623 * 0 if the ADD instruction is still part of the program. 624 * 1 if the ADD instruction is no longer part of the program. 625 */ 626static int peephole_add_presub_inv( 627 struct radeon_compiler * c, 628 struct rc_instruction * inst_add) 629{ 630 unsigned int i, swz; 631 632 if (!is_presub_candidate(c, inst_add)) 633 return 0; 634 635 /* Check if src0 is 1. */ 636 /* XXX It would be nice to use is_src_uniform_constant here, but that 637 * function only works if the register's file is RC_FILE_NONE */ 638 for(i = 0; i < 4; i++ ) { 639 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); 640 if(((1 << i) & inst_add->U.I.DstReg.WriteMask) 641 && swz != RC_SWIZZLE_ONE) { 642 return 0; 643 } 644 } 645 646 /* Check src1. */ 647 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != 648 inst_add->U.I.DstReg.WriteMask 649 || inst_add->U.I.SrcReg[1].Abs 650 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY 651 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) 652 || src_has_const_swz(inst_add->U.I.SrcReg[1])) { 653 654 return 0; 655 } 656 657 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { 658 rc_remove_instruction(inst_add); 659 return 1; 660 } 661 return 0; 662} 663 664struct peephole_mul_cb_data { 665 struct rc_dst_register * Writer; 666 unsigned int Clobbered; 667}; 668 669static void omod_filter_reader_cb( 670 void * userdata, 671 struct rc_instruction * inst, 672 rc_register_file file, 673 unsigned int index, 674 unsigned int mask) 675{ 676 struct peephole_mul_cb_data * d = userdata; 677 if (rc_src_reads_dst_mask(file, mask, index, 678 d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) { 679 680 d->Clobbered = 1; 681 } 682} 683 684static int peephole_mul_omod( 685 struct radeon_compiler * c, 686 struct rc_instruction * inst_mul, 687 struct rc_list * var_list) 688{ 689 unsigned int chan, swz, i; 690 int const_index = -1; 691 int temp_index = -1; 692 float const_value; 693 rc_omod_op omod_op = RC_OMOD_DISABLE; 694 struct rc_list * writer_list; 695 struct rc_variable * var; 696 struct peephole_mul_cb_data cb_data; 697 698 for (i = 0; i < 2; i++) { 699 unsigned int j; 700 if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT 701 && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) { 702 return 0; 703 } 704 if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { 705 if (temp_index != -1) { 706 /* The instruction has two temp sources */ 707 return 0; 708 } else { 709 temp_index = i; 710 continue; 711 } 712 } 713 /* If we get this far Src[i] must be a constant src */ 714 if (inst_mul->U.I.SrcReg[i].Negate) { 715 return 0; 716 } 717 /* The constant src needs to read from the same swizzle */ 718 swz = RC_SWIZZLE_UNUSED; 719 chan = 0; 720 for (j = 0; j < 4; j++) { 721 unsigned int j_swz = 722 GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); 723 if (j_swz == RC_SWIZZLE_UNUSED) { 724 continue; 725 } 726 if (swz == RC_SWIZZLE_UNUSED) { 727 swz = j_swz; 728 chan = j; 729 } else if (j_swz != swz) { 730 return 0; 731 } 732 } 733 734 if (const_index != -1) { 735 /* The instruction has two constant sources */ 736 return 0; 737 } else { 738 const_index = i; 739 } 740 } 741 742 if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, 743 inst_mul->U.I.SrcReg[const_index].Index)) { 744 return 0; 745 } 746 const_value = rc_get_constant_value(c, 747 inst_mul->U.I.SrcReg[const_index].Index, 748 inst_mul->U.I.SrcReg[const_index].Swizzle, 749 inst_mul->U.I.SrcReg[const_index].Negate, 750 chan); 751 752 if (const_value == 2.0f) { 753 omod_op = RC_OMOD_MUL_2; 754 } else if (const_value == 4.0f) { 755 omod_op = RC_OMOD_MUL_4; 756 } else if (const_value == 8.0f) { 757 omod_op = RC_OMOD_MUL_8; 758 } else if (const_value == (1.0f / 2.0f)) { 759 omod_op = RC_OMOD_DIV_2; 760 } else if (const_value == (1.0f / 4.0f)) { 761 omod_op = RC_OMOD_DIV_4; 762 } else if (const_value == (1.0f / 8.0f)) { 763 omod_op = RC_OMOD_DIV_8; 764 } else { 765 return 0; 766 } 767 768 writer_list = rc_variable_list_get_writers_one_reader(var_list, 769 RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]); 770 771 if (!writer_list) { 772 return 0; 773 } 774 775 cb_data.Clobbered = 0; 776 cb_data.Writer = &inst_mul->U.I.DstReg; 777 for (var = writer_list->Item; var; var = var->Friend) { 778 struct rc_instruction * inst; 779 const struct rc_opcode_info * info = rc_get_opcode_info( 780 var->Inst->U.I.Opcode); 781 if (info->HasTexture) { 782 return 0; 783 } 784 if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { 785 return 0; 786 } 787 for (inst = inst_mul->Prev; inst != var->Inst; 788 inst = inst->Prev) { 789 rc_for_all_reads_mask(inst, omod_filter_reader_cb, 790 &cb_data); 791 if (cb_data.Clobbered) { 792 break; 793 } 794 } 795 } 796 797 if (cb_data.Clobbered) { 798 return 0; 799 } 800 801 /* Rewrite the instructions */ 802 for (var = writer_list->Item; var; var = var->Friend) { 803 struct rc_variable * writer = writer_list->Item; 804 writer->Inst->U.I.Omod = omod_op; 805 writer->Inst->U.I.DstReg = inst_mul->U.I.DstReg; 806 writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; 807 } 808 809 rc_remove_instruction(inst_mul); 810 811 return 1; 812} 813 814/** 815 * @return 816 * 0 if inst is still part of the program. 817 * 1 if inst is no longer part of the program. 818 */ 819static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) 820{ 821 switch(inst->U.I.Opcode){ 822 case RC_OPCODE_ADD: 823 if (c->has_presub) { 824 if(peephole_add_presub_inv(c, inst)) 825 return 1; 826 if(peephole_add_presub_add(c, inst)) 827 return 1; 828 } 829 break; 830 default: 831 break; 832 } 833 return 0; 834} 835 836void rc_optimize(struct radeon_compiler * c, void *user) 837{ 838 struct rc_instruction * inst = c->Program.Instructions.Next; 839 struct rc_list * var_list; 840 while(inst != &c->Program.Instructions) { 841 struct rc_instruction * cur = inst; 842 inst = inst->Next; 843 844 constant_folding(c, cur); 845 846 if(peephole(c, cur)) 847 continue; 848 849 if (cur->U.I.Opcode == RC_OPCODE_MOV) { 850 copy_propagate(c, cur); 851 /* cur may no longer be part of the program */ 852 } 853 } 854 855 if (!c->has_omod) { 856 return; 857 } 858 859 inst = c->Program.Instructions.Next; 860 while(inst != &c->Program.Instructions) { 861 struct rc_instruction * cur = inst; 862 inst = inst->Next; 863 if (cur->U.I.Opcode == RC_OPCODE_MUL) { 864 var_list = rc_get_variables(c); 865 peephole_mul_omod(c, cur, var_list); 866 } 867 } 868} 869