1/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ 2 3/* 4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Rob Clark <robclark@freedesktop.org> 27 */ 28 29#include "freedreno_util.h" 30 31#include "ir3.h" 32#include "ir3_shader.h" 33 34/* 35 * Copy Propagate: 36 */ 37 38struct ir3_cp_ctx { 39 struct ir3 *shader; 40 struct ir3_shader_variant *so; 41 unsigned immediate_idx; 42}; 43 44/* is it a type preserving mov, with ok flags? */ 45static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) 46{ 47 if (is_same_type_mov(instr)) { 48 struct ir3_register *dst = instr->regs[0]; 49 struct ir3_register *src = instr->regs[1]; 50 struct ir3_instruction *src_instr = ssa(src); 51 52 /* only if mov src is SSA (not const/immed): */ 53 if (!src_instr) 54 return false; 55 56 /* no indirect: */ 57 if (dst->flags & IR3_REG_RELATIV) 58 return false; 59 if (src->flags & IR3_REG_RELATIV) 60 return false; 61 62 if (!allow_flags) 63 if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG | 64 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) 65 return false; 66 67 /* TODO: remove this hack: */ 68 if (src_instr->opc == OPC_META_FO) 69 return false; 70 /* TODO: we currently don't handle left/right neighbors 71 * very well when inserting parallel-copies into phi.. 72 * to avoid problems don't eliminate a mov coming out 73 * of phi.. 74 */ 75 if (src_instr->opc == OPC_META_PHI) 76 return false; 77 return true; 78 } 79 return false; 80} 81 82static unsigned cp_flags(unsigned flags) 83{ 84 /* only considering these flags (at least for now): */ 85 flags &= (IR3_REG_CONST | IR3_REG_IMMED | 86 IR3_REG_FNEG | IR3_REG_FABS | 87 IR3_REG_SNEG | IR3_REG_SABS | 88 IR3_REG_BNOT | IR3_REG_RELATIV); 89 return flags; 90} 91 92static bool valid_flags(struct ir3_instruction *instr, unsigned n, 93 unsigned flags) 94{ 95 unsigned valid_flags; 96 flags = cp_flags(flags); 97 98 /* If destination is indirect, then source cannot be.. at least 99 * I don't think so.. 100 */ 101 if ((instr->regs[0]->flags & IR3_REG_RELATIV) && 102 (flags & IR3_REG_RELATIV)) 103 return false; 104 105 /* TODO it seems to *mostly* work to cp RELATIV, except we get some 106 * intermittent piglit variable-indexing fails. Newer blob driver 107 * doesn't seem to cp these. Possibly this is hw workaround? Not 108 * sure, but until that is understood better, lets just switch off 109 * cp for indirect src's: 110 */ 111 if (flags & IR3_REG_RELATIV) 112 return false; 113 114 /* clear flags that are 'ok' */ 115 switch (opc_cat(instr->opc)) { 116 case 1: 117 valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV; 118 if (flags & ~valid_flags) 119 return false; 120 break; 121 case 5: 122 /* no flags allowed */ 123 if (flags) 124 return false; 125 break; 126 case 6: 127 valid_flags = IR3_REG_IMMED; 128 if (flags & ~valid_flags) 129 return false; 130 131 if (flags & IR3_REG_IMMED) { 132 /* doesn't seem like we can have immediate src for store 133 * instructions: 134 * 135 * TODO this restriction could also apply to load instructions, 136 * but for load instructions this arg is the address (and not 137 * really sure any good way to test a hard-coded immed addr src) 138 */ 139 if (is_store(instr) && (n == 1)) 140 return false; 141 } 142 143 break; 144 case 2: 145 valid_flags = ir3_cat2_absneg(instr->opc) | 146 IR3_REG_CONST | IR3_REG_RELATIV; 147 148 if (ir3_cat2_int(instr->opc)) 149 valid_flags |= IR3_REG_IMMED; 150 151 if (flags & ~valid_flags) 152 return false; 153 154 if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) { 155 unsigned m = (n ^ 1) + 1; 156 /* cannot deal w/ const in both srcs: 157 * (note that some cat2 actually only have a single src) 158 */ 159 if (m < instr->regs_count) { 160 struct ir3_register *reg = instr->regs[m]; 161 if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST)) 162 return false; 163 if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED)) 164 return false; 165 } 166 /* cannot be const + ABS|NEG: */ 167 if (flags & (IR3_REG_FABS | IR3_REG_FNEG | 168 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) 169 return false; 170 } 171 break; 172 case 3: 173 valid_flags = ir3_cat3_absneg(instr->opc) | 174 IR3_REG_CONST | IR3_REG_RELATIV; 175 176 if (flags & ~valid_flags) 177 return false; 178 179 if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) { 180 /* cannot deal w/ const/relativ in 2nd src: */ 181 if (n == 1) 182 return false; 183 } 184 185 if (flags & IR3_REG_CONST) { 186 /* cannot be const + ABS|NEG: */ 187 if (flags & (IR3_REG_FABS | IR3_REG_FNEG | 188 IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) 189 return false; 190 } 191 break; 192 case 4: 193 /* seems like blob compiler avoids const as src.. */ 194 /* TODO double check if this is still the case on a4xx */ 195 if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) 196 return false; 197 if (flags & (IR3_REG_SABS | IR3_REG_SNEG)) 198 return false; 199 break; 200 } 201 202 return true; 203} 204 205/* propagate register flags from src to dst.. negates need special 206 * handling to cancel each other out. 207 */ 208static void combine_flags(unsigned *dstflags, struct ir3_instruction *src) 209{ 210 unsigned srcflags = src->regs[1]->flags; 211 212 /* if what we are combining into already has (abs) flags, 213 * we can drop (neg) from src: 214 */ 215 if (*dstflags & IR3_REG_FABS) 216 srcflags &= ~IR3_REG_FNEG; 217 if (*dstflags & IR3_REG_SABS) 218 srcflags &= ~IR3_REG_SNEG; 219 220 if (srcflags & IR3_REG_FABS) 221 *dstflags |= IR3_REG_FABS; 222 if (srcflags & IR3_REG_SABS) 223 *dstflags |= IR3_REG_SABS; 224 if (srcflags & IR3_REG_FNEG) 225 *dstflags ^= IR3_REG_FNEG; 226 if (srcflags & IR3_REG_SNEG) 227 *dstflags ^= IR3_REG_SNEG; 228 if (srcflags & IR3_REG_BNOT) 229 *dstflags ^= IR3_REG_BNOT; 230 231 *dstflags &= ~IR3_REG_SSA; 232 *dstflags |= srcflags & IR3_REG_SSA; 233 *dstflags |= srcflags & IR3_REG_CONST; 234 *dstflags |= srcflags & IR3_REG_IMMED; 235 *dstflags |= srcflags & IR3_REG_RELATIV; 236 *dstflags |= srcflags & IR3_REG_ARRAY; 237 238 /* if src of the src is boolean we can drop the (abs) since we know 239 * the source value is already a postitive integer. This cleans 240 * up the absnegs that get inserted when converting between nir and 241 * native boolean (see ir3_b2n/n2b) 242 */ 243 struct ir3_instruction *srcsrc = ssa(src->regs[1]); 244 if (srcsrc && is_bool(srcsrc)) 245 *dstflags &= ~IR3_REG_SABS; 246} 247 248static struct ir3_register * 249lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags) 250{ 251 unsigned swiz, idx, i; 252 253 reg = ir3_reg_clone(ctx->shader, reg); 254 255 /* in some cases, there are restrictions on (abs)/(neg) plus const.. 256 * so just evaluate those and clear the flags: 257 */ 258 if (new_flags & IR3_REG_SABS) { 259 reg->iim_val = abs(reg->iim_val); 260 new_flags &= ~IR3_REG_SABS; 261 } 262 263 if (new_flags & IR3_REG_FABS) { 264 reg->fim_val = fabs(reg->fim_val); 265 new_flags &= ~IR3_REG_FABS; 266 } 267 268 if (new_flags & IR3_REG_SNEG) { 269 reg->iim_val = -reg->iim_val; 270 new_flags &= ~IR3_REG_SNEG; 271 } 272 273 if (new_flags & IR3_REG_FNEG) { 274 reg->fim_val = -reg->fim_val; 275 new_flags &= ~IR3_REG_FNEG; 276 } 277 278 for (i = 0; i < ctx->immediate_idx; i++) { 279 swiz = i % 4; 280 idx = i / 4; 281 282 if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) { 283 break; 284 } 285 } 286 287 if (i == ctx->immediate_idx) { 288 /* need to generate a new immediate: */ 289 swiz = i % 4; 290 idx = i / 4; 291 ctx->so->immediates[idx].val[swiz] = reg->uim_val; 292 ctx->so->immediates_count = idx + 1; 293 ctx->immediate_idx++; 294 } 295 296 new_flags &= ~IR3_REG_IMMED; 297 new_flags |= IR3_REG_CONST; 298 reg->flags = new_flags; 299 reg->num = i + (4 * ctx->so->constbase.immediate); 300 301 return reg; 302} 303 304/** 305 * Handle cp for a given src register. This additionally handles 306 * the cases of collapsing immedate/const (which replace the src 307 * register with a non-ssa src) or collapsing mov's from relative 308 * src (which needs to also fixup the address src reference by the 309 * instruction). 310 */ 311static void 312reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, 313 struct ir3_register *reg, unsigned n) 314{ 315 struct ir3_instruction *src = ssa(reg); 316 317 /* don't propagate copies into a PHI, since we don't know if the 318 * src block executed: 319 */ 320 if (instr->opc == OPC_META_PHI) 321 return; 322 323 if (is_eligible_mov(src, true)) { 324 /* simple case, no immed/const/relativ, only mov's w/ ssa src: */ 325 struct ir3_register *src_reg = src->regs[1]; 326 unsigned new_flags = reg->flags; 327 328 combine_flags(&new_flags, src); 329 330 if (valid_flags(instr, n, new_flags)) { 331 if (new_flags & IR3_REG_ARRAY) { 332 debug_assert(!(reg->flags & IR3_REG_ARRAY)); 333 reg->array = src_reg->array; 334 } 335 reg->flags = new_flags; 336 reg->instr = ssa(src_reg); 337 } 338 339 src = ssa(reg); /* could be null for IR3_REG_ARRAY case */ 340 if (!src) 341 return; 342 } else if (is_same_type_mov(src) && 343 /* cannot collapse const/immed/etc into meta instrs: */ 344 !is_meta(instr)) { 345 /* immed/const/etc cases, which require some special handling: */ 346 struct ir3_register *src_reg = src->regs[1]; 347 unsigned new_flags = reg->flags; 348 349 combine_flags(&new_flags, src); 350 351 if (!valid_flags(instr, n, new_flags)) { 352 /* See if lowering an immediate to const would help. */ 353 if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { 354 debug_assert(new_flags & IR3_REG_IMMED); 355 instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags); 356 return; 357 } 358 359 /* special case for "normal" mad instructions, we can 360 * try swapping the first two args if that fits better. 361 * 362 * the "plain" MAD's (ie. the ones that don't shift first 363 * src prior to multiply) can swap their first two srcs if 364 * src[0] is !CONST and src[1] is CONST: 365 */ 366 if ((n == 1) && is_mad(instr->opc) && 367 !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) && 368 valid_flags(instr, 0, new_flags)) { 369 /* swap src[0] and src[1]: */ 370 struct ir3_register *tmp; 371 tmp = instr->regs[0 + 1]; 372 instr->regs[0 + 1] = instr->regs[1 + 1]; 373 instr->regs[1 + 1] = tmp; 374 n = 0; 375 } else { 376 return; 377 } 378 } 379 380 /* Here we handle the special case of mov from 381 * CONST and/or RELATIV. These need to be handled 382 * specially, because in the case of move from CONST 383 * there is no src ir3_instruction so we need to 384 * replace the ir3_register. And in the case of 385 * RELATIV we need to handle the address register 386 * dependency. 387 */ 388 if (src_reg->flags & IR3_REG_CONST) { 389 /* an instruction cannot reference two different 390 * address registers: 391 */ 392 if ((src_reg->flags & IR3_REG_RELATIV) && 393 conflicts(instr->address, reg->instr->address)) 394 return; 395 396 /* This seems to be a hw bug, or something where the timings 397 * just somehow don't work out. This restriction may only 398 * apply if the first src is also CONST. 399 */ 400 if ((opc_cat(instr->opc) == 3) && (n == 2) && 401 (src_reg->flags & IR3_REG_RELATIV) && 402 (src_reg->array.offset == 0)) 403 return; 404 405 src_reg = ir3_reg_clone(instr->block->shader, src_reg); 406 src_reg->flags = new_flags; 407 instr->regs[n+1] = src_reg; 408 409 if (src_reg->flags & IR3_REG_RELATIV) 410 ir3_instr_set_address(instr, reg->instr->address); 411 412 return; 413 } 414 415 if ((src_reg->flags & IR3_REG_RELATIV) && 416 !conflicts(instr->address, reg->instr->address)) { 417 src_reg = ir3_reg_clone(instr->block->shader, src_reg); 418 src_reg->flags = new_flags; 419 instr->regs[n+1] = src_reg; 420 ir3_instr_set_address(instr, reg->instr->address); 421 422 return; 423 } 424 425 /* NOTE: seems we can only do immed integers, so don't 426 * need to care about float. But we do need to handle 427 * abs/neg *before* checking that the immediate requires 428 * few enough bits to encode: 429 * 430 * TODO: do we need to do something to avoid accidentally 431 * catching a float immed? 432 */ 433 if (src_reg->flags & IR3_REG_IMMED) { 434 int32_t iim_val = src_reg->iim_val; 435 436 debug_assert((opc_cat(instr->opc) == 1) || 437 (opc_cat(instr->opc) == 6) || 438 ir3_cat2_int(instr->opc)); 439 440 if (new_flags & IR3_REG_SABS) 441 iim_val = abs(iim_val); 442 443 if (new_flags & IR3_REG_SNEG) 444 iim_val = -iim_val; 445 446 if (new_flags & IR3_REG_BNOT) 447 iim_val = ~iim_val; 448 449 /* other than category 1 (mov) we can only encode up to 10 bits: */ 450 if ((instr->opc == OPC_MOV) || 451 !((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) { 452 new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT); 453 src_reg = ir3_reg_clone(instr->block->shader, src_reg); 454 src_reg->flags = new_flags; 455 src_reg->iim_val = iim_val; 456 instr->regs[n+1] = src_reg; 457 } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { 458 /* See if lowering an immediate to const would help. */ 459 instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags); 460 } 461 462 return; 463 } 464 } 465} 466 467/* Handle special case of eliminating output mov, and similar cases where 468 * there isn't a normal "consuming" instruction. In this case we cannot 469 * collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot 470 * be eliminated) 471 */ 472static struct ir3_instruction * 473eliminate_output_mov(struct ir3_instruction *instr) 474{ 475 if (is_eligible_mov(instr, false)) { 476 struct ir3_register *reg = instr->regs[1]; 477 if (!(reg->flags & IR3_REG_ARRAY)) { 478 struct ir3_instruction *src_instr = ssa(reg); 479 debug_assert(src_instr); 480 return src_instr; 481 } 482 } 483 return instr; 484} 485 486/** 487 * Find instruction src's which are mov's that can be collapsed, replacing 488 * the mov dst with the mov src 489 */ 490static void 491instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) 492{ 493 struct ir3_register *reg; 494 495 if (instr->regs_count == 0) 496 return; 497 498 if (ir3_instr_check_mark(instr)) 499 return; 500 501 /* walk down the graph from each src: */ 502 foreach_src_n(reg, n, instr) { 503 struct ir3_instruction *src = ssa(reg); 504 505 if (!src) 506 continue; 507 508 instr_cp(ctx, src); 509 510 /* TODO non-indirect access we could figure out which register 511 * we actually want and allow cp.. 512 */ 513 if (reg->flags & IR3_REG_ARRAY) 514 continue; 515 516 reg_cp(ctx, instr, reg, n); 517 } 518 519 if (instr->regs[0]->flags & IR3_REG_ARRAY) { 520 struct ir3_instruction *src = ssa(instr->regs[0]); 521 if (src) 522 instr_cp(ctx, src); 523 } 524 525 if (instr->address) { 526 instr_cp(ctx, instr->address); 527 ir3_instr_set_address(instr, eliminate_output_mov(instr->address)); 528 } 529 530 /* we can end up with extra cmps.s from frontend, which uses a 531 * 532 * cmps.s p0.x, cond, 0 533 * 534 * as a way to mov into the predicate register. But frequently 'cond' 535 * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and 536 * just re-write the instruction writing predicate register to get rid 537 * of the double cmps. 538 */ 539 if ((instr->opc == OPC_CMPS_S) && 540 (instr->regs[0]->num == regid(REG_P0, 0)) && 541 ssa(instr->regs[1]) && 542 (instr->regs[2]->flags & IR3_REG_IMMED) && 543 (instr->regs[2]->iim_val == 0)) { 544 struct ir3_instruction *cond = ssa(instr->regs[1]); 545 switch (cond->opc) { 546 case OPC_CMPS_S: 547 case OPC_CMPS_F: 548 case OPC_CMPS_U: 549 instr->opc = cond->opc; 550 instr->flags = cond->flags; 551 instr->cat2 = cond->cat2; 552 instr->address = cond->address; 553 instr->regs[1] = cond->regs[1]; 554 instr->regs[2] = cond->regs[2]; 555 break; 556 default: 557 break; 558 } 559 } 560} 561 562void 563ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so) 564{ 565 struct ir3_cp_ctx ctx = { 566 .shader = ir, 567 .so = so, 568 }; 569 570 ir3_clear_mark(ir); 571 572 for (unsigned i = 0; i < ir->noutputs; i++) { 573 if (ir->outputs[i]) { 574 instr_cp(&ctx, ir->outputs[i]); 575 ir->outputs[i] = eliminate_output_mov(ir->outputs[i]); 576 } 577 } 578 579 for (unsigned i = 0; i < ir->keeps_count; i++) { 580 instr_cp(&ctx, ir->keeps[i]); 581 ir->keeps[i] = eliminate_output_mov(ir->keeps[i]); 582 } 583 584 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { 585 if (block->condition) { 586 instr_cp(&ctx, block->condition); 587 block->condition = eliminate_output_mov(block->condition); 588 } 589 } 590} 591