1d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller/* 2d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Copyright 2011 Christoph Bumiller 3d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 4d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Permission is hereby granted, free of charge, to any person obtaining a 5d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * copy of this software and associated documentation files (the "Software"), 6d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * to deal in the Software without restriction, including without limitation 7d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * and/or sell copies of the Software, and to permit persons to whom the 9d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Software is furnished to do so, subject to the following conditions: 10d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 11d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * The above copyright notice and this permission notice shall be included in 12d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * all copies or substantial portions of the Software. 13d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 14d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * SOFTWARE. 21d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller */ 2257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "nv50_ir.h" 2457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "nv50_ir_target.h" 2557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "nv50_ir_build_util.h" 2657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerextern "C" { 2857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "util/u_math.h" 2957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 3057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 3157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillernamespace nv50_ir { 3257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 3357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 3457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerInstruction::isNop() const 3557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 3600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (op == OP_PHI || op == OP_SPLIT || op == OP_MERGE || op == OP_CONSTRAINT) 3757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 3857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (terminator || join) // XXX: should terminator imply flow ? 3957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 4057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!fixed && op == OP_NOP) 4157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 4257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 439362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (defExists(0) && def(0).rep()->reg.data.id < 0) { 4457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int d = 1; defExists(d); ++d) 459362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (def(d).rep()->reg.data.id >= 0) 4657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller WARN("part of vector result is unused !\n"); 4757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 4857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 4957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 5057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (op == OP_MOV || op == OP_UNION) { 51e43a3a66a9d8a99021d76ff4d07dec7b8cfd62caChristoph Bumiller if (!getDef(0)->equals(getSrc(0))) 5257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 5357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (op == OP_UNION) 549362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (!def(0).rep()->equals(getSrc(1))) 5557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 5657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 5757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 5857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 5957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 6057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 6157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool Instruction::isDead() const 6357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 6457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (op == OP_STORE || 652fc014f8c0d9339b1652f4e037aee5697142304aChristoph Bumiller op == OP_EXPORT || 662fc014f8c0d9339b1652f4e037aee5697142304aChristoph Bumiller op == OP_WRSV) 6757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 6857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int d = 0; defExists(d); ++d) 7057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (getDef(d)->refCount() || getDef(d)->reg.data.id >= 0) 7157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 7257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (terminator || asFlow()) 7457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 7557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (fixed) 7657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 7757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 7957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 8057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 8257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass CopyPropagation : public Pass 8457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 8557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 8657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 8757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 8857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Propagate all MOVs forward to make subsequent optimization easier, except if 9057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// the sources stem from a phi, in which case we don't want to mess up potential 9157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def. 9257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 9357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerCopyPropagation::visit(BasicBlock *bb) 9457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 9557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *mov, *si, *next; 9657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 9757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (mov = bb->getEntry(); mov; mov = next) { 9857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = mov->next; 9957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mov->op != OP_MOV || mov->fixed || !mov->getSrc(0)->asLValue()) 10057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 10144e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller if (mov->getPredicate()) 10244e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller continue; 10344e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller if (mov->def(0).getFile() != mov->src(0).getFile()) 10444e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller continue; 10557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller si = mov->getSrc(0)->getInsn(); 10657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mov->getDef(0)->reg.data.id < 0 && si && si->op != OP_PHI) { 10757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // propagate 1089362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mov->def(0).replace(mov->getSrc(0), false); 10957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, mov); 11057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 11157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 11257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 11357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 11457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 11557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 11657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 11757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass LoadPropagation : public Pass 11857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 11957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 12057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 12157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 12257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void checkSwapSrc01(Instruction *); 12357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 12457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool isCSpaceLoad(Instruction *); 12557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool isImmd32Load(Instruction *); 12690b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller bool isAttribOrSharedLoad(Instruction *); 12757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 12857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 12957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 13057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::isCSpaceLoad(Instruction *ld) 13157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 1329362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller return ld && ld->op == OP_LOAD && ld->src(0).getFile() == FILE_MEMORY_CONST; 13357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 13457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 13557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 13657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::isImmd32Load(Instruction *ld) 13757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 13857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4)) 13957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 1409362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller return ld->src(0).getFile() == FILE_IMMEDIATE; 14157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 14257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 14390b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumillerbool 14490b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph BumillerLoadPropagation::isAttribOrSharedLoad(Instruction *ld) 14590b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller{ 14690b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller return ld && 14790b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller (ld->op == OP_VFETCH || 14890b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller (ld->op == OP_LOAD && 14990b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller (ld->src(0).getFile() == FILE_SHADER_INPUT || 15090b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller ld->src(0).getFile() == FILE_MEMORY_SHARED))); 15190b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller} 15290b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller 15357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 15457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::checkSwapSrc01(Instruction *insn) 15557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 15657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!prog->getTarget()->getOpInfo(insn).commutative) 15757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->op != OP_SET && insn->op != OP_SLCT) 15857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 1599362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (insn->src(1).getFile() != FILE_GPR) 16057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 16157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 16257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i0 = insn->getSrc(0)->getInsn(); 16357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i1 = insn->getSrc(1)->getInsn(); 16457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 16557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (isCSpaceLoad(i0)) { 16657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isCSpaceLoad(i1)) 16757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->swapSources(0, 1); 16857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 16957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 17057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 17157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (isImmd32Load(i0)) { 17257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isCSpaceLoad(i1) && !isImmd32Load(i1)) 17357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->swapSources(0, 1); 17457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 17557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 17690b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller } else 17790b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller if (isAttribOrSharedLoad(i1)) { 17890b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller if (!isAttribOrSharedLoad(i0)) 17990b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller insn->swapSources(0, 1); 18090b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller else 18190b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller return; 18257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 18357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 18457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 18557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 18657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->op == OP_SET) 18757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond); 18857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 18957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->op == OP_SLCT) 19057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->asCmp()->setCond = inverseCondCode(insn->asCmp()->setCond); 19157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 19257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 19357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 19457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::visit(BasicBlock *bb) 19557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 19657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Target *targ = prog->getTarget(); 19757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next; 19857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 19957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getEntry(); i; i = next) { 20057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 20157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 20257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->srcExists(1)) 20357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller checkSwapSrc01(i); 20457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 20557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; i->srcExists(s); ++s) { 20657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ld = i->getSrc(s)->getInsn(); 20757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 20857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV)) 20957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 21057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!targ->insnCanLoad(i, s, ld)) 21157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 21257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 21357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // propagate ! 21457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(s, ld->getSrc(0)); 2159362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (ld->src(0).isIndirect(0)) 21657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setIndirect(s, 0, ld->getIndirect(0, 0)); 21757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 21857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ld->getDef(0)->refCount() == 0) 21957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, ld); 22057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 22157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 22257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 22357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 22457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 22557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 22657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 22757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Evaluate constant expressions. 22857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass ConstantFolding : public Pass 22957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 23057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerpublic: 23157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool foldAll(Program *); 23257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 23357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 23457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 23557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 236d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez void expr(Instruction *, ImmediateValue&, ImmediateValue&); 237d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez void opnd(Instruction *, ImmediateValue&, int s); 23857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 23957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void unary(Instruction *, const ImmediateValue&); 24057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 24155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&); 24255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 24357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET 24457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller CmpInstruction *findOriginForTestWithZero(Value *); 24557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 24657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int foldCount; 24757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 24857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BuildUtil bld; 24957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 25057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 25157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// TODO: remember generated immediates and only revisit these 25257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 25357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::foldAll(Program *prog) 25457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 25557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int iterCount = 0; 25657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller do { 25757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller foldCount = 0; 25857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!run(prog)) 25957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 26057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } while (foldCount && ++iterCount < 2); 26157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 26257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 26357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 26457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 26557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::visit(BasicBlock *bb) 26657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 26757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i, *next; 26857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 26957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (i = bb->getEntry(); i; i = next) { 27057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 2711e957941735fae514de658c836b8bdaf6c66bc06Francisco Jerez if (i->op == OP_MOV || i->op == OP_CALL) 27257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 27357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 274d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez ImmediateValue src0, src1; 27557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 276d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->srcExists(1) && 277d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) 27857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller expr(i, src0, src1); 27957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 280d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->srcExists(0) && i->src(0).getImmediate(src0)) 28157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller opnd(i, src0, 0); 28257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 283d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->srcExists(1) && i->src(1).getImmediate(src1)) 28457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller opnd(i, src1, 1); 28557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 28657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 28757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 28857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 28957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerCmpInstruction * 29057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::findOriginForTestWithZero(Value *value) 29157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 29257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!value) 29357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return NULL; 29457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn = value->getInsn(); 29557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 29657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller while (insn && insn->op != OP_SET) { 29757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next = NULL; 29857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (insn->op) { 29957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_NEG: 30057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ABS: 30157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_CVT: 30257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = insn->getSrc(0)->getInsn(); 30357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->sType != next->dType) 30457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return NULL; 30557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 30657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MOV: 30757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = insn->getSrc(0)->getInsn(); 30857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 30957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 31057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return NULL; 31157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 31257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn = next; 31357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 31457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return insn ? insn->asCmp() : NULL; 31557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 31657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 31757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 31857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerModifier::applyTo(ImmediateValue& imm) const 31957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 32057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (imm.reg.type) { 32157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: 32257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_ABS) 32357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f32 = fabsf(imm.reg.data.f32); 32457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_NEG) 32557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f32 = -imm.reg.data.f32; 32657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_SAT) { 32757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (imm.reg.data.f32 < 0.0f) 32857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f32 = 0.0f; 32957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 33057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (imm.reg.data.f32 > 1.0f) 33157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f32 = 1.0f; 33257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 33357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!(bits & NV50_IR_MOD_NOT)); 33457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 33557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 33657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S8: // NOTE: will be extended 33757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S16: 33857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: 33957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U8: // NOTE: treated as signed 34057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U16: 34157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: 34257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_ABS) 34357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.s32 = (imm.reg.data.s32 >= 0) ? 34457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.s32 : -imm.reg.data.s32; 34557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_NEG) 34657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.s32 = -imm.reg.data.s32; 34757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_NOT) 34857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.s32 = ~imm.reg.data.s32; 34957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 35057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 35157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: 35257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_ABS) 35357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f64 = fabs(imm.reg.data.f64); 35457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_NEG) 35557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f64 = -imm.reg.data.f64; 35657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_SAT) { 35757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (imm.reg.data.f64 < 0.0) 35857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f64 = 0.0; 35957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 36057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (imm.reg.data.f64 > 1.0) 36157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f64 = 1.0; 36257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 36357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!(bits & NV50_IR_MOD_NOT)); 36457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 36557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 36657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 36757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!"invalid/unhandled type"); 36857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.u64 = 0; 36957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 37057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 37157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 37257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 37357594065c30feec9376be9b2132659f7d87362eeChristoph Bumilleroperation 37457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerModifier::getOp() const 37557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 37657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (bits) { 37757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case NV50_IR_MOD_ABS: return OP_ABS; 37857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case NV50_IR_MOD_NEG: return OP_NEG; 37957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case NV50_IR_MOD_SAT: return OP_SAT; 38057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case NV50_IR_MOD_NOT: return OP_NOT; 38157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case 0: 38257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return OP_MOV; 38357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 38457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return OP_CVT; 38557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 38657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 38757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 38857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 38957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::expr(Instruction *i, 390d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez ImmediateValue &imm0, ImmediateValue &imm1) 39157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 39257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller struct Storage *const a = &imm0.reg, *const b = &imm1.reg; 393d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez struct Storage res; 39457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 395d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez memset(&res.data, 0, sizeof(res.data)); 39657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 39757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 39857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MAD: 39957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_FMA: 40057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MUL: 40157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dnz && i->dType == TYPE_F32) { 40257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isfinite(a->data.f32)) 40357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller a->data.f32 = 0.0f; 40457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isfinite(b->data.f32)) 40557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller b->data.f32 = 0.0f; 40657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 40757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 40857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break; 40957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break; 41057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: 41157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break; 41257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 41357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 41457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 41557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 41657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 41757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (b->data.u32 == 0) 41857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 41957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 42057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = a->data.f32 / b->data.f32; break; 42157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = a->data.f64 / b->data.f64; break; 42257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: res.data.s32 = a->data.s32 / b->data.s32; break; 42357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = a->data.u32 / b->data.u32; break; 42457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 42557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 42657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 42757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 42857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ADD: 42957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 43057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = a->data.f32 + b->data.f32; break; 43157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = a->data.f64 + b->data.f64; break; 43257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: 43357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break; 43457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 43557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 43657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 43757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 43857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_POW: 43957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 44057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break; 44157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = pow(a->data.f64, b->data.f64); break; 44257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 44357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 44457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 44557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 44657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MAX: 44757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 44857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = MAX2(a->data.f32, b->data.f32); break; 44957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = MAX2(a->data.f64, b->data.f64); break; 45057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: res.data.s32 = MAX2(a->data.s32, b->data.s32); break; 45157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = MAX2(a->data.u32, b->data.u32); break; 45257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 45357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 45457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 45557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 45657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MIN: 45757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 45857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = MIN2(a->data.f32, b->data.f32); break; 45957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = MIN2(a->data.f64, b->data.f64); break; 46057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: res.data.s32 = MIN2(a->data.s32, b->data.s32); break; 46157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = MIN2(a->data.u32, b->data.u32); break; 46257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 46357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 46457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 46557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 46657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_AND: 46757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.u64 = a->data.u64 & b->data.u64; 46857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 46957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_OR: 47057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.u64 = a->data.u64 | b->data.u64; 47157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 47257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_XOR: 47357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.u64 = a->data.u64 ^ b->data.u64; 47457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 47557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SHL: 47657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.u32 = a->data.u32 << b->data.u32; 47757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 47857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SHR: 47957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 48057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: res.data.s32 = a->data.s32 >> b->data.u32; break; 48157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = a->data.u32 >> b->data.u32; break; 48257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 48357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 48457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 48557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 48657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SLCT: 48757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (a->data.u32 != b->data.u32) 48857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 48957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.u32 = a->data.u32; 49057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 49157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 49257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 49357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 49457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++foldCount; 49557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 4969362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = Modifier(0); 4979362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(1).mod = Modifier(0); 49857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 49957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32)); 50057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 50157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 50257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getSrc(0)->reg.data = res.data; 50357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 50457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op == OP_MAD || i->op == OP_FMA) { 50557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_ADD; 50657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 50757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, i->getSrc(0)); 5089362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(1).mod = i->src(2).mod; 50957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, i->getSrc(2)); 51057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(2, NULL); 51157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 512d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez ImmediateValue src0; 513d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->src(0).getImmediate(src0)) 514d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez expr(i, src0, *i->getSrc(1)->asImm()); 51557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 51657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 51757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 51857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 51957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 52057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 52157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::unary(Instruction *i, const ImmediateValue &imm) 52257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 52357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Storage res; 52457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 52557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dType != TYPE_F32) 52657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 52757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 52857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_NEG: res.data.f32 = -imm.reg.data.f32; break; 52957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break; 53057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break; 53157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break; 53257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break; 53357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EX2: res.data.f32 = exp2f(imm.reg.data.f32); break; 53457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SIN: res.data.f32 = sinf(imm.reg.data.f32); break; 53557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_COS: res.data.f32 = cosf(imm.reg.data.f32); break; 53657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SQRT: res.data.f32 = sqrtf(imm.reg.data.f32); break; 53757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_PRESIN: 53857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_PREEX2: 53957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // these should be handled in subsequent OP_SIN/COS/EX2 54057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.f32 = imm.reg.data.f32; 54157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 54257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 54357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 54457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 54557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 54657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.f32)); 5479362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = Modifier(0); 54857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 54957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 55057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 55155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph BumillerConstantFolding::tryCollapseChainedMULs(Instruction *mul2, 55255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller const int s, ImmediateValue& imm2) 55355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller{ 55455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller const int t = s ? 0 : 1; 55555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller Instruction *insn; 55655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller Instruction *mul1 = NULL; // mul1 before mul2 55755f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller int e = 0; 55855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller float f = imm2.reg.data.f32; 559d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez ImmediateValue imm1; 56055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 56155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32); 56255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 56355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (mul2->getSrc(t)->refCount() == 1) { 56455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller insn = mul2->getSrc(t)->getInsn(); 565d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32) 56655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul1 = insn; 56728d6a268af3587cedb6a0e9deee7a98ecc8f82baChristoph Bumiller if (mul1 && !mul1->saturate) { 568d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez int s1; 569d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez 570d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (mul1->src(s1 = 0).getImmediate(imm1) || 571d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez mul1->src(s1 = 1).getImmediate(imm1)) { 57255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller bld.setPosition(mul1, false); 57355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // a = mul r, imm1 57455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // d = mul a, imm2 -> d = mul r, (imm1 * imm2) 57555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32)); 576d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez mul1->src(s1).mod = Modifier(0); 5779362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mul2->def(0).replace(mul1->getDef(0), false); 57855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } else 57955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { 58055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // c = mul a, b 58155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // d = mul c, imm -> d = mul_x_imm a, b 58255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul1->postFactor = e; 5839362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mul2->def(0).replace(mul1->getDef(0), false); 58455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (f < 0) 585d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG); 58655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } 58728d6a268af3587cedb6a0e9deee7a98ecc8f82baChristoph Bumiller mul1->saturate = mul2->saturate; 58855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller return; 58955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } 59055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } 59128d6a268af3587cedb6a0e9deee7a98ecc8f82baChristoph Bumiller if (mul2->getDef(0)->refCount() == 1 && !mul2->saturate) { 59255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // b = mul a, imm 59355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // d = mul b, c -> d = mul_x_imm a, c 59455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller int s2, t2; 5958cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez insn = mul2->getDef(0)->uses.front()->getInsn(); 59655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (!insn) 59755f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller return; 59855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul1 = mul2; 59955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul2 = NULL; 60055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1; 60155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller t2 = s2 ? 0 : 1; 60255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (insn->op == OP_MUL && insn->dType == TYPE_F32) 603d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (!insn->src(s2).mod && !insn->src(t2).getImmediate(imm1)) 60455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul2 = insn; 60555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { 60655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul2->postFactor = e; 6079362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mul2->setSrc(s2, mul1->src(t)); 60855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (f < 0) 609d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez mul2->src(s2).mod *= Modifier(NV50_IR_MOD_NEG); 61055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } 61155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } 61255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller} 61355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 61455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumillervoid 615d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco JerezConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) 61657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 61757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const int t = !s; 61857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const operation op = i->op; 61957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 62057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 62157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MUL: 62255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (i->dType == TYPE_F32) 623d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez tryCollapseChainedMULs(i, s, imm0); 62455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 625d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isInteger(0)) { 62657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 627d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(0, new_ImmediateValue(prog, 0u)); 628d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->src(0).mod = Modifier(0); 62957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 63057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 631d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isInteger(1) || imm0.isInteger(-1)) { 632d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isNegative()) 6339362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); 6349362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->op = i->src(t).mod.getOp(); 63557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (s == 0) { 63657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, i->getSrc(1)); 6379362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = i->src(1).mod; 6389362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(1).mod = 0; 63957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 64057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op != OP_CVT) 6419362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = 0; 64257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 64357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 644d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isInteger(2) || imm0.isInteger(-2)) { 645d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isNegative()) 6469362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); 64757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_ADD; 64857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(s, i->getSrc(t)); 6499362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(s).mod = i->src(t).mod; 65057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 651d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (!isFloatType(i->sType) && !imm0.isNegative() && imm0.isPow2()) { 65257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_SHL; 653d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez imm0.applyLog2(); 654d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(0, i->getSrc(t)); 655d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->src(0).mod = i->src(t).mod; 656d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32)); 657d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->src(1).mod = 0; 65857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 65957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 66057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ADD: 661d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isInteger(0)) { 66257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (s == 0) { 66357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, i->getSrc(1)); 6649362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = i->src(1).mod; 66557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 66657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 6679362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->op = i->src(0).mod.getOp(); 66857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op != OP_CVT) 6699362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = Modifier(0); 67057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 67157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 67257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 67357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 67457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32)) 67557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 67657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(i, false); 677d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.reg.data.u32 == 0) { 67857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 67957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 680d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.reg.data.u32 == 1) { 68157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 68257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 68357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 684d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->dType == TYPE_U32 && imm0.isPow2()) { 6856ab6110133c2d316d98f78bbc38bca0c5b6184a7Christoph Bumiller i->op = OP_SHR; 686d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32))); 68757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 68857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dType == TYPE_U32) { 68957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *mul; 69057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *tA, *tB; 691d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez const uint32_t d = imm0.reg.data.u32; 69257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t m; 69357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int r, s; 69457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t l = util_logbase2(d); 69557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (((uint32_t)1 << l) < d) 69657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++l; 69757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller m = (((uint64_t)1 << 32) * (((uint64_t)1 << l) - d)) / d + 1; 69857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller r = l ? 1 : 0; 69957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller s = l ? (l - 1) : 0; 70057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 70157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tA = bld.getSSA(); 70257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tB = bld.getSSA(); 70357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mul = bld.mkOp2(OP_MUL, TYPE_U32, tA, i->getSrc(0), 70457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.loadImm(NULL, m)); 70557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mul->subOp = NV50_IR_SUBOP_MUL_HIGH; 70657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SUB, TYPE_U32, tB, i->getSrc(0), tA); 70757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tA = bld.getSSA(); 70857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (r) 70957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SHR, TYPE_U32, tA, tB, bld.mkImm(r)); 71057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 71157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tA = tB; 71257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tB = s ? bld.getSSA() : i->getDef(0); 71357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA); 71457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (s) 71557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s)); 71657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 71757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 71857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 719d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.reg.data.s32 == -1) { 72057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_NEG; 72157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 72257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 72357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *tA, *tB; 72457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *tD; 725d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez const int32_t d = imm0.reg.data.s32; 72657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t m; 72757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t l = util_logbase2(static_cast<unsigned>(abs(d))); 72857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((1 << l) < abs(d)) 72957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++l; 73057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!l) 73157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller l = 1; 73257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller m = ((uint64_t)1 << (32 + l - 1)) / abs(d) + 1 - ((uint64_t)1 << 32); 73357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 73457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tA = bld.getSSA(); 73557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tB = bld.getSSA(); 73657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp3(OP_MAD, TYPE_S32, tA, i->getSrc(0), bld.loadImm(NULL, m), 73757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getSrc(0))->subOp = NV50_IR_SUBOP_MUL_HIGH; 73857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (l > 1) 73957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SHR, TYPE_S32, tB, tA, bld.mkImm(l - 1)); 74057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 74157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tB = tA; 74257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tA = bld.getSSA(); 74357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, i->getSrc(0), bld.mkImm(0)); 74457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue(); 74557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA); 74657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (d < 0) 74757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB); 74857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 74957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 75057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 75157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 75257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 753ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller case OP_MOD: 754d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->sType == TYPE_U32 && imm0.isPow2()) { 755ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller bld.setPosition(i, false); 756ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller i->op = OP_AND; 757d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1)); 758ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller } 759ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller break; 760ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller 76157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SET: // TODO: SET_AND,OR,XOR 76257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 76357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t)); 76457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller CondCode cc, ccZ; 7659362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(t).mod != Modifier(0)) 76657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 767d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET) 76857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 76957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller cc = si->setCond; 77057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U); 77157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (s == 0) 77257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ccZ = reverseCondCode(ccZ); 77357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (ccZ) { 77457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_LT: cc = CC_FL; break; 77557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_GE: cc = CC_TR; break; 77657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_EQ: cc = inverseCondCode(cc); break; 77757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_LE: cc = inverseCondCode(cc); break; 77857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_GT: break; 77957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_NE: break; 78057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 78157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 78257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 78357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->asCmp()->setCond = cc; 7849362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->setSrc(0, si->src(0)); 7859362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->setSrc(1, si->src(1)); 78657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->sType = si->sType; 78757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 78857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 78957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 79057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SHL: 79157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 7929362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (s != 1 || i->src(0).mod != Modifier(0)) 79357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 79457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // try to concatenate shifts 79557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *si = i->getSrc(0)->getInsn(); 796d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (!si || si->op != OP_SHL) 79757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 798d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez ImmediateValue imm1; 799d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (si->src(1).getImmediate(imm1)) { 80057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(i, false); 80157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, si->getSrc(0)); 802d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32)); 80357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 80457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 80557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 80657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 80757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ABS: 80857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_NEG: 80957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_LG2: 81057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RCP: 81157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SQRT: 81257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RSQ: 81357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_PRESIN: 81457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SIN: 81557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_COS: 81657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_PREEX2: 81757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EX2: 818d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez unary(i, imm0); 81957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 82057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 82157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 82257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 82357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op != op) 82457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller foldCount++; 82557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 82657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 82757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 82857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 82957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed. 83057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass ModifierFolding : public Pass 83157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 83257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 83357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 83457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 83557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 83657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 83757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerModifierFolding::visit(BasicBlock *bb) 83857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 83957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Target *target = prog->getTarget(); 84057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 84157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i, *next, *mi; 84257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Modifier mod; 84357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 84457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (i = bb->getEntry(); i; i = next) { 84557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 84657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 84757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (0 && i->op == OP_SUB) { 84857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // turn "sub" into "add neg" (do we really want this ?) 84957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_ADD; 8509362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG); 85157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 85257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 85357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; s < 3 && i->srcExists(s); ++s) { 85457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi = i->getSrc(s)->getInsn(); 85557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!mi || 85657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi->predSrc >= 0 || mi->getDef(0)->refCount() > 8) 85757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 85857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->sType == TYPE_U32 && mi->dType == TYPE_S32) { 85957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((i->op != OP_ADD && 86057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op != OP_MUL) || 86157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller (mi->op != OP_ABS && 86257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi->op != OP_NEG)) 86357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 86457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 86557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->sType != mi->dType) { 86657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 86757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 86857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((mod = Modifier(mi->op)) == Modifier(0)) 86957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 87014d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez mod *= mi->src(0).mod; 87157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8729362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if ((i->op == OP_ABS) || i->src(s).mod.abs()) { 87357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // abs neg [abs] = abs 87457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mod = mod & Modifier(~(NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)); 87557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 87657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((i->op == OP_NEG) && mod.neg()) { 87757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(s == 0); 87857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // neg as both opcode and modifier on same insn is prohibited 87957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // neg neg abs = abs, neg neg = identity 88057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mod = mod & Modifier(~NV50_IR_MOD_NEG); 88157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = mod.getOp(); 88257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mod = mod & Modifier(~NV50_IR_MOD_ABS); 88357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mod == Modifier(0)) 88457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 88557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 88657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 88757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (target->isModSupported(i, s, mod)) { 88857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(s, mi->getSrc(0)); 88914d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez i->src(s).mod *= mod; 89057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 89157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 89257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 89357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op == OP_SAT) { 89457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi = i->getSrc(0)->getInsn(); 89557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mi && 89657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi->getDef(0)->refCount() <= 1 && target->isSatSupported(mi)) { 89757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi->saturate = 1; 89857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi->setDef(0, i->getDef(0)); 89957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 90057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 90157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 90257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 90357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 90457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 90557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 90657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 90757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 90857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 90957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// MUL + ADD -> MAD/FMA 91057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// MIN/MAX(a, a) -> a, etc. 91157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// SLCT(a, b, const) -> cc(const) ? a : b 91257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// RCP(RCP(a)) -> a 91357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// MUL(MUL(a, b), const) -> MUL_Xconst(a, b) 91457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass AlgebraicOpt : public Pass 91557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 91657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 91757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 91857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 9191f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller void handleABS(Instruction *); 9201f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller bool handleADD(Instruction *); 9211f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller bool tryADDToMADOrSAD(Instruction *, operation toOp); 92257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void handleMINMAX(Instruction *); 92357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void handleRCP(Instruction *); 92457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void handleSLCT(Instruction *); 92557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void handleLOGOP(Instruction *); 92657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void handleCVT(Instruction *); 9271f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller 9281f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller BuildUtil bld; 92957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 93057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 93157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 9321f4c154f0253ed8fb448402532cfa670f74e69cdChristoph BumillerAlgebraicOpt::handleABS(Instruction *abs) 9331f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller{ 9341f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller Instruction *sub = abs->getSrc(0)->getInsn(); 9351f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller DataType ty; 9361f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (!sub || 9371f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller !prog->getTarget()->isOpSupported(OP_SAD, abs->dType)) 9381f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return; 9391f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller // expect not to have mods yet, if we do, bail 9401f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (sub->src(0).mod || sub->src(1).mod) 9411f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return; 9421f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller // hidden conversion ? 9431f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller ty = intTypeToSigned(sub->dType); 9441f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (abs->dType != abs->sType || ty != abs->sType) 9451f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return; 9461f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller 9471f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if ((sub->op != OP_ADD && sub->op != OP_SUB) || 9481f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller sub->src(0).getFile() != FILE_GPR || sub->src(0).mod || 9491f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller sub->src(1).getFile() != FILE_GPR || sub->src(1).mod) 9501f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return; 9511f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller 9521f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller Value *src0 = sub->getSrc(0); 9531f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller Value *src1 = sub->getSrc(1); 9541f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller 9551f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (sub->op == OP_ADD) { 9561f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller Instruction *neg = sub->getSrc(1)->getInsn(); 9571f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (neg && neg->op != OP_NEG) { 9581f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller neg = sub->getSrc(0)->getInsn(); 9591f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller src0 = sub->getSrc(1); 9601f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller } 9611f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (!neg || neg->op != OP_NEG || 9621f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller neg->dType != neg->sType || neg->sType != ty) 9631f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return; 9641f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller src1 = neg->getSrc(0); 9651f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller } 9661f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller 9671f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller // found ABS(SUB)) 9681f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller abs->moveSources(1, 2); // move sources >=1 up by 2 9691f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller abs->op = OP_SAD; 9701f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller abs->setType(sub->dType); 9711f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller abs->setSrc(0, src0); 9721f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller abs->setSrc(1, src1); 9731f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller bld.setPosition(abs, false); 9741f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0)); 9751f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller} 9761f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller 9771f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumillerbool 97857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleADD(Instruction *add) 97957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 98057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src0 = add->getSrc(0); 98157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src1 = add->getSrc(1); 9821f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller 9831f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) 9841f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return false; 9851f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller 9861f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller bool changed = false; 9871f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (!changed && prog->getTarget()->isOpSupported(OP_MAD, add->dType)) 9881f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller changed = tryADDToMADOrSAD(add, OP_MAD); 9891f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType)) 9901f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller changed = tryADDToMADOrSAD(add, OP_SAD); 9911f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return changed; 9921f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller} 9931f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller 9941f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller// ADD(SAD(a,b,0), c) -> SAD(a,b,c) 9951f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller// ADD(MUL(a,b), c) -> MAD(a,b,c) 9961f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumillerbool 9971f4c154f0253ed8fb448402532cfa670f74e69cdChristoph BumillerAlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp) 9981f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller{ 9991f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller Value *src0 = add->getSrc(0); 10001f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller Value *src1 = add->getSrc(1); 100157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src; 100257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int s; 10031f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL; 10041f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0)); 100557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Modifier mod[4]; 100657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 100757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src0->refCount() == 1 && 10081f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp) 100957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller s = 0; 101057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 101157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src1->refCount() == 1 && 10121f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp) 101357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller s = 1; 101457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 10151f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return false; 101657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 101757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) || 101857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller (src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb)) 10191f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return false; 102057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 102157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller src = add->getSrc(s); 102257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 102355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (src->getInsn()->postFactor) 10241f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return false; 10251f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (toOp == OP_SAD) { 10261f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller ImmediateValue imm; 10271f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (!src->getInsn()->src(2).getImmediate(imm)) 10281f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return false; 10291f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (!imm.isInteger(0)) 10301f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return false; 10311f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller } 103255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 10339362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mod[0] = add->src(0).mod; 10349362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mod[1] = add->src(1).mod; 10359362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mod[2] = src->getUniqueInsn()->src(0).mod; 10369362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mod[3] = src->getUniqueInsn()->src(1).mod; 103757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 10381f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad) 10391f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return false; 104057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 10411f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller add->op = toOp; 104257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller add->subOp = src->getInsn()->subOp; // potentially mul-high 104357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 10449362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller add->setSrc(2, add->src(s ? 0 : 1)); 104557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 104657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller add->setSrc(0, src->getInsn()->getSrc(0)); 10479362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller add->src(0).mod = mod[2] ^ mod[s]; 104857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller add->setSrc(1, src->getInsn()->getSrc(1)); 10499362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller add->src(1).mod = mod[3]; 10501f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller 10511f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller return true; 105257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 105357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 105457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 105557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleMINMAX(Instruction *minmax) 105657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 105757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src0 = minmax->getSrc(0); 105857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src1 = minmax->getSrc(1); 105957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 106057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src0 != src1 || src0->reg.file != FILE_GPR) 106157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 10629362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (minmax->src(0).mod == minmax->src(1).mod) { 106314d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez if (minmax->def(0).mayReplace(minmax->src(0))) { 106414d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez minmax->def(0).replace(minmax->src(0), false); 106514d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez minmax->bb->remove(minmax); 106614d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez } else { 106757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller minmax->op = OP_CVT; 106857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller minmax->setSrc(1, NULL); 106957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 107057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 107157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: 107257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // min(x, -x) = -abs(x) 107357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // min(x, -abs(x)) = -abs(x) 107457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // min(x, abs(x)) = x 107557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // max(x, -abs(x)) = x 107657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // max(x, abs(x)) = abs(x) 107757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // max(x, -x) = abs(x) 107857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 107957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 108057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 108157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 108257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleRCP(Instruction *rcp) 108357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 108457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *si = rcp->getSrc(0)->getUniqueInsn(); 108557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 108657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (si && si->op == OP_RCP) { 10879362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller Modifier mod = rcp->src(0).mod * si->src(0).mod; 108857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rcp->op = mod.getOp(); 108957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rcp->setSrc(0, si->getSrc(0)); 109057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 109157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 109257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 109357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 109457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleSLCT(Instruction *slct) 109557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 109657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) { 109757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f)) 109857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller slct->setSrc(0, slct->getSrc(1)); 109957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 110057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (slct->getSrc(0) != slct->getSrc(1)) { 110157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 110257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 110357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller slct->op = OP_MOV; 110457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller slct->setSrc(1, NULL); 110557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller slct->setSrc(2, NULL); 110657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 110757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 110857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 110957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleLOGOP(Instruction *logop) 111057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 111157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src0 = logop->getSrc(0); 111257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src1 = logop->getSrc(1); 111357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 111457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) 111557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 111657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 111757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src0 == src1) { 111814d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez if ((logop->op == OP_AND || logop->op == OP_OR) && 111914d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez logop->def(0).mayReplace(logop->src(0))) { 112014d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez logop->def(0).replace(logop->src(0), false); 112157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, logop); 112257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 112357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 112457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // try AND(SET, SET) -> SET_AND(SET) 112557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *set0 = src0->getInsn(); 112657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *set1 = src1->getInsn(); 112757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 112857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!set0 || set0->fixed || !set1 || set1->fixed) 112957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 113057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set1->op != OP_SET) { 113157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *xchg = set0; 113257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0 = set1; 113357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set1 = xchg; 113457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set1->op != OP_SET) 113557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 113657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 113793508b5b0d0a2b1e966973f1d0119b32d2ccf729Francisco Jerez operation redOp = (logop->op == OP_AND ? OP_SET_AND : 113893508b5b0d0a2b1e966973f1d0119b32d2ccf729Francisco Jerez logop->op == OP_XOR ? OP_SET_XOR : OP_SET_OR); 113993508b5b0d0a2b1e966973f1d0119b32d2ccf729Francisco Jerez if (!prog->getTarget()->isOpSupported(redOp, set1->sType)) 114093508b5b0d0a2b1e966973f1d0119b32d2ccf729Francisco Jerez return; 114157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set0->op != OP_SET && 114257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->op != OP_SET_AND && 114357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->op != OP_SET_OR && 114457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->op != OP_SET_XOR) 114557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 114657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set0->getDef(0)->refCount() > 1 && 114757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set1->getDef(0)->refCount() > 1) 114857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 114957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set0->getPredicate() || set1->getPredicate()) 115057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 115157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // check that they don't source each other 115257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; s < 2; ++s) 115357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set0->getSrc(s) == set1->getDef(0) || 115457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set1->getSrc(s) == set0->getDef(0)) 115557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 115657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1157a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez set0 = cloneForward(func, set0); 1158a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez set1 = cloneShallow(func, set1); 115957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller logop->bb->insertAfter(logop, set1); 116057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller logop->bb->insertAfter(logop, set0); 116157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 116257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->dType = TYPE_U8; 116357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->getDef(0)->reg.file = FILE_PREDICATE; 116457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->getDef(0)->reg.size = 1; 116557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set1->setSrc(2, set0->getDef(0)); 116693508b5b0d0a2b1e966973f1d0119b32d2ccf729Francisco Jerez set1->op = redOp; 116757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set1->setDef(0, logop->getDef(0)); 116857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, logop); 116957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 117057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 117157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 117257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0 1173a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller// nv50: 1174a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller// F2I(NEG(I2F(ABS(SET)))) 117557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 117657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleCVT(Instruction *cvt) 117757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 117857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (cvt->sType != TYPE_F32 || 11799362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0)) 118057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 118157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn = cvt->getSrc(0)->getInsn(); 118257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32) 118357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 11849362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (insn->src(0).mod != Modifier(0)) 118557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 118657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn = insn->getSrc(0)->getInsn(); 1187a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller 1188a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller // check for nv50 SET(-1,0) -> SET(1.0f/0.0f) chain and nvc0's f32 SET 1189a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller if (insn && insn->op == OP_CVT && 1190a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller insn->dType == TYPE_F32 && 1191a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller insn->sType == TYPE_S32) { 1192a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller insn = insn->getSrc(0)->getInsn(); 1193a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller if (!insn || insn->op != OP_ABS || insn->sType != TYPE_S32 || 1194a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller insn->src(0).mod) 1195a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller return; 1196a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller insn = insn->getSrc(0)->getInsn(); 1197a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller if (!insn || insn->op != OP_SET || insn->dType != TYPE_U32) 1198a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller return; 1199a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller } else 1200a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller if (!insn || insn->op != OP_SET || insn->dType != TYPE_F32) { 120157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 1202a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller } 120357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1204a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez Instruction *bset = cloneShallow(func, insn); 120557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bset->dType = TYPE_U32; 120657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bset->setDef(0, cvt->getDef(0)); 120757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller cvt->bb->insertAfter(cvt, bset); 120857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, cvt); 120957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 121057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 121157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 121257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::visit(BasicBlock *bb) 121357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 121457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next; 121557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getEntry(); i; i = next) { 121657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 121757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 12181f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller case OP_ABS: 12191f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller handleABS(i); 12201f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller break; 122157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ADD: 122257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleADD(i); 122357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 122457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RCP: 122557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleRCP(i); 122657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 122757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MIN: 122857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MAX: 122957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleMINMAX(i); 123057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 123157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SLCT: 123257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleSLCT(i); 123357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 123457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_AND: 123557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_OR: 123657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_XOR: 123757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleLOGOP(i); 123857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 123957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_CVT: 124057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleCVT(i); 124157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 124257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 124357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 124457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 124557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 124657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 124757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 124857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 124957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 125057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 125157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 125257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerstatic inline void 125357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerupdateLdStOffset(Instruction *ldst, int32_t offset, Function *fn) 125457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 125557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offset != ldst->getSrc(0)->reg.data.offset) { 125657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->getSrc(0)->refCount() > 1) 1257a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez ldst->setSrc(0, cloneShallow(fn, ldst->getSrc(0))); 125857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ldst->getSrc(0)->reg.data.offset = offset; 125957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 126057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 126157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 126257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Combine loads and stores, forward stores to loads where possible. 126357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass MemoryOpt : public Pass 126457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 126557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 126657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller class Record 126757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 126857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller public: 126957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *next; 127057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn; 127157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Value *rel[2]; 127257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Value *base; 127357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offset; 127457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int8_t fileIndex; 127557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint8_t size; 127657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool locked; 127757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *prev; 127857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 127957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool overlaps(const Instruction *ldst) const; 128057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 128157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline void link(Record **); 128257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline void unlink(Record **); 128357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline void set(const Instruction *ldst); 128457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller }; 128557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 128657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerpublic: 128757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller MemoryOpt(); 128857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 128957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *loads[DATA_FILE_COUNT]; 129057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *stores[DATA_FILE_COUNT]; 129157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 129257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller MemoryPool recordPool; 129357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 129457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 129557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 129657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool runOpt(BasicBlock *); 129757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 129857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record **getList(const Instruction *); 129957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 130057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *findRecord(const Instruction *, bool load, bool& isAdjacent) const; 130157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 130257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // merge @insn into load/store instruction from @rec 130357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool combineLd(Record *rec, Instruction *ld); 130457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool combineSt(Record *rec, Instruction *st); 130557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 130657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool replaceLdFromLd(Instruction *ld, Record *ldRec); 130757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool replaceLdFromSt(Instruction *ld, Record *stRec); 130857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool replaceStFromSt(Instruction *restrict st, Record *stRec); 130957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 131057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void addRecord(Instruction *ldst); 131157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void purgeRecords(Instruction *const st, DataFile); 131257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void lockStores(Instruction *const ld); 131357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void reset(); 131457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 131557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 131657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *prevRecord; 131757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 131857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 131957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record), 6) 132057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 132157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int i = 0; i < DATA_FILE_COUNT; ++i) { 132257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller loads[i] = NULL; 132357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller stores[i] = NULL; 132457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 132557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller prevRecord = NULL; 132657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 132757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 132857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 132957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::reset() 133057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 133157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (unsigned int i = 0; i < DATA_FILE_COUNT; ++i) { 133257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *it, *next; 133357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (it = loads[i]; it; it = next) { 133457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = it->next; 133557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller recordPool.release(it); 133657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 133757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller loads[i] = NULL; 133857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (it = stores[i]; it; it = next) { 133957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = it->next; 134057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller recordPool.release(it); 134157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 134257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller stores[i] = NULL; 134357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 134457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 134557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 134657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 134757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::combineLd(Record *rec, Instruction *ld) 134857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 134957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offRc = rec->offset; 135057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offLd = ld->getSrc(0)->reg.data.offset; 135157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int sizeRc = rec->size; 135257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int sizeLd = typeSizeof(ld->dType); 135357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int size = sizeRc + sizeLd; 135457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int d, j; 135557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1356286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller if (!prog->getTarget()-> 1357286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller isAccessSupported(ld->getSrc(0)->reg.file, typeOfSize(size))) 135857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 135957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // no unaligned loads 136057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) || 136157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ((size == 0xc) && (MIN2(offLd, offRc) & 0xf))) 136257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 136357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 136457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(sizeRc + sizeLd <= 16 && offRc != offLd); 136557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 136657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j); 136757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 136857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offLd < offRc) { 136957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int sz; 137057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (sz = 0, d = 0; sz < sizeLd; sz += ld->getDef(d)->reg.size, ++d); 137157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // d: nr of definitions in ld 137257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // j: nr of definitions in rec->insn, move: 137357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = d + j - 1; j > 0; --j, --d) 137457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn->setDef(d, rec->insn->getDef(j - 1)); 137557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 137657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rec->insn->getSrc(0)->refCount() > 1) 1377a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez rec->insn->setSrc(0, cloneShallow(func, rec->insn->getSrc(0))); 137857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->offset = rec->insn->getSrc(0)->reg.data.offset = offLd; 137957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 138057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller d = 0; 138157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 138257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller d = j; 138357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 138457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // move definitions of @ld to @rec->insn 138557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (j = 0; sizeLd; ++j, ++d) { 138657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sizeLd -= ld->getDef(j)->reg.size; 138757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn->setDef(d, ld->getDef(j)); 138857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 138957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 139057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->size = size; 139190f0fac65524fbc4e2f2d396d20d9808e4a0a95cFrancisco Jerez rec->insn->getSrc(0)->reg.size = size; 139257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn->setType(typeOfSize(size)); 139357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 139457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, ld); 139557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 139657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 139757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 139857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 139957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 140057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::combineSt(Record *rec, Instruction *st) 140157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 140257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offRc = rec->offset; 140357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offSt = st->getSrc(0)->reg.data.offset; 140457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int sizeRc = rec->size; 140557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int sizeSt = typeSizeof(st->dType); 140657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int s = sizeSt / 4; 140757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int size = sizeRc + sizeSt; 140857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int j, k; 140957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src[4]; // no modifiers in ValueRef allowed for st 141057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *extra[3]; 141157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1412286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller if (!prog->getTarget()-> 1413286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller isAccessSupported(st->getSrc(0)->reg.file, typeOfSize(size))) 141457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 141557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (size == 8 && MIN2(offRc, offSt) & 0x7) 141657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 141757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 141857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->takeExtraSources(0, extra); // save predicate and indirect address 141957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 142057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offRc < offSt) { 142157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // save values from @st 142257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 0; sizeSt; ++s) { 142357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sizeSt -= st->getSrc(s + 1)->reg.size; 142457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller src[s] = st->getSrc(s + 1); 142557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 142657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // set record's values as low sources of @st 142757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (j = 1; sizeRc; ++j) { 1428ef7f9f68cfe71b1f812e59abc644a54a0b80dd06Christoph Bumiller sizeRc -= rec->insn->getSrc(j)->reg.size; 142957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(j, rec->insn->getSrc(j)); 143057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 143157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // set saved values as high sources of @st 143257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (k = j, j = 0; j < s; ++j) 143357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(k++, src[j]); 143457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 143557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller updateLdStOffset(st, offRc, func); 143657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 143757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (j = 1; sizeSt; ++j) 143857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sizeSt -= st->getSrc(j)->reg.size; 143957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; sizeRc; ++j, ++s) { 144057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sizeRc -= rec->insn->getSrc(s)->reg.size; 144157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(j, rec->insn->getSrc(s)); 144257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 144357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->offset = offSt; 144457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 144557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->putExtraSources(0, extra); // restore pointer and predicate 144657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 144757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, rec->insn); 144857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn = st; 144957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->size = size; 145090f0fac65524fbc4e2f2d396d20d9808e4a0a95cFrancisco Jerez rec->insn->getSrc(0)->reg.size = size; 145157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn->setType(typeOfSize(size)); 145257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 145357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 145457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 145557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 145657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::set(const Instruction *ldst) 145757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 145857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Symbol *mem = ldst->getSrc(0)->asSym(); 145957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller fileIndex = mem->reg.fileIndex; 146057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rel[0] = ldst->getIndirect(0, 0); 146157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rel[1] = ldst->getIndirect(0, 1); 146257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller offset = mem->reg.data.offset; 146357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller base = mem->getBase(); 146457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller size = typeSizeof(ldst->sType); 146557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 146657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 146757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 146857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::link(Record **list) 146957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 147057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = *list; 147157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (next) 147257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next->prev = this; 147357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller prev = NULL; 147457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller *list = this; 147557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 147657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 147757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 147857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::unlink(Record **list) 147957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 148057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (next) 148157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next->prev = prev; 148257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prev) 148357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller prev->next = next; 148457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 148557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller *list = next; 148657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 148757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 148857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record ** 148957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::getList(const Instruction *insn) 149057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 149157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->op == OP_LOAD || insn->op == OP_VFETCH) 14929362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller return &loads[insn->src(0).getFile()]; 14939362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller return &stores[insn->src(0).getFile()]; 149457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 149557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 149657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 149757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::addRecord(Instruction *i) 149857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 149957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record **list = getList(i); 150057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *it = reinterpret_cast<Record *>(recordPool.allocate()); 150157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 150257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->link(list); 150357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->set(i); 150457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->insn = i; 150557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->locked = false; 150657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 150757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 150857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record * 150957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::findRecord(const Instruction *insn, bool load, bool& isAdj) const 151057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 151157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Symbol *sym = insn->getSrc(0)->asSym(); 151257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const int size = typeSizeof(insn->sType); 151357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *rec = NULL; 151457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file]; 151557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 151657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (; it; it = it->next) { 151757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (it->locked && insn->op != OP_LOAD) 151857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 151957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((it->offset >> 4) != (sym->reg.data.offset >> 4) || 152057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->rel[0] != insn->getIndirect(0, 0) || 152157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->fileIndex != sym->reg.fileIndex || 152257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->rel[1] != insn->getIndirect(0, 1)) 152357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 152457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 152557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (it->offset < sym->reg.data.offset) { 152657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (it->offset + it->size >= sym->reg.data.offset) { 152757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller isAdj = (it->offset + it->size == sym->reg.data.offset); 152857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isAdj) 152957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return it; 153057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!(it->offset & 0x7)) 153157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec = it; 153257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 153357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 153457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller isAdj = it->offset != sym->reg.data.offset; 153557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (size <= it->size && !isAdj) 153657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return it; 153757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 153857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!(sym->reg.data.offset & 0x7)) 153957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (it->offset - size <= sym->reg.data.offset) 154057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec = it; 154157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 154257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 154357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return rec; 154457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 154557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 154657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 154757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::replaceLdFromSt(Instruction *ld, Record *rec) 154857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 154957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *st = rec->insn; 155057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offSt = rec->offset; 155157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offLd = ld->getSrc(0)->reg.data.offset; 155257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int d, s; 155357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 155457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; offSt != offLd && st->srcExists(s); ++s) 155557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller offSt += st->getSrc(s)->reg.size; 155657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offSt != offLd) 155757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 155857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 155957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; ld->defExists(d) && st->srcExists(s); ++d, ++s) { 156057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ld->getDef(d)->reg.size != st->getSrc(s)->reg.size) 156157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 156257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (st->getSrc(s)->reg.file != FILE_GPR) 156357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 156414d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez ld->def(d).replace(st->src(s), false); 156557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 156657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld->bb->remove(ld); 156757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 156857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 156957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 157057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 157157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::replaceLdFromLd(Instruction *ldE, Record *rec) 157257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 157357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ldR = rec->insn; 157457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offR = rec->offset; 157557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offE = ldE->getSrc(0)->reg.data.offset; 157657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int dR, dE; 157757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 157857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(offR <= offE); 157957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (dR = 0; offR < offE && ldR->defExists(dR); ++dR) 158057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller offR += ldR->getDef(dR)->reg.size; 158157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offR != offE) 158257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 158357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 158457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (dE = 0; ldE->defExists(dE) && ldR->defExists(dR); ++dE, ++dR) { 158557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldE->getDef(dE)->reg.size != ldR->getDef(dR)->reg.size) 158657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 15879362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller ldE->def(dE).replace(ldR->getDef(dR), false); 158857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 158957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 159057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, ldE); 159157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 159257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 159357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 159457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 159557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::replaceStFromSt(Instruction *restrict st, Record *rec) 159657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 159757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Instruction *const ri = rec->insn; 159857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *extra[3]; 159957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 160057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offS = st->getSrc(0)->reg.data.offset; 160157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offR = rec->offset; 160257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t endS = offS + typeSizeof(st->dType); 160357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t endR = offR + typeSizeof(ri->dType); 160457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 160557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->size = MAX2(endS, endR) - MIN2(offS, offR); 160657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 160757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->takeExtraSources(0, extra); 160857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 160957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offR < offS) { 1610541bb2e33f89b07bcbea2e27275df858760c8ec8Brian Paul Value *vals[10]; 161157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int s, n; 161257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int k = 0; 161357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // get non-replaced sources of ri 161457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; offR < offS; offR += ri->getSrc(s)->reg.size, ++s) 161557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller vals[k++] = ri->getSrc(s); 161657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller n = s; 161757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // get replaced sources of st 161857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; st->srcExists(s); offS += st->getSrc(s)->reg.size, ++s) 161957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller vals[k++] = st->getSrc(s); 162057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // skip replaced sources of ri 162157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = n; offR < endS; offR += ri->getSrc(s)->reg.size, ++s); 162257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // get non-replaced sources after values covered by st 162357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s) 162457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller vals[k++] = ri->getSrc(s); 162551327a2df283da9a77c6e537751c6a45baed6951Christoph Bumiller assert((unsigned int)k <= Elements(vals)); 162657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 0; s < k; ++s) 162757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(s + 1, vals[s]); 162857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(0, ri->getSrc(0)); 162957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 163057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (endR > endS) { 163157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int j, s; 163257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (j = 1; offR < endS; offR += ri->getSrc(j++)->reg.size); 163357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; offS < endS; offS += st->getSrc(s++)->reg.size); 163457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (; offR < endR; offR += ri->getSrc(j++)->reg.size) 163557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(s++, ri->getSrc(j)); 163657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 163757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->putExtraSources(0, extra); 163857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 163957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, rec->insn); 164057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 164157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn = st; 164257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->offset = st->getSrc(0)->reg.data.offset; 164357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 164457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setType(typeOfSize(rec->size)); 164557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 164657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 164757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 164857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 164957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 165057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::overlaps(const Instruction *ldst) const 165157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 165257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record that; 165357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller that.set(ldst); 165457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 165557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->fileIndex != that.fileIndex) 165657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 165757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 165857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->rel[0] || that.rel[0]) 165957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return this->base == that.base; 166057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return 166157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller (this->offset < that.offset + that.size) && 166257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller (this->offset + this->size > that.offset); 166357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 166457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 166557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// We must not eliminate stores that affect the result of @ld if 166657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// we find later stores to the same location, and we may no longer 166757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// merge them with later stores. 166857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// The stored value can, however, still be used to determine the value 166957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// returned by future loads. 167057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 167157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::lockStores(Instruction *const ld) 167257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 16739362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller for (Record *r = stores[ld->src(0).getFile()]; r; r = r->next) 167457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!r->locked && r->overlaps(ld)) 167557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller r->locked = true; 167657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 167757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 167857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Prior loads from the location of @st are no longer valid. 167957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Stores to the location of @st may no longer be used to derive 168057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// the value at it nor be coalesced into later stores. 168157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 168257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::purgeRecords(Instruction *const st, DataFile f) 168357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 168457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (st) 16859362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller f = st->src(0).getFile(); 168657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 168757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Record *r = loads[f]; r; r = r->next) 168857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!st || r->overlaps(st)) 168957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller r->unlink(&loads[f]); 169057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 169157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Record *r = stores[f]; r; r = r->next) 169257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!st || r->overlaps(st)) 169357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller r->unlink(&stores[f]); 169457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 169557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 169657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 169757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::visit(BasicBlock *bb) 169857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 169957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool ret = runOpt(bb); 170057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st 170157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // where 96 bit memory operations are forbidden. 170257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ret) 170357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ret = runOpt(bb); 170457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return ret; 170557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 170657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 170757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 170857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::runOpt(BasicBlock *bb) 170957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 171057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ldst, *next; 171157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *rec; 171257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool isAdjacent = true; 171357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 171457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (ldst = bb->getEntry(); ldst; ldst = next) { 171557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool keep = true; 171657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool isLoad = true; 171757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = ldst->next; 171857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 171957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->op == OP_LOAD || ldst->op == OP_VFETCH) { 172057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->isDead()) { 172157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // might have been produced by earlier optimization 172257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, ldst); 172357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 172457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 172557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 172657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) { 172757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller isLoad = false; 172857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 172957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: maybe have all fixed ops act as barrier ? 173057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->op == OP_CALL) { 173157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(NULL, FILE_MEMORY_LOCAL); 173257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(NULL, FILE_MEMORY_GLOBAL); 173357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(NULL, FILE_MEMORY_SHARED); 173457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(NULL, FILE_SHADER_OUTPUT); 173557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 173657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->op == OP_EMIT || ldst->op == OP_RESTART) { 173757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(NULL, FILE_SHADER_OUTPUT); 173857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 173957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 174057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 174157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->getPredicate()) // TODO: handle predicated ld/st 174257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 174357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 174457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (isLoad) { 17459362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller DataFile file = ldst->src(0).getFile(); 174657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 174757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // if ld l[]/g[] look for previous store to eliminate the reload 174857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (file == FILE_MEMORY_GLOBAL || file == FILE_MEMORY_LOCAL) { 174957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: shared memory ? 175057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec = findRecord(ldst, false, isAdjacent); 175157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rec && !isAdjacent) 175257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller keep = !replaceLdFromSt(ldst, rec); 175357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 175457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 175557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // or look for ld from the same location and replace this one 175657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec = keep ? findRecord(ldst, true, isAdjacent) : NULL; 175757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rec) { 175857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isAdjacent) 175957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller keep = !replaceLdFromLd(ldst, rec); 176057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 176157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // or combine a previous load with this one 176257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller keep = !combineLd(rec, ldst); 176357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 176457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (keep) 176557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller lockStores(ldst); 176657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 176757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec = findRecord(ldst, false, isAdjacent); 176857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rec) { 176957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isAdjacent) 177057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller keep = !replaceStFromSt(ldst, rec); 177157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 177257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller keep = !combineSt(rec, ldst); 177357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 177457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (keep) 177557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(ldst, DATA_FILE_COUNT); 177657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 177757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (keep) 177857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addRecord(ldst); 177957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 178057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller reset(); 178157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 178257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 178357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 178457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 178557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 178657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 178757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Turn control flow into predicated instructions (after register allocation !). 178857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// TODO: 178957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Could move this to before register allocation on NVC0 and also handle nested 179057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// constructs. 179157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass FlatteningPass : public Pass 179257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 179357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 179457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 179557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 179657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool tryPredicateConditional(BasicBlock *); 179757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void predicateInstructions(BasicBlock *, Value *pred, CondCode cc); 179857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void tryPropagateBranch(BasicBlock *); 179957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline bool isConstantCondition(Value *pred); 180057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline bool mayPredicate(const Instruction *, const Value *pred) const; 180157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline void removeFlow(Instruction *); 180257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 180357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 180457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 180557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::isConstantCondition(Value *pred) 180657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 180757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn = pred->getUniqueInsn(); 180857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(insn); 180957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->op != OP_SET || insn->srcExists(2)) 181057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 181157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 181257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; s < 2 && insn->srcExists(s); ++s) { 181357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ld = insn->getSrc(s)->getUniqueInsn(); 181457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller DataFile file; 181557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ld) { 181657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ld->op != OP_MOV && ld->op != OP_LOAD) 181757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 18189362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (ld->src(0).isIndirect(0)) 181957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 18209362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller file = ld->src(0).getFile(); 182157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 18229362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller file = insn->src(s).getFile(); 182357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // catch $r63 on NVC0 182457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR) 182557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller file = FILE_IMMEDIATE; 182657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 182757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST) 182857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 182957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 183057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 183157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 183257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 183357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 183457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::removeFlow(Instruction *insn) 183557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 183657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FlowInstruction *term = insn ? insn->asFlow() : NULL; 183757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!term) 183857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 183957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Graph::Edge::Type ty = term->bb->cfg.outgoing().getType(); 184057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 184157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (term->op == OP_BRA) { 184257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: this might get more difficult when we get arbitrary BRAs 184357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ty == Graph::Edge::CROSS || ty == Graph::Edge::BACK) 184457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 184557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 184657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (term->op != OP_JOIN) 184757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 184857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 184957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pred = term->getPredicate(); 185057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1851d41f293bf014e08df3df4324cdc02de5ce49d5edChristoph Bumiller delete_Instruction(prog, term); 1852d41f293bf014e08df3df4324cdc02de5ce49d5edChristoph Bumiller 185357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (pred && pred->refCount() == 0) { 185457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *pSet = pred->getUniqueInsn(); 185557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller pred->join->reg.data.id = -1; // deallocate 185657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (pSet->isDead()) 185757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, pSet); 185857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 185957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 186057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 186157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 186257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::predicateInstructions(BasicBlock *bb, Value *pred, CondCode cc) 186357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 186457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getEntry(); i; i = i->next) { 186557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->isNop()) 186657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 186757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!i->getPredicate()); 186857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setPredicate(cc, pred); 186957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 187057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller removeFlow(bb->getExit()); 187157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 187257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 187357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 187457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::mayPredicate(const Instruction *insn, const Value *pred) const 187557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 187657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->isPseudo()) 187757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 187857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: calls where we don't know which registers are modified 187957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 188057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!prog->getTarget()->mayPredicate(insn, pred)) 188157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 188257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int d = 0; insn->defExists(d); ++d) 188357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->getDef(d)->equals(pred)) 188457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 188557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 188657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 188757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 188857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// If we conditionally skip over or to a branch instruction, replace it. 188957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// NOTE: We do not update the CFG anymore here ! 189057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 189157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::tryPropagateBranch(BasicBlock *bb) 189257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 189357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *bf = NULL; 189457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int i; 189557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 189657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->cfg.outgoingCount() != 2) 189757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 189857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!bb->getExit() || bb->getExit()->op != OP_BRA) 189957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 190057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Graph::EdgeIterator ei = bb->cfg.outgoing(); 190157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 190257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (i = 0; !ei.end(); ++i, ei.next()) { 190357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bf = BasicBlock::get(ei.getNode()); 190457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bf->getInsnCount() == 1) 190557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 190657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 190757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ei.end() || !bf->getExit()) 190857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 190957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FlowInstruction *bra = bb->getExit()->asFlow(); 191057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FlowInstruction *rep = bf->getExit()->asFlow(); 191157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 191257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rep->getPredicate()) 191357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 191457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rep->op != OP_BRA && 191557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rep->op != OP_JOIN && 191657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rep->op != OP_EXIT) 191757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 191857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 191957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bra->op = rep->op; 192057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bra->target.bb = rep->target.bb; 192157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i) // 2nd out block means branch not taken 192257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bra->cc = inverseCondCode(bra->cc); 192357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bf->remove(rep); 192457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 192557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 192657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 192757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::visit(BasicBlock *bb) 192857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 192957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (tryPredicateConditional(bb)) 193057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 193157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 193257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // try to attach join to previous instruction 193357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn = bb->getExit(); 193457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn && insn->op == OP_JOIN && !insn->getPredicate()) { 193557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn = insn->prev; 193600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller if (insn && !insn->getPredicate() && 193700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller !insn->asFlow() && 193800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller insn->op != OP_TEXBAR && 19390d818cdacce0299fabe4ac2aa735247c651fdcfaChristoph Bumiller !isTextureOp(insn->op) && // probably just nve4 1940d46f969b84a405dff6bbc647a7addd0902adc1e4Christoph Bumiller insn->op != OP_LINTERP && // probably just nve4 1941d46f969b84a405dff6bbc647a7addd0902adc1e4Christoph Bumiller insn->op != OP_PINTERP && // probably just nve4 194279eed0d2246e8e7be505784af0078507c712a02cChristoph Bumiller ((insn->op != OP_LOAD && insn->op != OP_STORE) || 194379eed0d2246e8e7be505784af0078507c712a02cChristoph Bumiller typeSizeof(insn->dType) <= 4) && 194400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller !insn->isNop()) { 194557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->join = 1; 194657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->getExit()); 194757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 194857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 194957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 195057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 195157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tryPropagateBranch(bb); 195257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 195357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 195457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 195557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 195657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 195757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::tryPredicateConditional(BasicBlock *bb) 195857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 195957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *bL = NULL, *bR = NULL; 196057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int nL = 0, nR = 0, limit = 12; 196157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn; 196257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int mask; 196357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 196457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mask = bb->initiatesSimpleConditional(); 196557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!mask) 196657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 196757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 196857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(bb->getExit()); 196957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pred = bb->getExit()->getPredicate(); 197057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(pred); 197157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 197257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (isConstantCondition(pred)) 197357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller limit = 4; 197457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 197557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Graph::EdgeIterator ei = bb->cfg.outgoing(); 197657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 197757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mask & 1) { 197857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bL = BasicBlock::get(ei.getNode()); 197957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (insn = bL->getEntry(); insn; insn = insn->next, ++nL) 198057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!mayPredicate(insn, pred)) 198157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 198257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (nL > limit) 198357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; // too long, do a real branch 198457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 198557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ei.next(); 198657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 198757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mask & 2) { 198857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bR = BasicBlock::get(ei.getNode()); 198957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (insn = bR->getEntry(); insn; insn = insn->next, ++nR) 199057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!mayPredicate(insn, pred)) 199157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 199257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (nR > limit) 199357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; // too long, do a real branch 199457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 199557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 199657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bL) 199757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller predicateInstructions(bL, pred, bb->getExit()->cc); 199857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bR) 199957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller predicateInstructions(bR, pred, inverseCondCode(bb->getExit()->cc)); 200057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 200157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->joinAt) { 200257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->joinAt); 200357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->joinAt = NULL; 200457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 200557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller removeFlow(bb->getExit()); // delete the branch/join at the fork point 200657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 200757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // remove potential join operations at the end of the conditional 200857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getTarget()->joinAnterior) { 200957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb = BasicBlock::get((bL ? bL : bR)->cfg.outgoing().getNode()); 201057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->getEntry() && bb->getEntry()->op == OP_JOIN) 201157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller removeFlow(bb->getEntry()); 201257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 201357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 201457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 201557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 201657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 201757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 201857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 201957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Common subexpression elimination. Stupid O^2 implementation. 202057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass LocalCSE : public Pass 202157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 202257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 202357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 202457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 202557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline bool tryReplace(Instruction **, Instruction *); 202657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 202757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller DLList ops[OP_LAST + 1]; 202857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 202957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 203057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass GlobalCSE : public Pass 203157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 203257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 203357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 203457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 203557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 203657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 203757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerInstruction::isActionEqual(const Instruction *that) const 203857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 203957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->op != that->op || 204057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->dType != that->dType || 204157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->sType != that->sType) 204257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 204357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->cc != that->cc) 204457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 204557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 204657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->asTex()) { 204757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (memcmp(&this->asTex()->tex, 204857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller &that->asTex()->tex, 204957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sizeof(this->asTex()->tex))) 205057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 205157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 205257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->asCmp()) { 205357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->asCmp()->setCond != that->asCmp()->setCond) 205457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 205557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 205657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->asFlow()) { 205757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 205857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 205957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->atomic != that->atomic || 206057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->ipa != that->ipa || 206157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->lanes != that->lanes || 206257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->perPatch != that->perPatch) 206357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 206457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->postFactor != that->postFactor) 206557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 206657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 206757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 206857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->subOp != that->subOp || 206957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->saturate != that->saturate || 207057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->rnd != that->rnd || 207157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->ftz != that->ftz || 207257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->dnz != that->dnz || 207357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->cache != that->cache) 207457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 207557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 207657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 207757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 207857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 207957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 208057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerInstruction::isResultEqual(const Instruction *that) const 208157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 208257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int d, s; 208357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 208457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // NOTE: location of discard only affects tex with liveOnly and quadops 208557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!this->defExists(0) && this->op != OP_DISCARD) 208657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 208757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 208857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isActionEqual(that)) 208957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 209057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 209157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->predSrc != that->predSrc) 209257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 209357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 209457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; this->defExists(d); ++d) { 209557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!that->defExists(d) || 209657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller !this->getDef(d)->equals(that->getDef(d), false)) 209757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 209857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 209957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (that->defExists(d)) 210057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 210157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 210257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 0; this->srcExists(s); ++s) { 210357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!that->srcExists(s)) 210457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 21059362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (this->src(s).mod != that->src(s).mod) 210657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 210757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!this->getSrc(s)->equals(that->getSrc(s), true)) 210857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 210957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 211057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (that->srcExists(s)) 211157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 211257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 211357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (op == OP_LOAD || op == OP_VFETCH) { 21149362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller switch (src(0).getFile()) { 211557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case FILE_MEMORY_CONST: 211657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case FILE_SHADER_INPUT: 211757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 211857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 211957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 212057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 212157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 212257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 212357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 212457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 212557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 212657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// pull through common expressions from different in-blocks 212757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 212857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerGlobalCSE::visit(BasicBlock *bb) 212957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 213057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *phi, *next, *ik; 213157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int s; 213257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2133ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller // TODO: maybe do this with OP_UNION, too 2134ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller 213557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = next) { 213657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = phi->next; 213757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (phi->getSrc(0)->refCount() > 1) 213857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 213957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ik = phi->getSrc(0)->getInsn(); 2140bb9c15bac42cf323ef267095b33031ffc1d4fba4Christoph Bumiller if (!ik) 2141bb9c15bac42cf323ef267095b33031ffc1d4fba4Christoph Bumiller continue; // probably a function input 214257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; phi->srcExists(s); ++s) { 214357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (phi->getSrc(s)->refCount() > 1) 214457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 21451e957941735fae514de658c836b8bdaf6c66bc06Francisco Jerez if (!phi->getSrc(s)->getInsn() || 21461e957941735fae514de658c836b8bdaf6c66bc06Francisco Jerez !phi->getSrc(s)->getInsn()->isResultEqual(ik)) 214757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 214857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 214957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!phi->srcExists(s)) { 215057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *entry = bb->getEntry(); 215157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ik->bb->remove(ik); 215257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!entry || entry->op != OP_JOIN) 215357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->insertHead(ik); 215457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 215557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->insertAfter(entry, ik); 215657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ik->setDef(0, phi->getDef(0)); 215757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, phi); 215857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 215957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 216057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 216157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 216257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 216357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 216457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 216557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLocalCSE::tryReplace(Instruction **ptr, Instruction *i) 216657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 216757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *old = *ptr; 2168ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller 2169ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller // TODO: maybe relax this later (causes trouble with OP_UNION) 2170ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller if (i->isPredicated()) 2171ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller return false; 2172ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller 217357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!old->isResultEqual(i)) 217457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 2175ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller 217657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int d = 0; old->defExists(d); ++d) 21779362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller old->def(d).replace(i->getDef(d), false); 217857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, old); 217957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller *ptr = NULL; 218057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 218157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 218257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 218357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 218457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLocalCSE::visit(BasicBlock *bb) 218557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 218657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int replaced; 218757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 218857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller do { 218957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ir, *next; 219057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 219157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller replaced = 0; 219257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 219357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // will need to know the order of instructions 219457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int serial = 0; 219515ce0f76e2e014374a292550505f58da88333fb7Christoph Bumiller for (ir = bb->getFirst(); ir; ir = ir->next) 219657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ir->serial = serial++; 219757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 219857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (ir = bb->getEntry(); ir; ir = next) { 219957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int s; 220057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src = NULL; 220157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 220257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = ir->next; 220357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 220457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ir->fixed) { 220557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ops[ir->op].insert(ir); 220657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 220757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 220857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 220957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 0; ir->srcExists(s); ++s) 221057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ir->getSrc(s)->asLValue()) 221157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!src || ir->getSrc(s)->refCount() < src->refCount()) 221257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller src = ir->getSrc(s); 221357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 221457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src) { 22158cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez for (Value::UseIterator it = src->uses.begin(); 22168cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez it != src->uses.end(); ++it) { 22178cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez Instruction *ik = (*it)->getInsn(); 2218be161e66d6108e56d40c116a4ee12668d6b8d960Christoph Bumiller if (ik && ik->bb == ir->bb && ik->serial < ir->serial) 221957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (tryReplace(&ir, ik)) 222057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 222157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 222257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 222357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller DLLIST_FOR_EACH(&ops[ir->op], iter) 222457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 222557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ik = reinterpret_cast<Instruction *>(iter.get()); 222657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (tryReplace(&ir, ik)) 222757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 222857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 222957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 223057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 223157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ir) 223257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ops[ir->op].insert(ir); 223357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 223457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++replaced; 223557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 223657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (unsigned int i = 0; i <= OP_LAST; ++i) 223757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ops[i].clear(); 223857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 223957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } while (replaced); 224057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 224157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 224257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 224357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 224457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 224557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 224657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Remove computations of unused values. 224757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass DeadCodeElim : public Pass 224857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 224957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerpublic: 225057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool buryAll(Program *); 225157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 225257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 225357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 225457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 225557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void checkSplitLoad(Instruction *ld); // for partially dead loads 225657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 225757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int deadCount; 225857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 225957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 226057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 226157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerDeadCodeElim::buryAll(Program *prog) 226257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 226357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller do { 226457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller deadCount = 0; 226557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!this->run(prog, false, false)) 226657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 226757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } while (deadCount); 226857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 226957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 227057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 227157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 227257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 227357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerDeadCodeElim::visit(BasicBlock *bb) 227457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 227557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next; 227657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 227757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getFirst(); i; i = next) { 227857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 227957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->isDead()) { 228057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++deadCount; 228157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 228257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 228357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) { 228457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller checkSplitLoad(i); 228557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 228657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 228757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 228857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 228957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 229057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 229157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerDeadCodeElim::checkSplitLoad(Instruction *ld1) 229257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 229357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ld2 = NULL; // can get at most 2 loads 229457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def1[4]; 229557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def2[4]; 229657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t addr1, addr2; 229757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t size1, size2; 229857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int d, n1, n2; 229957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t mask = 0xffffffff; 230057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 230157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; ld1->defExists(d); ++d) 230257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!ld1->getDef(d)->refCount() && ld1->getDef(d)->reg.data.id < 0) 230357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mask &= ~(1 << d); 230457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mask == 0xffffffff) 230557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 230657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 230757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addr1 = ld1->getSrc(0)->reg.data.offset; 230857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller n1 = n2 = 0; 230957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller size1 = size2 = 0; 231057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; ld1->defExists(d); ++d) { 231157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mask & (1 << d)) { 231257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (size1 && (addr1 & 0x7)) 231357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 231457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def1[n1] = ld1->getDef(d); 231557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller size1 += def1[n1++]->reg.size; 231657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 231757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!n1) { 231857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addr1 += ld1->getDef(d)->reg.size; 231957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 232057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 232157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 232257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 232357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (addr2 = addr1 + size1; ld1->defExists(d); ++d) { 232457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mask & (1 << d)) { 232557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def2[n2] = ld1->getDef(d); 232657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller size2 += def2[n2++]->reg.size; 232757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 232857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!n2); 232957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addr2 += ld1->getDef(d)->reg.size; 233057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 233157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 233257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 233357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller updateLdStOffset(ld1, addr1, func); 233457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld1->setType(typeOfSize(size1)); 233557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; d < 4; ++d) 233657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld1->setDef(d, (d < n1) ? def1[d] : NULL); 233757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 233857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!n2) 233957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 234057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2341a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez ld2 = cloneShallow(func, ld1); 234257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller updateLdStOffset(ld2, addr2, func); 234357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld2->setType(typeOfSize(size2)); 234457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; d < 4; ++d) 234557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld2->setDef(d, (d < n2) ? def2[d] : NULL); 234657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 234757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld1->bb->insertAfter(ld1, ld2); 234857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 234957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 235057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 235157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 235257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define RUN_PASS(l, n, f) \ 235357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (level >= (l)) { \ 235457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (dbgFlags & NV50_IR_DEBUG_VERBOSE) \ 235557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller INFO("PEEPHOLE: %s\n", #n); \ 235657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller n pass; \ 235757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!pass.f(this)) \ 235857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; \ 235957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 236057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 236157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 236257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerProgram::optimizeSSA(int level) 236357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 236457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(1, DeadCodeElim, buryAll); 236557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(1, CopyPropagation, run); 236657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, GlobalCSE, run); 236757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(1, LocalCSE, run); 236857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, AlgebraicOpt, run); 236957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks 237057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(1, ConstantFolding, foldAll); 237157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(1, LoadPropagation, run); 237257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, MemoryOpt, run); 237357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, LocalCSE, run); 237457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(0, DeadCodeElim, buryAll); 2375ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller 237657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 237757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 237857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 237957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 238057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerProgram::optimizePostRA(int level) 238157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 238257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, FlatteningPass, run); 238357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 238457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 238557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 238657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 2387