nv50_ir_peephole.cpp revision 44e84d6f161e95d44d847440b3bc6d670c242cd7
1d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller/* 2d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Copyright 2011 Christoph Bumiller 3d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 4d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Permission is hereby granted, free of charge, to any person obtaining a 5d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * copy of this software and associated documentation files (the "Software"), 6d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * to deal in the Software without restriction, including without limitation 7d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * and/or sell copies of the Software, and to permit persons to whom the 9d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Software is furnished to do so, subject to the following conditions: 10d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 11d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * The above copyright notice and this permission notice shall be included in 12d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * all copies or substantial portions of the Software. 13d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * 14d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * SOFTWARE. 21d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller */ 2257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "nv50_ir.h" 2457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "nv50_ir_target.h" 2557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "nv50_ir_build_util.h" 2657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerextern "C" { 2857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "util/u_math.h" 2957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 3057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 3157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillernamespace nv50_ir { 3257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 3357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 3457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerInstruction::isNop() const 3557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 3657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (op == OP_CONSTRAINT || op == OP_PHI) 3757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 3857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (terminator || join) // XXX: should terminator imply flow ? 3957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 4057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!fixed && op == OP_NOP) 4157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 4257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 439362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (defExists(0) && def(0).rep()->reg.data.id < 0) { 4457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int d = 1; defExists(d); ++d) 459362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (def(d).rep()->reg.data.id >= 0) 4657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller WARN("part of vector result is unused !\n"); 4757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 4857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 4957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 5057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (op == OP_MOV || op == OP_UNION) { 519362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (!def(0).rep()->equals(getSrc(0))) 5257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 5357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (op == OP_UNION) 549362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (!def(0).rep()->equals(getSrc(1))) 5557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 5657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 5757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 5857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 5957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 6057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 6157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool Instruction::isDead() const 6357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 6457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (op == OP_STORE || 6557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller op == OP_EXPORT) 6657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 6757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 6857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int d = 0; defExists(d); ++d) 6957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (getDef(d)->refCount() || getDef(d)->reg.data.id >= 0) 7057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 7157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (terminator || asFlow()) 7357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 7457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (fixed) 7557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 7657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 7757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 7857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 7957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 8157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass CopyPropagation : public Pass 8357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 8457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 8557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 8657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 8757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Propagate all MOVs forward to make subsequent optimization easier, except if 8957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// the sources stem from a phi, in which case we don't want to mess up potential 9057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def. 9157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 9257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerCopyPropagation::visit(BasicBlock *bb) 9357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 9457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *mov, *si, *next; 9557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 9657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (mov = bb->getEntry(); mov; mov = next) { 9757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = mov->next; 9857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mov->op != OP_MOV || mov->fixed || !mov->getSrc(0)->asLValue()) 9957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 10044e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller if (mov->getPredicate()) 10144e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller continue; 10244e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller if (mov->def(0).getFile() != mov->src(0).getFile()) 10344e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller continue; 10457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller si = mov->getSrc(0)->getInsn(); 10557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mov->getDef(0)->reg.data.id < 0 && si && si->op != OP_PHI) { 10657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // propagate 1079362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mov->def(0).replace(mov->getSrc(0), false); 10857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, mov); 10957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 11057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 11157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 11257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 11357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 11457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 11557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 11657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass LoadPropagation : public Pass 11757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 11857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 11957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 12057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 12157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void checkSwapSrc01(Instruction *); 12257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 12357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool isCSpaceLoad(Instruction *); 12457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool isImmd32Load(Instruction *); 12557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 12657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 12757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 12857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::isCSpaceLoad(Instruction *ld) 12957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 1309362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller return ld && ld->op == OP_LOAD && ld->src(0).getFile() == FILE_MEMORY_CONST; 13157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 13257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 13357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 13457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::isImmd32Load(Instruction *ld) 13557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 13657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4)) 13757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 1389362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller return ld->src(0).getFile() == FILE_IMMEDIATE; 13957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 14057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 14157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 14257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::checkSwapSrc01(Instruction *insn) 14357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 14457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!prog->getTarget()->getOpInfo(insn).commutative) 14557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->op != OP_SET && insn->op != OP_SLCT) 14657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 1479362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (insn->src(1).getFile() != FILE_GPR) 14857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 14957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 15057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i0 = insn->getSrc(0)->getInsn(); 15157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i1 = insn->getSrc(1)->getInsn(); 15257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 15357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (isCSpaceLoad(i0)) { 15457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isCSpaceLoad(i1)) 15557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->swapSources(0, 1); 15657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 15757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 15857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 15957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (isImmd32Load(i0)) { 16057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isCSpaceLoad(i1) && !isImmd32Load(i1)) 16157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->swapSources(0, 1); 16257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 16357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 16457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 16557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 16657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 16757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 16857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->op == OP_SET) 16957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond); 17057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 17157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->op == OP_SLCT) 17257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->asCmp()->setCond = inverseCondCode(insn->asCmp()->setCond); 17357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 17457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 17557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 17657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::visit(BasicBlock *bb) 17757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 17857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Target *targ = prog->getTarget(); 17957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next; 18057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 18157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getEntry(); i; i = next) { 18257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 18357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 18457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->srcExists(1)) 18557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller checkSwapSrc01(i); 18657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 18757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; i->srcExists(s); ++s) { 18857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ld = i->getSrc(s)->getInsn(); 18957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 19057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV)) 19157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 19257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!targ->insnCanLoad(i, s, ld)) 19357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 19457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 19557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // propagate ! 19657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(s, ld->getSrc(0)); 1979362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (ld->src(0).isIndirect(0)) 19857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setIndirect(s, 0, ld->getIndirect(0, 0)); 19957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 20057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ld->getDef(0)->refCount() == 0) 20157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, ld); 20257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 20357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 20457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 20557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 20657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 20757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 20857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 20957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Evaluate constant expressions. 21057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass ConstantFolding : public Pass 21157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 21257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerpublic: 21357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool foldAll(Program *); 21457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 21557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 21657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 21757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 218d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez void expr(Instruction *, ImmediateValue&, ImmediateValue&); 219d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez void opnd(Instruction *, ImmediateValue&, int s); 22057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 22157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void unary(Instruction *, const ImmediateValue&); 22257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 22355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&); 22455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 22557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET 22657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller CmpInstruction *findOriginForTestWithZero(Value *); 22757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 22857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int foldCount; 22957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 23057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BuildUtil bld; 23157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 23257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 23357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// TODO: remember generated immediates and only revisit these 23457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 23557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::foldAll(Program *prog) 23657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 23757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int iterCount = 0; 23857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller do { 23957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller foldCount = 0; 24057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!run(prog)) 24157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 24257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } while (foldCount && ++iterCount < 2); 24357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 24457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 24557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 24657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 24757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::visit(BasicBlock *bb) 24857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 24957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i, *next; 25057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 25157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (i = bb->getEntry(); i; i = next) { 25257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 2531e957941735fae514de658c836b8bdaf6c66bc06Francisco Jerez if (i->op == OP_MOV || i->op == OP_CALL) 25457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 25557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 256d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez ImmediateValue src0, src1; 25757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 258d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->srcExists(1) && 259d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) 26057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller expr(i, src0, src1); 26157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 262d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->srcExists(0) && i->src(0).getImmediate(src0)) 26357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller opnd(i, src0, 0); 26457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 265d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->srcExists(1) && i->src(1).getImmediate(src1)) 26657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller opnd(i, src1, 1); 26757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 26857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 26957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 27057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 27157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerCmpInstruction * 27257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::findOriginForTestWithZero(Value *value) 27357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 27457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!value) 27557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return NULL; 27657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn = value->getInsn(); 27757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 27857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller while (insn && insn->op != OP_SET) { 27957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next = NULL; 28057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (insn->op) { 28157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_NEG: 28257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ABS: 28357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_CVT: 28457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = insn->getSrc(0)->getInsn(); 28557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->sType != next->dType) 28657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return NULL; 28757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 28857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MOV: 28957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = insn->getSrc(0)->getInsn(); 29057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 29157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 29257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return NULL; 29357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 29457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn = next; 29557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 29657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return insn ? insn->asCmp() : NULL; 29757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 29857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 29957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 30057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerModifier::applyTo(ImmediateValue& imm) const 30157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 30257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (imm.reg.type) { 30357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: 30457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_ABS) 30557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f32 = fabsf(imm.reg.data.f32); 30657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_NEG) 30757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f32 = -imm.reg.data.f32; 30857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_SAT) { 30957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (imm.reg.data.f32 < 0.0f) 31057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f32 = 0.0f; 31157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 31257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (imm.reg.data.f32 > 1.0f) 31357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f32 = 1.0f; 31457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 31557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!(bits & NV50_IR_MOD_NOT)); 31657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 31757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 31857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S8: // NOTE: will be extended 31957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S16: 32057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: 32157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U8: // NOTE: treated as signed 32257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U16: 32357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: 32457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_ABS) 32557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.s32 = (imm.reg.data.s32 >= 0) ? 32657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.s32 : -imm.reg.data.s32; 32757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_NEG) 32857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.s32 = -imm.reg.data.s32; 32957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_NOT) 33057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.s32 = ~imm.reg.data.s32; 33157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 33257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 33357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: 33457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_ABS) 33557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f64 = fabs(imm.reg.data.f64); 33657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_NEG) 33757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f64 = -imm.reg.data.f64; 33857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bits & NV50_IR_MOD_SAT) { 33957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (imm.reg.data.f64 < 0.0) 34057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f64 = 0.0; 34157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 34257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (imm.reg.data.f64 > 1.0) 34357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.f64 = 1.0; 34457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 34557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!(bits & NV50_IR_MOD_NOT)); 34657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 34757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 34857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 34957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!"invalid/unhandled type"); 35057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller imm.reg.data.u64 = 0; 35157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 35257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 35357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 35457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 35557594065c30feec9376be9b2132659f7d87362eeChristoph Bumilleroperation 35657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerModifier::getOp() const 35757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 35857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (bits) { 35957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case NV50_IR_MOD_ABS: return OP_ABS; 36057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case NV50_IR_MOD_NEG: return OP_NEG; 36157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case NV50_IR_MOD_SAT: return OP_SAT; 36257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case NV50_IR_MOD_NOT: return OP_NOT; 36357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case 0: 36457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return OP_MOV; 36557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 36657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return OP_CVT; 36757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 36857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 36957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 37057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 37157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::expr(Instruction *i, 372d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez ImmediateValue &imm0, ImmediateValue &imm1) 37357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 37457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller struct Storage *const a = &imm0.reg, *const b = &imm1.reg; 375d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez struct Storage res; 37657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 377d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez memset(&res.data, 0, sizeof(res.data)); 37857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 37957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 38057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MAD: 38157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_FMA: 38257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MUL: 38357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dnz && i->dType == TYPE_F32) { 38457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isfinite(a->data.f32)) 38557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller a->data.f32 = 0.0f; 38657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isfinite(b->data.f32)) 38757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller b->data.f32 = 0.0f; 38857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 38957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 39057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break; 39157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break; 39257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: 39357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break; 39457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 39557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 39657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 39757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 39857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 39957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (b->data.u32 == 0) 40057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 40157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 40257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = a->data.f32 / b->data.f32; break; 40357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = a->data.f64 / b->data.f64; break; 40457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: res.data.s32 = a->data.s32 / b->data.s32; break; 40557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = a->data.u32 / b->data.u32; break; 40657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 40757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 40857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 40957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 41057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ADD: 41157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 41257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = a->data.f32 + b->data.f32; break; 41357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = a->data.f64 + b->data.f64; break; 41457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: 41557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break; 41657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 41757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 41857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 41957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 42057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_POW: 42157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 42257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break; 42357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = pow(a->data.f64, b->data.f64); break; 42457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 42557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 42657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 42757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 42857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MAX: 42957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 43057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = MAX2(a->data.f32, b->data.f32); break; 43157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = MAX2(a->data.f64, b->data.f64); break; 43257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: res.data.s32 = MAX2(a->data.s32, b->data.s32); break; 43357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = MAX2(a->data.u32, b->data.u32); break; 43457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 43557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 43657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 43757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 43857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MIN: 43957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 44057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F32: res.data.f32 = MIN2(a->data.f32, b->data.f32); break; 44157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_F64: res.data.f64 = MIN2(a->data.f64, b->data.f64); break; 44257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: res.data.s32 = MIN2(a->data.s32, b->data.s32); break; 44357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = MIN2(a->data.u32, b->data.u32); break; 44457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 44557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 44657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 44757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 44857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_AND: 44957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.u64 = a->data.u64 & b->data.u64; 45057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 45157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_OR: 45257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.u64 = a->data.u64 | b->data.u64; 45357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 45457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_XOR: 45557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.u64 = a->data.u64 ^ b->data.u64; 45657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 45757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SHL: 45857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.u32 = a->data.u32 << b->data.u32; 45957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 46057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SHR: 46157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->dType) { 46257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_S32: res.data.s32 = a->data.s32 >> b->data.u32; break; 46357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case TYPE_U32: res.data.u32 = a->data.u32 >> b->data.u32; break; 46457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 46557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 46657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 46757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 46857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SLCT: 46957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (a->data.u32 != b->data.u32) 47057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 47157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.u32 = a->data.u32; 47257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 47357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 47457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 47557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 47657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++foldCount; 47757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 4789362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = Modifier(0); 4799362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(1).mod = Modifier(0); 48057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 48157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32)); 48257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 48357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 48457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getSrc(0)->reg.data = res.data; 48557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 48657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op == OP_MAD || i->op == OP_FMA) { 48757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_ADD; 48857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 48957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, i->getSrc(0)); 4909362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(1).mod = i->src(2).mod; 49157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, i->getSrc(2)); 49257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(2, NULL); 49357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 494d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez ImmediateValue src0; 495d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->src(0).getImmediate(src0)) 496d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez expr(i, src0, *i->getSrc(1)->asImm()); 49757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 49857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 49957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 50057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 50157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 50257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 50357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::unary(Instruction *i, const ImmediateValue &imm) 50457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 50557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Storage res; 50657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 50757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dType != TYPE_F32) 50857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 50957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 51057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_NEG: res.data.f32 = -imm.reg.data.f32; break; 51157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break; 51257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break; 51357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break; 51457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break; 51557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EX2: res.data.f32 = exp2f(imm.reg.data.f32); break; 51657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SIN: res.data.f32 = sinf(imm.reg.data.f32); break; 51757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_COS: res.data.f32 = cosf(imm.reg.data.f32); break; 51857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SQRT: res.data.f32 = sqrtf(imm.reg.data.f32); break; 51957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_PRESIN: 52057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_PREEX2: 52157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // these should be handled in subsequent OP_SIN/COS/EX2 52257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller res.data.f32 = imm.reg.data.f32; 52357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 52457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 52557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 52657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 52757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 52857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.f32)); 5299362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = Modifier(0); 53057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 53157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 53257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 53355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph BumillerConstantFolding::tryCollapseChainedMULs(Instruction *mul2, 53455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller const int s, ImmediateValue& imm2) 53555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller{ 53655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller const int t = s ? 0 : 1; 53755f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller Instruction *insn; 53855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller Instruction *mul1 = NULL; // mul1 before mul2 53955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller int e = 0; 54055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller float f = imm2.reg.data.f32; 541d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez ImmediateValue imm1; 54255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 54355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32); 54455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 54555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (mul2->getSrc(t)->refCount() == 1) { 54655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller insn = mul2->getSrc(t)->getInsn(); 547d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32) 54855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul1 = insn; 54955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (mul1) { 550d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez int s1; 551d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez 552d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (mul1->src(s1 = 0).getImmediate(imm1) || 553d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez mul1->src(s1 = 1).getImmediate(imm1)) { 55455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller bld.setPosition(mul1, false); 55555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // a = mul r, imm1 55655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // d = mul a, imm2 -> d = mul r, (imm1 * imm2) 55755f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32)); 558d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez mul1->src(s1).mod = Modifier(0); 5599362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mul2->def(0).replace(mul1->getDef(0), false); 56055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } else 56155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { 56255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // c = mul a, b 56355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // d = mul c, imm -> d = mul_x_imm a, b 56455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul1->postFactor = e; 5659362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mul2->def(0).replace(mul1->getDef(0), false); 56655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (f < 0) 567d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG); 56855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } 56955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller return; 57055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } 57155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } 57255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (mul2->getDef(0)->refCount() == 1) { 57355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // b = mul a, imm 57455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller // d = mul b, c -> d = mul_x_imm a, c 57555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller int s2, t2; 5768cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez insn = mul2->getDef(0)->uses.front()->getInsn(); 57755f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (!insn) 57855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller return; 57955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul1 = mul2; 58055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul2 = NULL; 58155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1; 58255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller t2 = s2 ? 0 : 1; 58355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (insn->op == OP_MUL && insn->dType == TYPE_F32) 584d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (!insn->src(s2).mod && !insn->src(t2).getImmediate(imm1)) 58555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul2 = insn; 58655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { 58755f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller mul2->postFactor = e; 5889362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mul2->setSrc(s2, mul1->src(t)); 58955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (f < 0) 590d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez mul2->src(s2).mod *= Modifier(NV50_IR_MOD_NEG); 59155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } 59255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller } 59355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller} 59455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 59555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumillervoid 596d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco JerezConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) 59757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 59857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const int t = !s; 59957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const operation op = i->op; 60057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 60157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 60257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MUL: 60355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (i->dType == TYPE_F32) 604d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez tryCollapseChainedMULs(i, s, imm0); 60555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 606d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isInteger(0)) { 60757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 608d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(0, new_ImmediateValue(prog, 0u)); 609d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->src(0).mod = Modifier(0); 61057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 61157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 612d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isInteger(1) || imm0.isInteger(-1)) { 613d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isNegative()) 6149362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); 6159362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->op = i->src(t).mod.getOp(); 61657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (s == 0) { 61757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, i->getSrc(1)); 6189362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = i->src(1).mod; 6199362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(1).mod = 0; 62057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 62157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op != OP_CVT) 6229362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = 0; 62357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 62457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 625d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isInteger(2) || imm0.isInteger(-2)) { 626d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isNegative()) 6279362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); 62857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_ADD; 62957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(s, i->getSrc(t)); 6309362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(s).mod = i->src(t).mod; 63157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 632d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (!isFloatType(i->sType) && !imm0.isNegative() && imm0.isPow2()) { 63357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_SHL; 634d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez imm0.applyLog2(); 635d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(0, i->getSrc(t)); 636d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->src(0).mod = i->src(t).mod; 637d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32)); 638d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->src(1).mod = 0; 63957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 64057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 64157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ADD: 642d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.isInteger(0)) { 64357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (s == 0) { 64457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, i->getSrc(1)); 6459362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = i->src(1).mod; 64657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 64757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 6489362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->op = i->src(0).mod.getOp(); 64957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op != OP_CVT) 6509362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = Modifier(0); 65157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 65257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 65357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 65457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_DIV: 65557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32)) 65657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 65757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(i, false); 658d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.reg.data.u32 == 0) { 65957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 66057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 661d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.reg.data.u32 == 1) { 66257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 66357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 66457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 665d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->dType == TYPE_U32 && imm0.isPow2()) { 6666ab6110133c2d316d98f78bbc38bca0c5b6184a7Christoph Bumiller i->op = OP_SHR; 667d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32))); 66857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 66957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->dType == TYPE_U32) { 67057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *mul; 67157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *tA, *tB; 672d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez const uint32_t d = imm0.reg.data.u32; 67357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t m; 67457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int r, s; 67557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t l = util_logbase2(d); 67657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (((uint32_t)1 << l) < d) 67757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++l; 67857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller m = (((uint64_t)1 << 32) * (((uint64_t)1 << l) - d)) / d + 1; 67957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller r = l ? 1 : 0; 68057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller s = l ? (l - 1) : 0; 68157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 68257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tA = bld.getSSA(); 68357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tB = bld.getSSA(); 68457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mul = bld.mkOp2(OP_MUL, TYPE_U32, tA, i->getSrc(0), 68557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.loadImm(NULL, m)); 68657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mul->subOp = NV50_IR_SUBOP_MUL_HIGH; 68757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SUB, TYPE_U32, tB, i->getSrc(0), tA); 68857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tA = bld.getSSA(); 68957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (r) 69057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SHR, TYPE_U32, tA, tB, bld.mkImm(r)); 69157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 69257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tA = tB; 69357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tB = s ? bld.getSSA() : i->getDef(0); 69457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA); 69557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (s) 69657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s)); 69757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 69857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 69957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 700d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.reg.data.s32 == -1) { 70157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_NEG; 70257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(1, NULL); 70357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 70457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *tA, *tB; 70557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller LValue *tD; 706d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez const int32_t d = imm0.reg.data.s32; 70757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t m; 70857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t l = util_logbase2(static_cast<unsigned>(abs(d))); 70957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((1 << l) < abs(d)) 71057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++l; 71157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!l) 71257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller l = 1; 71357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller m = ((uint64_t)1 << (32 + l - 1)) / abs(d) + 1 - ((uint64_t)1 << 32); 71457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 71557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tA = bld.getSSA(); 71657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tB = bld.getSSA(); 71757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp3(OP_MAD, TYPE_S32, tA, i->getSrc(0), bld.loadImm(NULL, m), 71857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->getSrc(0))->subOp = NV50_IR_SUBOP_MUL_HIGH; 71957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (l > 1) 72057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SHR, TYPE_S32, tB, tA, bld.mkImm(l - 1)); 72157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 72257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tB = tA; 72357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tA = bld.getSSA(); 72457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, i->getSrc(0), bld.mkImm(0)); 72557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue(); 72657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA); 72757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (d < 0) 72857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB); 72957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 73057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 73157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 73257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 73357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 734ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller case OP_MOD: 735d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (i->sType == TYPE_U32 && imm0.isPow2()) { 736ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller bld.setPosition(i, false); 737ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller i->op = OP_AND; 738d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1)); 739ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller } 740ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller break; 741ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller 74257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SET: // TODO: SET_AND,OR,XOR 74357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 74457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t)); 74557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller CondCode cc, ccZ; 7469362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (i->src(t).mod != Modifier(0)) 74757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 748d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET) 74957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 75057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller cc = si->setCond; 75157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U); 75257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (s == 0) 75357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ccZ = reverseCondCode(ccZ); 75457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (ccZ) { 75557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_LT: cc = CC_FL; break; 75657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_GE: cc = CC_TR; break; 75757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_EQ: cc = inverseCondCode(cc); break; 75857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_LE: cc = inverseCondCode(cc); break; 75957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_GT: break; 76057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case CC_NE: break; 76157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 76257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 76357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 76457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->asCmp()->setCond = cc; 7659362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->setSrc(0, si->src(0)); 7669362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->setSrc(1, si->src(1)); 76757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->sType = si->sType; 76857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 76957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 77057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 77157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SHL: 77257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 7739362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (s != 1 || i->src(0).mod != Modifier(0)) 77457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 77557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // try to concatenate shifts 77657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *si = i->getSrc(0)->getInsn(); 777d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (!si || si->op != OP_SHL) 77857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 779d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez ImmediateValue imm1; 780d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez if (si->src(1).getImmediate(imm1)) { 78157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bld.setPosition(i, false); 78257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(0, si->getSrc(0)); 783d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32)); 78457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 78557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 78657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 78757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 78857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ABS: 78957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_NEG: 79057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_LG2: 79157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RCP: 79257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SQRT: 79357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RSQ: 79457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_PRESIN: 79557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SIN: 79657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_COS: 79757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_PREEX2: 79857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_EX2: 799d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez unary(i, imm0); 80057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 80157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 80257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 80357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 80457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op != op) 80557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller foldCount++; 80657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 80757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 80857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 80957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 81057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed. 81157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass ModifierFolding : public Pass 81257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 81357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 81457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 81557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 81657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 81757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 81857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerModifierFolding::visit(BasicBlock *bb) 81957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 82057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Target *target = prog->getTarget(); 82157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 82257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *i, *next, *mi; 82357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Modifier mod; 82457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 82557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (i = bb->getEntry(); i; i = next) { 82657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 82757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 82857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (0 && i->op == OP_SUB) { 82957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // turn "sub" into "add neg" (do we really want this ?) 83057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_ADD; 8319362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG); 83257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 83357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 83457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; s < 3 && i->srcExists(s); ++s) { 83557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi = i->getSrc(s)->getInsn(); 83657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!mi || 83757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi->predSrc >= 0 || mi->getDef(0)->refCount() > 8) 83857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 83957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->sType == TYPE_U32 && mi->dType == TYPE_S32) { 84057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((i->op != OP_ADD && 84157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op != OP_MUL) || 84257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller (mi->op != OP_ABS && 84357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi->op != OP_NEG)) 84457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 84557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 84657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->sType != mi->dType) { 84757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 84857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 84957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((mod = Modifier(mi->op)) == Modifier(0)) 85057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 85114d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez mod *= mi->src(0).mod; 85257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 8539362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if ((i->op == OP_ABS) || i->src(s).mod.abs()) { 85457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // abs neg [abs] = abs 85557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mod = mod & Modifier(~(NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)); 85657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 85757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((i->op == OP_NEG) && mod.neg()) { 85857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(s == 0); 85957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // neg as both opcode and modifier on same insn is prohibited 86057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // neg neg abs = abs, neg neg = identity 86157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mod = mod & Modifier(~NV50_IR_MOD_NEG); 86257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = mod.getOp(); 86357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mod = mod & Modifier(~NV50_IR_MOD_ABS); 86457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mod == Modifier(0)) 86557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->op = OP_MOV; 86657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 86757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 86857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (target->isModSupported(i, s, mod)) { 86957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setSrc(s, mi->getSrc(0)); 87014d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez i->src(s).mod *= mod; 87157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 87257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 87357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 87457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->op == OP_SAT) { 87557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi = i->getSrc(0)->getInsn(); 87657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mi && 87757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi->getDef(0)->refCount() <= 1 && target->isSatSupported(mi)) { 87857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi->saturate = 1; 87957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mi->setDef(0, i->getDef(0)); 88057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 88157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 88257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 88357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 88457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 88557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 88657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 88757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 88857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 88957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 89057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// MUL + ADD -> MAD/FMA 89157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// MIN/MAX(a, a) -> a, etc. 89257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// SLCT(a, b, const) -> cc(const) ? a : b 89357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// RCP(RCP(a)) -> a 89457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// MUL(MUL(a, b), const) -> MUL_Xconst(a, b) 89557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass AlgebraicOpt : public Pass 89657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 89757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 89857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 89957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 90057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void handleADD(Instruction *); 90157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void handleMINMAX(Instruction *); 90257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void handleRCP(Instruction *); 90357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void handleSLCT(Instruction *); 90457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void handleLOGOP(Instruction *); 90557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void handleCVT(Instruction *); 90657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 90757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 90857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 90957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleADD(Instruction *add) 91057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 91157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src0 = add->getSrc(0); 91257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src1 = add->getSrc(1); 91357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src; 91457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int s; 91557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Modifier mod[4]; 91657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 91757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!prog->getTarget()->isOpSupported(OP_MAD, add->dType)) 91857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 91957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 92057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) 92157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 92257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 92357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src0->refCount() == 1 && 92457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_MUL) 92557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller s = 0; 92657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 92757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src1->refCount() == 1 && 92857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_MUL) 92957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller s = 1; 93057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 93157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 93257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 93357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) || 93457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller (src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb)) 93557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 93657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 93757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller src = add->getSrc(s); 93857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 93955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller if (src->getInsn()->postFactor) 94055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller return; 94155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller 9429362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mod[0] = add->src(0).mod; 9439362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mod[1] = add->src(1).mod; 9449362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mod[2] = src->getUniqueInsn()->src(0).mod; 9459362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller mod[3] = src->getUniqueInsn()->src(1).mod; 94657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 94757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & Modifier(~NV50_IR_MOD_NEG)) 94857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 94957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 95057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller add->op = OP_MAD; 95157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller add->subOp = src->getInsn()->subOp; // potentially mul-high 95257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 9539362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller add->setSrc(2, add->src(s ? 0 : 1)); 95457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 95557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller add->setSrc(0, src->getInsn()->getSrc(0)); 9569362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller add->src(0).mod = mod[2] ^ mod[s]; 95757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller add->setSrc(1, src->getInsn()->getSrc(1)); 9589362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller add->src(1).mod = mod[3]; 95957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 96057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 96157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 96257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleMINMAX(Instruction *minmax) 96357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 96457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src0 = minmax->getSrc(0); 96557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src1 = minmax->getSrc(1); 96657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 96757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src0 != src1 || src0->reg.file != FILE_GPR) 96857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 9699362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (minmax->src(0).mod == minmax->src(1).mod) { 97014d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez if (minmax->def(0).mayReplace(minmax->src(0))) { 97114d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez minmax->def(0).replace(minmax->src(0), false); 97214d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez minmax->bb->remove(minmax); 97314d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez } else { 97457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller minmax->op = OP_CVT; 97557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller minmax->setSrc(1, NULL); 97657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 97757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 97857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: 97957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // min(x, -x) = -abs(x) 98057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // min(x, -abs(x)) = -abs(x) 98157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // min(x, abs(x)) = x 98257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // max(x, -abs(x)) = x 98357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // max(x, abs(x)) = abs(x) 98457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // max(x, -x) = abs(x) 98557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 98657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 98757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 98857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 98957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleRCP(Instruction *rcp) 99057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 99157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *si = rcp->getSrc(0)->getUniqueInsn(); 99257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 99357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (si && si->op == OP_RCP) { 9949362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller Modifier mod = rcp->src(0).mod * si->src(0).mod; 99557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rcp->op = mod.getOp(); 99657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rcp->setSrc(0, si->getSrc(0)); 99757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 99857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 99957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 100057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 100157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleSLCT(Instruction *slct) 100257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 100357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) { 100457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f)) 100557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller slct->setSrc(0, slct->getSrc(1)); 100657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 100757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (slct->getSrc(0) != slct->getSrc(1)) { 100857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 100957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 101057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller slct->op = OP_MOV; 101157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller slct->setSrc(1, NULL); 101257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller slct->setSrc(2, NULL); 101357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 101457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 101557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 101657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleLOGOP(Instruction *logop) 101757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 101857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src0 = logop->getSrc(0); 101957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src1 = logop->getSrc(1); 102057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 102157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) 102257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 102357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 102457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src0 == src1) { 102514d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez if ((logop->op == OP_AND || logop->op == OP_OR) && 102614d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez logop->def(0).mayReplace(logop->src(0))) { 102714d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez logop->def(0).replace(logop->src(0), false); 102857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, logop); 102957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 103057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 103157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // try AND(SET, SET) -> SET_AND(SET) 103257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *set0 = src0->getInsn(); 103357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *set1 = src1->getInsn(); 103457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 103557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!set0 || set0->fixed || !set1 || set1->fixed) 103657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 103757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set1->op != OP_SET) { 103857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *xchg = set0; 103957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0 = set1; 104057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set1 = xchg; 104157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set1->op != OP_SET) 104257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 104357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 104457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set0->op != OP_SET && 104557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->op != OP_SET_AND && 104657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->op != OP_SET_OR && 104757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->op != OP_SET_XOR) 104857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 104957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set0->getDef(0)->refCount() > 1 && 105057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set1->getDef(0)->refCount() > 1) 105157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 105257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set0->getPredicate() || set1->getPredicate()) 105357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 105457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // check that they don't source each other 105557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; s < 2; ++s) 105657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (set0->getSrc(s) == set1->getDef(0) || 105757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set1->getSrc(s) == set0->getDef(0)) 105857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 105957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1060a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez set0 = cloneForward(func, set0); 1061a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez set1 = cloneShallow(func, set1); 106257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller logop->bb->insertAfter(logop, set1); 106357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller logop->bb->insertAfter(logop, set0); 106457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 106557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->dType = TYPE_U8; 106657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->getDef(0)->reg.file = FILE_PREDICATE; 106757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set0->getDef(0)->reg.size = 1; 106857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set1->setSrc(2, set0->getDef(0)); 106957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (logop->op) { 107057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_AND: set1->op = OP_SET_AND; break; 107157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_OR: set1->op = OP_SET_OR; break; 107257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_XOR: set1->op = OP_SET_XOR; break; 107357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 107457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(0); 107557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 107657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 107757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller set1->setDef(0, logop->getDef(0)); 107857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, logop); 107957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 108057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 108157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 108257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0 108357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 108457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleCVT(Instruction *cvt) 108557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 108657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (cvt->sType != TYPE_F32 || 10879362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0)) 108857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 108957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn = cvt->getSrc(0)->getInsn(); 109057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32) 109157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 10929362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (insn->src(0).mod != Modifier(0)) 109357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 109457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn = insn->getSrc(0)->getInsn(); 109557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!insn || insn->op != OP_SET || insn->dType != TYPE_F32) 109657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 109757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1098a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez Instruction *bset = cloneShallow(func, insn); 109957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bset->dType = TYPE_U32; 110057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bset->setDef(0, cvt->getDef(0)); 110157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller cvt->bb->insertAfter(cvt, bset); 110257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, cvt); 110357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 110457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 110557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 110657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::visit(BasicBlock *bb) 110757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 110857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next; 110957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getEntry(); i; i = next) { 111057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 111157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller switch (i->op) { 111257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_ADD: 111357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleADD(i); 111457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 111557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_RCP: 111657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleRCP(i); 111757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 111857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MIN: 111957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_MAX: 112057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleMINMAX(i); 112157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 112257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_SLCT: 112357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleSLCT(i); 112457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 112557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_AND: 112657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_OR: 112757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_XOR: 112857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleLOGOP(i); 112957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 113057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case OP_CVT: 113157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller handleCVT(i); 113257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 113357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 113457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 113557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 113657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 113757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 113857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 113957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 114057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 114157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 114257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 114357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerstatic inline void 114457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerupdateLdStOffset(Instruction *ldst, int32_t offset, Function *fn) 114557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 114657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offset != ldst->getSrc(0)->reg.data.offset) { 114757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->getSrc(0)->refCount() > 1) 1148a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez ldst->setSrc(0, cloneShallow(fn, ldst->getSrc(0))); 114957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ldst->getSrc(0)->reg.data.offset = offset; 115057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 115157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 115257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 115357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Combine loads and stores, forward stores to loads where possible. 115457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass MemoryOpt : public Pass 115557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 115657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 115757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller class Record 115857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 115957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller public: 116057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *next; 116157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn; 116257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Value *rel[2]; 116357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Value *base; 116457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offset; 116557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int8_t fileIndex; 116657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint8_t size; 116757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool locked; 116857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *prev; 116957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 117057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool overlaps(const Instruction *ldst) const; 117157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 117257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline void link(Record **); 117357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline void unlink(Record **); 117457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline void set(const Instruction *ldst); 117557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller }; 117657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 117757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerpublic: 117857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller MemoryOpt(); 117957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 118057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *loads[DATA_FILE_COUNT]; 118157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *stores[DATA_FILE_COUNT]; 118257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 118357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller MemoryPool recordPool; 118457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 118557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 118657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 118757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool runOpt(BasicBlock *); 118857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 118957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record **getList(const Instruction *); 119057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 119157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *findRecord(const Instruction *, bool load, bool& isAdjacent) const; 119257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 119357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // merge @insn into load/store instruction from @rec 119457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool combineLd(Record *rec, Instruction *ld); 119557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool combineSt(Record *rec, Instruction *st); 119657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 119757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool replaceLdFromLd(Instruction *ld, Record *ldRec); 119857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool replaceLdFromSt(Instruction *ld, Record *stRec); 119957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool replaceStFromSt(Instruction *restrict st, Record *stRec); 120057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 120157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void addRecord(Instruction *ldst); 120257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void purgeRecords(Instruction *const st, DataFile); 120357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void lockStores(Instruction *const ld); 120457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void reset(); 120557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 120657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 120757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *prevRecord; 120857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 120957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 121057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record), 6) 121157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 121257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int i = 0; i < DATA_FILE_COUNT; ++i) { 121357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller loads[i] = NULL; 121457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller stores[i] = NULL; 121557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 121657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller prevRecord = NULL; 121757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 121857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 121957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 122057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::reset() 122157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 122257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (unsigned int i = 0; i < DATA_FILE_COUNT; ++i) { 122357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *it, *next; 122457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (it = loads[i]; it; it = next) { 122557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = it->next; 122657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller recordPool.release(it); 122757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 122857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller loads[i] = NULL; 122957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (it = stores[i]; it; it = next) { 123057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = it->next; 123157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller recordPool.release(it); 123257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 123357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller stores[i] = NULL; 123457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 123557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 123657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 123757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 123857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::combineLd(Record *rec, Instruction *ld) 123957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 124057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offRc = rec->offset; 124157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offLd = ld->getSrc(0)->reg.data.offset; 124257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int sizeRc = rec->size; 124357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int sizeLd = typeSizeof(ld->dType); 124457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int size = sizeRc + sizeLd; 124557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int d, j; 124657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1247286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller if (!prog->getTarget()-> 1248286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller isAccessSupported(ld->getSrc(0)->reg.file, typeOfSize(size))) 124957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 125057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // no unaligned loads 125157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) || 125257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ((size == 0xc) && (MIN2(offLd, offRc) & 0xf))) 125357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 125457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 125557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(sizeRc + sizeLd <= 16 && offRc != offLd); 125657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 125757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j); 125857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 125957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offLd < offRc) { 126057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int sz; 126157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (sz = 0, d = 0; sz < sizeLd; sz += ld->getDef(d)->reg.size, ++d); 126257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // d: nr of definitions in ld 126357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // j: nr of definitions in rec->insn, move: 126457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = d + j - 1; j > 0; --j, --d) 126557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn->setDef(d, rec->insn->getDef(j - 1)); 126657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 126757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rec->insn->getSrc(0)->refCount() > 1) 1268a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez rec->insn->setSrc(0, cloneShallow(func, rec->insn->getSrc(0))); 126957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->offset = rec->insn->getSrc(0)->reg.data.offset = offLd; 127057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 127157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller d = 0; 127257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 127357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller d = j; 127457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 127557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // move definitions of @ld to @rec->insn 127657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (j = 0; sizeLd; ++j, ++d) { 127757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sizeLd -= ld->getDef(j)->reg.size; 127857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn->setDef(d, ld->getDef(j)); 127957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 128057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 128157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->size = size; 128290f0fac65524fbc4e2f2d396d20d9808e4a0a95cFrancisco Jerez rec->insn->getSrc(0)->reg.size = size; 128357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn->setType(typeOfSize(size)); 128457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 128557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, ld); 128657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 128757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 128857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 128957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 129057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 129157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::combineSt(Record *rec, Instruction *st) 129257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 129357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offRc = rec->offset; 129457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offSt = st->getSrc(0)->reg.data.offset; 129557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int sizeRc = rec->size; 129657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int sizeSt = typeSizeof(st->dType); 129757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int s = sizeSt / 4; 129857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int size = sizeRc + sizeSt; 129957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int j, k; 130057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src[4]; // no modifiers in ValueRef allowed for st 130157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *extra[3]; 130257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 1303286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller if (!prog->getTarget()-> 1304286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller isAccessSupported(st->getSrc(0)->reg.file, typeOfSize(size))) 130557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 130657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (size == 8 && MIN2(offRc, offSt) & 0x7) 130757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 130857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 130957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->takeExtraSources(0, extra); // save predicate and indirect address 131057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 131157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offRc < offSt) { 131257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // save values from @st 131357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 0; sizeSt; ++s) { 131457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sizeSt -= st->getSrc(s + 1)->reg.size; 131557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller src[s] = st->getSrc(s + 1); 131657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 131757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // set record's values as low sources of @st 131857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (j = 1; sizeRc; ++j) { 131957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sizeRc -= st->getSrc(j)->reg.size; 132057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(j, rec->insn->getSrc(j)); 132157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 132257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // set saved values as high sources of @st 132357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (k = j, j = 0; j < s; ++j) 132457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(k++, src[j]); 132557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 132657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller updateLdStOffset(st, offRc, func); 132757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 132857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (j = 1; sizeSt; ++j) 132957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sizeSt -= st->getSrc(j)->reg.size; 133057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; sizeRc; ++j, ++s) { 133157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sizeRc -= rec->insn->getSrc(s)->reg.size; 133257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(j, rec->insn->getSrc(s)); 133357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 133457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->offset = offSt; 133557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 133657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->putExtraSources(0, extra); // restore pointer and predicate 133757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 133857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, rec->insn); 133957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn = st; 134057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->size = size; 134190f0fac65524fbc4e2f2d396d20d9808e4a0a95cFrancisco Jerez rec->insn->getSrc(0)->reg.size = size; 134257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn->setType(typeOfSize(size)); 134357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 134457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 134557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 134657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 134757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::set(const Instruction *ldst) 134857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 134957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Symbol *mem = ldst->getSrc(0)->asSym(); 135057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller fileIndex = mem->reg.fileIndex; 135157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rel[0] = ldst->getIndirect(0, 0); 135257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rel[1] = ldst->getIndirect(0, 1); 135357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller offset = mem->reg.data.offset; 135457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller base = mem->getBase(); 135557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller size = typeSizeof(ldst->sType); 135657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 135757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 135857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 135957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::link(Record **list) 136057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 136157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = *list; 136257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (next) 136357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next->prev = this; 136457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller prev = NULL; 136557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller *list = this; 136657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 136757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 136857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 136957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::unlink(Record **list) 137057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 137157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (next) 137257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next->prev = prev; 137357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prev) 137457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller prev->next = next; 137557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 137657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller *list = next; 137757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 137857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 137957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record ** 138057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::getList(const Instruction *insn) 138157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 138257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->op == OP_LOAD || insn->op == OP_VFETCH) 13839362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller return &loads[insn->src(0).getFile()]; 13849362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller return &stores[insn->src(0).getFile()]; 138557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 138657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 138757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 138857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::addRecord(Instruction *i) 138957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 139057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record **list = getList(i); 139157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *it = reinterpret_cast<Record *>(recordPool.allocate()); 139257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 139357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->link(list); 139457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->set(i); 139557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->insn = i; 139657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->locked = false; 139757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 139857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 139957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record * 140057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::findRecord(const Instruction *insn, bool load, bool& isAdj) const 140157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 140257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Symbol *sym = insn->getSrc(0)->asSym(); 140357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const int size = typeSizeof(insn->sType); 140457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *rec = NULL; 140557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file]; 140657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 140757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (; it; it = it->next) { 140857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (it->locked && insn->op != OP_LOAD) 140957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 141057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if ((it->offset >> 4) != (sym->reg.data.offset >> 4) || 141157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->rel[0] != insn->getIndirect(0, 0) || 141257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->fileIndex != sym->reg.fileIndex || 141357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller it->rel[1] != insn->getIndirect(0, 1)) 141457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 141557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 141657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (it->offset < sym->reg.data.offset) { 141757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (it->offset + it->size >= sym->reg.data.offset) { 141857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller isAdj = (it->offset + it->size == sym->reg.data.offset); 141957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isAdj) 142057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return it; 142157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!(it->offset & 0x7)) 142257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec = it; 142357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 142457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 142557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller isAdj = it->offset != sym->reg.data.offset; 142657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (size <= it->size && !isAdj) 142757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return it; 142857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 142957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!(sym->reg.data.offset & 0x7)) 143057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (it->offset - size <= sym->reg.data.offset) 143157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec = it; 143257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 143357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 143457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return rec; 143557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 143657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 143757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 143857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::replaceLdFromSt(Instruction *ld, Record *rec) 143957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 144057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *st = rec->insn; 144157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offSt = rec->offset; 144257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offLd = ld->getSrc(0)->reg.data.offset; 144357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int d, s; 144457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 144557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; offSt != offLd && st->srcExists(s); ++s) 144657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller offSt += st->getSrc(s)->reg.size; 144757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offSt != offLd) 144857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 144957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 145057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; ld->defExists(d) && st->srcExists(s); ++d, ++s) { 145157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ld->getDef(d)->reg.size != st->getSrc(s)->reg.size) 145257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 145357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (st->getSrc(s)->reg.file != FILE_GPR) 145457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 145514d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez ld->def(d).replace(st->src(s), false); 145657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 145757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld->bb->remove(ld); 145857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 145957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 146057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 146157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 146257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::replaceLdFromLd(Instruction *ldE, Record *rec) 146357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 146457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ldR = rec->insn; 146557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offR = rec->offset; 146657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offE = ldE->getSrc(0)->reg.data.offset; 146757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int dR, dE; 146857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 146957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(offR <= offE); 147057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (dR = 0; offR < offE && ldR->defExists(dR); ++dR) 147157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller offR += ldR->getDef(dR)->reg.size; 147257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offR != offE) 147357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 147457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 147557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (dE = 0; ldE->defExists(dE) && ldR->defExists(dR); ++dE, ++dR) { 147657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldE->getDef(dE)->reg.size != ldR->getDef(dR)->reg.size) 147757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 14789362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller ldE->def(dE).replace(ldR->getDef(dR), false); 147957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 148057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 148157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, ldE); 148257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 148357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 148457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 148557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 148657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::replaceStFromSt(Instruction *restrict st, Record *rec) 148757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 148857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller const Instruction *const ri = rec->insn; 148957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *extra[3]; 149057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 149157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offS = st->getSrc(0)->reg.data.offset; 149257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t offR = rec->offset; 149357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t endS = offS + typeSizeof(st->dType); 149457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t endR = offR + typeSizeof(ri->dType); 149557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 149657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->size = MAX2(endS, endR) - MIN2(offS, offR); 149757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 149857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->takeExtraSources(0, extra); 149957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 150057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (offR < offS) { 1501541bb2e33f89b07bcbea2e27275df858760c8ec8Brian Paul Value *vals[10]; 150257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int s, n; 150357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int k = 0; 150457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // get non-replaced sources of ri 150557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; offR < offS; offR += ri->getSrc(s)->reg.size, ++s) 150657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller vals[k++] = ri->getSrc(s); 150757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller n = s; 150857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // get replaced sources of st 150957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; st->srcExists(s); offS += st->getSrc(s)->reg.size, ++s) 151057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller vals[k++] = st->getSrc(s); 151157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // skip replaced sources of ri 151257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = n; offR < endS; offR += ri->getSrc(s)->reg.size, ++s); 151357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // get non-replaced sources after values covered by st 151457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s) 151557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller vals[k++] = ri->getSrc(s); 1516f09910f399d747e524731953bb11b64c1f4821d0Brian Paul assert(k <= Elements(vals)); 151757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 0; s < k; ++s) 151857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(s + 1, vals[s]); 151957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(0, ri->getSrc(0)); 152057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 152157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (endR > endS) { 152257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int j, s; 152357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (j = 1; offR < endS; offR += ri->getSrc(j++)->reg.size); 152457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; offS < endS; offS += st->getSrc(s++)->reg.size); 152557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (; offR < endR; offR += ri->getSrc(j++)->reg.size) 152657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setSrc(s++, ri->getSrc(j)); 152757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 152857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->putExtraSources(0, extra); 152957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 153057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, rec->insn); 153157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 153257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->insn = st; 153357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec->offset = st->getSrc(0)->reg.data.offset; 153457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 153557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller st->setType(typeOfSize(rec->size)); 153657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 153757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 153857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 153957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 154057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 154157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::overlaps(const Instruction *ldst) const 154257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 154357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record that; 154457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller that.set(ldst); 154557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 154657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->fileIndex != that.fileIndex) 154757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 154857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 154957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->rel[0] || that.rel[0]) 155057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return this->base == that.base; 155157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return 155257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller (this->offset < that.offset + that.size) && 155357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller (this->offset + this->size > that.offset); 155457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 155557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 155657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// We must not eliminate stores that affect the result of @ld if 155757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// we find later stores to the same location, and we may no longer 155857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// merge them with later stores. 155957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// The stored value can, however, still be used to determine the value 156057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// returned by future loads. 156157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 156257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::lockStores(Instruction *const ld) 156357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 15649362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller for (Record *r = stores[ld->src(0).getFile()]; r; r = r->next) 156557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!r->locked && r->overlaps(ld)) 156657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller r->locked = true; 156757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 156857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 156957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Prior loads from the location of @st are no longer valid. 157057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Stores to the location of @st may no longer be used to derive 157157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// the value at it nor be coalesced into later stores. 157257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 157357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::purgeRecords(Instruction *const st, DataFile f) 157457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 157557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (st) 15769362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller f = st->src(0).getFile(); 157757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 157857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Record *r = loads[f]; r; r = r->next) 157957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!st || r->overlaps(st)) 158057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller r->unlink(&loads[f]); 158157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 158257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Record *r = stores[f]; r; r = r->next) 158357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!st || r->overlaps(st)) 158457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller r->unlink(&stores[f]); 158557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 158657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 158757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 158857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::visit(BasicBlock *bb) 158957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 159057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool ret = runOpt(bb); 159157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st 159257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // where 96 bit memory operations are forbidden. 159357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ret) 159457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ret = runOpt(bb); 159557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return ret; 159657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 159757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 159857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 159957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::runOpt(BasicBlock *bb) 160057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 160157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ldst, *next; 160257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Record *rec; 160357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool isAdjacent = true; 160457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 160557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (ldst = bb->getEntry(); ldst; ldst = next) { 160657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool keep = true; 160757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool isLoad = true; 160857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = ldst->next; 160957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 161057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->op == OP_LOAD || ldst->op == OP_VFETCH) { 161157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->isDead()) { 161257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // might have been produced by earlier optimization 161357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, ldst); 161457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 161557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 161657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 161757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) { 161857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller isLoad = false; 161957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 162057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: maybe have all fixed ops act as barrier ? 162157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->op == OP_CALL) { 162257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(NULL, FILE_MEMORY_LOCAL); 162357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(NULL, FILE_MEMORY_GLOBAL); 162457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(NULL, FILE_MEMORY_SHARED); 162557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(NULL, FILE_SHADER_OUTPUT); 162657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 162757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->op == OP_EMIT || ldst->op == OP_RESTART) { 162857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(NULL, FILE_SHADER_OUTPUT); 162957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 163057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 163157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 163257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ldst->getPredicate()) // TODO: handle predicated ld/st 163357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 163457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 163557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (isLoad) { 16369362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller DataFile file = ldst->src(0).getFile(); 163757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 163857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // if ld l[]/g[] look for previous store to eliminate the reload 163957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (file == FILE_MEMORY_GLOBAL || file == FILE_MEMORY_LOCAL) { 164057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: shared memory ? 164157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec = findRecord(ldst, false, isAdjacent); 164257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rec && !isAdjacent) 164357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller keep = !replaceLdFromSt(ldst, rec); 164457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 164557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 164657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // or look for ld from the same location and replace this one 164757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec = keep ? findRecord(ldst, true, isAdjacent) : NULL; 164857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rec) { 164957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isAdjacent) 165057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller keep = !replaceLdFromLd(ldst, rec); 165157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 165257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // or combine a previous load with this one 165357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller keep = !combineLd(rec, ldst); 165457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 165557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (keep) 165657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller lockStores(ldst); 165757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 165857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rec = findRecord(ldst, false, isAdjacent); 165957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rec) { 166057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isAdjacent) 166157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller keep = !replaceStFromSt(ldst, rec); 166257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 166357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller keep = !combineSt(rec, ldst); 166457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 166557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (keep) 166657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller purgeRecords(ldst, DATA_FILE_COUNT); 166757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 166857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (keep) 166957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addRecord(ldst); 167057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 167157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller reset(); 167257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 167357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 167457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 167557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 167657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 167757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 167857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Turn control flow into predicated instructions (after register allocation !). 167957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// TODO: 168057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Could move this to before register allocation on NVC0 and also handle nested 168157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// constructs. 168257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass FlatteningPass : public Pass 168357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 168457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 168557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 168657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 168757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool tryPredicateConditional(BasicBlock *); 168857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void predicateInstructions(BasicBlock *, Value *pred, CondCode cc); 168957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void tryPropagateBranch(BasicBlock *); 169057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline bool isConstantCondition(Value *pred); 169157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline bool mayPredicate(const Instruction *, const Value *pred) const; 169257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline void removeFlow(Instruction *); 169357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 169457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 169557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 169657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::isConstantCondition(Value *pred) 169757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 169857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn = pred->getUniqueInsn(); 169957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(insn); 170057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->op != OP_SET || insn->srcExists(2)) 170157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 170257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 170357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int s = 0; s < 2 && insn->srcExists(s); ++s) { 170457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ld = insn->getSrc(s)->getUniqueInsn(); 170557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller DataFile file; 170657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ld) { 170757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ld->op != OP_MOV && ld->op != OP_LOAD) 170857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 17099362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (ld->src(0).isIndirect(0)) 171057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 17119362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller file = ld->src(0).getFile(); 171257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 17139362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller file = insn->src(s).getFile(); 171457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // catch $r63 on NVC0 171557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR) 171657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller file = FILE_IMMEDIATE; 171757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 171857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST) 171957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 172057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 172157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 172257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 172357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 172457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 172557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::removeFlow(Instruction *insn) 172657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 172757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FlowInstruction *term = insn ? insn->asFlow() : NULL; 172857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!term) 172957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 173057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Graph::Edge::Type ty = term->bb->cfg.outgoing().getType(); 173157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 173257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (term->op == OP_BRA) { 173357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: this might get more difficult when we get arbitrary BRAs 173457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ty == Graph::Edge::CROSS || ty == Graph::Edge::BACK) 173557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 173657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 173757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (term->op != OP_JOIN) 173857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 173957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 174057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, term); 174157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 174257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pred = term->getPredicate(); 174357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 174457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (pred && pred->refCount() == 0) { 174557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *pSet = pred->getUniqueInsn(); 174657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller pred->join->reg.data.id = -1; // deallocate 174757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (pSet->isDead()) 174857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, pSet); 174957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 175057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 175157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 175257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 175357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::predicateInstructions(BasicBlock *bb, Value *pred, CondCode cc) 175457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 175557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getEntry(); i; i = i->next) { 175657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->isNop()) 175757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 175857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!i->getPredicate()); 175957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller i->setPredicate(cc, pred); 176057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 176157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller removeFlow(bb->getExit()); 176257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 176357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 176457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 176557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::mayPredicate(const Instruction *insn, const Value *pred) const 176657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 176757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->isPseudo()) 176857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 176957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // TODO: calls where we don't know which registers are modified 177057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 177157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!prog->getTarget()->mayPredicate(insn, pred)) 177257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 177357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int d = 0; insn->defExists(d); ++d) 177457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn->getDef(d)->equals(pred)) 177557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 177657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 177757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 177857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 177957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// If we conditionally skip over or to a branch instruction, replace it. 178057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// NOTE: We do not update the CFG anymore here ! 178157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 178257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::tryPropagateBranch(BasicBlock *bb) 178357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 178457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *bf = NULL; 178557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int i; 178657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 178757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->cfg.outgoingCount() != 2) 178857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 178957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!bb->getExit() || bb->getExit()->op != OP_BRA) 179057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 179157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Graph::EdgeIterator ei = bb->cfg.outgoing(); 179257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 179357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (i = 0; !ei.end(); ++i, ei.next()) { 179457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bf = BasicBlock::get(ei.getNode()); 179557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bf->getInsnCount() == 1) 179657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 179757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 179857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ei.end() || !bf->getExit()) 179957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 180057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FlowInstruction *bra = bb->getExit()->asFlow(); 180157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller FlowInstruction *rep = bf->getExit()->asFlow(); 180257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 180357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rep->getPredicate()) 180457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 180557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (rep->op != OP_BRA && 180657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rep->op != OP_JOIN && 180757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller rep->op != OP_EXIT) 180857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 180957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 181057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bra->op = rep->op; 181157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bra->target.bb = rep->target.bb; 181257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i) // 2nd out block means branch not taken 181357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bra->cc = inverseCondCode(bra->cc); 181457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bf->remove(rep); 181557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 181657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 181757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 181857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::visit(BasicBlock *bb) 181957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 182057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (tryPredicateConditional(bb)) 182157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 182257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 182357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // try to attach join to previous instruction 182457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn = bb->getExit(); 182557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn && insn->op == OP_JOIN && !insn->getPredicate()) { 182657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn = insn->prev; 182757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (insn && !insn->getPredicate() && !insn->asFlow() && !insn->isNop()) { 182857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller insn->join = 1; 182957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->getExit()); 183057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 183157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 183257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 183357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 183457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller tryPropagateBranch(bb); 183557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 183657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 183757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 183857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 183957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 184057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::tryPredicateConditional(BasicBlock *bb) 184157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 184257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller BasicBlock *bL = NULL, *bR = NULL; 184357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int nL = 0, nR = 0, limit = 12; 184457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *insn; 184557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int mask; 184657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 184757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mask = bb->initiatesSimpleConditional(); 184857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!mask) 184957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 185057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 185157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(bb->getExit()); 185257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *pred = bb->getExit()->getPredicate(); 185357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(pred); 185457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 185557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (isConstantCondition(pred)) 185657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller limit = 4; 185757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 185857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Graph::EdgeIterator ei = bb->cfg.outgoing(); 185957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 186057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mask & 1) { 186157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bL = BasicBlock::get(ei.getNode()); 186257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (insn = bL->getEntry(); insn; insn = insn->next, ++nL) 186357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!mayPredicate(insn, pred)) 186457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 186557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (nL > limit) 186657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; // too long, do a real branch 186757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 186857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ei.next(); 186957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 187057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mask & 2) { 187157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bR = BasicBlock::get(ei.getNode()); 187257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (insn = bR->getEntry(); insn; insn = insn->next, ++nR) 187357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!mayPredicate(insn, pred)) 187457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 187557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (nR > limit) 187657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; // too long, do a real branch 187757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 187857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 187957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bL) 188057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller predicateInstructions(bL, pred, bb->getExit()->cc); 188157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bR) 188257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller predicateInstructions(bR, pred, inverseCondCode(bb->getExit()->cc)); 188357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 188457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->joinAt) { 188557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->remove(bb->joinAt); 188657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->joinAt = NULL; 188757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 188857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller removeFlow(bb->getExit()); // delete the branch/join at the fork point 188957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 189057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // remove potential join operations at the end of the conditional 189157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (prog->getTarget()->joinAnterior) { 189257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb = BasicBlock::get((bL ? bL : bR)->cfg.outgoing().getNode()); 189357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (bb->getEntry() && bb->getEntry()->op == OP_JOIN) 189457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller removeFlow(bb->getEntry()); 189557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 189657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 189757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 189857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 189957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 190057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 190157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 190257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Common subexpression elimination. Stupid O^2 implementation. 190357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass LocalCSE : public Pass 190457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 190557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 190657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 190757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 190857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller inline bool tryReplace(Instruction **, Instruction *); 190957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 191057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller DLList ops[OP_LAST + 1]; 191157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 191257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 191357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass GlobalCSE : public Pass 191457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 191557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 191657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 191757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 191857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 191957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 192057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerInstruction::isActionEqual(const Instruction *that) const 192157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 192257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->op != that->op || 192357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->dType != that->dType || 192457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->sType != that->sType) 192557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 192657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->cc != that->cc) 192757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 192857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 192957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->asTex()) { 193057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (memcmp(&this->asTex()->tex, 193157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller &that->asTex()->tex, 193257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller sizeof(this->asTex()->tex))) 193357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 193457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 193557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->asCmp()) { 193657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->asCmp()->setCond != that->asCmp()->setCond) 193757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 193857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 193957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->asFlow()) { 194057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 194157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 194257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->atomic != that->atomic || 194357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->ipa != that->ipa || 194457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->lanes != that->lanes || 194557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->perPatch != that->perPatch) 194657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 194757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->postFactor != that->postFactor) 194857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 194957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 195057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 195157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->subOp != that->subOp || 195257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->saturate != that->saturate || 195357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->rnd != that->rnd || 195457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->ftz != that->ftz || 195557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->dnz != that->dnz || 195657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller this->cache != that->cache) 195757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 195857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 195957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 196057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 196157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 196257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 196357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerInstruction::isResultEqual(const Instruction *that) const 196457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 196557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int d, s; 196657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 196757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // NOTE: location of discard only affects tex with liveOnly and quadops 196857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!this->defExists(0) && this->op != OP_DISCARD) 196957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 197057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 197157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!isActionEqual(that)) 197257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 197357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 197457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (this->predSrc != that->predSrc) 197557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 197657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 197757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; this->defExists(d); ++d) { 197857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!that->defExists(d) || 197957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller !this->getDef(d)->equals(that->getDef(d), false)) 198057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 198157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 198257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (that->defExists(d)) 198357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 198457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 198557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 0; this->srcExists(s); ++s) { 198657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!that->srcExists(s)) 198757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 19889362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller if (this->src(s).mod != that->src(s).mod) 198957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 199057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!this->getSrc(s)->equals(that->getSrc(s), true)) 199157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 199257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 199357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (that->srcExists(s)) 199457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 199557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 199657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (op == OP_LOAD || op == OP_VFETCH) { 19979362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller switch (src(0).getFile()) { 199857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case FILE_MEMORY_CONST: 199957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller case FILE_SHADER_INPUT: 200057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 200157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller default: 200257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 200357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 200457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 200557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 200657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 200757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 200857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 200957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// pull through common expressions from different in-blocks 201057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 201157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerGlobalCSE::visit(BasicBlock *bb) 201257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 201357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *phi, *next, *ik; 201457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int s; 201557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2016ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller // TODO: maybe do this with OP_UNION, too 2017ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller 201857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = next) { 201957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = phi->next; 202057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (phi->getSrc(0)->refCount() > 1) 202157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 202257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ik = phi->getSrc(0)->getInsn(); 202357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 1; phi->srcExists(s); ++s) { 202457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (phi->getSrc(s)->refCount() > 1) 202557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 20261e957941735fae514de658c836b8bdaf6c66bc06Francisco Jerez if (!phi->getSrc(s)->getInsn() || 20271e957941735fae514de658c836b8bdaf6c66bc06Francisco Jerez !phi->getSrc(s)->getInsn()->isResultEqual(ik)) 202857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 202957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 203057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!phi->srcExists(s)) { 203157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *entry = bb->getEntry(); 203257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ik->bb->remove(ik); 203357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!entry || entry->op != OP_JOIN) 203457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->insertHead(ik); 203557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 203657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bb->insertAfter(entry, ik); 203757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ik->setDef(0, phi->getDef(0)); 203857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, phi); 203957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 204057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 204157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 204257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 204357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 204457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 204557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 204657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLocalCSE::tryReplace(Instruction **ptr, Instruction *i) 204757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 204857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *old = *ptr; 2049ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller 2050ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller // TODO: maybe relax this later (causes trouble with OP_UNION) 2051ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller if (i->isPredicated()) 2052ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller return false; 2053ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller 205457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!old->isResultEqual(i)) 205557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 2056ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller 205757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (int d = 0; old->defExists(d); ++d) 20589362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller old->def(d).replace(i->getDef(d), false); 205957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, old); 206057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller *ptr = NULL; 206157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 206257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 206357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 206457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 206557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLocalCSE::visit(BasicBlock *bb) 206657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 206757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int replaced; 206857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 206957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller do { 207057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ir, *next; 207157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 207257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller replaced = 0; 207357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 207457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller // will need to know the order of instructions 207557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int serial = 0; 207657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (ir = bb->getEntry(); ir; ir = ir->next) 207757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ir->serial = serial++; 207857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 207957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (ir = bb->getEntry(); ir; ir = next) { 208057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int s; 208157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *src = NULL; 208257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 208357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = ir->next; 208457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 208557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ir->fixed) { 208657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ops[ir->op].insert(ir); 208757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller continue; 208857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 208957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 209057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (s = 0; ir->srcExists(s); ++s) 209157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ir->getSrc(s)->asLValue()) 209257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!src || ir->getSrc(s)->refCount() < src->refCount()) 209357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller src = ir->getSrc(s); 209457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 209557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (src) { 20968cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez for (Value::UseIterator it = src->uses.begin(); 20978cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez it != src->uses.end(); ++it) { 20988cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez Instruction *ik = (*it)->getInsn(); 20998cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez if (ik && ik->serial < ir->serial && ik->bb == ir->bb) 210057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (tryReplace(&ir, ik)) 210157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 210257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 210357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 210457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller DLLIST_FOR_EACH(&ops[ir->op], iter) 210557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller { 210657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ik = reinterpret_cast<Instruction *>(iter.get()); 210757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (tryReplace(&ir, ik)) 210857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 210957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 211057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 211157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 211257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (ir) 211357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ops[ir->op].insert(ir); 211457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller else 211557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++replaced; 211657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 211757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (unsigned int i = 0; i <= OP_LAST; ++i) 211857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ops[i].clear(); 211957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 212057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } while (replaced); 212157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 212257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 212357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 212457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 212557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 212657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 212757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Remove computations of unused values. 212857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass DeadCodeElim : public Pass 212957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 213057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerpublic: 213157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller bool buryAll(Program *); 213257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 213357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate: 213457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller virtual bool visit(BasicBlock *); 213557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 213657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller void checkSplitLoad(Instruction *ld); // for partially dead loads 213757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 213857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller unsigned int deadCount; 213957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}; 214057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 214157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 214257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerDeadCodeElim::buryAll(Program *prog) 214357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 214457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller do { 214557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller deadCount = 0; 214657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!this->run(prog, false, false)) 214757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; 214857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } while (deadCount); 214957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 215057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 215157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 215257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 215357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 215457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerDeadCodeElim::visit(BasicBlock *bb) 215557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 215657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *next; 215757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 215857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (Instruction *i = bb->getFirst(); i; i = next) { 215957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller next = i->next; 216057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->isDead()) { 216157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ++deadCount; 216257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller delete_Instruction(prog, i); 216357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 216457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) { 216557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller checkSplitLoad(i); 216657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 216757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 216857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 216957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 217057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 217157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid 217257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerDeadCodeElim::checkSplitLoad(Instruction *ld1) 217357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 217457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Instruction *ld2 = NULL; // can get at most 2 loads 217557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def1[4]; 217657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller Value *def2[4]; 217757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t addr1, addr2; 217857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int32_t size1, size2; 217957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller int d, n1, n2; 218057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller uint32_t mask = 0xffffffff; 218157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 218257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; ld1->defExists(d); ++d) 218357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!ld1->getDef(d)->refCount() && ld1->getDef(d)->reg.data.id < 0) 218457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller mask &= ~(1 << d); 218557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mask == 0xffffffff) 218657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 218757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 218857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addr1 = ld1->getSrc(0)->reg.data.offset; 218957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller n1 = n2 = 0; 219057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller size1 = size2 = 0; 219157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; ld1->defExists(d); ++d) { 219257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mask & (1 << d)) { 219357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (size1 && (addr1 & 0x7)) 219457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 219557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def1[n1] = ld1->getDef(d); 219657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller size1 += def1[n1++]->reg.size; 219757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else 219857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!n1) { 219957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addr1 += ld1->getDef(d)->reg.size; 220057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 220157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller break; 220257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 220357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 220457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (addr2 = addr1 + size1; ld1->defExists(d); ++d) { 220557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (mask & (1 << d)) { 220657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller def2[n2] = ld1->getDef(d); 220757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller size2 += def2[n2++]->reg.size; 220857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } else { 220957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller assert(!n2); 221057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller addr2 += ld1->getDef(d)->reg.size; 221157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 221257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 221357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 221457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller updateLdStOffset(ld1, addr1, func); 221557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld1->setType(typeOfSize(size1)); 221657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; d < 4; ++d) 221757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld1->setDef(d, (d < n1) ? def1[d] : NULL); 221857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 221957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!n2) 222057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return; 222157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 2222a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez ld2 = cloneShallow(func, ld1); 222357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller updateLdStOffset(ld2, addr2, func); 222457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld2->setType(typeOfSize(size2)); 222557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller for (d = 0; d < 4; ++d) 222657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld2->setDef(d, (d < n2) ? def2[d] : NULL); 222757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 222857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller ld1->bb->insertAfter(ld1, ld2); 222957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 223057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 223157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// ============================================================================= 223257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 223357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define RUN_PASS(l, n, f) \ 223457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (level >= (l)) { \ 223557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (dbgFlags & NV50_IR_DEBUG_VERBOSE) \ 223657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller INFO("PEEPHOLE: %s\n", #n); \ 223757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller n pass; \ 223857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller if (!pass.f(this)) \ 223957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return false; \ 224057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller } 224157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 224257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 224357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerProgram::optimizeSSA(int level) 224457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 224557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(1, DeadCodeElim, buryAll); 224657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(1, CopyPropagation, run); 224757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, GlobalCSE, run); 224857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(1, LocalCSE, run); 224957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, AlgebraicOpt, run); 225057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks 225157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(1, ConstantFolding, foldAll); 225257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(1, LoadPropagation, run); 225357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, MemoryOpt, run); 225457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, LocalCSE, run); 225557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(0, DeadCodeElim, buryAll); 2256ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller 225757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 225857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 225957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 226057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool 226157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerProgram::optimizePostRA(int level) 226257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{ 226357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller RUN_PASS(2, FlatteningPass, run); 226457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller return true; 226557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 226657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller 226757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller} 2268