1d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller/*
2d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Copyright 2011 Christoph Bumiller
3d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller *
4d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Permission is hereby granted, free of charge, to any person obtaining a
5d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * copy of this software and associated documentation files (the "Software"),
6d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * to deal in the Software without restriction, including without limitation
7d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * and/or sell copies of the Software, and to permit persons to whom the
9d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * Software is furnished to do so, subject to the following conditions:
10d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller *
11d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * The above copyright notice and this permission notice shall be included in
12d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * all copies or substantial portions of the Software.
13d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller *
14d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller * SOFTWARE.
21d2d19ea51fa3575a8d014a69a9b835c335728817Christoph Bumiller */
2257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
2357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "nv50_ir.h"
2457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "nv50_ir_target.h"
2557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "nv50_ir_build_util.h"
2657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
2757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerextern "C" {
2857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#include "util/u_math.h"
2957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
3057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
3157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillernamespace nv50_ir {
3257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
3357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
3457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerInstruction::isNop() const
3557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
3600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller   if (op == OP_PHI || op == OP_SPLIT || op == OP_MERGE || op == OP_CONSTRAINT)
3757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return true;
3857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (terminator || join) // XXX: should terminator imply flow ?
3957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
4057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!fixed && op == OP_NOP)
4157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return true;
4257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
439362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   if (defExists(0) && def(0).rep()->reg.data.id < 0) {
4457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (int d = 1; defExists(d); ++d)
459362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         if (def(d).rep()->reg.data.id >= 0)
4657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            WARN("part of vector result is unused !\n");
4757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return true;
4857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
4957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
5057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (op == OP_MOV || op == OP_UNION) {
51e43a3a66a9d8a99021d76ff4d07dec7b8cfd62caChristoph Bumiller      if (!getDef(0)->equals(getSrc(0)))
5257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
5357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (op == OP_UNION)
549362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         if (!def(0).rep()->equals(getSrc(1)))
5557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            return false;
5657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return true;
5757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
5857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
5957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return false;
6057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
6157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
6257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool Instruction::isDead() const
6357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
6457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (op == OP_STORE ||
652fc014f8c0d9339b1652f4e037aee5697142304aChristoph Bumiller       op == OP_EXPORT ||
662fc014f8c0d9339b1652f4e037aee5697142304aChristoph Bumiller       op == OP_WRSV)
6757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
6857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
6957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (int d = 0; defExists(d); ++d)
7057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (getDef(d)->refCount() || getDef(d)->reg.data.id >= 0)
7157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
7257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
7357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (terminator || asFlow())
7457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
7557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (fixed)
7657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
7757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
7857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
7957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller};
8057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
8157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// =============================================================================
8257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
8357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass CopyPropagation : public Pass
8457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
8557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
8657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   virtual bool visit(BasicBlock *);
8757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller};
8857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
8957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Propagate all MOVs forward to make subsequent optimization easier, except if
9057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// the sources stem from a phi, in which case we don't want to mess up potential
9157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def.
9257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
9357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerCopyPropagation::visit(BasicBlock *bb)
9457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
9557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *mov, *si, *next;
9657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
9757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (mov = bb->getEntry(); mov; mov = next) {
9857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      next = mov->next;
9957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (mov->op != OP_MOV || mov->fixed || !mov->getSrc(0)->asLValue())
10057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         continue;
10144e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller      if (mov->getPredicate())
10244e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller         continue;
10344e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller      if (mov->def(0).getFile() != mov->src(0).getFile())
10444e84d6f161e95d44d847440b3bc6d670c242cd7Christoph Bumiller         continue;
10557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      si = mov->getSrc(0)->getInsn();
10657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (mov->getDef(0)->reg.data.id < 0 && si && si->op != OP_PHI) {
10757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         // propagate
1089362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         mov->def(0).replace(mov->getSrc(0), false);
10957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         delete_Instruction(prog, mov);
11057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
11157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
11257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
11357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
11457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
11557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// =============================================================================
11657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
11757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass LoadPropagation : public Pass
11857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
11957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
12057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   virtual bool visit(BasicBlock *);
12157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
12257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void checkSwapSrc01(Instruction *);
12357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
12457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool isCSpaceLoad(Instruction *);
12557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool isImmd32Load(Instruction *);
12690b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller   bool isAttribOrSharedLoad(Instruction *);
12757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller};
12857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
12957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
13057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::isCSpaceLoad(Instruction *ld)
13157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
1329362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   return ld && ld->op == OP_LOAD && ld->src(0).getFile() == FILE_MEMORY_CONST;
13357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
13457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
13557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
13657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::isImmd32Load(Instruction *ld)
13757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
13857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4))
13957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
1409362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   return ld->src(0).getFile() == FILE_IMMEDIATE;
14157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
14257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
14390b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumillerbool
14490b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph BumillerLoadPropagation::isAttribOrSharedLoad(Instruction *ld)
14590b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller{
14690b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller   return ld &&
14790b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller      (ld->op == OP_VFETCH ||
14890b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller       (ld->op == OP_LOAD &&
14990b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller        (ld->src(0).getFile() == FILE_SHADER_INPUT ||
15090b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller         ld->src(0).getFile() == FILE_MEMORY_SHARED)));
15190b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller}
15290b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller
15357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
15457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::checkSwapSrc01(Instruction *insn)
15557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
15657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!prog->getTarget()->getOpInfo(insn).commutative)
15757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (insn->op != OP_SET && insn->op != OP_SLCT)
15857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
1599362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   if (insn->src(1).getFile() != FILE_GPR)
16057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
16157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
16257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *i0 = insn->getSrc(0)->getInsn();
16357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *i1 = insn->getSrc(1)->getInsn();
16457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
16557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (isCSpaceLoad(i0)) {
16657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!isCSpaceLoad(i1))
16757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         insn->swapSources(0, 1);
16857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      else
16957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
17057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else
17157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (isImmd32Load(i0)) {
17257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!isCSpaceLoad(i1) && !isImmd32Load(i1))
17357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         insn->swapSources(0, 1);
17457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      else
17557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
17690b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller   } else
17790b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller   if (isAttribOrSharedLoad(i1)) {
17890b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller      if (!isAttribOrSharedLoad(i0))
17990b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller         insn->swapSources(0, 1);
18090b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller      else
18190b5301ceab8fd86fccf76efe7ebb039c0e4a28fChristoph Bumiller         return;
18257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else {
18357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
18457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
18557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
18657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (insn->op == OP_SET)
18757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond);
18857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   else
18957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (insn->op == OP_SLCT)
19057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      insn->asCmp()->setCond = inverseCondCode(insn->asCmp()->setCond);
19157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
19257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
19357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
19457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLoadPropagation::visit(BasicBlock *bb)
19557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
19657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   const Target *targ = prog->getTarget();
19757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *next;
19857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
19957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (Instruction *i = bb->getEntry(); i; i = next) {
20057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      next = i->next;
20157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
20257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (i->srcExists(1))
20357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         checkSwapSrc01(i);
20457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
20557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (int s = 0; i->srcExists(s); ++s) {
20657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         Instruction *ld = i->getSrc(s)->getInsn();
20757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
20857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV))
20957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            continue;
21057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (!targ->insnCanLoad(i, s, ld))
21157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            continue;
21257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
21357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         // propagate !
21457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->setSrc(s, ld->getSrc(0));
2159362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         if (ld->src(0).isIndirect(0))
21657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            i->setIndirect(s, 0, ld->getIndirect(0, 0));
21757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
21857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (ld->getDef(0)->refCount() == 0)
21957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            delete_Instruction(prog, ld);
22057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
22157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
22257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
22357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
22457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
22557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// =============================================================================
22657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
22757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Evaluate constant expressions.
22857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass ConstantFolding : public Pass
22957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
23057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerpublic:
23157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool foldAll(Program *);
23257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
23357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
23457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   virtual bool visit(BasicBlock *);
23557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
236d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez   void expr(Instruction *, ImmediateValue&, ImmediateValue&);
237d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez   void opnd(Instruction *, ImmediateValue&, int s);
23857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
23957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void unary(Instruction *, const ImmediateValue&);
24057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
24155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller   void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
24255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller
24357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
24457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   CmpInstruction *findOriginForTestWithZero(Value *);
24557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
24657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   unsigned int foldCount;
24757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
24857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   BuildUtil bld;
24957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller};
25057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
25157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// TODO: remember generated immediates and only revisit these
25257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
25357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::foldAll(Program *prog)
25457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
25557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   unsigned int iterCount = 0;
25657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   do {
25757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      foldCount = 0;
25857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!run(prog))
25957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
26057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } while (foldCount && ++iterCount < 2);
26157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
26257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
26357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
26457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
26557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::visit(BasicBlock *bb)
26657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
26757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *i, *next;
26857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
26957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (i = bb->getEntry(); i; i = next) {
27057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      next = i->next;
2711e957941735fae514de658c836b8bdaf6c66bc06Francisco Jerez      if (i->op == OP_MOV || i->op == OP_CALL)
27257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         continue;
27357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
274d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      ImmediateValue src0, src1;
27557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
276d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (i->srcExists(1) &&
277d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez          i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
27857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         expr(i, src0, src1);
27957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      else
280d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (i->srcExists(0) && i->src(0).getImmediate(src0))
28157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         opnd(i, src0, 0);
28257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      else
283d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (i->srcExists(1) && i->src(1).getImmediate(src1))
28457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         opnd(i, src1, 1);
28557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
28657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
28757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
28857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
28957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerCmpInstruction *
29057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::findOriginForTestWithZero(Value *value)
29157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
29257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!value)
29357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return NULL;
29457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *insn = value->getInsn();
29557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
29657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   while (insn && insn->op != OP_SET) {
29757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      Instruction *next = NULL;
29857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      switch (insn->op) {
29957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_NEG:
30057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_ABS:
30157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_CVT:
30257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         next = insn->getSrc(0)->getInsn();
30357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (insn->sType != next->dType)
30457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            return NULL;
30557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
30657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_MOV:
30757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         next = insn->getSrc(0)->getInsn();
30857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
30957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      default:
31057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return NULL;
31157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
31257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      insn = next;
31357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
31457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return insn ? insn->asCmp() : NULL;
31557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
31657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
31757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
31857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerModifier::applyTo(ImmediateValue& imm) const
31957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
32057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   switch (imm.reg.type) {
32157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case TYPE_F32:
32257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (bits & NV50_IR_MOD_ABS)
32357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         imm.reg.data.f32 = fabsf(imm.reg.data.f32);
32457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (bits & NV50_IR_MOD_NEG)
32557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         imm.reg.data.f32 = -imm.reg.data.f32;
32657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (bits & NV50_IR_MOD_SAT) {
32757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (imm.reg.data.f32 < 0.0f)
32857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            imm.reg.data.f32 = 0.0f;
32957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         else
33057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (imm.reg.data.f32 > 1.0f)
33157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            imm.reg.data.f32 = 1.0f;
33257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
33357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      assert(!(bits & NV50_IR_MOD_NOT));
33457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
33557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
33657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case TYPE_S8: // NOTE: will be extended
33757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case TYPE_S16:
33857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case TYPE_S32:
33957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case TYPE_U8: // NOTE: treated as signed
34057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case TYPE_U16:
34157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case TYPE_U32:
34257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (bits & NV50_IR_MOD_ABS)
34357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         imm.reg.data.s32 = (imm.reg.data.s32 >= 0) ?
34457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            imm.reg.data.s32 : -imm.reg.data.s32;
34557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (bits & NV50_IR_MOD_NEG)
34657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         imm.reg.data.s32 = -imm.reg.data.s32;
34757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (bits & NV50_IR_MOD_NOT)
34857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         imm.reg.data.s32 = ~imm.reg.data.s32;
34957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
35057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
35157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case TYPE_F64:
35257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (bits & NV50_IR_MOD_ABS)
35357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         imm.reg.data.f64 = fabs(imm.reg.data.f64);
35457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (bits & NV50_IR_MOD_NEG)
35557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         imm.reg.data.f64 = -imm.reg.data.f64;
35657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (bits & NV50_IR_MOD_SAT) {
35757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (imm.reg.data.f64 < 0.0)
35857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            imm.reg.data.f64 = 0.0;
35957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         else
36057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (imm.reg.data.f64 > 1.0)
36157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            imm.reg.data.f64 = 1.0;
36257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
36357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      assert(!(bits & NV50_IR_MOD_NOT));
36457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
36557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
36657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   default:
36757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      assert(!"invalid/unhandled type");
36857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      imm.reg.data.u64 = 0;
36957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
37057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
37157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
37257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
37357594065c30feec9376be9b2132659f7d87362eeChristoph Bumilleroperation
37457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerModifier::getOp() const
37557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
37657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   switch (bits) {
37757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case NV50_IR_MOD_ABS: return OP_ABS;
37857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case NV50_IR_MOD_NEG: return OP_NEG;
37957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case NV50_IR_MOD_SAT: return OP_SAT;
38057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case NV50_IR_MOD_NOT: return OP_NOT;
38157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case 0:
38257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return OP_MOV;
38357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   default:
38457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return OP_CVT;
38557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
38657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
38757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
38857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
38957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::expr(Instruction *i,
390d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez                      ImmediateValue &imm0, ImmediateValue &imm1)
39157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
39257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   struct Storage *const a = &imm0.reg, *const b = &imm1.reg;
393d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez   struct Storage res;
39457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
395d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez   memset(&res.data, 0, sizeof(res.data));
39657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
39757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   switch (i->op) {
39857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_MAD:
39957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_FMA:
40057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_MUL:
40157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (i->dnz && i->dType == TYPE_F32) {
40257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (!isfinite(a->data.f32))
40357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            a->data.f32 = 0.0f;
40457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (!isfinite(b->data.f32))
40557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            b->data.f32 = 0.0f;
40657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
40757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      switch (i->dType) {
40857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break;
40957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
41057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_S32:
41157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break;
41257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      default:
41357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
41457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
41557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
41657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_DIV:
41757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (b->data.u32 == 0)
41857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
41957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      switch (i->dType) {
42057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F32: res.data.f32 = a->data.f32 / b->data.f32; break;
42157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F64: res.data.f64 = a->data.f64 / b->data.f64; break;
42257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_S32: res.data.s32 = a->data.s32 / b->data.s32; break;
42357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_U32: res.data.u32 = a->data.u32 / b->data.u32; break;
42457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      default:
42557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
42657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
42757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
42857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_ADD:
42957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      switch (i->dType) {
43057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F32: res.data.f32 = a->data.f32 + b->data.f32; break;
43157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F64: res.data.f64 = a->data.f64 + b->data.f64; break;
43257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_S32:
43357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break;
43457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      default:
43557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
43657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
43757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
43857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_POW:
43957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      switch (i->dType) {
44057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break;
44157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F64: res.data.f64 = pow(a->data.f64, b->data.f64); break;
44257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      default:
44357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
44457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
44557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
44657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_MAX:
44757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      switch (i->dType) {
44857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F32: res.data.f32 = MAX2(a->data.f32, b->data.f32); break;
44957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F64: res.data.f64 = MAX2(a->data.f64, b->data.f64); break;
45057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_S32: res.data.s32 = MAX2(a->data.s32, b->data.s32); break;
45157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_U32: res.data.u32 = MAX2(a->data.u32, b->data.u32); break;
45257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      default:
45357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
45457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
45557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
45657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_MIN:
45757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      switch (i->dType) {
45857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F32: res.data.f32 = MIN2(a->data.f32, b->data.f32); break;
45957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_F64: res.data.f64 = MIN2(a->data.f64, b->data.f64); break;
46057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_S32: res.data.s32 = MIN2(a->data.s32, b->data.s32); break;
46157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_U32: res.data.u32 = MIN2(a->data.u32, b->data.u32); break;
46257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      default:
46357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
46457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
46557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
46657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_AND:
46757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      res.data.u64 = a->data.u64 & b->data.u64;
46857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
46957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_OR:
47057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      res.data.u64 = a->data.u64 | b->data.u64;
47157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
47257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_XOR:
47357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      res.data.u64 = a->data.u64 ^ b->data.u64;
47457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
47557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_SHL:
47657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      res.data.u32 = a->data.u32 << b->data.u32;
47757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
47857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_SHR:
47957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      switch (i->dType) {
48057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_S32: res.data.s32 = a->data.s32 >> b->data.u32; break;
48157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case TYPE_U32: res.data.u32 = a->data.u32 >> b->data.u32; break;
48257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      default:
48357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
48457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
48557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
48657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_SLCT:
48757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (a->data.u32 != b->data.u32)
48857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
48957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      res.data.u32 = a->data.u32;
49057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
49157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   default:
49257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
49357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
49457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   ++foldCount;
49557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
4969362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   i->src(0).mod = Modifier(0);
4979362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   i->src(1).mod = Modifier(0);
49857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
49957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
50057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   i->setSrc(1, NULL);
50157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
50257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   i->getSrc(0)->reg.data = res.data;
50357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
50457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (i->op == OP_MAD || i->op == OP_FMA) {
50557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      i->op = OP_ADD;
50657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
50757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      i->setSrc(1, i->getSrc(0));
5089362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      i->src(1).mod = i->src(2).mod;
50957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      i->setSrc(0, i->getSrc(2));
51057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      i->setSrc(2, NULL);
51157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
512d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      ImmediateValue src0;
513d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (i->src(0).getImmediate(src0))
514d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         expr(i, src0, *i->getSrc(1)->asImm());
51557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else {
51657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      i->op = OP_MOV;
51757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
51857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
51957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
52057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
52157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerConstantFolding::unary(Instruction *i, const ImmediateValue &imm)
52257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
52357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Storage res;
52457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
52557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (i->dType != TYPE_F32)
52657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
52757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   switch (i->op) {
52857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_NEG: res.data.f32 = -imm.reg.data.f32; break;
52957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break;
53057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break;
53157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break;
53257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break;
53357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_EX2: res.data.f32 = exp2f(imm.reg.data.f32); break;
53457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_SIN: res.data.f32 = sinf(imm.reg.data.f32); break;
53557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_COS: res.data.f32 = cosf(imm.reg.data.f32); break;
53657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_SQRT: res.data.f32 = sqrtf(imm.reg.data.f32); break;
53757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_PRESIN:
53857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_PREEX2:
53957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // these should be handled in subsequent OP_SIN/COS/EX2
54057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      res.data.f32 = imm.reg.data.f32;
54157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
54257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   default:
54357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
54457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
54557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   i->op = OP_MOV;
54657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.f32));
5479362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   i->src(0).mod = Modifier(0);
54857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
54957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
55057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
55155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph BumillerConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
55255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller                                        const int s, ImmediateValue& imm2)
55355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller{
55455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller   const int t = s ? 0 : 1;
55555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller   Instruction *insn;
55655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller   Instruction *mul1 = NULL; // mul1 before mul2
55755f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller   int e = 0;
55855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller   float f = imm2.reg.data.f32;
559d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez   ImmediateValue imm1;
56055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller
56155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller   assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32);
56255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller
56355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller   if (mul2->getSrc(t)->refCount() == 1) {
56455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      insn = mul2->getSrc(t)->getInsn();
565d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32)
56655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller         mul1 = insn;
56728d6a268af3587cedb6a0e9deee7a98ecc8f82baChristoph Bumiller      if (mul1 && !mul1->saturate) {
568d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         int s1;
569d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez
570d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         if (mul1->src(s1 = 0).getImmediate(imm1) ||
571d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez             mul1->src(s1 = 1).getImmediate(imm1)) {
57255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller            bld.setPosition(mul1, false);
57355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller            // a = mul r, imm1
57455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller            // d = mul a, imm2 -> d = mul r, (imm1 * imm2)
57555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller            mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32));
576d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez            mul1->src(s1).mod = Modifier(0);
5779362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller            mul2->def(0).replace(mul1->getDef(0), false);
57855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller         } else
57955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller         if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
58055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller            // c = mul a, b
58155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller            // d = mul c, imm   -> d = mul_x_imm a, b
58255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller            mul1->postFactor = e;
5839362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller            mul2->def(0).replace(mul1->getDef(0), false);
58455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller            if (f < 0)
585d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez               mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG);
58655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller         }
58728d6a268af3587cedb6a0e9deee7a98ecc8f82baChristoph Bumiller         mul1->saturate = mul2->saturate;
58855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller         return;
58955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      }
59055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller   }
59128d6a268af3587cedb6a0e9deee7a98ecc8f82baChristoph Bumiller   if (mul2->getDef(0)->refCount() == 1 && !mul2->saturate) {
59255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      // b = mul a, imm
59355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      // d = mul b, c   -> d = mul_x_imm a, c
59455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      int s2, t2;
5958cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez      insn = mul2->getDef(0)->uses.front()->getInsn();
59655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      if (!insn)
59755f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller         return;
59855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      mul1 = mul2;
59955f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      mul2 = NULL;
60055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1;
60155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      t2 = s2 ? 0 : 1;
60255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      if (insn->op == OP_MUL && insn->dType == TYPE_F32)
603d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         if (!insn->src(s2).mod && !insn->src(t2).getImmediate(imm1))
60455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller            mul2 = insn;
60555f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
60655f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller         mul2->postFactor = e;
6079362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         mul2->setSrc(s2, mul1->src(t));
60855f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller         if (f < 0)
609d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez            mul2->src(s2).mod *= Modifier(NV50_IR_MOD_NEG);
61055f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      }
61155f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller   }
61255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller}
61355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller
61455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumillervoid
615d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco JerezConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
61657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
61757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   const int t = !s;
61857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   const operation op = i->op;
61957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
62057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   switch (i->op) {
62157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_MUL:
62255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller      if (i->dType == TYPE_F32)
623d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         tryCollapseChainedMULs(i, s, imm0);
62455f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller
625d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (imm0.isInteger(0)) {
62657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->op = OP_MOV;
627d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         i->setSrc(0, new_ImmediateValue(prog, 0u));
628d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         i->src(0).mod = Modifier(0);
62957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->setSrc(1, NULL);
63057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else
631d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (imm0.isInteger(1) || imm0.isInteger(-1)) {
632d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         if (imm0.isNegative())
6339362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller            i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
6349362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         i->op = i->src(t).mod.getOp();
63557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (s == 0) {
63657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            i->setSrc(0, i->getSrc(1));
6379362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller            i->src(0).mod = i->src(1).mod;
6389362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller            i->src(1).mod = 0;
63957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
64057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (i->op != OP_CVT)
6419362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller            i->src(0).mod = 0;
64257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->setSrc(1, NULL);
64357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else
644d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (imm0.isInteger(2) || imm0.isInteger(-2)) {
645d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         if (imm0.isNegative())
6469362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller            i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
64757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->op = OP_ADD;
64857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->setSrc(s, i->getSrc(t));
6499362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         i->src(s).mod = i->src(t).mod;
65057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else
651d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (!isFloatType(i->sType) && !imm0.isNegative() && imm0.isPow2()) {
65257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->op = OP_SHL;
653d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         imm0.applyLog2();
654d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         i->setSrc(0, i->getSrc(t));
655d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         i->src(0).mod = i->src(t).mod;
656d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
657d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         i->src(1).mod = 0;
65857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
65957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
66057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_ADD:
661d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (imm0.isInteger(0)) {
66257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (s == 0) {
66357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            i->setSrc(0, i->getSrc(1));
6649362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller            i->src(0).mod = i->src(1).mod;
66557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
66657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->setSrc(1, NULL);
6679362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         i->op = i->src(0).mod.getOp();
66857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (i->op != OP_CVT)
6699362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller            i->src(0).mod = Modifier(0);
67057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
67157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
67257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
67357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_DIV:
67457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32))
67557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
67657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bld.setPosition(i, false);
677d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (imm0.reg.data.u32 == 0) {
67857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
67957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else
680d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (imm0.reg.data.u32 == 1) {
68157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->op = OP_MOV;
68257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->setSrc(1, NULL);
68357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else
684d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (i->dType == TYPE_U32 && imm0.isPow2()) {
6856ab6110133c2d316d98f78bbc38bca0c5b6184a7Christoph Bumiller         i->op = OP_SHR;
686d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32)));
68757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else
68857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (i->dType == TYPE_U32) {
68957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         Instruction *mul;
69057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         Value *tA, *tB;
691d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         const uint32_t d = imm0.reg.data.u32;
69257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         uint32_t m;
69357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         int r, s;
69457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         uint32_t l = util_logbase2(d);
69557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (((uint32_t)1 << l) < d)
69657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            ++l;
69757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         m = (((uint64_t)1 << 32) * (((uint64_t)1 << l) - d)) / d + 1;
69857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         r = l ? 1 : 0;
69957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         s = l ? (l - 1) : 0;
70057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
70157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         tA = bld.getSSA();
70257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         tB = bld.getSSA();
70357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         mul = bld.mkOp2(OP_MUL, TYPE_U32, tA, i->getSrc(0),
70457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller                         bld.loadImm(NULL, m));
70557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         mul->subOp = NV50_IR_SUBOP_MUL_HIGH;
70657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         bld.mkOp2(OP_SUB, TYPE_U32, tB, i->getSrc(0), tA);
70757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         tA = bld.getSSA();
70857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (r)
70957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            bld.mkOp2(OP_SHR, TYPE_U32, tA, tB, bld.mkImm(r));
71057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         else
71157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            tA = tB;
71257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         tB = s ? bld.getSSA() : i->getDef(0);
71357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
71457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (s)
71557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
71657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
71757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         delete_Instruction(prog, i);
71857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else
719d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (imm0.reg.data.s32 == -1) {
72057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->op = OP_NEG;
72157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->setSrc(1, NULL);
72257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else {
72357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         LValue *tA, *tB;
72457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         LValue *tD;
725d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         const int32_t d = imm0.reg.data.s32;
72657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         int32_t m;
72757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         int32_t l = util_logbase2(static_cast<unsigned>(abs(d)));
72857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if ((1 << l) < abs(d))
72957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            ++l;
73057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (!l)
73157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            l = 1;
73257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         m = ((uint64_t)1 << (32 + l - 1)) / abs(d) + 1 - ((uint64_t)1 << 32);
73357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
73457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         tA = bld.getSSA();
73557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         tB = bld.getSSA();
73657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         bld.mkOp3(OP_MAD, TYPE_S32, tA, i->getSrc(0), bld.loadImm(NULL, m),
73757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller                   i->getSrc(0))->subOp = NV50_IR_SUBOP_MUL_HIGH;
73857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (l > 1)
73957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            bld.mkOp2(OP_SHR, TYPE_S32, tB, tA, bld.mkImm(l - 1));
74057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         else
74157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            tB = tA;
74257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         tA = bld.getSSA();
74357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, i->getSrc(0), bld.mkImm(0));
74457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
74557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
74657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (d < 0)
74757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
74857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
74957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         delete_Instruction(prog, i);
75057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
75157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
75257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
753ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller   case OP_MOD:
754d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (i->sType == TYPE_U32 && imm0.isPow2()) {
755ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller         bld.setPosition(i, false);
756ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller         i->op = OP_AND;
757d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1));
758ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller      }
759ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller      break;
760ae828413c4a98ba3546f5586f2e20d9da718ab0cChristoph Bumiller
76157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_SET: // TODO: SET_AND,OR,XOR
76257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   {
76357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
76457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      CondCode cc, ccZ;
7659362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      if (i->src(t).mod != Modifier(0))
76657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
767d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
76857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
76957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      cc = si->setCond;
77057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
77157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (s == 0)
77257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         ccZ = reverseCondCode(ccZ);
77357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      switch (ccZ) {
77457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case CC_LT: cc = CC_FL; break;
77557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case CC_GE: cc = CC_TR; break;
77657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case CC_EQ: cc = inverseCondCode(cc); break;
77757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case CC_LE: cc = inverseCondCode(cc); break;
77857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case CC_GT: break;
77957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case CC_NE: break;
78057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      default:
78157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
78257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
78357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      i->asCmp()->setCond = cc;
7849362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      i->setSrc(0, si->src(0));
7859362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      i->setSrc(1, si->src(1));
78657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      i->sType = si->sType;
78757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
78857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
78957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
79057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_SHL:
79157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   {
7929362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      if (s != 1 || i->src(0).mod != Modifier(0))
79357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
79457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // try to concatenate shifts
79557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      Instruction *si = i->getSrc(0)->getInsn();
796d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (!si || si->op != OP_SHL)
79757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
798d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      ImmediateValue imm1;
799d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      if (si->src(1).getImmediate(imm1)) {
80057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         bld.setPosition(i, false);
80157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->setSrc(0, si->getSrc(0));
802d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez         i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32));
80357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
80457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
80557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
80657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
80757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_ABS:
80857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_NEG:
80957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_LG2:
81057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_RCP:
81157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_SQRT:
81257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_RSQ:
81357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_PRESIN:
81457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_SIN:
81557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_COS:
81657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_PREEX2:
81757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   case OP_EX2:
818d6d1f0e4a25c9fbefce7485d77617855a8ea956aFrancisco Jerez      unary(i, imm0);
81957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      break;
82057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   default:
82157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
82257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
82357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (i->op != op)
82457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      foldCount++;
82557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
82657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
82757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// =============================================================================
82857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
82957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed.
83057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass ModifierFolding : public Pass
83157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
83257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
83357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   virtual bool visit(BasicBlock *);
83457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller};
83557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
83657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
83757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerModifierFolding::visit(BasicBlock *bb)
83857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
83957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   const Target *target = prog->getTarget();
84057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
84157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *i, *next, *mi;
84257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Modifier mod;
84357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
84457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (i = bb->getEntry(); i; i = next) {
84557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      next = i->next;
84657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
84757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (0 && i->op == OP_SUB) {
84857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         // turn "sub" into "add neg" (do we really want this ?)
84957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         i->op = OP_ADD;
8509362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
85157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
85257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
85357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (int s = 0; s < 3 && i->srcExists(s); ++s) {
85457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         mi = i->getSrc(s)->getInsn();
85557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (!mi ||
85657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller             mi->predSrc >= 0 || mi->getDef(0)->refCount() > 8)
85757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            continue;
85857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (i->sType == TYPE_U32 && mi->dType == TYPE_S32) {
85957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            if ((i->op != OP_ADD &&
86057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller                 i->op != OP_MUL) ||
86157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller                (mi->op != OP_ABS &&
86257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller                 mi->op != OP_NEG))
86357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               continue;
86457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         } else
86557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (i->sType != mi->dType) {
86657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            continue;
86757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
86857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if ((mod = Modifier(mi->op)) == Modifier(0))
86957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            continue;
87014d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez         mod *= mi->src(0).mod;
87157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
8729362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         if ((i->op == OP_ABS) || i->src(s).mod.abs()) {
87357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            // abs neg [abs] = abs
87457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            mod = mod & Modifier(~(NV50_IR_MOD_NEG | NV50_IR_MOD_ABS));
87557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         } else
87657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if ((i->op == OP_NEG) && mod.neg()) {
87757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            assert(s == 0);
87857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            // neg as both opcode and modifier on same insn is prohibited
87957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            // neg neg abs = abs, neg neg = identity
88057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            mod = mod & Modifier(~NV50_IR_MOD_NEG);
88157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            i->op = mod.getOp();
88257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            mod = mod & Modifier(~NV50_IR_MOD_ABS);
88357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            if (mod == Modifier(0))
88457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               i->op = OP_MOV;
88557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
88657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
88757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (target->isModSupported(i, s, mod)) {
88857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            i->setSrc(s, mi->getSrc(0));
88914d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez            i->src(s).mod *= mod;
89057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
89157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
89257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
89357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (i->op == OP_SAT) {
89457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         mi = i->getSrc(0)->getInsn();
89557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (mi &&
89657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller             mi->getDef(0)->refCount() <= 1 && target->isSatSupported(mi)) {
89757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            mi->saturate = 1;
89857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            mi->setDef(0, i->getDef(0));
89957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            delete_Instruction(prog, i);
90057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
90157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
90257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
90357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
90457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
90557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
90657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
90757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// =============================================================================
90857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
90957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// MUL + ADD -> MAD/FMA
91057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// MIN/MAX(a, a) -> a, etc.
91157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// SLCT(a, b, const) -> cc(const) ? a : b
91257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// RCP(RCP(a)) -> a
91357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// MUL(MUL(a, b), const) -> MUL_Xconst(a, b)
91457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass AlgebraicOpt : public Pass
91557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
91657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
91757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   virtual bool visit(BasicBlock *);
91857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
9191f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   void handleABS(Instruction *);
9201f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   bool handleADD(Instruction *);
9211f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   bool tryADDToMADOrSAD(Instruction *, operation toOp);
92257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void handleMINMAX(Instruction *);
92357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void handleRCP(Instruction *);
92457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void handleSLCT(Instruction *);
92557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void handleLOGOP(Instruction *);
92657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void handleCVT(Instruction *);
9271f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller
9281f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   BuildUtil bld;
92957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller};
93057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
93157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
9321f4c154f0253ed8fb448402532cfa670f74e69cdChristoph BumillerAlgebraicOpt::handleABS(Instruction *abs)
9331f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller{
9341f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   Instruction *sub = abs->getSrc(0)->getInsn();
9351f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   DataType ty;
9361f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   if (!sub ||
9371f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller       !prog->getTarget()->isOpSupported(OP_SAD, abs->dType))
9381f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      return;
9391f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   // expect not to have mods yet, if we do, bail
9401f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   if (sub->src(0).mod || sub->src(1).mod)
9411f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      return;
9421f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   // hidden conversion ?
9431f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   ty = intTypeToSigned(sub->dType);
9441f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   if (abs->dType != abs->sType || ty != abs->sType)
9451f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      return;
9461f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller
9471f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   if ((sub->op != OP_ADD && sub->op != OP_SUB) ||
9481f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller       sub->src(0).getFile() != FILE_GPR || sub->src(0).mod ||
9491f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller       sub->src(1).getFile() != FILE_GPR || sub->src(1).mod)
9501f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller         return;
9511f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller
9521f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   Value *src0 = sub->getSrc(0);
9531f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   Value *src1 = sub->getSrc(1);
9541f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller
9551f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   if (sub->op == OP_ADD) {
9561f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      Instruction *neg = sub->getSrc(1)->getInsn();
9571f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      if (neg && neg->op != OP_NEG) {
9581f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller         neg = sub->getSrc(0)->getInsn();
9591f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller         src0 = sub->getSrc(1);
9601f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      }
9611f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      if (!neg || neg->op != OP_NEG ||
9621f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller          neg->dType != neg->sType || neg->sType != ty)
9631f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller         return;
9641f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      src1 = neg->getSrc(0);
9651f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   }
9661f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller
9671f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   // found ABS(SUB))
9681f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   abs->moveSources(1, 2); // move sources >=1 up by 2
9691f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   abs->op = OP_SAD;
9701f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   abs->setType(sub->dType);
9711f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   abs->setSrc(0, src0);
9721f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   abs->setSrc(1, src1);
9731f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   bld.setPosition(abs, false);
9741f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0));
9751f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller}
9761f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller
9771f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumillerbool
97857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleADD(Instruction *add)
97957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
98057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *src0 = add->getSrc(0);
98157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *src1 = add->getSrc(1);
9821f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller
9831f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
9841f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      return false;
9851f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller
9861f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   bool changed = false;
9871f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   if (!changed && prog->getTarget()->isOpSupported(OP_MAD, add->dType))
9881f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      changed = tryADDToMADOrSAD(add, OP_MAD);
9891f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType))
9901f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      changed = tryADDToMADOrSAD(add, OP_SAD);
9911f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   return changed;
9921f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller}
9931f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller
9941f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller// ADD(SAD(a,b,0), c) -> SAD(a,b,c)
9951f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller// ADD(MUL(a,b), c) -> MAD(a,b,c)
9961f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumillerbool
9971f4c154f0253ed8fb448402532cfa670f74e69cdChristoph BumillerAlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp)
9981f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller{
9991f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   Value *src0 = add->getSrc(0);
10001f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   Value *src1 = add->getSrc(1);
100157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *src;
100257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int s;
10031f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL;
10041f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0));
100557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Modifier mod[4];
100657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
100757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (src0->refCount() == 1 &&
10081f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller       src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp)
100957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      s = 0;
101057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   else
101157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (src1->refCount() == 1 &&
10121f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller       src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp)
101357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      s = 1;
101457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   else
10151f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      return false;
101657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
101757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) ||
101857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller       (src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb))
10191f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      return false;
102057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
102157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   src = add->getSrc(s);
102257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
102355f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller   if (src->getInsn()->postFactor)
10241f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      return false;
10251f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   if (toOp == OP_SAD) {
10261f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      ImmediateValue imm;
10271f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      if (!src->getInsn()->src(2).getImmediate(imm))
10281f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller         return false;
10291f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      if (!imm.isInteger(0))
10301f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller         return false;
10311f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   }
103255f9bdb64e1f88c74754c8e090cd2cdbe62bba05Christoph Bumiller
10339362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   mod[0] = add->src(0).mod;
10349362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   mod[1] = add->src(1).mod;
10359362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   mod[2] = src->getUniqueInsn()->src(0).mod;
10369362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   mod[3] = src->getUniqueInsn()->src(1).mod;
103757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
10381f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad)
10391f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      return false;
104057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
10411f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   add->op = toOp;
104257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   add->subOp = src->getInsn()->subOp; // potentially mul-high
104357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
10449362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   add->setSrc(2, add->src(s ? 0 : 1));
104557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
104657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   add->setSrc(0, src->getInsn()->getSrc(0));
10479362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   add->src(0).mod = mod[2] ^ mod[s];
104857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   add->setSrc(1, src->getInsn()->getSrc(1));
10499362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   add->src(1).mod = mod[3];
10501f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller
10511f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller   return true;
105257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
105357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
105457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
105557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleMINMAX(Instruction *minmax)
105657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
105757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *src0 = minmax->getSrc(0);
105857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *src1 = minmax->getSrc(1);
105957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
106057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (src0 != src1 || src0->reg.file != FILE_GPR)
106157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
10629362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   if (minmax->src(0).mod == minmax->src(1).mod) {
106314d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez      if (minmax->def(0).mayReplace(minmax->src(0))) {
106414d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez         minmax->def(0).replace(minmax->src(0), false);
106514d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez         minmax->bb->remove(minmax);
106614d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez      } else {
106757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         minmax->op = OP_CVT;
106857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         minmax->setSrc(1, NULL);
106957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
107057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else {
107157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // TODO:
107257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // min(x, -x) = -abs(x)
107357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // min(x, -abs(x)) = -abs(x)
107457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // min(x, abs(x)) = x
107557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // max(x, -abs(x)) = x
107657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // max(x, abs(x)) = abs(x)
107757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // max(x, -x) = abs(x)
107857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
107957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
108057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
108157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
108257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleRCP(Instruction *rcp)
108357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
108457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *si = rcp->getSrc(0)->getUniqueInsn();
108557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
108657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (si && si->op == OP_RCP) {
10879362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      Modifier mod = rcp->src(0).mod * si->src(0).mod;
108857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      rcp->op = mod.getOp();
108957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      rcp->setSrc(0, si->getSrc(0));
109057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
109157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
109257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
109357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
109457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleSLCT(Instruction *slct)
109557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
109657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) {
109757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f))
109857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         slct->setSrc(0, slct->getSrc(1));
109957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else
110057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (slct->getSrc(0) != slct->getSrc(1)) {
110157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
110257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
110357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   slct->op = OP_MOV;
110457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   slct->setSrc(1, NULL);
110557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   slct->setSrc(2, NULL);
110657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
110757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
110857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
110957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleLOGOP(Instruction *logop)
111057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
111157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *src0 = logop->getSrc(0);
111257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *src1 = logop->getSrc(1);
111357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
111457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
111557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
111657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
111757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (src0 == src1) {
111814d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez      if ((logop->op == OP_AND || logop->op == OP_OR) &&
111914d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez          logop->def(0).mayReplace(logop->src(0))) {
112014d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez         logop->def(0).replace(logop->src(0), false);
112157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         delete_Instruction(prog, logop);
112257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
112357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else {
112457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // try AND(SET, SET) -> SET_AND(SET)
112557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      Instruction *set0 = src0->getInsn();
112657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      Instruction *set1 = src1->getInsn();
112757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
112857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!set0 || set0->fixed || !set1 || set1->fixed)
112957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
113057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (set1->op != OP_SET) {
113157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         Instruction *xchg = set0;
113257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         set0 = set1;
113357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         set1 = xchg;
113457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (set1->op != OP_SET)
113557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            return;
113657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
113793508b5b0d0a2b1e966973f1d0119b32d2ccf729Francisco Jerez      operation redOp = (logop->op == OP_AND ? OP_SET_AND :
113893508b5b0d0a2b1e966973f1d0119b32d2ccf729Francisco Jerez                         logop->op == OP_XOR ? OP_SET_XOR : OP_SET_OR);
113993508b5b0d0a2b1e966973f1d0119b32d2ccf729Francisco Jerez      if (!prog->getTarget()->isOpSupported(redOp, set1->sType))
114093508b5b0d0a2b1e966973f1d0119b32d2ccf729Francisco Jerez         return;
114157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (set0->op != OP_SET &&
114257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller          set0->op != OP_SET_AND &&
114357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller          set0->op != OP_SET_OR &&
114457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller          set0->op != OP_SET_XOR)
114557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
114657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (set0->getDef(0)->refCount() > 1 &&
114757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller          set1->getDef(0)->refCount() > 1)
114857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
114957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (set0->getPredicate() || set1->getPredicate())
115057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
115157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // check that they don't source each other
115257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (int s = 0; s < 2; ++s)
115357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (set0->getSrc(s) == set1->getDef(0) ||
115457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller             set1->getSrc(s) == set0->getDef(0))
115557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            return;
115657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
1157a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez      set0 = cloneForward(func, set0);
1158a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez      set1 = cloneShallow(func, set1);
115957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      logop->bb->insertAfter(logop, set1);
116057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      logop->bb->insertAfter(logop, set0);
116157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
116257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      set0->dType = TYPE_U8;
116357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      set0->getDef(0)->reg.file = FILE_PREDICATE;
116457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      set0->getDef(0)->reg.size = 1;
116557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      set1->setSrc(2, set0->getDef(0));
116693508b5b0d0a2b1e966973f1d0119b32d2ccf729Francisco Jerez      set1->op = redOp;
116757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      set1->setDef(0, logop->getDef(0));
116857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      delete_Instruction(prog, logop);
116957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
117057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
117157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
117257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0
1173a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller// nv50:
1174a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller//  F2I(NEG(I2F(ABS(SET))))
117557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
117657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::handleCVT(Instruction *cvt)
117757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
117857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (cvt->sType != TYPE_F32 ||
11799362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller       cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0))
118057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
118157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *insn = cvt->getSrc(0)->getInsn();
118257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32)
118357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
11849362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   if (insn->src(0).mod != Modifier(0))
118557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
118657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   insn = insn->getSrc(0)->getInsn();
1187a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller
1188a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller   // check for nv50 SET(-1,0) -> SET(1.0f/0.0f) chain and nvc0's f32 SET
1189a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller   if (insn && insn->op == OP_CVT &&
1190a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller       insn->dType == TYPE_F32 &&
1191a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller       insn->sType == TYPE_S32) {
1192a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller      insn = insn->getSrc(0)->getInsn();
1193a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller      if (!insn || insn->op != OP_ABS || insn->sType != TYPE_S32 ||
1194a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller          insn->src(0).mod)
1195a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller         return;
1196a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller      insn = insn->getSrc(0)->getInsn();
1197a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller      if (!insn || insn->op != OP_SET || insn->dType != TYPE_U32)
1198a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller         return;
1199a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller   } else
1200a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller   if (!insn || insn->op != OP_SET || insn->dType != TYPE_F32) {
120157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
1202a6fcf14c239fa4c1542559f8c938fb574e48104fChristoph Bumiller   }
120357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
1204a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez   Instruction *bset = cloneShallow(func, insn);
120557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bset->dType = TYPE_U32;
120657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bset->setDef(0, cvt->getDef(0));
120757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   cvt->bb->insertAfter(cvt, bset);
120857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   delete_Instruction(prog, cvt);
120957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
121057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
121157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
121257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerAlgebraicOpt::visit(BasicBlock *bb)
121357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
121457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *next;
121557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (Instruction *i = bb->getEntry(); i; i = next) {
121657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      next = i->next;
121757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      switch (i->op) {
12181f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller      case OP_ABS:
12191f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller         handleABS(i);
12201f4c154f0253ed8fb448402532cfa670f74e69cdChristoph Bumiller         break;
122157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_ADD:
122257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         handleADD(i);
122357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
122457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_RCP:
122557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         handleRCP(i);
122657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
122757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_MIN:
122857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_MAX:
122957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         handleMINMAX(i);
123057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
123157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_SLCT:
123257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         handleSLCT(i);
123357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
123457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_AND:
123557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_OR:
123657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_XOR:
123757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         handleLOGOP(i);
123857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
123957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case OP_CVT:
124057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         handleCVT(i);
124157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
124257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      default:
124357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
124457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
124557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
124657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
124757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
124857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
124957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
125057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// =============================================================================
125157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
125257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerstatic inline void
125357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerupdateLdStOffset(Instruction *ldst, int32_t offset, Function *fn)
125457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
125557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (offset != ldst->getSrc(0)->reg.data.offset) {
125657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (ldst->getSrc(0)->refCount() > 1)
1257a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez         ldst->setSrc(0, cloneShallow(fn, ldst->getSrc(0)));
125857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      ldst->getSrc(0)->reg.data.offset = offset;
125957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
126057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
126157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
126257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Combine loads and stores, forward stores to loads where possible.
126357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass MemoryOpt : public Pass
126457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
126557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
126657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   class Record
126757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   {
126857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   public:
126957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      Record *next;
127057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      Instruction *insn;
127157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      const Value *rel[2];
127257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      const Value *base;
127357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      int32_t offset;
127457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      int8_t fileIndex;
127557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      uint8_t size;
127657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bool locked;
127757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      Record *prev;
127857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
127957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bool overlaps(const Instruction *ldst) const;
128057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
128157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      inline void link(Record **);
128257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      inline void unlink(Record **);
128357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      inline void set(const Instruction *ldst);
128457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   };
128557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
128657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerpublic:
128757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   MemoryOpt();
128857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
128957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Record *loads[DATA_FILE_COUNT];
129057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Record *stores[DATA_FILE_COUNT];
129157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
129257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   MemoryPool recordPool;
129357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
129457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
129557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   virtual bool visit(BasicBlock *);
129657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool runOpt(BasicBlock *);
129757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
129857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Record **getList(const Instruction *);
129957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
130057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Record *findRecord(const Instruction *, bool load, bool& isAdjacent) const;
130157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
130257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   // merge @insn into load/store instruction from @rec
130357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool combineLd(Record *rec, Instruction *ld);
130457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool combineSt(Record *rec, Instruction *st);
130557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
130657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool replaceLdFromLd(Instruction *ld, Record *ldRec);
130757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool replaceLdFromSt(Instruction *ld, Record *stRec);
130857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool replaceStFromSt(Instruction *restrict st, Record *stRec);
130957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
131057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void addRecord(Instruction *ldst);
131157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void purgeRecords(Instruction *const st, DataFile);
131257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void lockStores(Instruction *const ld);
131357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void reset();
131457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
131557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
131657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Record *prevRecord;
131757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller};
131857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
131957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record), 6)
132057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
132157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (int i = 0; i < DATA_FILE_COUNT; ++i) {
132257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      loads[i] = NULL;
132357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      stores[i] = NULL;
132457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
132557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   prevRecord = NULL;
132657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
132757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
132857594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
132957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::reset()
133057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
133157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (unsigned int i = 0; i < DATA_FILE_COUNT; ++i) {
133257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      Record *it, *next;
133357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (it = loads[i]; it; it = next) {
133457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         next = it->next;
133557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         recordPool.release(it);
133657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
133757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      loads[i] = NULL;
133857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (it = stores[i]; it; it = next) {
133957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         next = it->next;
134057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         recordPool.release(it);
134157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
134257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      stores[i] = NULL;
134357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
134457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
134557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
134657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
134757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::combineLd(Record *rec, Instruction *ld)
134857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
134957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t offRc = rec->offset;
135057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t offLd = ld->getSrc(0)->reg.data.offset;
135157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int sizeRc = rec->size;
135257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int sizeLd = typeSizeof(ld->dType);
135357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int size = sizeRc + sizeLd;
135457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int d, j;
135557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
1356286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller   if (!prog->getTarget()->
1357286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller       isAccessSupported(ld->getSrc(0)->reg.file, typeOfSize(size)))
135857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
135957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   // no unaligned loads
136057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) ||
136157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller       ((size == 0xc) && (MIN2(offLd, offRc) & 0xf)))
136257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
136357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
136457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   assert(sizeRc + sizeLd <= 16 && offRc != offLd);
136557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
136657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j);
136757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
136857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (offLd < offRc) {
136957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      int sz;
137057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (sz = 0, d = 0; sz < sizeLd; sz += ld->getDef(d)->reg.size, ++d);
137157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // d: nr of definitions in ld
137257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // j: nr of definitions in rec->insn, move:
137357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (d = d + j - 1; j > 0; --j, --d)
137457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         rec->insn->setDef(d, rec->insn->getDef(j - 1));
137557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
137657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (rec->insn->getSrc(0)->refCount() > 1)
1377a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez         rec->insn->setSrc(0, cloneShallow(func, rec->insn->getSrc(0)));
137857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      rec->offset = rec->insn->getSrc(0)->reg.data.offset = offLd;
137957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
138057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      d = 0;
138157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else {
138257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      d = j;
138357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
138457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   // move definitions of @ld to @rec->insn
138557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (j = 0; sizeLd; ++j, ++d) {
138657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      sizeLd -= ld->getDef(j)->reg.size;
138757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      rec->insn->setDef(d, ld->getDef(j));
138857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
138957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
139057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   rec->size = size;
139190f0fac65524fbc4e2f2d396d20d9808e4a0a95cFrancisco Jerez   rec->insn->getSrc(0)->reg.size = size;
139257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   rec->insn->setType(typeOfSize(size));
139357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
139457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   delete_Instruction(prog, ld);
139557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
139657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
139757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
139857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
139957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
140057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::combineSt(Record *rec, Instruction *st)
140157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
140257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t offRc = rec->offset;
140357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t offSt = st->getSrc(0)->reg.data.offset;
140457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int sizeRc = rec->size;
140557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int sizeSt = typeSizeof(st->dType);
140657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int s = sizeSt / 4;
140757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int size = sizeRc + sizeSt;
140857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int j, k;
140957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *src[4]; // no modifiers in ValueRef allowed for st
141057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *extra[3];
141157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
1412286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller   if (!prog->getTarget()->
1413286abcb51ec2c27970e901ed815a814b3f0bebf6Christoph Bumiller       isAccessSupported(st->getSrc(0)->reg.file, typeOfSize(size)))
141457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
141557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (size == 8 && MIN2(offRc, offSt) & 0x7)
141657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
141757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
141857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   st->takeExtraSources(0, extra); // save predicate and indirect address
141957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
142057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (offRc < offSt) {
142157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // save values from @st
142257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (s = 0; sizeSt; ++s) {
142357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         sizeSt -= st->getSrc(s + 1)->reg.size;
142457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         src[s] = st->getSrc(s + 1);
142557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
142657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // set record's values as low sources of @st
142757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (j = 1; sizeRc; ++j) {
1428ef7f9f68cfe71b1f812e59abc644a54a0b80dd06Christoph Bumiller         sizeRc -= rec->insn->getSrc(j)->reg.size;
142957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         st->setSrc(j, rec->insn->getSrc(j));
143057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
143157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // set saved values as high sources of @st
143257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (k = j, j = 0; j < s; ++j)
143357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         st->setSrc(k++, src[j]);
143457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
143557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      updateLdStOffset(st, offRc, func);
143657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else {
143757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (j = 1; sizeSt; ++j)
143857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         sizeSt -= st->getSrc(j)->reg.size;
143957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (s = 1; sizeRc; ++j, ++s) {
144057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         sizeRc -= rec->insn->getSrc(s)->reg.size;
144157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         st->setSrc(j, rec->insn->getSrc(s));
144257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
144357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      rec->offset = offSt;
144457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
144557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   st->putExtraSources(0, extra); // restore pointer and predicate
144657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
144757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   delete_Instruction(prog, rec->insn);
144857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   rec->insn = st;
144957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   rec->size = size;
145090f0fac65524fbc4e2f2d396d20d9808e4a0a95cFrancisco Jerez   rec->insn->getSrc(0)->reg.size = size;
145157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   rec->insn->setType(typeOfSize(size));
145257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
145357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
145457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
145557594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
145657594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::set(const Instruction *ldst)
145757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
145857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   const Symbol *mem = ldst->getSrc(0)->asSym();
145957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   fileIndex = mem->reg.fileIndex;
146057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   rel[0] = ldst->getIndirect(0, 0);
146157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   rel[1] = ldst->getIndirect(0, 1);
146257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   offset = mem->reg.data.offset;
146357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   base = mem->getBase();
146457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   size = typeSizeof(ldst->sType);
146557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
146657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
146757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
146857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::link(Record **list)
146957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
147057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   next = *list;
147157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (next)
147257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      next->prev = this;
147357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   prev = NULL;
147457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   *list = this;
147557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
147657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
147757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
147857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::unlink(Record **list)
147957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
148057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (next)
148157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      next->prev = prev;
148257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (prev)
148357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      prev->next = next;
148457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   else
148557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      *list = next;
148657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
148757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
148857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record **
148957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::getList(const Instruction *insn)
149057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
149157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (insn->op == OP_LOAD || insn->op == OP_VFETCH)
14929362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      return &loads[insn->src(0).getFile()];
14939362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   return &stores[insn->src(0).getFile()];
149457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
149557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
149657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
149757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::addRecord(Instruction *i)
149857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
149957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Record **list = getList(i);
150057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Record *it = reinterpret_cast<Record *>(recordPool.allocate());
150157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
150257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   it->link(list);
150357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   it->set(i);
150457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   it->insn = i;
150557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   it->locked = false;
150657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
150757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
150857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record *
150957594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::findRecord(const Instruction *insn, bool load, bool& isAdj) const
151057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
151157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   const Symbol *sym = insn->getSrc(0)->asSym();
151257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   const int size = typeSizeof(insn->sType);
151357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Record *rec = NULL;
151457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file];
151557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
151657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (; it; it = it->next) {
151757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (it->locked && insn->op != OP_LOAD)
151857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         continue;
151957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if ((it->offset >> 4) != (sym->reg.data.offset >> 4) ||
152057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller          it->rel[0] != insn->getIndirect(0, 0) ||
152157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller          it->fileIndex != sym->reg.fileIndex ||
152257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller          it->rel[1] != insn->getIndirect(0, 1))
152357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         continue;
152457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
152557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (it->offset < sym->reg.data.offset) {
152657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (it->offset + it->size >= sym->reg.data.offset) {
152757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            isAdj = (it->offset + it->size == sym->reg.data.offset);
152857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            if (!isAdj)
152957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               return it;
153057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            if (!(it->offset & 0x7))
153157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               rec = it;
153257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
153357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else {
153457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         isAdj = it->offset != sym->reg.data.offset;
153557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (size <= it->size && !isAdj)
153657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            return it;
153757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         else
153857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (!(sym->reg.data.offset & 0x7))
153957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            if (it->offset - size <= sym->reg.data.offset)
154057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               rec = it;
154157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
154257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
154357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return rec;
154457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
154557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
154657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
154757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::replaceLdFromSt(Instruction *ld, Record *rec)
154857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
154957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *st = rec->insn;
155057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t offSt = rec->offset;
155157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t offLd = ld->getSrc(0)->reg.data.offset;
155257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int d, s;
155357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
155457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (s = 1; offSt != offLd && st->srcExists(s); ++s)
155557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      offSt += st->getSrc(s)->reg.size;
155657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (offSt != offLd)
155757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
155857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
155957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (d = 0; ld->defExists(d) && st->srcExists(s); ++d, ++s) {
156057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (ld->getDef(d)->reg.size != st->getSrc(s)->reg.size)
156157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
156257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (st->getSrc(s)->reg.file != FILE_GPR)
156357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
156414d5f975a65c57830077dabf2f95261afbc51773Francisco Jerez      ld->def(d).replace(st->src(s), false);
156557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
156657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   ld->bb->remove(ld);
156757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
156857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
156957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
157057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
157157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::replaceLdFromLd(Instruction *ldE, Record *rec)
157257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
157357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *ldR = rec->insn;
157457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t offR = rec->offset;
157557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t offE = ldE->getSrc(0)->reg.data.offset;
157657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int dR, dE;
157757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
157857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   assert(offR <= offE);
157957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (dR = 0; offR < offE && ldR->defExists(dR); ++dR)
158057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      offR += ldR->getDef(dR)->reg.size;
158157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (offR != offE)
158257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
158357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
158457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (dE = 0; ldE->defExists(dE) && ldR->defExists(dR); ++dE, ++dR) {
158557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (ldE->getDef(dE)->reg.size != ldR->getDef(dR)->reg.size)
158657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
15879362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      ldE->def(dE).replace(ldR->getDef(dR), false);
158857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
158957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
159057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   delete_Instruction(prog, ldE);
159157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
159257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
159357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
159457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
159557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::replaceStFromSt(Instruction *restrict st, Record *rec)
159657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
159757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   const Instruction *const ri = rec->insn;
159857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *extra[3];
159957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
160057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t offS = st->getSrc(0)->reg.data.offset;
160157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t offR = rec->offset;
160257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t endS = offS + typeSizeof(st->dType);
160357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t endR = offR + typeSizeof(ri->dType);
160457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
160557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   rec->size = MAX2(endS, endR) - MIN2(offS, offR);
160657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
160757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   st->takeExtraSources(0, extra);
160857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
160957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (offR < offS) {
1610541bb2e33f89b07bcbea2e27275df858760c8ec8Brian Paul      Value *vals[10];
161157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      int s, n;
161257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      int k = 0;
161357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // get non-replaced sources of ri
161457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (s = 1; offR < offS; offR += ri->getSrc(s)->reg.size, ++s)
161557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         vals[k++] = ri->getSrc(s);
161657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      n = s;
161757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // get replaced sources of st
161857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (s = 1; st->srcExists(s); offS += st->getSrc(s)->reg.size, ++s)
161957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         vals[k++] = st->getSrc(s);
162057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // skip replaced sources of ri
162157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (s = n; offR < endS; offR += ri->getSrc(s)->reg.size, ++s);
162257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // get non-replaced sources after values covered by st
162357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s)
162457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         vals[k++] = ri->getSrc(s);
162551327a2df283da9a77c6e537751c6a45baed6951Christoph Bumiller      assert((unsigned int)k <= Elements(vals));
162657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (s = 0; s < k; ++s)
162757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         st->setSrc(s + 1, vals[s]);
162857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      st->setSrc(0, ri->getSrc(0));
162957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else
163057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (endR > endS) {
163157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      int j, s;
163257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (j = 1; offR < endS; offR += ri->getSrc(j++)->reg.size);
163357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (s = 1; offS < endS; offS += st->getSrc(s++)->reg.size);
163457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (; offR < endR; offR += ri->getSrc(j++)->reg.size)
163557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         st->setSrc(s++, ri->getSrc(j));
163657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
163757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   st->putExtraSources(0, extra);
163857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
163957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   delete_Instruction(prog, rec->insn);
164057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
164157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   rec->insn = st;
164257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   rec->offset = st->getSrc(0)->reg.data.offset;
164357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
164457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   st->setType(typeOfSize(rec->size));
164557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
164657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
164757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
164857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
164957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
165057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::Record::overlaps(const Instruction *ldst) const
165157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
165257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Record that;
165357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   that.set(ldst);
165457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
165557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (this->fileIndex != that.fileIndex)
165657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
165757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
165857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (this->rel[0] || that.rel[0])
165957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return this->base == that.base;
166057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return
166157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      (this->offset < that.offset + that.size) &&
166257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      (this->offset + this->size > that.offset);
166357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
166457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
166557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// We must not eliminate stores that affect the result of @ld if
166657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// we find later stores to the same location, and we may no longer
166757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// merge them with later stores.
166857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// The stored value can, however, still be used to determine the value
166957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// returned by future loads.
167057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
167157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::lockStores(Instruction *const ld)
167257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
16739362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller   for (Record *r = stores[ld->src(0).getFile()]; r; r = r->next)
167457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!r->locked && r->overlaps(ld))
167557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         r->locked = true;
167657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
167757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
167857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Prior loads from the location of @st are no longer valid.
167957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Stores to the location of @st may no longer be used to derive
168057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// the value at it nor be coalesced into later stores.
168157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
168257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::purgeRecords(Instruction *const st, DataFile f)
168357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
168457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (st)
16859362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      f = st->src(0).getFile();
168657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
168757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (Record *r = loads[f]; r; r = r->next)
168857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!st || r->overlaps(st))
168957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         r->unlink(&loads[f]);
169057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
169157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (Record *r = stores[f]; r; r = r->next)
169257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!st || r->overlaps(st))
169357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         r->unlink(&stores[f]);
169457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
169557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
169657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
169757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::visit(BasicBlock *bb)
169857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
169957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool ret = runOpt(bb);
170057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   // Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st
170157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   // where 96 bit memory operations are forbidden.
170257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (ret)
170357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      ret = runOpt(bb);
170457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return ret;
170557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
170657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
170757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
170857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerMemoryOpt::runOpt(BasicBlock *bb)
170957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
171057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *ldst, *next;
171157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Record *rec;
171257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool isAdjacent = true;
171357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
171457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (ldst = bb->getEntry(); ldst; ldst = next) {
171557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bool keep = true;
171657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bool isLoad = true;
171757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      next = ldst->next;
171857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
171957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (ldst->op == OP_LOAD || ldst->op == OP_VFETCH) {
172057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (ldst->isDead()) {
172157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            // might have been produced by earlier optimization
172257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            delete_Instruction(prog, ldst);
172357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            continue;
172457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
172557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else
172657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) {
172757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         isLoad = false;
172857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else {
172957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         // TODO: maybe have all fixed ops act as barrier ?
173057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (ldst->op == OP_CALL) {
173157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            purgeRecords(NULL, FILE_MEMORY_LOCAL);
173257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            purgeRecords(NULL, FILE_MEMORY_GLOBAL);
173357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            purgeRecords(NULL, FILE_MEMORY_SHARED);
173457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            purgeRecords(NULL, FILE_SHADER_OUTPUT);
173557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         } else
173657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (ldst->op == OP_EMIT || ldst->op == OP_RESTART) {
173757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            purgeRecords(NULL, FILE_SHADER_OUTPUT);
173857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
173957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         continue;
174057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
174157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (ldst->getPredicate()) // TODO: handle predicated ld/st
174257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         continue;
174357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
174457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (isLoad) {
17459362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         DataFile file = ldst->src(0).getFile();
174657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
174757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         // if ld l[]/g[] look for previous store to eliminate the reload
174857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (file == FILE_MEMORY_GLOBAL || file == FILE_MEMORY_LOCAL) {
174957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            // TODO: shared memory ?
175057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            rec = findRecord(ldst, false, isAdjacent);
175157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            if (rec && !isAdjacent)
175257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               keep = !replaceLdFromSt(ldst, rec);
175357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
175457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
175557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         // or look for ld from the same location and replace this one
175657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         rec = keep ? findRecord(ldst, true, isAdjacent) : NULL;
175757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (rec) {
175857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            if (!isAdjacent)
175957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               keep = !replaceLdFromLd(ldst, rec);
176057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            else
176157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               // or combine a previous load with this one
176257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               keep = !combineLd(rec, ldst);
176357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
176457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (keep)
176557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            lockStores(ldst);
176657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else {
176757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         rec = findRecord(ldst, false, isAdjacent);
176857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (rec) {
176957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            if (!isAdjacent)
177057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               keep = !replaceStFromSt(ldst, rec);
177157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            else
177257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               keep = !combineSt(rec, ldst);
177357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
177457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (keep)
177557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            purgeRecords(ldst, DATA_FILE_COUNT);
177657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
177757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (keep)
177857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         addRecord(ldst);
177957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
178057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   reset();
178157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
178257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
178357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
178457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
178557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// =============================================================================
178657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
178757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Turn control flow into predicated instructions (after register allocation !).
178857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// TODO:
178957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Could move this to before register allocation on NVC0 and also handle nested
179057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// constructs.
179157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass FlatteningPass : public Pass
179257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
179357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
179457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   virtual bool visit(BasicBlock *);
179557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
179657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool tryPredicateConditional(BasicBlock *);
179757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void predicateInstructions(BasicBlock *, Value *pred, CondCode cc);
179857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void tryPropagateBranch(BasicBlock *);
179957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   inline bool isConstantCondition(Value *pred);
180057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   inline bool mayPredicate(const Instruction *, const Value *pred) const;
180157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   inline void removeFlow(Instruction *);
180257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller};
180357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
180457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
180557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::isConstantCondition(Value *pred)
180657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
180757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *insn = pred->getUniqueInsn();
180857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   assert(insn);
180957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (insn->op != OP_SET || insn->srcExists(2))
181057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
181157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
181257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (int s = 0; s < 2 && insn->srcExists(s); ++s) {
181357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      Instruction *ld = insn->getSrc(s)->getUniqueInsn();
181457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      DataFile file;
181557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (ld) {
181657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (ld->op != OP_MOV && ld->op != OP_LOAD)
181757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            return false;
18189362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         if (ld->src(0).isIndirect(0))
181957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            return false;
18209362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         file = ld->src(0).getFile();
182157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else {
18229362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller         file = insn->src(s).getFile();
182357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         // catch $r63 on NVC0
182457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR)
182557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            file = FILE_IMMEDIATE;
182657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
182757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST)
182857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
182957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
183057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
183157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
183257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
183357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
183457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::removeFlow(Instruction *insn)
183557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
183657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   FlowInstruction *term = insn ? insn->asFlow() : NULL;
183757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!term)
183857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
183957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Graph::Edge::Type ty = term->bb->cfg.outgoing().getType();
184057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
184157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (term->op == OP_BRA) {
184257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // TODO: this might get more difficult when we get arbitrary BRAs
184357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (ty == Graph::Edge::CROSS || ty == Graph::Edge::BACK)
184457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return;
184557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else
184657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (term->op != OP_JOIN)
184757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
184857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
184957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *pred = term->getPredicate();
185057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
1851d41f293bf014e08df3df4324cdc02de5ce49d5edChristoph Bumiller   delete_Instruction(prog, term);
1852d41f293bf014e08df3df4324cdc02de5ce49d5edChristoph Bumiller
185357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (pred && pred->refCount() == 0) {
185457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      Instruction *pSet = pred->getUniqueInsn();
185557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      pred->join->reg.data.id = -1; // deallocate
185657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (pSet->isDead())
185757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         delete_Instruction(prog, pSet);
185857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
185957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
186057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
186157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
186257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::predicateInstructions(BasicBlock *bb, Value *pred, CondCode cc)
186357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
186457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (Instruction *i = bb->getEntry(); i; i = i->next) {
186557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (i->isNop())
186657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         continue;
186757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      assert(!i->getPredicate());
186857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      i->setPredicate(cc, pred);
186957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
187057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   removeFlow(bb->getExit());
187157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
187257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
187357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
187457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::mayPredicate(const Instruction *insn, const Value *pred) const
187557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
187657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (insn->isPseudo())
187757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return true;
187857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   // TODO: calls where we don't know which registers are modified
187957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
188057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!prog->getTarget()->mayPredicate(insn, pred))
188157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
188257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (int d = 0; insn->defExists(d); ++d)
188357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (insn->getDef(d)->equals(pred))
188457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
188557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
188657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
188757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
188857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// If we conditionally skip over or to a branch instruction, replace it.
188957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// NOTE: We do not update the CFG anymore here !
189057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
189157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::tryPropagateBranch(BasicBlock *bb)
189257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
189357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   BasicBlock *bf = NULL;
189457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   unsigned int i;
189557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
189657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (bb->cfg.outgoingCount() != 2)
189757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
189857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!bb->getExit() || bb->getExit()->op != OP_BRA)
189957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
190057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Graph::EdgeIterator ei = bb->cfg.outgoing();
190157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
190257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (i = 0; !ei.end(); ++i, ei.next()) {
190357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bf = BasicBlock::get(ei.getNode());
190457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (bf->getInsnCount() == 1)
190557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
190657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
190757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (ei.end() || !bf->getExit())
190857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
190957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   FlowInstruction *bra = bb->getExit()->asFlow();
191057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   FlowInstruction *rep = bf->getExit()->asFlow();
191157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
191257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (rep->getPredicate())
191357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
191457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (rep->op != OP_BRA &&
191557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller       rep->op != OP_JOIN &&
191657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller       rep->op != OP_EXIT)
191757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
191857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
191957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bra->op = rep->op;
192057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bra->target.bb = rep->target.bb;
192157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (i) // 2nd out block means branch not taken
192257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bra->cc = inverseCondCode(bra->cc);
192357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bf->remove(rep);
192457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
192557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
192657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
192757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::visit(BasicBlock *bb)
192857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
192957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (tryPredicateConditional(bb))
193057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return true;
193157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
193257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   // try to attach join to previous instruction
193357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *insn = bb->getExit();
193457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (insn && insn->op == OP_JOIN && !insn->getPredicate()) {
193557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      insn = insn->prev;
193600fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller      if (insn && !insn->getPredicate() &&
193700fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller          !insn->asFlow() &&
193800fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller          insn->op != OP_TEXBAR &&
19390d818cdacce0299fabe4ac2aa735247c651fdcfaChristoph Bumiller          !isTextureOp(insn->op) && // probably just nve4
1940d46f969b84a405dff6bbc647a7addd0902adc1e4Christoph Bumiller          insn->op != OP_LINTERP && // probably just nve4
1941d46f969b84a405dff6bbc647a7addd0902adc1e4Christoph Bumiller          insn->op != OP_PINTERP && // probably just nve4
194279eed0d2246e8e7be505784af0078507c712a02cChristoph Bumiller          ((insn->op != OP_LOAD && insn->op != OP_STORE) ||
194379eed0d2246e8e7be505784af0078507c712a02cChristoph Bumiller           typeSizeof(insn->dType) <= 4) &&
194400fe442253744c4c4e7e68da44d6983da053968bChristoph Bumiller          !insn->isNop()) {
194557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         insn->join = 1;
194657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         bb->remove(bb->getExit());
194757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return true;
194857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
194957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
195057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
195157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   tryPropagateBranch(bb);
195257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
195357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
195457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
195557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
195657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
195757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerFlatteningPass::tryPredicateConditional(BasicBlock *bb)
195857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
195957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   BasicBlock *bL = NULL, *bR = NULL;
196057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   unsigned int nL = 0, nR = 0, limit = 12;
196157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *insn;
196257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   unsigned int mask;
196357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
196457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   mask = bb->initiatesSimpleConditional();
196557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!mask)
196657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
196757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
196857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   assert(bb->getExit());
196957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *pred = bb->getExit()->getPredicate();
197057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   assert(pred);
197157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
197257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (isConstantCondition(pred))
197357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      limit = 4;
197457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
197557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Graph::EdgeIterator ei = bb->cfg.outgoing();
197657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
197757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (mask & 1) {
197857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bL = BasicBlock::get(ei.getNode());
197957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (insn = bL->getEntry(); insn; insn = insn->next, ++nL)
198057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (!mayPredicate(insn, pred))
198157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            return false;
198257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (nL > limit)
198357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false; // too long, do a real branch
198457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
198557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   ei.next();
198657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
198757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (mask & 2) {
198857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bR = BasicBlock::get(ei.getNode());
198957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (insn = bR->getEntry(); insn; insn = insn->next, ++nR)
199057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (!mayPredicate(insn, pred))
199157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            return false;
199257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (nR > limit)
199357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false; // too long, do a real branch
199457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
199557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
199657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (bL)
199757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      predicateInstructions(bL, pred, bb->getExit()->cc);
199857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (bR)
199957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      predicateInstructions(bR, pred, inverseCondCode(bb->getExit()->cc));
200057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
200157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (bb->joinAt) {
200257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bb->remove(bb->joinAt);
200357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bb->joinAt = NULL;
200457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
200557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   removeFlow(bb->getExit()); // delete the branch/join at the fork point
200657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
200757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   // remove potential join operations at the end of the conditional
200857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (prog->getTarget()->joinAnterior) {
200957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      bb = BasicBlock::get((bL ? bL : bR)->cfg.outgoing().getNode());
201057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (bb->getEntry() && bb->getEntry()->op == OP_JOIN)
201157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         removeFlow(bb->getEntry());
201257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
201357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
201457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
201557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
201657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
201757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// =============================================================================
201857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
201957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Common subexpression elimination. Stupid O^2 implementation.
202057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass LocalCSE : public Pass
202157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
202257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
202357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   virtual bool visit(BasicBlock *);
202457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
202557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   inline bool tryReplace(Instruction **, Instruction *);
202657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
202757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   DLList ops[OP_LAST + 1];
202857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller};
202957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
203057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass GlobalCSE : public Pass
203157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
203257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
203357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   virtual bool visit(BasicBlock *);
203457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller};
203557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
203657594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
203757594065c30feec9376be9b2132659f7d87362eeChristoph BumillerInstruction::isActionEqual(const Instruction *that) const
203857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
203957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (this->op != that->op ||
204057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller       this->dType != that->dType ||
204157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller       this->sType != that->sType)
204257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
204357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (this->cc != that->cc)
204457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
204557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
204657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (this->asTex()) {
204757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (memcmp(&this->asTex()->tex,
204857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller                 &that->asTex()->tex,
204957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller                 sizeof(this->asTex()->tex)))
205057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
205157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else
205257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (this->asCmp()) {
205357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (this->asCmp()->setCond != that->asCmp()->setCond)
205457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
205557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else
205657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (this->asFlow()) {
205757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
205857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } else {
205957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (this->atomic != that->atomic ||
206057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller          this->ipa != that->ipa ||
206157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller          this->lanes != that->lanes ||
206257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller          this->perPatch != that->perPatch)
206357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
206457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (this->postFactor != that->postFactor)
206557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
206657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
206757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
206857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (this->subOp != that->subOp ||
206957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller       this->saturate != that->saturate ||
207057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller       this->rnd != that->rnd ||
207157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller       this->ftz != that->ftz ||
207257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller       this->dnz != that->dnz ||
207357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller       this->cache != that->cache)
207457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
207557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
207657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
207757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
207857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
207957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
208057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerInstruction::isResultEqual(const Instruction *that) const
208157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
208257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   unsigned int d, s;
208357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
208457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   // NOTE: location of discard only affects tex with liveOnly and quadops
208557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!this->defExists(0) && this->op != OP_DISCARD)
208657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
208757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
208857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!isActionEqual(that))
208957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
209057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
209157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (this->predSrc != that->predSrc)
209257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
209357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
209457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (d = 0; this->defExists(d); ++d) {
209557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!that->defExists(d) ||
209657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller          !this->getDef(d)->equals(that->getDef(d), false))
209757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
209857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
209957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (that->defExists(d))
210057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
210157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
210257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (s = 0; this->srcExists(s); ++s) {
210357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!that->srcExists(s))
210457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
21059362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      if (this->src(s).mod != that->src(s).mod)
210657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
210757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!this->getSrc(s)->equals(that->getSrc(s), true))
210857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
210957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
211057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (that->srcExists(s))
211157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
211257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
211357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (op == OP_LOAD || op == OP_VFETCH) {
21149362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      switch (src(0).getFile()) {
211557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case FILE_MEMORY_CONST:
211657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      case FILE_SHADER_INPUT:
211757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return true;
211857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      default:
211957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
212057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
212157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
212257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
212357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
212457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
212557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
212657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// pull through common expressions from different in-blocks
212757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
212857594065c30feec9376be9b2132659f7d87362eeChristoph BumillerGlobalCSE::visit(BasicBlock *bb)
212957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
213057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *phi, *next, *ik;
213157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int s;
213257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
2133ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller   // TODO: maybe do this with OP_UNION, too
2134ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller
213557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = next) {
213657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      next = phi->next;
213757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (phi->getSrc(0)->refCount() > 1)
213857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         continue;
213957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      ik = phi->getSrc(0)->getInsn();
2140bb9c15bac42cf323ef267095b33031ffc1d4fba4Christoph Bumiller      if (!ik)
2141bb9c15bac42cf323ef267095b33031ffc1d4fba4Christoph Bumiller         continue; // probably a function input
214257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (s = 1; phi->srcExists(s); ++s) {
214357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (phi->getSrc(s)->refCount() > 1)
214457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            break;
21451e957941735fae514de658c836b8bdaf6c66bc06Francisco Jerez         if (!phi->getSrc(s)->getInsn() ||
21461e957941735fae514de658c836b8bdaf6c66bc06Francisco Jerez             !phi->getSrc(s)->getInsn()->isResultEqual(ik))
214757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            break;
214857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
214957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!phi->srcExists(s)) {
215057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         Instruction *entry = bb->getEntry();
215157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         ik->bb->remove(ik);
215257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (!entry || entry->op != OP_JOIN)
215357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            bb->insertHead(ik);
215457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         else
215557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            bb->insertAfter(entry, ik);
215657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         ik->setDef(0, phi->getDef(0));
215757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         delete_Instruction(prog, phi);
215857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
215957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
216057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
216157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
216257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
216357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
216457594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
216557594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLocalCSE::tryReplace(Instruction **ptr, Instruction *i)
216657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
216757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *old = *ptr;
2168ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller
2169ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller   // TODO: maybe relax this later (causes trouble with OP_UNION)
2170ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller   if (i->isPredicated())
2171ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller      return false;
2172ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller
217357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!old->isResultEqual(i))
217457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return false;
2175ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller
217657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (int d = 0; old->defExists(d); ++d)
21779362d4bc0a03860ec386156cf499e855a9c2d2a5Christoph Bumiller      old->def(d).replace(i->getDef(d), false);
217857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   delete_Instruction(prog, old);
217957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   *ptr = NULL;
218057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
218157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
218257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
218357594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
218457594065c30feec9376be9b2132659f7d87362eeChristoph BumillerLocalCSE::visit(BasicBlock *bb)
218557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
218657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   unsigned int replaced;
218757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
218857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   do {
218957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      Instruction *ir, *next;
219057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
219157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      replaced = 0;
219257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
219357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      // will need to know the order of instructions
219457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      int serial = 0;
219515ce0f76e2e014374a292550505f58da88333fb7Christoph Bumiller      for (ir = bb->getFirst(); ir; ir = ir->next)
219657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         ir->serial = serial++;
219757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
219857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (ir = bb->getEntry(); ir; ir = next) {
219957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         int s;
220057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         Value *src = NULL;
220157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
220257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         next = ir->next;
220357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
220457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (ir->fixed) {
220557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            ops[ir->op].insert(ir);
220657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            continue;
220757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
220857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
220957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         for (s = 0; ir->srcExists(s); ++s)
221057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            if (ir->getSrc(s)->asLValue())
221157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               if (!src || ir->getSrc(s)->refCount() < src->refCount())
221257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller                  src = ir->getSrc(s);
221357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
221457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (src) {
22158cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez            for (Value::UseIterator it = src->uses.begin();
22168cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez                 it != src->uses.end(); ++it) {
22178cc2eca5df0116aa7fb8233a9ab6ad1c9e4203cdFrancisco Jerez               Instruction *ik = (*it)->getInsn();
2218be161e66d6108e56d40c116a4ee12668d6b8d960Christoph Bumiller               if (ik && ik->bb == ir->bb && ik->serial < ir->serial)
221957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller                  if (tryReplace(&ir, ik))
222057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller                     break;
222157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            }
222257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         } else {
222357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            DLLIST_FOR_EACH(&ops[ir->op], iter)
222457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            {
222557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               Instruction *ik = reinterpret_cast<Instruction *>(iter.get());
222657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller               if (tryReplace(&ir, ik))
222757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller                  break;
222857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            }
222957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         }
223057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
223157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (ir)
223257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            ops[ir->op].insert(ir);
223357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         else
223457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            ++replaced;
223557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
223657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      for (unsigned int i = 0; i <= OP_LAST; ++i)
223757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         ops[i].clear();
223857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
223957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } while (replaced);
224057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
224157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
224257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
224357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
224457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// =============================================================================
224557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
224657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// Remove computations of unused values.
224757594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerclass DeadCodeElim : public Pass
224857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
224957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerpublic:
225057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   bool buryAll(Program *);
225157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
225257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerprivate:
225357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   virtual bool visit(BasicBlock *);
225457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
225557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   void checkSplitLoad(Instruction *ld); // for partially dead loads
225657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
225757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   unsigned int deadCount;
225857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller};
225957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
226057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
226157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerDeadCodeElim::buryAll(Program *prog)
226257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
226357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   do {
226457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      deadCount = 0;
226557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!this->run(prog, false, false))
226657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;
226757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   } while (deadCount);
226857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
226957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
227057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
227157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
227257594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
227357594065c30feec9376be9b2132659f7d87362eeChristoph BumillerDeadCodeElim::visit(BasicBlock *bb)
227457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
227557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *next;
227657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
227757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (Instruction *i = bb->getFirst(); i; i = next) {
227857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      next = i->next;
227957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (i->isDead()) {
228057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         ++deadCount;
228157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         delete_Instruction(prog, i);
228257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else
228357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) {
228457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         checkSplitLoad(i);
228557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
228657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
228757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
228857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
228957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
229057594065c30feec9376be9b2132659f7d87362eeChristoph Bumillervoid
229157594065c30feec9376be9b2132659f7d87362eeChristoph BumillerDeadCodeElim::checkSplitLoad(Instruction *ld1)
229257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
229357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Instruction *ld2 = NULL; // can get at most 2 loads
229457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *def1[4];
229557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   Value *def2[4];
229657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t addr1, addr2;
229757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int32_t size1, size2;
229857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   int d, n1, n2;
229957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   uint32_t mask = 0xffffffff;
230057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
230157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (d = 0; ld1->defExists(d); ++d)
230257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!ld1->getDef(d)->refCount() && ld1->getDef(d)->reg.data.id < 0)
230357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         mask &= ~(1 << d);
230457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (mask == 0xffffffff)
230557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
230657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
230757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   addr1 = ld1->getSrc(0)->reg.data.offset;
230857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   n1 = n2 = 0;
230957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   size1 = size2 = 0;
231057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (d = 0; ld1->defExists(d); ++d) {
231157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (mask & (1 << d)) {
231257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         if (size1 && (addr1 & 0x7))
231357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller            break;
231457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         def1[n1] = ld1->getDef(d);
231557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         size1 += def1[n1++]->reg.size;
231657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else
231757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!n1) {
231857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         addr1 += ld1->getDef(d)->reg.size;
231957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else {
232057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         break;
232157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
232257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
232357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (addr2 = addr1 + size1; ld1->defExists(d); ++d) {
232457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (mask & (1 << d)) {
232557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         def2[n2] = ld1->getDef(d);
232657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         size2 += def2[n2++]->reg.size;
232757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      } else {
232857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         assert(!n2);
232957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         addr2 += ld1->getDef(d)->reg.size;
233057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      }
233157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
233257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
233357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   updateLdStOffset(ld1, addr1, func);
233457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   ld1->setType(typeOfSize(size1));
233557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (d = 0; d < 4; ++d)
233657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      ld1->setDef(d, (d < n1) ? def1[d] : NULL);
233757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
233857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (!n2)
233957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      return;
234057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
2341a05e6a3fa28168d58a13cfb07f7a664e84b925aeFrancisco Jerez   ld2 = cloneShallow(func, ld1);
234257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   updateLdStOffset(ld2, addr2, func);
234357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   ld2->setType(typeOfSize(size2));
234457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   for (d = 0; d < 4; ++d)
234557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      ld2->setDef(d, (d < n2) ? def2[d] : NULL);
234657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
234757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   ld1->bb->insertAfter(ld1, ld2);
234857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
234957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
235057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller// =============================================================================
235157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
235257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller#define RUN_PASS(l, n, f)                       \
235357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   if (level >= (l)) {                          \
235457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (dbgFlags & NV50_IR_DEBUG_VERBOSE)     \
235557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         INFO("PEEPHOLE: %s\n", #n);            \
235657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      n pass;                                   \
235757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller      if (!pass.f(this))                        \
235857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller         return false;                          \
235957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   }
236057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
236157594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
236257594065c30feec9376be9b2132659f7d87362eeChristoph BumillerProgram::optimizeSSA(int level)
236357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
236457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(1, DeadCodeElim, buryAll);
236557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(1, CopyPropagation, run);
236657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(2, GlobalCSE, run);
236757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(1, LocalCSE, run);
236857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(2, AlgebraicOpt, run);
236957594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks
237057594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(1, ConstantFolding, foldAll);
237157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(1, LoadPropagation, run);
237257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(2, MemoryOpt, run);
237357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(2, LocalCSE, run);
237457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(0, DeadCodeElim, buryAll);
2375ca1fc2b86400e3fc9dd0517863e22721b5e91c77Christoph Bumiller
237657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
237757594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
237857594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
237957594065c30feec9376be9b2132659f7d87362eeChristoph Bumillerbool
238057594065c30feec9376be9b2132659f7d87362eeChristoph BumillerProgram::optimizePostRA(int level)
238157594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller{
238257594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   RUN_PASS(2, FlatteningPass, run);
238357594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller   return true;
238457594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
238557594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller
238657594065c30feec9376be9b2132659f7d87362eeChristoph Bumiller}
2387