1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright 2011 Christoph Bumiller 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the "Software"), 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to deal in the Software without restriction, including without limitation 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and/or sell copies of the Software, and to permit persons to whom the 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software is furnished to do so, subject to the following conditions: 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice shall be included in 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * all copies or substantial portions of the Software. 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * SOFTWARE. 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "nv50_ir.h" 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "nv50_ir_target.h" 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "nv50_ir_build_util.h" 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgextern "C" { 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_math.h" 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace nv50_ir { 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgInstruction::isNop() const 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (op == OP_PHI || op == OP_SPLIT || op == OP_MERGE || op == OP_CONSTRAINT) 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (terminator || join) // XXX: should terminator imply flow ? 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!fixed && op == OP_NOP) 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (defExists(0) && def(0).rep()->reg.data.id < 0) { 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int d = 1; defExists(d); ++d) 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (def(d).rep()->reg.data.id >= 0) 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org WARN("part of vector result is unused !\n"); 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (op == OP_MOV || op == OP_UNION) { 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!getDef(0)->equals(getSrc(0))) 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (op == OP_UNION) 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!def(0).rep()->equals(getSrc(1))) 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool Instruction::isDead() const 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (op == OP_STORE || 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op == OP_EXPORT || 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op == OP_WRSV) 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int d = 0; defExists(d); ++d) 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (getDef(d)->refCount() || getDef(d)->reg.data.id >= 0) 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (terminator || asFlow()) 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (fixed) 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ============================================================================= 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass CopyPropagation : public Pass 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool visit(BasicBlock *); 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Propagate all MOVs forward to make subsequent optimization easier, except if 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// the sources stem from a phi, in which case we don't want to mess up potential 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def. 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgCopyPropagation::visit(BasicBlock *bb) 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *mov, *si, *next; 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (mov = bb->getEntry(); mov; mov = next) { 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = mov->next; 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mov->op != OP_MOV || mov->fixed || !mov->getSrc(0)->asLValue()) 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mov->getPredicate()) 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mov->def(0).getFile() != mov->src(0).getFile()) 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org si = mov->getSrc(0)->getInsn(); 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mov->getDef(0)->reg.data.id < 0 && si && si->op != OP_PHI) { 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // propagate 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mov->def(0).replace(mov->getSrc(0), false); 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, mov); 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ============================================================================= 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass LoadPropagation : public Pass 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool visit(BasicBlock *); 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void checkSwapSrc01(Instruction *); 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isCSpaceLoad(Instruction *); 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isImmd32Load(Instruction *); 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isAttribOrSharedLoad(Instruction *); 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLoadPropagation::isCSpaceLoad(Instruction *ld) 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return ld && ld->op == OP_LOAD && ld->src(0).getFile() == FILE_MEMORY_CONST; 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLoadPropagation::isImmd32Load(Instruction *ld) 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4)) 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return ld->src(0).getFile() == FILE_IMMEDIATE; 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLoadPropagation::isAttribOrSharedLoad(Instruction *ld) 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return ld && 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (ld->op == OP_VFETCH || 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (ld->op == OP_LOAD && 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (ld->src(0).getFile() == FILE_SHADER_INPUT || 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ld->src(0).getFile() == FILE_MEMORY_SHARED))); 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLoadPropagation::checkSwapSrc01(Instruction *insn) 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!prog->getTarget()->getOpInfo(insn).commutative) 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn->op != OP_SET && insn->op != OP_SLCT) 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn->src(1).getFile() != FILE_GPR) 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *i0 = insn->getSrc(0)->getInsn(); 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *i1 = insn->getSrc(1)->getInsn(); 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isCSpaceLoad(i0)) { 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isCSpaceLoad(i1)) 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->swapSources(0, 1); 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isImmd32Load(i0)) { 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isCSpaceLoad(i1) && !isImmd32Load(i1)) 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->swapSources(0, 1); 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isAttribOrSharedLoad(i1)) { 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isAttribOrSharedLoad(i0)) 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->swapSources(0, 1); 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn->op == OP_SET) 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond); 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn->op == OP_SLCT) 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->asCmp()->setCond = inverseCondCode(insn->asCmp()->setCond); 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLoadPropagation::visit(BasicBlock *bb) 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const Target *targ = prog->getTarget(); 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *next; 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (Instruction *i = bb->getEntry(); i; i = next) { 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = i->next; 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->srcExists(1)) 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org checkSwapSrc01(i); 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int s = 0; i->srcExists(s); ++s) { 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *ld = i->getSrc(s)->getInsn(); 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV)) 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!targ->insnCanLoad(i, s, ld)) 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // propagate ! 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(s, ld->getSrc(0)); 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ld->src(0).isIndirect(0)) 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setIndirect(s, 0, ld->getIndirect(0, 0)); 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ld->getDef(0)->refCount() == 0) 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, ld); 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ============================================================================= 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Evaluate constant expressions. 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass ConstantFolding : public Pass 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic: 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool foldAll(Program *); 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool visit(BasicBlock *); 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void expr(Instruction *, ImmediateValue&, ImmediateValue&); 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void opnd(Instruction *, ImmediateValue&, int s); 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void unary(Instruction *, const ImmediateValue&); 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&); 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CmpInstruction *findOriginForTestWithZero(Value *); 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int foldCount; 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildUtil bld; 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// TODO: remember generated immediates and only revisit these 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgConstantFolding::foldAll(Program *prog) 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int iterCount = 0; 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org do { 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foldCount = 0; 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!run(prog)) 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } while (foldCount && ++iterCount < 2); 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgConstantFolding::visit(BasicBlock *bb) 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *i, *next; 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = bb->getEntry(); i; i = next) { 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = i->next; 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->op == OP_MOV || i->op == OP_CALL) 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ImmediateValue src0, src1; 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->srcExists(1) && 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org expr(i, src0, src1); 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->srcExists(0) && i->src(0).getImmediate(src0)) 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opnd(i, src0, 0); 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->srcExists(1) && i->src(1).getImmediate(src1)) 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opnd(i, src1, 1); 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgCmpInstruction * 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgConstantFolding::findOriginForTestWithZero(Value *value) 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!value) 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return NULL; 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *insn = value->getInsn(); 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org while (insn && insn->op != OP_SET) { 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *next = NULL; 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (insn->op) { 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_NEG: 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_ABS: 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_CVT: 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = insn->getSrc(0)->getInsn(); 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn->sType != next->dType) 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return NULL; 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_MOV: 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = insn->getSrc(0)->getInsn(); 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return NULL; 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn = next; 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return insn ? insn->asCmp() : NULL; 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgModifier::applyTo(ImmediateValue& imm) const 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (imm.reg.type) { 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F32: 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bits & NV50_IR_MOD_ABS) 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.f32 = fabsf(imm.reg.data.f32); 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bits & NV50_IR_MOD_NEG) 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.f32 = -imm.reg.data.f32; 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bits & NV50_IR_MOD_SAT) { 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm.reg.data.f32 < 0.0f) 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.f32 = 0.0f; 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm.reg.data.f32 > 1.0f) 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.f32 = 1.0f; 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!(bits & NV50_IR_MOD_NOT)); 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_S8: // NOTE: will be extended 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_S16: 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_S32: 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_U8: // NOTE: treated as signed 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_U16: 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_U32: 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bits & NV50_IR_MOD_ABS) 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.s32 = (imm.reg.data.s32 >= 0) ? 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.s32 : -imm.reg.data.s32; 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bits & NV50_IR_MOD_NEG) 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.s32 = -imm.reg.data.s32; 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bits & NV50_IR_MOD_NOT) 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.s32 = ~imm.reg.data.s32; 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F64: 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bits & NV50_IR_MOD_ABS) 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.f64 = fabs(imm.reg.data.f64); 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bits & NV50_IR_MOD_NEG) 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.f64 = -imm.reg.data.f64; 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bits & NV50_IR_MOD_SAT) { 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm.reg.data.f64 < 0.0) 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.f64 = 0.0; 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm.reg.data.f64 > 1.0) 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.f64 = 1.0; 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!(bits & NV50_IR_MOD_NOT)); 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!"invalid/unhandled type"); 368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm.reg.data.u64 = 0; 369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgoperation 374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgModifier::getOp() const 375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (bits) { 377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case NV50_IR_MOD_ABS: return OP_ABS; 378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case NV50_IR_MOD_NEG: return OP_NEG; 379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case NV50_IR_MOD_SAT: return OP_SAT; 380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case NV50_IR_MOD_NOT: return OP_NOT; 381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case 0: 382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return OP_MOV; 383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return OP_CVT; 385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgConstantFolding::expr(Instruction *i, 390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ImmediateValue &imm0, ImmediateValue &imm1) 391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct Storage *const a = &imm0.reg, *const b = &imm1.reg; 393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct Storage res; 394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org memset(&res.data, 0, sizeof(res.data)); 396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (i->op) { 398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_MAD: 399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_FMA: 400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_MUL: 401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->dnz && i->dType == TYPE_F32) { 402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isfinite(a->data.f32)) 403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org a->data.f32 = 0.0f; 404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isfinite(b->data.f32)) 405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org b->data.f32 = 0.0f; 406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (i->dType) { 408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break; 409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break; 410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_S32: 411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break; 412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_DIV: 417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (b->data.u32 == 0) 418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (i->dType) { 420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F32: res.data.f32 = a->data.f32 / b->data.f32; break; 421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F64: res.data.f64 = a->data.f64 / b->data.f64; break; 422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_S32: res.data.s32 = a->data.s32 / b->data.s32; break; 423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_U32: res.data.u32 = a->data.u32 / b->data.u32; break; 424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_ADD: 429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (i->dType) { 430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F32: res.data.f32 = a->data.f32 + b->data.f32; break; 431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F64: res.data.f64 = a->data.f64 + b->data.f64; break; 432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_S32: 433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break; 434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_POW: 439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (i->dType) { 440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break; 441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F64: res.data.f64 = pow(a->data.f64, b->data.f64); break; 442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_MAX: 447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (i->dType) { 448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F32: res.data.f32 = MAX2(a->data.f32, b->data.f32); break; 449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F64: res.data.f64 = MAX2(a->data.f64, b->data.f64); break; 450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_S32: res.data.s32 = MAX2(a->data.s32, b->data.s32); break; 451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_U32: res.data.u32 = MAX2(a->data.u32, b->data.u32); break; 452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_MIN: 457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (i->dType) { 458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F32: res.data.f32 = MIN2(a->data.f32, b->data.f32); break; 459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_F64: res.data.f64 = MIN2(a->data.f64, b->data.f64); break; 460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_S32: res.data.s32 = MIN2(a->data.s32, b->data.s32); break; 461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_U32: res.data.u32 = MIN2(a->data.u32, b->data.u32); break; 462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_AND: 467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res.data.u64 = a->data.u64 & b->data.u64; 468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_OR: 470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res.data.u64 = a->data.u64 | b->data.u64; 471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_XOR: 473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res.data.u64 = a->data.u64 ^ b->data.u64; 474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_SHL: 476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res.data.u32 = a->data.u32 << b->data.u32; 477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_SHR: 479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (i->dType) { 480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_S32: res.data.s32 = a->data.s32 >> b->data.u32; break; 481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case TYPE_U32: res.data.u32 = a->data.u32 >> b->data.u32; break; 482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_SLCT: 487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (a->data.u32 != b->data.u32) 488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res.data.u32 = a->data.u32; 490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ++foldCount; 495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(0).mod = Modifier(0); 497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(1).mod = Modifier(0); 498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32)); 500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, NULL); 501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->getSrc(0)->reg.data = res.data; 503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->op == OP_MAD || i->op == OP_FMA) { 505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_ADD; 506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, i->getSrc(0)); 508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(1).mod = i->src(2).mod; 509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(0, i->getSrc(2)); 510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(2, NULL); 511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ImmediateValue src0; 513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->src(0).getImmediate(src0)) 514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org expr(i, src0, *i->getSrc(1)->asImm()); 515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_MOV; 517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgConstantFolding::unary(Instruction *i, const ImmediateValue &imm) 522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Storage res; 524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->dType != TYPE_F32) 526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (i->op) { 528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_NEG: res.data.f32 = -imm.reg.data.f32; break; 529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break; 530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break; 531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break; 532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break; 533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_EX2: res.data.f32 = exp2f(imm.reg.data.f32); break; 534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_SIN: res.data.f32 = sinf(imm.reg.data.f32); break; 535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_COS: res.data.f32 = cosf(imm.reg.data.f32); break; 536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_SQRT: res.data.f32 = sqrtf(imm.reg.data.f32); break; 537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_PRESIN: 538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_PREEX2: 539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // these should be handled in subsequent OP_SIN/COS/EX2 540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res.data.f32 = imm.reg.data.f32; 541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_MOV; 546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.f32)); 547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(0).mod = Modifier(0); 548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgConstantFolding::tryCollapseChainedMULs(Instruction *mul2, 552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const int s, ImmediateValue& imm2) 553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const int t = s ? 0 : 1; 555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *insn; 556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *mul1 = NULL; // mul1 before mul2 557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int e = 0; 558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org float f = imm2.reg.data.f32; 559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ImmediateValue imm1; 560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32); 562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mul2->getSrc(t)->refCount() == 1) { 564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn = mul2->getSrc(t)->getInsn(); 565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32) 566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul1 = insn; 567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mul1 && !mul1->saturate) { 568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int s1; 569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mul1->src(s1 = 0).getImmediate(imm1) || 571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul1->src(s1 = 1).getImmediate(imm1)) { 572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.setPosition(mul1, false); 573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // a = mul r, imm1 574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // d = mul a, imm2 -> d = mul r, (imm1 * imm2) 575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32)); 576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul1->src(s1).mod = Modifier(0); 577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul2->def(0).replace(mul1->getDef(0), false); 578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { 580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // c = mul a, b 581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // d = mul c, imm -> d = mul_x_imm a, b 582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul1->postFactor = e; 583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul2->def(0).replace(mul1->getDef(0), false); 584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (f < 0) 585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG); 586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul1->saturate = mul2->saturate; 588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mul2->getDef(0)->refCount() == 1 && !mul2->saturate) { 592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // b = mul a, imm 593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // d = mul b, c -> d = mul_x_imm a, c 594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int s2, t2; 595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn = mul2->getDef(0)->uses.front()->getInsn(); 596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!insn) 597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul1 = mul2; 599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul2 = NULL; 600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1; 601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org t2 = s2 ? 0 : 1; 602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn->op == OP_MUL && insn->dType == TYPE_F32) 603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!insn->src(s2).mod && !insn->src(t2).getImmediate(imm1)) 604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul2 = insn; 605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { 606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul2->postFactor = e; 607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul2->setSrc(s2, mul1->src(t)); 608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (f < 0) 609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul2->src(s2).mod *= Modifier(NV50_IR_MOD_NEG); 610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) 616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const int t = !s; 618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const operation op = i->op; 619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (i->op) { 621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_MUL: 622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->dType == TYPE_F32) 623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tryCollapseChainedMULs(i, s, imm0); 624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm0.isInteger(0)) { 626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_MOV; 627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(0, new_ImmediateValue(prog, 0u)); 628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(0).mod = Modifier(0); 629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, NULL); 630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm0.isInteger(1) || imm0.isInteger(-1)) { 632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm0.isNegative()) 633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); 634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = i->src(t).mod.getOp(); 635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (s == 0) { 636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(0, i->getSrc(1)); 637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(0).mod = i->src(1).mod; 638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(1).mod = 0; 639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->op != OP_CVT) 641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(0).mod = 0; 642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, NULL); 643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm0.isInteger(2) || imm0.isInteger(-2)) { 645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm0.isNegative()) 646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); 647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_ADD; 648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(s, i->getSrc(t)); 649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(s).mod = i->src(t).mod; 650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isFloatType(i->sType) && !imm0.isNegative() && imm0.isPow2()) { 652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_SHL; 653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org imm0.applyLog2(); 654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(0, i->getSrc(t)); 655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(0).mod = i->src(t).mod; 656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32)); 657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(1).mod = 0; 658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_ADD: 661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm0.isInteger(0)) { 662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (s == 0) { 663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(0, i->getSrc(1)); 664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(0).mod = i->src(1).mod; 665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, NULL); 667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = i->src(0).mod.getOp(); 668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->op != OP_CVT) 669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(0).mod = Modifier(0); 670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_DIV: 674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32)) 675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.setPosition(i, false); 677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm0.reg.data.u32 == 0) { 678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm0.reg.data.u32 == 1) { 681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_MOV; 682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, NULL); 683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->dType == TYPE_U32 && imm0.isPow2()) { 685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_SHR; 686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32))); 687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->dType == TYPE_U32) { 689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *mul; 690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *tA, *tB; 691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const uint32_t d = imm0.reg.data.u32; 692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t m; 693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int r, s; 694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t l = util_logbase2(d); 695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (((uint32_t)1 << l) < d) 696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ++l; 697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org m = (((uint64_t)1 << 32) * (((uint64_t)1 << l) - d)) / d + 1; 698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org r = l ? 1 : 0; 699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s = l ? (l - 1) : 0; 700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tA = bld.getSSA(); 702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tB = bld.getSSA(); 703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul = bld.mkOp2(OP_MUL, TYPE_U32, tA, i->getSrc(0), 704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.loadImm(NULL, m)); 705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mul->subOp = NV50_IR_SUBOP_MUL_HIGH; 706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.mkOp2(OP_SUB, TYPE_U32, tB, i->getSrc(0), tA); 707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tA = bld.getSSA(); 708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (r) 709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.mkOp2(OP_SHR, TYPE_U32, tA, tB, bld.mkImm(r)); 710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tA = tB; 712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tB = s ? bld.getSSA() : i->getDef(0); 713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA); 714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (s) 715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s)); 716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, i); 718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm0.reg.data.s32 == -1) { 720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_NEG; 721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, NULL); 722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LValue *tA, *tB; 724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LValue *tD; 725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const int32_t d = imm0.reg.data.s32; 726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t m; 727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t l = util_logbase2(static_cast<unsigned>(abs(d))); 728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((1 << l) < abs(d)) 729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ++l; 730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!l) 731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org l = 1; 732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org m = ((uint64_t)1 << (32 + l - 1)) / abs(d) + 1 - ((uint64_t)1 << 32); 733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tA = bld.getSSA(); 735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tB = bld.getSSA(); 736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.mkOp3(OP_MAD, TYPE_S32, tA, i->getSrc(0), bld.loadImm(NULL, m), 737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->getSrc(0))->subOp = NV50_IR_SUBOP_MUL_HIGH; 738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (l > 1) 739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.mkOp2(OP_SHR, TYPE_S32, tB, tA, bld.mkImm(l - 1)); 740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tB = tA; 742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tA = bld.getSSA(); 743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, i->getSrc(0), bld.mkImm(0)); 744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue(); 745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA); 746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (d < 0) 747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB); 748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, i); 750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_MOD: 754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->sType == TYPE_U32 && imm0.isPow2()) { 755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.setPosition(i, false); 756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_AND; 757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1)); 758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_SET: // TODO: SET_AND,OR,XOR 762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org { 763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t)); 764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CondCode cc, ccZ; 765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->src(t).mod != Modifier(0)) 766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET) 768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cc = si->setCond; 770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U); 771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (s == 0) 772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ccZ = reverseCondCode(ccZ); 773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (ccZ) { 774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CC_LT: cc = CC_FL; break; 775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CC_GE: cc = CC_TR; break; 776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CC_EQ: cc = inverseCondCode(cc); break; 777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CC_LE: cc = inverseCondCode(cc); break; 778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CC_GT: break; 779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CC_NE: break; 780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->asCmp()->setCond = cc; 784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(0, si->src(0)); 785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, si->src(1)); 786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->sType = si->sType; 787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_SHL: 791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org { 792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (s != 1 || i->src(0).mod != Modifier(0)) 793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // try to concatenate shifts 795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *si = i->getSrc(0)->getInsn(); 796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!si || si->op != OP_SHL) 797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ImmediateValue imm1; 799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (si->src(1).getImmediate(imm1)) { 800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.setPosition(i, false); 801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(0, si->getSrc(0)); 802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32)); 803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_ABS: 808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_NEG: 809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_LG2: 810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_RCP: 811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_SQRT: 812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_RSQ: 813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_PRESIN: 814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_SIN: 815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_COS: 816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_PREEX2: 817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_EX2: 818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unary(i, imm0); 819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->op != op) 824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org foldCount++; 825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ============================================================================= 828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed. 830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass ModifierFolding : public Pass 831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool visit(BasicBlock *); 834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgModifierFolding::visit(BasicBlock *bb) 838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const Target *target = prog->getTarget(); 840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *i, *next, *mi; 842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Modifier mod; 843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = bb->getEntry(); i; i = next) { 845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = i->next; 846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (0 && i->op == OP_SUB) { 848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // turn "sub" into "add neg" (do we really want this ?) 849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_ADD; 850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG); 851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int s = 0; s < 3 && i->srcExists(s); ++s) { 854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mi = i->getSrc(s)->getInsn(); 855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!mi || 856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mi->predSrc >= 0 || mi->getDef(0)->refCount() > 8) 857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->sType == TYPE_U32 && mi->dType == TYPE_S32) { 859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((i->op != OP_ADD && 860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op != OP_MUL) || 861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (mi->op != OP_ABS && 862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mi->op != OP_NEG)) 863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->sType != mi->dType) { 866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((mod = Modifier(mi->op)) == Modifier(0)) 869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mod *= mi->src(0).mod; 871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((i->op == OP_ABS) || i->src(s).mod.abs()) { 873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // abs neg [abs] = abs 874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mod = mod & Modifier(~(NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)); 875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((i->op == OP_NEG) && mod.neg()) { 877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(s == 0); 878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // neg as both opcode and modifier on same insn is prohibited 879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // neg neg abs = abs, neg neg = identity 880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mod = mod & Modifier(~NV50_IR_MOD_NEG); 881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = mod.getOp(); 882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mod = mod & Modifier(~NV50_IR_MOD_ABS); 883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mod == Modifier(0)) 884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->op = OP_MOV; 885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (target->isModSupported(i, s, mod)) { 888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setSrc(s, mi->getSrc(0)); 889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->src(s).mod *= mod; 890f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 891f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 892f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 893f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->op == OP_SAT) { 894f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mi = i->getSrc(0)->getInsn(); 895f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mi && 896f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mi->getDef(0)->refCount() <= 1 && target->isSatSupported(mi)) { 897f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mi->saturate = 1; 898f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mi->setDef(0, i->getDef(0)); 899f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, i); 900f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 901f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 902f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 903f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 904f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 905f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 906f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 907f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ============================================================================= 908f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 909f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// MUL + ADD -> MAD/FMA 910f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// MIN/MAX(a, a) -> a, etc. 911f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// SLCT(a, b, const) -> cc(const) ? a : b 912f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// RCP(RCP(a)) -> a 913f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// MUL(MUL(a, b), const) -> MUL_Xconst(a, b) 914f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass AlgebraicOpt : public Pass 915f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 916f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 917f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool visit(BasicBlock *); 918f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 919f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void handleABS(Instruction *); 920f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool handleADD(Instruction *); 921f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool tryADDToMADOrSAD(Instruction *, operation toOp); 922f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void handleMINMAX(Instruction *); 923f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void handleRCP(Instruction *); 924f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void handleSLCT(Instruction *); 925f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void handleLOGOP(Instruction *); 926f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void handleCVT(Instruction *); 927f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 928f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildUtil bld; 929f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 930f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 931f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 932f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAlgebraicOpt::handleABS(Instruction *abs) 933f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 934f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *sub = abs->getSrc(0)->getInsn(); 935f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DataType ty; 936f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!sub || 937f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !prog->getTarget()->isOpSupported(OP_SAD, abs->dType)) 938f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 939f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // expect not to have mods yet, if we do, bail 940f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sub->src(0).mod || sub->src(1).mod) 941f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 942f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // hidden conversion ? 943f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ty = intTypeToSigned(sub->dType); 944f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (abs->dType != abs->sType || ty != abs->sType) 945f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 946f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 947f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((sub->op != OP_ADD && sub->op != OP_SUB) || 948f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sub->src(0).getFile() != FILE_GPR || sub->src(0).mod || 949f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sub->src(1).getFile() != FILE_GPR || sub->src(1).mod) 950f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 951f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 952f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src0 = sub->getSrc(0); 953f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src1 = sub->getSrc(1); 954f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 955f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (sub->op == OP_ADD) { 956f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *neg = sub->getSrc(1)->getInsn(); 957f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (neg && neg->op != OP_NEG) { 958f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org neg = sub->getSrc(0)->getInsn(); 959f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src0 = sub->getSrc(1); 960f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 961f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!neg || neg->op != OP_NEG || 962f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org neg->dType != neg->sType || neg->sType != ty) 963f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 964f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src1 = neg->getSrc(0); 965f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 966f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 967f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // found ABS(SUB)) 968f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org abs->moveSources(1, 2); // move sources >=1 up by 2 969f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org abs->op = OP_SAD; 970f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org abs->setType(sub->dType); 971f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org abs->setSrc(0, src0); 972f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org abs->setSrc(1, src1); 973f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld.setPosition(abs, false); 974f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0)); 975f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 976f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 977f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 978f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAlgebraicOpt::handleADD(Instruction *add) 979f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 980f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src0 = add->getSrc(0); 981f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src1 = add->getSrc(1); 982f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 983f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) 984f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 985f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 986f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool changed = false; 987f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!changed && prog->getTarget()->isOpSupported(OP_MAD, add->dType)) 988f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org changed = tryADDToMADOrSAD(add, OP_MAD); 989f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType)) 990f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org changed = tryADDToMADOrSAD(add, OP_SAD); 991f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return changed; 992f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 993f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 994f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ADD(SAD(a,b,0), c) -> SAD(a,b,c) 995f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ADD(MUL(a,b), c) -> MAD(a,b,c) 996f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 997f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp) 998f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 999f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src0 = add->getSrc(0); 1000f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src1 = add->getSrc(1); 1001f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src; 1002f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int s; 1003f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL; 1004f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0)); 1005f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Modifier mod[4]; 1006f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1007f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src0->refCount() == 1 && 1008f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp) 1009f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s = 0; 1010f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 1011f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src1->refCount() == 1 && 1012f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp) 1013f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org s = 1; 1014f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 1015f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1016f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1017f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) || 1018f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb)) 1019f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1020f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1021f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src = add->getSrc(s); 1022f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1023f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src->getInsn()->postFactor) 1024f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1025f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (toOp == OP_SAD) { 1026f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ImmediateValue imm; 1027f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!src->getInsn()->src(2).getImmediate(imm)) 1028f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1029f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!imm.isInteger(0)) 1030f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1031f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1032f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1033f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mod[0] = add->src(0).mod; 1034f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mod[1] = add->src(1).mod; 1035f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mod[2] = src->getUniqueInsn()->src(0).mod; 1036f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mod[3] = src->getUniqueInsn()->src(1).mod; 1037f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1038f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad) 1039f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1040f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1041f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add->op = toOp; 1042f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add->subOp = src->getInsn()->subOp; // potentially mul-high 1043f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1044f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add->setSrc(2, add->src(s ? 0 : 1)); 1045f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1046f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add->setSrc(0, src->getInsn()->getSrc(0)); 1047f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add->src(0).mod = mod[2] ^ mod[s]; 1048f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add->setSrc(1, src->getInsn()->getSrc(1)); 1049f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org add->src(1).mod = mod[3]; 1050f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1051f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1052f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1053f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1054f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1055f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAlgebraicOpt::handleMINMAX(Instruction *minmax) 1056f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1057f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src0 = minmax->getSrc(0); 1058f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src1 = minmax->getSrc(1); 1059f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1060f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src0 != src1 || src0->reg.file != FILE_GPR) 1061f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1062f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (minmax->src(0).mod == minmax->src(1).mod) { 1063f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (minmax->def(0).mayReplace(minmax->src(0))) { 1064f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org minmax->def(0).replace(minmax->src(0), false); 1065f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org minmax->bb->remove(minmax); 1066f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1067f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org minmax->op = OP_CVT; 1068f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org minmax->setSrc(1, NULL); 1069f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1070f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1071f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TODO: 1072f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // min(x, -x) = -abs(x) 1073f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // min(x, -abs(x)) = -abs(x) 1074f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // min(x, abs(x)) = x 1075f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // max(x, -abs(x)) = x 1076f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // max(x, abs(x)) = abs(x) 1077f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // max(x, -x) = abs(x) 1078f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1079f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1080f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1081f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1082f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAlgebraicOpt::handleRCP(Instruction *rcp) 1083f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1084f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *si = rcp->getSrc(0)->getUniqueInsn(); 1085f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1086f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (si && si->op == OP_RCP) { 1087f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Modifier mod = rcp->src(0).mod * si->src(0).mod; 1088f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rcp->op = mod.getOp(); 1089f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rcp->setSrc(0, si->getSrc(0)); 1090f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1091f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1092f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1093f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1094f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAlgebraicOpt::handleSLCT(Instruction *slct) 1095f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1096f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) { 1097f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f)) 1098f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org slct->setSrc(0, slct->getSrc(1)); 1099f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 1100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (slct->getSrc(0) != slct->getSrc(1)) { 1101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org slct->op = OP_MOV; 1104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org slct->setSrc(1, NULL); 1105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org slct->setSrc(2, NULL); 1106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAlgebraicOpt::handleLOGOP(Instruction *logop) 1110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src0 = logop->getSrc(0); 1112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src1 = logop->getSrc(1); 1113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) 1115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src0 == src1) { 1118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((logop->op == OP_AND || logop->op == OP_OR) && 1119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org logop->def(0).mayReplace(logop->src(0))) { 1120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org logop->def(0).replace(logop->src(0), false); 1121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, logop); 1122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // try AND(SET, SET) -> SET_AND(SET) 1125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *set0 = src0->getInsn(); 1126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *set1 = src1->getInsn(); 1127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!set0 || set0->fixed || !set1 || set1->fixed) 1129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (set1->op != OP_SET) { 1131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *xchg = set0; 1132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set0 = set1; 1133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set1 = xchg; 1134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (set1->op != OP_SET) 1135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org operation redOp = (logop->op == OP_AND ? OP_SET_AND : 1138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org logop->op == OP_XOR ? OP_SET_XOR : OP_SET_OR); 1139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!prog->getTarget()->isOpSupported(redOp, set1->sType)) 1140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (set0->op != OP_SET && 1142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set0->op != OP_SET_AND && 1143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set0->op != OP_SET_OR && 1144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set0->op != OP_SET_XOR) 1145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (set0->getDef(0)->refCount() > 1 && 1147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set1->getDef(0)->refCount() > 1) 1148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (set0->getPredicate() || set1->getPredicate()) 1150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // check that they don't source each other 1152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int s = 0; s < 2; ++s) 1153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (set0->getSrc(s) == set1->getDef(0) || 1154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set1->getSrc(s) == set0->getDef(0)) 1155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set0 = cloneForward(func, set0); 1158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set1 = cloneShallow(func, set1); 1159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org logop->bb->insertAfter(logop, set1); 1160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org logop->bb->insertAfter(logop, set0); 1161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set0->dType = TYPE_U8; 1163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set0->getDef(0)->reg.file = FILE_PREDICATE; 1164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set0->getDef(0)->reg.size = 1; 1165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set1->setSrc(2, set0->getDef(0)); 1166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set1->op = redOp; 1167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org set1->setDef(0, logop->getDef(0)); 1168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, logop); 1169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0 1173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// nv50: 1174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// F2I(NEG(I2F(ABS(SET)))) 1175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAlgebraicOpt::handleCVT(Instruction *cvt) 1177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (cvt->sType != TYPE_F32 || 1179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0)) 1180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *insn = cvt->getSrc(0)->getInsn(); 1182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32) 1183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn->src(0).mod != Modifier(0)) 1185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn = insn->getSrc(0)->getInsn(); 1187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // check for nv50 SET(-1,0) -> SET(1.0f/0.0f) chain and nvc0's f32 SET 1189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn && insn->op == OP_CVT && 1190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->dType == TYPE_F32 && 1191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->sType == TYPE_S32) { 1192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn = insn->getSrc(0)->getInsn(); 1193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!insn || insn->op != OP_ABS || insn->sType != TYPE_S32 || 1194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->src(0).mod) 1195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn = insn->getSrc(0)->getInsn(); 1197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!insn || insn->op != OP_SET || insn->dType != TYPE_U32) 1198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 1200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!insn || insn->op != OP_SET || insn->dType != TYPE_F32) { 1201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *bset = cloneShallow(func, insn); 1205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bset->dType = TYPE_U32; 1206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bset->setDef(0, cvt->getDef(0)); 1207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cvt->bb->insertAfter(cvt, bset); 1208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, cvt); 1209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgAlgebraicOpt::visit(BasicBlock *bb) 1213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *next; 1215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (Instruction *i = bb->getEntry(); i; i = next) { 1216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = i->next; 1217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (i->op) { 1218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_ABS: 1219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org handleABS(i); 1220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_ADD: 1222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org handleADD(i); 1223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_RCP: 1225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org handleRCP(i); 1226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_MIN: 1228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_MAX: 1229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org handleMINMAX(i); 1230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_SLCT: 1232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org handleSLCT(i); 1233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_AND: 1235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_OR: 1236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_XOR: 1237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org handleLOGOP(i); 1238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case OP_CVT: 1240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org handleCVT(i); 1241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 1243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ============================================================================= 1251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic inline void 1253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgupdateLdStOffset(Instruction *ldst, int32_t offset, Function *fn) 1254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (offset != ldst->getSrc(0)->reg.data.offset) { 1256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ldst->getSrc(0)->refCount() > 1) 1257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ldst->setSrc(0, cloneShallow(fn, ldst->getSrc(0))); 1258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ldst->getSrc(0)->reg.data.offset = offset; 1259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Combine loads and stores, forward stores to loads where possible. 1263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass MemoryOpt : public Pass 1264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 1266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org class Record 1267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org { 1268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org public: 1269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record *next; 1270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *insn; 1271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const Value *rel[2]; 1272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const Value *base; 1273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t offset; 1274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int8_t fileIndex; 1275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint8_t size; 1276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool locked; 1277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record *prev; 1278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool overlaps(const Instruction *ldst) const; 1280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inline void link(Record **); 1282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inline void unlink(Record **); 1283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inline void set(const Instruction *ldst); 1284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }; 1285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic: 1287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MemoryOpt(); 1288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record *loads[DATA_FILE_COUNT]; 1290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record *stores[DATA_FILE_COUNT]; 1291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MemoryPool recordPool; 1293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 1295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool visit(BasicBlock *); 1296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool runOpt(BasicBlock *); 1297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record **getList(const Instruction *); 1299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record *findRecord(const Instruction *, bool load, bool& isAdjacent) const; 1301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // merge @insn into load/store instruction from @rec 1303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool combineLd(Record *rec, Instruction *ld); 1304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool combineSt(Record *rec, Instruction *st); 1305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool replaceLdFromLd(Instruction *ld, Record *ldRec); 1307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool replaceLdFromSt(Instruction *ld, Record *stRec); 1308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool replaceStFromSt(Instruction *restrict st, Record *stRec); 1309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void addRecord(Instruction *ldst); 1311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void purgeRecords(Instruction *const st, DataFile); 1312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void lockStores(Instruction *const ld); 1313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void reset(); 1314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 1316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record *prevRecord; 1317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 1318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record), 6) 1320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int i = 0; i < DATA_FILE_COUNT; ++i) { 1322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loads[i] = NULL; 1323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org stores[i] = NULL; 1324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prevRecord = NULL; 1326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::reset() 1330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i < DATA_FILE_COUNT; ++i) { 1332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record *it, *next; 1333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (it = loads[i]; it; it = next) { 1334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = it->next; 1335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org recordPool.release(it); 1336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org loads[i] = NULL; 1338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (it = stores[i]; it; it = next) { 1339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = it->next; 1340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org recordPool.release(it); 1341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org stores[i] = NULL; 1343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::combineLd(Record *rec, Instruction *ld) 1348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t offRc = rec->offset; 1350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t offLd = ld->getSrc(0)->reg.data.offset; 1351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int sizeRc = rec->size; 1352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int sizeLd = typeSizeof(ld->dType); 1353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int size = sizeRc + sizeLd; 1354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int d, j; 1355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!prog->getTarget()-> 1357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org isAccessSupported(ld->getSrc(0)->reg.file, typeOfSize(size))) 1358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // no unaligned loads 1360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) || 1361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ((size == 0xc) && (MIN2(offLd, offRc) & 0xf))) 1362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(sizeRc + sizeLd <= 16 && offRc != offLd); 1365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j); 1367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (offLd < offRc) { 1369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int sz; 1370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (sz = 0, d = 0; sz < sizeLd; sz += ld->getDef(d)->reg.size, ++d); 1371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // d: nr of definitions in ld 1372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // j: nr of definitions in rec->insn, move: 1373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (d = d + j - 1; j > 0; --j, --d) 1374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->insn->setDef(d, rec->insn->getDef(j - 1)); 1375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (rec->insn->getSrc(0)->refCount() > 1) 1377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->insn->setSrc(0, cloneShallow(func, rec->insn->getSrc(0))); 1378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->offset = rec->insn->getSrc(0)->reg.data.offset = offLd; 1379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org d = 0; 1381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org d = j; 1383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // move definitions of @ld to @rec->insn 1385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (j = 0; sizeLd; ++j, ++d) { 1386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sizeLd -= ld->getDef(j)->reg.size; 1387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->insn->setDef(d, ld->getDef(j)); 1388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->size = size; 1391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->insn->getSrc(0)->reg.size = size; 1392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->insn->setType(typeOfSize(size)); 1393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, ld); 1395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::combineSt(Record *rec, Instruction *st) 1401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t offRc = rec->offset; 1403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t offSt = st->getSrc(0)->reg.data.offset; 1404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int sizeRc = rec->size; 1405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int sizeSt = typeSizeof(st->dType); 1406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int s = sizeSt / 4; 1407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int size = sizeRc + sizeSt; 1408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int j, k; 1409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src[4]; // no modifiers in ValueRef allowed for st 1410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *extra[3]; 1411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!prog->getTarget()-> 1413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org isAccessSupported(st->getSrc(0)->reg.file, typeOfSize(size))) 1414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (size == 8 && MIN2(offRc, offSt) & 0x7) 1416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org st->takeExtraSources(0, extra); // save predicate and indirect address 1419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (offRc < offSt) { 1421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // save values from @st 1422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (s = 0; sizeSt; ++s) { 1423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sizeSt -= st->getSrc(s + 1)->reg.size; 1424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src[s] = st->getSrc(s + 1); 1425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // set record's values as low sources of @st 1427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (j = 1; sizeRc; ++j) { 1428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sizeRc -= rec->insn->getSrc(j)->reg.size; 1429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org st->setSrc(j, rec->insn->getSrc(j)); 1430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // set saved values as high sources of @st 1432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (k = j, j = 0; j < s; ++j) 1433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org st->setSrc(k++, src[j]); 1434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org updateLdStOffset(st, offRc, func); 1436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (j = 1; sizeSt; ++j) 1438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sizeSt -= st->getSrc(j)->reg.size; 1439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (s = 1; sizeRc; ++j, ++s) { 1440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sizeRc -= rec->insn->getSrc(s)->reg.size; 1441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org st->setSrc(j, rec->insn->getSrc(s)); 1442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->offset = offSt; 1444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org st->putExtraSources(0, extra); // restore pointer and predicate 1446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, rec->insn); 1448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->insn = st; 1449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->size = size; 1450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->insn->getSrc(0)->reg.size = size; 1451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->insn->setType(typeOfSize(size)); 1452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::Record::set(const Instruction *ldst) 1457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const Symbol *mem = ldst->getSrc(0)->asSym(); 1459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org fileIndex = mem->reg.fileIndex; 1460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rel[0] = ldst->getIndirect(0, 0); 1461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rel[1] = ldst->getIndirect(0, 1); 1462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offset = mem->reg.data.offset; 1463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org base = mem->getBase(); 1464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size = typeSizeof(ldst->sType); 1465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::Record::link(Record **list) 1469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = *list; 1471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (next) 1472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next->prev = this; 1473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prev = NULL; 1474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *list = this; 1475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::Record::unlink(Record **list) 1479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (next) 1481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next->prev = prev; 1482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (prev) 1483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org prev->next = next; 1484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 1485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *list = next; 1486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::Record ** 1489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::getList(const Instruction *insn) 1490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn->op == OP_LOAD || insn->op == OP_VFETCH) 1492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return &loads[insn->src(0).getFile()]; 1493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return &stores[insn->src(0).getFile()]; 1494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::addRecord(Instruction *i) 1498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record **list = getList(i); 1500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record *it = reinterpret_cast<Record *>(recordPool.allocate()); 1501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org it->link(list); 1503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org it->set(i); 1504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org it->insn = i; 1505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org it->locked = false; 1506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::Record * 1509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::findRecord(const Instruction *insn, bool load, bool& isAdj) const 1510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const Symbol *sym = insn->getSrc(0)->asSym(); 1512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const int size = typeSizeof(insn->sType); 1513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record *rec = NULL; 1514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file]; 1515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (; it; it = it->next) { 1517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (it->locked && insn->op != OP_LOAD) 1518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((it->offset >> 4) != (sym->reg.data.offset >> 4) || 1520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org it->rel[0] != insn->getIndirect(0, 0) || 1521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org it->fileIndex != sym->reg.fileIndex || 1522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org it->rel[1] != insn->getIndirect(0, 1)) 1523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (it->offset < sym->reg.data.offset) { 1526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (it->offset + it->size >= sym->reg.data.offset) { 1527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org isAdj = (it->offset + it->size == sym->reg.data.offset); 1528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isAdj) 1529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return it; 1530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!(it->offset & 0x7)) 1531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec = it; 1532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org isAdj = it->offset != sym->reg.data.offset; 1535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (size <= it->size && !isAdj) 1536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return it; 1537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 1538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!(sym->reg.data.offset & 0x7)) 1539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (it->offset - size <= sym->reg.data.offset) 1540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec = it; 1541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return rec; 1544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::replaceLdFromSt(Instruction *ld, Record *rec) 1548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *st = rec->insn; 1550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t offSt = rec->offset; 1551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t offLd = ld->getSrc(0)->reg.data.offset; 1552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int d, s; 1553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (s = 1; offSt != offLd && st->srcExists(s); ++s) 1555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offSt += st->getSrc(s)->reg.size; 1556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (offSt != offLd) 1557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (d = 0; ld->defExists(d) && st->srcExists(s); ++d, ++s) { 1560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ld->getDef(d)->reg.size != st->getSrc(s)->reg.size) 1561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (st->getSrc(s)->reg.file != FILE_GPR) 1563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ld->def(d).replace(st->src(s), false); 1565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ld->bb->remove(ld); 1567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::replaceLdFromLd(Instruction *ldE, Record *rec) 1572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *ldR = rec->insn; 1574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t offR = rec->offset; 1575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t offE = ldE->getSrc(0)->reg.data.offset; 1576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int dR, dE; 1577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(offR <= offE); 1579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (dR = 0; offR < offE && ldR->defExists(dR); ++dR) 1580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org offR += ldR->getDef(dR)->reg.size; 1581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (offR != offE) 1582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (dE = 0; ldE->defExists(dE) && ldR->defExists(dR); ++dE, ++dR) { 1585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ldE->getDef(dE)->reg.size != ldR->getDef(dR)->reg.size) 1586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ldE->def(dE).replace(ldR->getDef(dR), false); 1588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, ldE); 1591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::replaceStFromSt(Instruction *restrict st, Record *rec) 1596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const Instruction *const ri = rec->insn; 1598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *extra[3]; 1599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t offS = st->getSrc(0)->reg.data.offset; 1601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t offR = rec->offset; 1602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t endS = offS + typeSizeof(st->dType); 1603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t endR = offR + typeSizeof(ri->dType); 1604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->size = MAX2(endS, endR) - MIN2(offS, offR); 1606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org st->takeExtraSources(0, extra); 1608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (offR < offS) { 1610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *vals[10]; 1611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int s, n; 1612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int k = 0; 1613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // get non-replaced sources of ri 1614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (s = 1; offR < offS; offR += ri->getSrc(s)->reg.size, ++s) 1615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org vals[k++] = ri->getSrc(s); 1616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org n = s; 1617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // get replaced sources of st 1618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (s = 1; st->srcExists(s); offS += st->getSrc(s)->reg.size, ++s) 1619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org vals[k++] = st->getSrc(s); 1620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // skip replaced sources of ri 1621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (s = n; offR < endS; offR += ri->getSrc(s)->reg.size, ++s); 1622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // get non-replaced sources after values covered by st 1623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s) 1624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org vals[k++] = ri->getSrc(s); 1625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert((unsigned int)k <= Elements(vals)); 1626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (s = 0; s < k; ++s) 1627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org st->setSrc(s + 1, vals[s]); 1628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org st->setSrc(0, ri->getSrc(0)); 1629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 1630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (endR > endS) { 1631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int j, s; 1632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (j = 1; offR < endS; offR += ri->getSrc(j++)->reg.size); 1633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (s = 1; offS < endS; offS += st->getSrc(s++)->reg.size); 1634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (; offR < endR; offR += ri->getSrc(j++)->reg.size) 1635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org st->setSrc(s++, ri->getSrc(j)); 1636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org st->putExtraSources(0, extra); 1638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, rec->insn); 1640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->insn = st; 1642f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec->offset = st->getSrc(0)->reg.data.offset; 1643f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1644f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org st->setType(typeOfSize(rec->size)); 1645f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1646f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1647f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1648f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1649f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1650f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::Record::overlaps(const Instruction *ldst) const 1651f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1652f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record that; 1653f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org that.set(ldst); 1654f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1655f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->fileIndex != that.fileIndex) 1656f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1657f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1658f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->rel[0] || that.rel[0]) 1659f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return this->base == that.base; 1660f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return 1661f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (this->offset < that.offset + that.size) && 1662f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (this->offset + this->size > that.offset); 1663f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1664f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1665f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// We must not eliminate stores that affect the result of @ld if 1666f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// we find later stores to the same location, and we may no longer 1667f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// merge them with later stores. 1668f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// The stored value can, however, still be used to determine the value 1669f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// returned by future loads. 1670f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1671f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::lockStores(Instruction *const ld) 1672f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1673f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (Record *r = stores[ld->src(0).getFile()]; r; r = r->next) 1674f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!r->locked && r->overlaps(ld)) 1675f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org r->locked = true; 1676f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1677f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1678f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Prior loads from the location of @st are no longer valid. 1679f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Stores to the location of @st may no longer be used to derive 1680f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// the value at it nor be coalesced into later stores. 1681f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1682f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::purgeRecords(Instruction *const st, DataFile f) 1683f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1684f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (st) 1685f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org f = st->src(0).getFile(); 1686f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1687f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (Record *r = loads[f]; r; r = r->next) 1688f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!st || r->overlaps(st)) 1689f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org r->unlink(&loads[f]); 1690f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1691f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (Record *r = stores[f]; r; r = r->next) 1692f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!st || r->overlaps(st)) 1693f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org r->unlink(&stores[f]); 1694f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1695f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1696f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1697f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::visit(BasicBlock *bb) 1698f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1699f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool ret = runOpt(bb); 1700f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st 1701f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // where 96 bit memory operations are forbidden. 1702f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ret) 1703f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret = runOpt(bb); 1704f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return ret; 1705f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1706f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1707f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1708f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMemoryOpt::runOpt(BasicBlock *bb) 1709f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1710f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *ldst, *next; 1711f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Record *rec; 1712f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isAdjacent = true; 1713f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1714f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (ldst = bb->getEntry(); ldst; ldst = next) { 1715f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool keep = true; 1716f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool isLoad = true; 1717f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = ldst->next; 1718f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1719f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ldst->op == OP_LOAD || ldst->op == OP_VFETCH) { 1720f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ldst->isDead()) { 1721f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // might have been produced by earlier optimization 1722f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, ldst); 1723f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1724f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1725f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 1726f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) { 1727f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org isLoad = false; 1728f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1729f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TODO: maybe have all fixed ops act as barrier ? 1730f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ldst->op == OP_CALL) { 1731f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org purgeRecords(NULL, FILE_MEMORY_LOCAL); 1732f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org purgeRecords(NULL, FILE_MEMORY_GLOBAL); 1733f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org purgeRecords(NULL, FILE_MEMORY_SHARED); 1734f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org purgeRecords(NULL, FILE_SHADER_OUTPUT); 1735f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 1736f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ldst->op == OP_EMIT || ldst->op == OP_RESTART) { 1737f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org purgeRecords(NULL, FILE_SHADER_OUTPUT); 1738f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1739f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1740f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1741f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ldst->getPredicate()) // TODO: handle predicated ld/st 1742f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1743f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1744f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isLoad) { 1745f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DataFile file = ldst->src(0).getFile(); 1746f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1747f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // if ld l[]/g[] look for previous store to eliminate the reload 1748f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (file == FILE_MEMORY_GLOBAL || file == FILE_MEMORY_LOCAL) { 1749f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TODO: shared memory ? 1750f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec = findRecord(ldst, false, isAdjacent); 1751f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (rec && !isAdjacent) 1752f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org keep = !replaceLdFromSt(ldst, rec); 1753f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1754f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1755f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // or look for ld from the same location and replace this one 1756f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec = keep ? findRecord(ldst, true, isAdjacent) : NULL; 1757f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (rec) { 1758f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isAdjacent) 1759f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org keep = !replaceLdFromLd(ldst, rec); 1760f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 1761f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // or combine a previous load with this one 1762f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org keep = !combineLd(rec, ldst); 1763f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1764f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (keep) 1765f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org lockStores(ldst); 1766f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1767f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rec = findRecord(ldst, false, isAdjacent); 1768f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (rec) { 1769f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isAdjacent) 1770f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org keep = !replaceStFromSt(ldst, rec); 1771f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 1772f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org keep = !combineSt(rec, ldst); 1773f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1774f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (keep) 1775f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org purgeRecords(ldst, DATA_FILE_COUNT); 1776f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1777f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (keep) 1778f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addRecord(ldst); 1779f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1780f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org reset(); 1781f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1782f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1783f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1784f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1785f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ============================================================================= 1786f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1787f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Turn control flow into predicated instructions (after register allocation !). 1788f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// TODO: 1789f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Could move this to before register allocation on NVC0 and also handle nested 1790f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// constructs. 1791f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass FlatteningPass : public Pass 1792f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1793f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 1794f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool visit(BasicBlock *); 1795f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1796f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool tryPredicateConditional(BasicBlock *); 1797f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void predicateInstructions(BasicBlock *, Value *pred, CondCode cc); 1798f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void tryPropagateBranch(BasicBlock *); 1799f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inline bool isConstantCondition(Value *pred); 1800f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inline bool mayPredicate(const Instruction *, const Value *pred) const; 1801f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inline void removeFlow(Instruction *); 1802f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 1803f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1804f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1805f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFlatteningPass::isConstantCondition(Value *pred) 1806f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1807f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *insn = pred->getUniqueInsn(); 1808f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(insn); 1809f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn->op != OP_SET || insn->srcExists(2)) 1810f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1811f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1812f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int s = 0; s < 2 && insn->srcExists(s); ++s) { 1813f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *ld = insn->getSrc(s)->getUniqueInsn(); 1814f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DataFile file; 1815f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ld) { 1816f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ld->op != OP_MOV && ld->op != OP_LOAD) 1817f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1818f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ld->src(0).isIndirect(0)) 1819f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1820f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org file = ld->src(0).getFile(); 1821f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 1822f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org file = insn->src(s).getFile(); 1823f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // catch $r63 on NVC0 1824f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR) 1825f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org file = FILE_IMMEDIATE; 1826f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1827f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST) 1828f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1829f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1830f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1831f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1832f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1833f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1834f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFlatteningPass::removeFlow(Instruction *insn) 1835f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1836f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org FlowInstruction *term = insn ? insn->asFlow() : NULL; 1837f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!term) 1838f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1839f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Graph::Edge::Type ty = term->bb->cfg.outgoing().getType(); 1840f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1841f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (term->op == OP_BRA) { 1842f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TODO: this might get more difficult when we get arbitrary BRAs 1843f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ty == Graph::Edge::CROSS || ty == Graph::Edge::BACK) 1844f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1845f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 1846f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (term->op != OP_JOIN) 1847f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1848f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1849f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *pred = term->getPredicate(); 1850f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1851f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, term); 1852f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1853f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (pred && pred->refCount() == 0) { 1854f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *pSet = pred->getUniqueInsn(); 1855f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pred->join->reg.data.id = -1; // deallocate 1856f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (pSet->isDead()) 1857f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, pSet); 1858f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1859f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1860f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1861f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1862f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFlatteningPass::predicateInstructions(BasicBlock *bb, Value *pred, CondCode cc) 1863f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1864f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (Instruction *i = bb->getEntry(); i; i = i->next) { 1865f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->isNop()) 1866f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 1867f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!i->getPredicate()); 1868f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org i->setPredicate(cc, pred); 1869f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1870f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org removeFlow(bb->getExit()); 1871f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1872f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1873f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1874f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFlatteningPass::mayPredicate(const Instruction *insn, const Value *pred) const 1875f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1876f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn->isPseudo()) 1877f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1878f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TODO: calls where we don't know which registers are modified 1879f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1880f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!prog->getTarget()->mayPredicate(insn, pred)) 1881f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1882f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int d = 0; insn->defExists(d); ++d) 1883f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn->getDef(d)->equals(pred)) 1884f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1885f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1886f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1887f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1888f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// If we conditionally skip over or to a branch instruction, replace it. 1889f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// NOTE: We do not update the CFG anymore here ! 1890f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 1891f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFlatteningPass::tryPropagateBranch(BasicBlock *bb) 1892f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1893f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BasicBlock *bf = NULL; 1894f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int i; 1895f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1896f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bb->cfg.outgoingCount() != 2) 1897f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1898f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!bb->getExit() || bb->getExit()->op != OP_BRA) 1899f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1900f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Graph::EdgeIterator ei = bb->cfg.outgoing(); 1901f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1902f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; !ei.end(); ++i, ei.next()) { 1903f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bf = BasicBlock::get(ei.getNode()); 1904f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bf->getInsnCount() == 1) 1905f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 1906f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1907f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ei.end() || !bf->getExit()) 1908f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1909f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org FlowInstruction *bra = bb->getExit()->asFlow(); 1910f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org FlowInstruction *rep = bf->getExit()->asFlow(); 1911f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1912f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (rep->getPredicate()) 1913f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1914f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (rep->op != OP_BRA && 1915f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rep->op != OP_JOIN && 1916f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org rep->op != OP_EXIT) 1917f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 1918f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1919f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bra->op = rep->op; 1920f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bra->target.bb = rep->target.bb; 1921f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i) // 2nd out block means branch not taken 1922f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bra->cc = inverseCondCode(bra->cc); 1923f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bf->remove(rep); 1924f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1925f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1926f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1927f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFlatteningPass::visit(BasicBlock *bb) 1928f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1929f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (tryPredicateConditional(bb)) 1930f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1931f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1932f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // try to attach join to previous instruction 1933f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *insn = bb->getExit(); 1934f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn && insn->op == OP_JOIN && !insn->getPredicate()) { 1935f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn = insn->prev; 1936f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (insn && !insn->getPredicate() && 1937f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !insn->asFlow() && 1938f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->op != OP_TEXBAR && 1939f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !isTextureOp(insn->op) && // probably just nve4 1940f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->op != OP_LINTERP && // probably just nve4 1941f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->op != OP_PINTERP && // probably just nve4 1942f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ((insn->op != OP_LOAD && insn->op != OP_STORE) || 1943f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org typeSizeof(insn->dType) <= 4) && 1944f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !insn->isNop()) { 1945f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org insn->join = 1; 1946f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bb->remove(bb->getExit()); 1947f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1948f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1949f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1950f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1951f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org tryPropagateBranch(bb); 1952f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1953f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 1954f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 1955f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1956f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 1957f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgFlatteningPass::tryPredicateConditional(BasicBlock *bb) 1958f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 1959f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BasicBlock *bL = NULL, *bR = NULL; 1960f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int nL = 0, nR = 0, limit = 12; 1961f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *insn; 1962f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int mask; 1963f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1964f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mask = bb->initiatesSimpleConditional(); 1965f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!mask) 1966f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1967f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1968f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(bb->getExit()); 1969f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *pred = bb->getExit()->getPredicate(); 1970f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(pred); 1971f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1972f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (isConstantCondition(pred)) 1973f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org limit = 4; 1974f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1975f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Graph::EdgeIterator ei = bb->cfg.outgoing(); 1976f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1977f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mask & 1) { 1978f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bL = BasicBlock::get(ei.getNode()); 1979f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (insn = bL->getEntry(); insn; insn = insn->next, ++nL) 1980f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!mayPredicate(insn, pred)) 1981f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1982f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (nL > limit) 1983f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; // too long, do a real branch 1984f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1985f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ei.next(); 1986f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1987f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mask & 2) { 1988f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bR = BasicBlock::get(ei.getNode()); 1989f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (insn = bR->getEntry(); insn; insn = insn->next, ++nR) 1990f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!mayPredicate(insn, pred)) 1991f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 1992f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (nR > limit) 1993f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; // too long, do a real branch 1994f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 1995f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 1996f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bL) 1997f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org predicateInstructions(bL, pred, bb->getExit()->cc); 1998f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bR) 1999f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org predicateInstructions(bR, pred, inverseCondCode(bb->getExit()->cc)); 2000f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2001f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bb->joinAt) { 2002f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bb->remove(bb->joinAt); 2003f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bb->joinAt = NULL; 2004f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2005f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org removeFlow(bb->getExit()); // delete the branch/join at the fork point 2006f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2007f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // remove potential join operations at the end of the conditional 2008f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (prog->getTarget()->joinAnterior) { 2009f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bb = BasicBlock::get((bL ? bL : bR)->cfg.outgoing().getNode()); 2010f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bb->getEntry() && bb->getEntry()->op == OP_JOIN) 2011f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org removeFlow(bb->getEntry()); 2012f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2013f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2014f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2015f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2016f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2017f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ============================================================================= 2018f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2019f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Common subexpression elimination. Stupid O^2 implementation. 2020f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass LocalCSE : public Pass 2021f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2022f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 2023f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool visit(BasicBlock *); 2024f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2025f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org inline bool tryReplace(Instruction **, Instruction *); 2026f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2027f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DLList ops[OP_LAST + 1]; 2028f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 2029f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2030f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass GlobalCSE : public Pass 2031f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2032f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 2033f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool visit(BasicBlock *); 2034f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 2035f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2036f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 2037f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgInstruction::isActionEqual(const Instruction *that) const 2038f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2039f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->op != that->op || 2040f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->dType != that->dType || 2041f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->sType != that->sType) 2042f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2043f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->cc != that->cc) 2044f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2045f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2046f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->asTex()) { 2047f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (memcmp(&this->asTex()->tex, 2048f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &that->asTex()->tex, 2049f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org sizeof(this->asTex()->tex))) 2050f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2051f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 2052f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->asCmp()) { 2053f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->asCmp()->setCond != that->asCmp()->setCond) 2054f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2055f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 2056f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->asFlow()) { 2057f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2058f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 2059f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->atomic != that->atomic || 2060f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->ipa != that->ipa || 2061f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->lanes != that->lanes || 2062f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->perPatch != that->perPatch) 2063f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2064f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->postFactor != that->postFactor) 2065f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2066f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2067f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2068f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->subOp != that->subOp || 2069f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->saturate != that->saturate || 2070f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->rnd != that->rnd || 2071f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->ftz != that->ftz || 2072f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->dnz != that->dnz || 2073f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org this->cache != that->cache) 2074f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2075f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2076f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2077f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2078f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2079f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 2080f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgInstruction::isResultEqual(const Instruction *that) const 2081f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2082f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int d, s; 2083f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2084f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // NOTE: location of discard only affects tex with liveOnly and quadops 2085f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!this->defExists(0) && this->op != OP_DISCARD) 2086f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2087f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2088f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!isActionEqual(that)) 2089f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2090f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2091f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->predSrc != that->predSrc) 2092f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2093f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2094f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (d = 0; this->defExists(d); ++d) { 2095f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!that->defExists(d) || 2096f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !this->getDef(d)->equals(that->getDef(d), false)) 2097f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2098f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2099f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (that->defExists(d)) 2100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (s = 0; this->srcExists(s); ++s) { 2103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!that->srcExists(s)) 2104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (this->src(s).mod != that->src(s).mod) 2106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!this->getSrc(s)->equals(that->getSrc(s), true)) 2108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (that->srcExists(s)) 2111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (op == OP_LOAD || op == OP_VFETCH) { 2114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (src(0).getFile()) { 2115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case FILE_MEMORY_CONST: 2116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case FILE_SHADER_INPUT: 2117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 2119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// pull through common expressions from different in-blocks 2127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 2128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGlobalCSE::visit(BasicBlock *bb) 2129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *phi, *next, *ik; 2131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int s; 2132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TODO: maybe do this with OP_UNION, too 2134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = next) { 2136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = phi->next; 2137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (phi->getSrc(0)->refCount() > 1) 2138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 2139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ik = phi->getSrc(0)->getInsn(); 2140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!ik) 2141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; // probably a function input 2142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (s = 1; phi->srcExists(s); ++s) { 2143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (phi->getSrc(s)->refCount() > 1) 2144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 2145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!phi->getSrc(s)->getInsn() || 2146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !phi->getSrc(s)->getInsn()->isResultEqual(ik)) 2147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 2148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!phi->srcExists(s)) { 2150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *entry = bb->getEntry(); 2151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ik->bb->remove(ik); 2152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!entry || entry->op != OP_JOIN) 2153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bb->insertHead(ik); 2154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 2155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bb->insertAfter(entry, ik); 2156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ik->setDef(0, phi->getDef(0)); 2157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, phi); 2158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 2165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLocalCSE::tryReplace(Instruction **ptr, Instruction *i) 2166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *old = *ptr; 2168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // TODO: maybe relax this later (causes trouble with OP_UNION) 2170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->isPredicated()) 2171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!old->isResultEqual(i)) 2174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (int d = 0; old->defExists(d); ++d) 2177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org old->def(d).replace(i->getDef(d), false); 2178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, old); 2179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *ptr = NULL; 2180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 2184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLocalCSE::visit(BasicBlock *bb) 2185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int replaced; 2187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org do { 2189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *ir, *next; 2190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org replaced = 0; 2192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // will need to know the order of instructions 2194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int serial = 0; 2195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (ir = bb->getFirst(); ir; ir = ir->next) 2196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ir->serial = serial++; 2197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (ir = bb->getEntry(); ir; ir = next) { 2199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int s; 2200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *src = NULL; 2201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = ir->next; 2203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ir->fixed) { 2205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ops[ir->op].insert(ir); 2206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org continue; 2207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (s = 0; ir->srcExists(s); ++s) 2210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ir->getSrc(s)->asLValue()) 2211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!src || ir->getSrc(s)->refCount() < src->refCount()) 2212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org src = ir->getSrc(s); 2213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (src) { 2215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (Value::UseIterator it = src->uses.begin(); 2216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org it != src->uses.end(); ++it) { 2217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *ik = (*it)->getInsn(); 2218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ik && ik->bb == ir->bb && ik->serial < ir->serial) 2219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (tryReplace(&ir, ik)) 2220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 2221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 2223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DLLIST_FOR_EACH(&ops[ir->op], iter) 2224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org { 2225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *ik = reinterpret_cast<Instruction *>(iter.get()); 2226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (tryReplace(&ir, ik)) 2227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 2228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (ir) 2232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ops[ir->op].insert(ir); 2233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 2234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ++replaced; 2235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (unsigned int i = 0; i <= OP_LAST; ++i) 2237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ops[i].clear(); 2238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } while (replaced); 2240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ============================================================================= 2245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Remove computations of unused values. 2247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclass DeadCodeElim : public Pass 2248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgpublic: 2250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bool buryAll(Program *); 2251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgprivate: 2253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org virtual bool visit(BasicBlock *); 2254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void checkSplitLoad(Instruction *ld); // for partially dead loads 2256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned int deadCount; 2258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}; 2259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 2261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgDeadCodeElim::buryAll(Program *prog) 2262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org do { 2264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org deadCount = 0; 2265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!this->run(prog, false, false)) 2266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; 2267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } while (deadCount); 2268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 2273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgDeadCodeElim::visit(BasicBlock *bb) 2274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *next; 2276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (Instruction *i = bb->getFirst(); i; i = next) { 2278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org next = i->next; 2279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->isDead()) { 2280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ++deadCount; 2281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete_Instruction(prog, i); 2282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 2283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) { 2284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org checkSplitLoad(i); 2285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid 2291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgDeadCodeElim::checkSplitLoad(Instruction *ld1) 2292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Instruction *ld2 = NULL; // can get at most 2 loads 2294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *def1[4]; 2295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Value *def2[4]; 2296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t addr1, addr2; 2297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int32_t size1, size2; 2298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org int d, n1, n2; 2299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint32_t mask = 0xffffffff; 2300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (d = 0; ld1->defExists(d); ++d) 2302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!ld1->getDef(d)->refCount() && ld1->getDef(d)->reg.data.id < 0) 2303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mask &= ~(1 << d); 2304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mask == 0xffffffff) 2305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 2306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addr1 = ld1->getSrc(0)->reg.data.offset; 2308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org n1 = n2 = 0; 2309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size1 = size2 = 0; 2310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (d = 0; ld1->defExists(d); ++d) { 2311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mask & (1 << d)) { 2312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (size1 && (addr1 & 0x7)) 2313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 2314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org def1[n1] = ld1->getDef(d); 2315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size1 += def1[n1++]->reg.size; 2316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else 2317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!n1) { 2318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addr1 += ld1->getDef(d)->reg.size; 2319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 2320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 2321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (addr2 = addr1 + size1; ld1->defExists(d); ++d) { 2324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (mask & (1 << d)) { 2325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org def2[n2] = ld1->getDef(d); 2326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size2 += def2[n2++]->reg.size; 2327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 2328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!n2); 2329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addr2 += ld1->getDef(d)->reg.size; 2330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org updateLdStOffset(ld1, addr1, func); 2334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ld1->setType(typeOfSize(size1)); 2335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (d = 0; d < 4; ++d) 2336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ld1->setDef(d, (d < n1) ? def1[d] : NULL); 2337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!n2) 2339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return; 2340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ld2 = cloneShallow(func, ld1); 2342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org updateLdStOffset(ld2, addr2, func); 2343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ld2->setType(typeOfSize(size2)); 2344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (d = 0; d < 4; ++d) 2345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ld2->setDef(d, (d < n2) ? def2[d] : NULL); 2346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ld1->bb->insertAfter(ld1, ld2); 2348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// ============================================================================= 2351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define RUN_PASS(l, n, f) \ 2353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (level >= (l)) { \ 2354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dbgFlags & NV50_IR_DEBUG_VERBOSE) \ 2355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org INFO("PEEPHOLE: %s\n", #n); \ 2356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org n pass; \ 2357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!pass.f(this)) \ 2358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return false; \ 2359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 2360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 2362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgProgram::optimizeSSA(int level) 2363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(1, DeadCodeElim, buryAll); 2365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(1, CopyPropagation, run); 2366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(2, GlobalCSE, run); 2367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(1, LocalCSE, run); 2368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(2, AlgebraicOpt, run); 2369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks 2370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(1, ConstantFolding, foldAll); 2371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(1, LoadPropagation, run); 2372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(2, MemoryOpt, run); 2373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(2, LocalCSE, run); 2374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(0, DeadCodeElim, buryAll); 2375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgbool 2380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgProgram::optimizePostRA(int level) 2381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 2382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org RUN_PASS(2, FlatteningPass, run); 2383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return true; 2384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 2386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 2387