X86FloatingPoint.cpp revision 3cdd9f65ed2630e999d5034ad02d07cbb73abc88
1b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===// 2b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// The LLVM Compiler Infrastructure 4b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 5b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// This file was developed by the LLVM research group and is distributed under 6b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// the University of Illinois Open Source License. See LICENSE.TXT for details. 7b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 8b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//===----------------------------------------------------------------------===// 9b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 10b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// This file defines the pass which converts floating point instructions from 11b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// virtual registers into register stack instructions. This pass uses live 12b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// variable information to indicate where the FPn registers are used and their 13b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// lifetimes. 14b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 15b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// This pass is hampered by the lack of decent CFG manipulation routines for 16b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// machine code. In particular, this wants to be able to split critical edges 17b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// as necessary, traverse the machine basic block CFG in depth-first order, and 18b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// allow there to be multiple machine basic blocks for each LLVM basicblock 19b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// (needed for critical edge splitting). 20b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 21b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// In particular, this pass currently barfs on critical edges. Because of this, 22b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// it requires the instruction selector to insert FP_REG_KILL instructions on 23b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// the exits of any basic block that has critical edges going from it, or which 24b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// branch to a critical basic block. 25b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 26b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// FIXME: this is not implemented yet. The stackifier pass only works on local 27b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// basic blocks. 28b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru// 29b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru//===----------------------------------------------------------------------===// 30b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 31b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#define DEBUG_TYPE "fp" 32b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "X86.h" 33b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "X86InstrInfo.h" 34b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "llvm/CodeGen/MachineFunctionPass.h" 35b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "llvm/CodeGen/MachineInstrBuilder.h" 36b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "llvm/CodeGen/LiveVariables.h" 37b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "llvm/CodeGen/Passes.h" 38b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "llvm/Target/TargetInstrInfo.h" 39b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "llvm/Target/TargetMachine.h" 40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "llvm/Support/Debug.h" 41b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "llvm/Support/Compiler.h" 4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "llvm/ADT/DepthFirstIterator.h" 43b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "llvm/ADT/Statistic.h" 44b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include "llvm/ADT/STLExtras.h" 45b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include <algorithm> 46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include <iostream> 47b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru#include <set> 48b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruusing namespace llvm; 49b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 5050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehonamespace { 51b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru Statistic<> NumFXCH("x86-codegen", "Number of fxch instructions inserted"); 52b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru Statistic<> NumFP ("x86-codegen", "Number of floating point instructions"); 53b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 54b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru struct VISIBILITY_HIDDEN FPS : public MachineFunctionPass { 55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru virtual bool runOnMachineFunction(MachineFunction &MF); 56b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 57b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru virtual const char *getPassName() const { return "X86 FP Stackifier"; } 5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 59b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru virtual void getAnalysisUsage(AnalysisUsage &AU) const { 60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru AU.addRequired<LiveVariables>(); 61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru MachineFunctionPass::getAnalysisUsage(AU); 62b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 63b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru private: 64b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru LiveVariables *LV; // Live variable info for current function... 6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MachineBasicBlock *MBB; // Current basic block 66b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unsigned Stack[8]; // FP<n> Registers in each stack slot... 67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unsigned RegMap[8]; // Track which stack slot contains each register 68b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unsigned StackTop; // The current top of the FP stack. 69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 70b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void dumpStack() const { 71b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru std::cerr << "Stack contents:"; 72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (unsigned i = 0; i != StackTop; ++i) { 7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho std::cerr << " FP" << Stack[i]; 74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); 75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru std::cerr << "\n"; 77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 78b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru private: 79b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // getSlot - Return the stack slot number a particular register number is 80b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // in... 8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho unsigned getSlot(unsigned RegNo) const { 82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru assert(RegNo < 8 && "Regno out of range!"); 83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return RegMap[RegNo]; 84b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 85b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 86b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // getStackEntry - Return the X86::FP<n> register in register ST(i) 87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unsigned getStackEntry(unsigned STi) const { 88b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru assert(STi < StackTop && "Access past stack top!"); 8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return Stack[StackTop-1-STi]; 90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 92b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // getSTReg - Return the X86::ST(i) register which contains the specified 93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // FP<RegNo> register 94b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unsigned getSTReg(unsigned RegNo) const { 95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; 96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 98b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // pushReg - Push the specified FP<n> register onto the stack 99b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void pushReg(unsigned Reg) { 100b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru assert(Reg < 8 && "Register number out of range!"); 101b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru assert(StackTop < 8 && "Stack overflow!"); 102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru Stack[StackTop] = Reg; 103b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru RegMap[Reg] = StackTop++; 104b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } 107b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void moveToTop(unsigned RegNo, MachineBasicBlock::iterator &I) { 108b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (!isAtTop(RegNo)) { 109b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unsigned STReg = getSTReg(RegNo); 110b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unsigned RegOnTop = getStackEntry(0); 111b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 112b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Swap the slots the regs are in 113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru std::swap(RegMap[RegNo], RegMap[RegOnTop]); 11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 115b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Swap stack slot contents 116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru assert(RegMap[RegOnTop] < StackTop); 117b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 118b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 119b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Emit an fxch to update the runtime processors version of the state 120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BuildMI(*MBB, I, X86::FXCH, 1).addReg(STReg); 121b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru NumFXCH++; 122b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 123b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 124b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { 126b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unsigned STReg = getSTReg(RegNo); 127b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pushReg(AsReg); // New register on top of stack 128b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 129b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru BuildMI(*MBB, I, X86::FLDrr, 1).addReg(STReg); 130b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 131b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 132b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // popStackAfter - Pop the current value off of the top of the FP stack 133b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // after the specified instruction. 13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void popStackAfter(MachineBasicBlock::iterator &I); 135b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 136b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // freeStackSlotAfter - Free the specified register from the register stack, 137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // so that it is no longer in a register. If the register is currently at 138b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // the top of the stack, we just pop the current instruction, otherwise we 139b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // store the current top-of-stack into the specified slot, then pop the top 140b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // of stack. 141b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); 142b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 143b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); 144b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void handleZeroArgFP(MachineBasicBlock::iterator &I); 146b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void handleOneArgFP(MachineBasicBlock::iterator &I); 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho void handleOneArgFPRW(MachineBasicBlock::iterator &I); 148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void handleTwoArgFP(MachineBasicBlock::iterator &I); 149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void handleCompareFP(MachineBasicBlock::iterator &I); 150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void handleCondMovFP(MachineBasicBlock::iterator &I); 151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru void handleSpecialFP(MachineBasicBlock::iterator &I); 152b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru }; 153b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 154b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruFunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } 156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP 158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/// register references into FP stack references. 159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/// 160b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querubool FPS::runOnMachineFunction(MachineFunction &MF) { 161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // We only need to run this pass if there are any FP registers used in this 162b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // function. If it is all integer, there is nothing for us to do! 16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const bool *PhysRegsUsed = MF.getUsedPhysregs(); 164b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bool FPIsUsed = false; 165b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 166b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!"); 167b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (unsigned i = 0; i <= 6; ++i) 168b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (PhysRegsUsed[X86::FP0+i]) { 169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru FPIsUsed = true; 170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 171b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 172b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 173b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Early exit. 17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!FPIsUsed) return false; 175b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru LV = &getAnalysis<LiveVariables>(); 177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru StackTop = 0; 178b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Process the function in depth first order so that we process at least one 180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // of the predecessors for every reachable block in the function. 18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho std::set<MachineBasicBlock*> Processed; 182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru MachineBasicBlock *Entry = MF.begin(); 183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bool Changed = false; 185b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (df_ext_iterator<MachineBasicBlock*, std::set<MachineBasicBlock*> > 186b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed); 187b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru I != E; ++I) 18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Changed |= processBasicBlock(MF, **I); 189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return Changed; 191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 192b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 193b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/// processBasicBlock - Loop over all of the instructions in the basic block, 194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/// transforming FP instructions into their stack form. 195b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru/// 196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Querubool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { 197b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 198b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru bool Changed = false; 199b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru MBB = &BB; 200b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { 202b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru MachineInstr *MI = I; 203b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unsigned Flags = TII.get(MI->getOpcode()).TSFlags; 204b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ((Flags & X86II::FPTypeMask) == X86II::NotFP) 205b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru continue; // Efficiently ignore non-fp insts! 206b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 207b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru MachineInstr *PrevMI = 0; 208b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (I != BB.begin()) 209b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru PrevMI = prior(I); 210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++NumFP; // Keep track of # of pseudo instrs 212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru DEBUG(std::cerr << "\nFPInst:\t"; MI->print(std::cerr, &(MF.getTarget()))); 213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 214b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Get dead variables list now because the MI pointer may be deleted as part 215b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // of processing! 216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru LiveVariables::killed_iterator IB, IE; 217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru tie(IB, IE) = LV->dead_range(MI); 218b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 219b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru DEBUG( 220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const MRegisterInfo *MRI = MF.getTarget().getRegisterInfo(); 221b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru LiveVariables::killed_iterator I = LV->killed_begin(MI); 222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho LiveVariables::killed_iterator E = LV->killed_end(MI); 223b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (I != E) { 224b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru std::cerr << "Killed Operands:"; 225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (; I != E; ++I) 226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru std::cerr << " %" << MRI->getName(*I); 227b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru std::cerr << "\n"; 228b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 229b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ); 230b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 231b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru switch (Flags & X86II::FPTypeMask) { 232b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case X86II::ZeroArgFP: handleZeroArgFP(I); break; 233b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0) 234b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0)) 235b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case X86II::TwoArgFP: handleTwoArgFP(I); break; 236b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case X86II::CompareFP: handleCompareFP(I); break; 237b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case X86II::CondMovFP: handleCondMovFP(I); break; 238b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru case X86II::SpecialFP: handleSpecialFP(I); break; 239b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru default: assert(0 && "Unknown FP Type!"); 240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 241b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 242b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Check to see if any of the values defined by this instruction are dead 243b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // after definition. If so, pop them. 244b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru for (; IB != IE; ++IB) { 245b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru unsigned Reg = *IB; 246b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (Reg >= X86::FP0 && Reg <= X86::FP6) { 247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru DEBUG(std::cerr << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); 248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru freeStackSlotAfter(I, Reg-X86::FP0); 249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 250b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 251b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 252b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Print out all of the instructions expanded to if -debug 253b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru DEBUG( 254b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru MachineBasicBlock::iterator PrevI(PrevMI); 255b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (I == PrevI) { 256b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru std::cerr << "Just deleted pseudo instruction\n"; 257b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 258b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru MachineBasicBlock::iterator Start = I; 259 // Rewind to first instruction newly inserted. 260 while (Start != BB.begin() && prior(Start) != PrevI) --Start; 261 std::cerr << "Inserted instructions:\n\t"; 262 Start->print(std::cerr, &MF.getTarget()); 263 while (++Start != next(I)); 264 } 265 dumpStack(); 266 ); 267 268 Changed = true; 269 } 270 271 assert(StackTop == 0 && "Stack not empty at end of basic block?"); 272 return Changed; 273} 274 275//===----------------------------------------------------------------------===// 276// Efficient Lookup Table Support 277//===----------------------------------------------------------------------===// 278 279namespace { 280 struct TableEntry { 281 unsigned from; 282 unsigned to; 283 bool operator<(const TableEntry &TE) const { return from < TE.from; } 284 friend bool operator<(const TableEntry &TE, unsigned V) { 285 return TE.from < V; 286 } 287 friend bool operator<(unsigned V, const TableEntry &TE) { 288 return V < TE.from; 289 } 290 }; 291} 292 293static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { 294 for (unsigned i = 0; i != NumEntries-1; ++i) 295 if (!(Table[i] < Table[i+1])) return false; 296 return true; 297} 298 299static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { 300 const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); 301 if (I != Table+N && I->from == Opcode) 302 return I->to; 303 return -1; 304} 305 306#define ARRAY_SIZE(TABLE) \ 307 (sizeof(TABLE)/sizeof(TABLE[0])) 308 309#ifdef NDEBUG 310#define ASSERT_SORTED(TABLE) 311#else 312#define ASSERT_SORTED(TABLE) \ 313 { static bool TABLE##Checked = false; \ 314 if (!TABLE##Checked) { \ 315 assert(TableIsSorted(TABLE, ARRAY_SIZE(TABLE)) && \ 316 "All lookup tables must be sorted for efficient access!"); \ 317 TABLE##Checked = true; \ 318 } \ 319 } 320#endif 321 322//===----------------------------------------------------------------------===// 323// Register File -> Register Stack Mapping Methods 324//===----------------------------------------------------------------------===// 325 326// OpcodeTable - Sorted map of register instructions to their stack version. 327// The first element is an register file pseudo instruction, the second is the 328// concrete X86 instruction which uses the register stack. 329// 330static const TableEntry OpcodeTable[] = { 331 { X86::FpABS , X86::FABS }, 332 { X86::FpADD32m , X86::FADD32m }, 333 { X86::FpADD64m , X86::FADD64m }, 334 { X86::FpCHS , X86::FCHS }, 335 { X86::FpCMOVB , X86::FCMOVB }, 336 { X86::FpCMOVBE , X86::FCMOVBE }, 337 { X86::FpCMOVE , X86::FCMOVE }, 338 { X86::FpCMOVNB , X86::FCMOVNB }, 339 { X86::FpCMOVNBE , X86::FCMOVNBE }, 340 { X86::FpCMOVNE , X86::FCMOVNE }, 341 { X86::FpCMOVNP , X86::FCMOVNP }, 342 { X86::FpCMOVP , X86::FCMOVP }, 343 { X86::FpCOS , X86::FCOS }, 344 { X86::FpDIV32m , X86::FDIV32m }, 345 { X86::FpDIV64m , X86::FDIV64m }, 346 { X86::FpDIVR32m , X86::FDIVR32m }, 347 { X86::FpDIVR64m , X86::FDIVR64m }, 348 { X86::FpIADD16m , X86::FIADD16m }, 349 { X86::FpIADD32m , X86::FIADD32m }, 350 { X86::FpIDIV16m , X86::FIDIV16m }, 351 { X86::FpIDIV32m , X86::FIDIV32m }, 352 { X86::FpIDIVR16m, X86::FIDIVR16m}, 353 { X86::FpIDIVR32m, X86::FIDIVR32m}, 354 { X86::FpILD16m , X86::FILD16m }, 355 { X86::FpILD32m , X86::FILD32m }, 356 { X86::FpILD64m , X86::FILD64m }, 357 { X86::FpIMUL16m , X86::FIMUL16m }, 358 { X86::FpIMUL32m , X86::FIMUL32m }, 359 { X86::FpIST16m , X86::FIST16m }, 360 { X86::FpIST32m , X86::FIST32m }, 361 { X86::FpIST64m , X86::FISTP64m }, 362 { X86::FpISTT16m , X86::FISTTP16m}, 363 { X86::FpISTT32m , X86::FISTTP32m}, 364 { X86::FpISTT64m , X86::FISTTP64m}, 365 { X86::FpISUB16m , X86::FISUB16m }, 366 { X86::FpISUB32m , X86::FISUB32m }, 367 { X86::FpISUBR16m, X86::FISUBR16m}, 368 { X86::FpISUBR32m, X86::FISUBR32m}, 369 { X86::FpLD0 , X86::FLD0 }, 370 { X86::FpLD1 , X86::FLD1 }, 371 { X86::FpLD32m , X86::FLD32m }, 372 { X86::FpLD64m , X86::FLD64m }, 373 { X86::FpMUL32m , X86::FMUL32m }, 374 { X86::FpMUL64m , X86::FMUL64m }, 375 { X86::FpSIN , X86::FSIN }, 376 { X86::FpSQRT , X86::FSQRT }, 377 { X86::FpST32m , X86::FST32m }, 378 { X86::FpST64m , X86::FST64m }, 379 { X86::FpSUB32m , X86::FSUB32m }, 380 { X86::FpSUB64m , X86::FSUB64m }, 381 { X86::FpSUBR32m , X86::FSUBR32m }, 382 { X86::FpSUBR64m , X86::FSUBR64m }, 383 { X86::FpTST , X86::FTST }, 384 { X86::FpUCOMIr , X86::FUCOMIr }, 385 { X86::FpUCOMr , X86::FUCOMr }, 386}; 387 388static unsigned getConcreteOpcode(unsigned Opcode) { 389 ASSERT_SORTED(OpcodeTable); 390 int Opc = Lookup(OpcodeTable, ARRAY_SIZE(OpcodeTable), Opcode); 391 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); 392 return Opc; 393} 394 395//===----------------------------------------------------------------------===// 396// Helper Methods 397//===----------------------------------------------------------------------===// 398 399// PopTable - Sorted map of instructions to their popping version. The first 400// element is an instruction, the second is the version which pops. 401// 402static const TableEntry PopTable[] = { 403 { X86::FADDrST0 , X86::FADDPrST0 }, 404 405 { X86::FDIVRrST0, X86::FDIVRPrST0 }, 406 { X86::FDIVrST0 , X86::FDIVPrST0 }, 407 408 { X86::FIST16m , X86::FISTP16m }, 409 { X86::FIST32m , X86::FISTP32m }, 410 411 { X86::FMULrST0 , X86::FMULPrST0 }, 412 413 { X86::FST32m , X86::FSTP32m }, 414 { X86::FST64m , X86::FSTP64m }, 415 { X86::FSTrr , X86::FSTPrr }, 416 417 { X86::FSUBRrST0, X86::FSUBRPrST0 }, 418 { X86::FSUBrST0 , X86::FSUBPrST0 }, 419 420 { X86::FUCOMIr , X86::FUCOMIPr }, 421 422 { X86::FUCOMPr , X86::FUCOMPPr }, 423 { X86::FUCOMr , X86::FUCOMPr }, 424}; 425 426/// popStackAfter - Pop the current value off of the top of the FP stack after 427/// the specified instruction. This attempts to be sneaky and combine the pop 428/// into the instruction itself if possible. The iterator is left pointing to 429/// the last instruction, be it a new pop instruction inserted, or the old 430/// instruction if it was modified in place. 431/// 432void FPS::popStackAfter(MachineBasicBlock::iterator &I) { 433 ASSERT_SORTED(PopTable); 434 assert(StackTop > 0 && "Cannot pop empty stack!"); 435 RegMap[Stack[--StackTop]] = ~0; // Update state 436 437 // Check to see if there is a popping version of this instruction... 438 int Opcode = Lookup(PopTable, ARRAY_SIZE(PopTable), I->getOpcode()); 439 if (Opcode != -1) { 440 I->setOpcode(Opcode); 441 if (Opcode == X86::FUCOMPPr) 442 I->RemoveOperand(0); 443 444 } else { // Insert an explicit pop 445 I = BuildMI(*MBB, ++I, X86::FSTPrr, 1).addReg(X86::ST0); 446 } 447} 448 449/// freeStackSlotAfter - Free the specified register from the register stack, so 450/// that it is no longer in a register. If the register is currently at the top 451/// of the stack, we just pop the current instruction, otherwise we store the 452/// current top-of-stack into the specified slot, then pop the top of stack. 453void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { 454 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy. 455 popStackAfter(I); 456 return; 457 } 458 459 // Otherwise, store the top of stack into the dead slot, killing the operand 460 // without having to add in an explicit xchg then pop. 461 // 462 unsigned STReg = getSTReg(FPRegNo); 463 unsigned OldSlot = getSlot(FPRegNo); 464 unsigned TopReg = Stack[StackTop-1]; 465 Stack[OldSlot] = TopReg; 466 RegMap[TopReg] = OldSlot; 467 RegMap[FPRegNo] = ~0; 468 Stack[--StackTop] = ~0; 469 I = BuildMI(*MBB, ++I, X86::FSTPrr, 1).addReg(STReg); 470} 471 472 473static unsigned getFPReg(const MachineOperand &MO) { 474 assert(MO.isRegister() && "Expected an FP register!"); 475 unsigned Reg = MO.getReg(); 476 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); 477 return Reg - X86::FP0; 478} 479 480 481//===----------------------------------------------------------------------===// 482// Instruction transformation implementation 483//===----------------------------------------------------------------------===// 484 485/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem> 486/// 487void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { 488 MachineInstr *MI = I; 489 unsigned DestReg = getFPReg(MI->getOperand(0)); 490 491 // Change from the pseudo instruction to the concrete instruction. 492 MI->RemoveOperand(0); // Remove the explicit ST(0) operand 493 MI->setOpcode(getConcreteOpcode(MI->getOpcode())); 494 495 // Result gets pushed on the stack. 496 pushReg(DestReg); 497} 498 499/// handleOneArgFP - fst <mem>, ST(0) 500/// 501void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { 502 MachineInstr *MI = I; 503 MachineFunction *MF = MI->getParent()->getParent(); 504 const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); 505 unsigned NumOps = TII.getNumOperands(MI->getOpcode()); 506 assert((NumOps == 5 || NumOps == 1) && 507 "Can only handle fst* & ftst instructions!"); 508 509 // Is this the last use of the source register? 510 unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); 511 bool KillsSrc = LV->KillsRegister(MI, X86::FP0+Reg); 512 513 // FISTP64m is strange because there isn't a non-popping versions. 514 // If we have one _and_ we don't want to pop the operand, duplicate the value 515 // on the stack instead of moving it. This ensure that popping the value is 516 // always ok. 517 // Ditto FISTTP16m, FISTTP32m, FISTTP64m. 518 // 519 if (!KillsSrc && 520 (MI->getOpcode() == X86::FpIST64m || 521 MI->getOpcode() == X86::FpISTT16m || 522 MI->getOpcode() == X86::FpISTT32m || 523 MI->getOpcode() == X86::FpISTT64m)) { 524 duplicateToTop(Reg, 7 /*temp register*/, I); 525 } else { 526 moveToTop(Reg, I); // Move to the top of the stack... 527 } 528 529 // Convert from the pseudo instruction to the concrete instruction. 530 MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand 531 MI->setOpcode(getConcreteOpcode(MI->getOpcode())); 532 533 if (MI->getOpcode() == X86::FISTP64m || 534 MI->getOpcode() == X86::FISTTP16m || 535 MI->getOpcode() == X86::FISTTP32m || 536 MI->getOpcode() == X86::FISTTP64m) { 537 assert(StackTop > 0 && "Stack empty??"); 538 --StackTop; 539 } else if (KillsSrc) { // Last use of operand? 540 popStackAfter(I); 541 } 542} 543 544 545/// handleOneArgFPRW: Handle instructions that read from the top of stack and 546/// replace the value with a newly computed value. These instructions may have 547/// non-fp operands after their FP operands. 548/// 549/// Examples: 550/// R1 = fchs R2 551/// R1 = fadd R2, [mem] 552/// 553void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { 554 MachineInstr *MI = I; 555 MachineFunction *MF = MI->getParent()->getParent(); 556 const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); 557 unsigned NumOps = TII.getNumOperands(MI->getOpcode()); 558 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!"); 559 560 // Is this the last use of the source register? 561 unsigned Reg = getFPReg(MI->getOperand(1)); 562 bool KillsSrc = LV->KillsRegister(MI, X86::FP0+Reg); 563 564 if (KillsSrc) { 565 // If this is the last use of the source register, just make sure it's on 566 // the top of the stack. 567 moveToTop(Reg, I); 568 assert(StackTop > 0 && "Stack cannot be empty!"); 569 --StackTop; 570 pushReg(getFPReg(MI->getOperand(0))); 571 } else { 572 // If this is not the last use of the source register, _copy_ it to the top 573 // of the stack. 574 duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I); 575 } 576 577 // Change from the pseudo instruction to the concrete instruction. 578 MI->RemoveOperand(1); // Drop the source operand. 579 MI->RemoveOperand(0); // Drop the destination operand. 580 MI->setOpcode(getConcreteOpcode(MI->getOpcode())); 581} 582 583 584//===----------------------------------------------------------------------===// 585// Define tables of various ways to map pseudo instructions 586// 587 588// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) 589static const TableEntry ForwardST0Table[] = { 590 { X86::FpADD , X86::FADDST0r }, 591 { X86::FpDIV , X86::FDIVST0r }, 592 { X86::FpMUL , X86::FMULST0r }, 593 { X86::FpSUB , X86::FSUBST0r }, 594}; 595 596// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) 597static const TableEntry ReverseST0Table[] = { 598 { X86::FpADD , X86::FADDST0r }, // commutative 599 { X86::FpDIV , X86::FDIVRST0r }, 600 { X86::FpMUL , X86::FMULST0r }, // commutative 601 { X86::FpSUB , X86::FSUBRST0r }, 602}; 603 604// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) 605static const TableEntry ForwardSTiTable[] = { 606 { X86::FpADD , X86::FADDrST0 }, // commutative 607 { X86::FpDIV , X86::FDIVRrST0 }, 608 { X86::FpMUL , X86::FMULrST0 }, // commutative 609 { X86::FpSUB , X86::FSUBRrST0 }, 610}; 611 612// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) 613static const TableEntry ReverseSTiTable[] = { 614 { X86::FpADD , X86::FADDrST0 }, 615 { X86::FpDIV , X86::FDIVrST0 }, 616 { X86::FpMUL , X86::FMULrST0 }, 617 { X86::FpSUB , X86::FSUBrST0 }, 618}; 619 620 621/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual 622/// instructions which need to be simplified and possibly transformed. 623/// 624/// Result: ST(0) = fsub ST(0), ST(i) 625/// ST(i) = fsub ST(0), ST(i) 626/// ST(0) = fsubr ST(0), ST(i) 627/// ST(i) = fsubr ST(0), ST(i) 628/// 629void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { 630 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 631 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 632 MachineInstr *MI = I; 633 634 MachineFunction *MF = MI->getParent()->getParent(); 635 const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); 636 unsigned NumOperands = TII.getNumOperands(MI->getOpcode()); 637 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!"); 638 unsigned Dest = getFPReg(MI->getOperand(0)); 639 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 640 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 641 bool KillsOp0 = LV->KillsRegister(MI, X86::FP0+Op0); 642 bool KillsOp1 = LV->KillsRegister(MI, X86::FP0+Op1); 643 644 unsigned TOS = getStackEntry(0); 645 646 // One of our operands must be on the top of the stack. If neither is yet, we 647 // need to move one. 648 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? 649 // We can choose to move either operand to the top of the stack. If one of 650 // the operands is killed by this instruction, we want that one so that we 651 // can update right on top of the old version. 652 if (KillsOp0) { 653 moveToTop(Op0, I); // Move dead operand to TOS. 654 TOS = Op0; 655 } else if (KillsOp1) { 656 moveToTop(Op1, I); 657 TOS = Op1; 658 } else { 659 // All of the operands are live after this instruction executes, so we 660 // cannot update on top of any operand. Because of this, we must 661 // duplicate one of the stack elements to the top. It doesn't matter 662 // which one we pick. 663 // 664 duplicateToTop(Op0, Dest, I); 665 Op0 = TOS = Dest; 666 KillsOp0 = true; 667 } 668 } else if (!KillsOp0 && !KillsOp1) { 669 // If we DO have one of our operands at the top of the stack, but we don't 670 // have a dead operand, we must duplicate one of the operands to a new slot 671 // on the stack. 672 duplicateToTop(Op0, Dest, I); 673 Op0 = TOS = Dest; 674 KillsOp0 = true; 675 } 676 677 // Now we know that one of our operands is on the top of the stack, and at 678 // least one of our operands is killed by this instruction. 679 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) && 680 "Stack conditions not set up right!"); 681 682 // We decide which form to use based on what is on the top of the stack, and 683 // which operand is killed by this instruction. 684 const TableEntry *InstTable; 685 bool isForward = TOS == Op0; 686 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); 687 if (updateST0) { 688 if (isForward) 689 InstTable = ForwardST0Table; 690 else 691 InstTable = ReverseST0Table; 692 } else { 693 if (isForward) 694 InstTable = ForwardSTiTable; 695 else 696 InstTable = ReverseSTiTable; 697 } 698 699 int Opcode = Lookup(InstTable, ARRAY_SIZE(ForwardST0Table), MI->getOpcode()); 700 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); 701 702 // NotTOS - The register which is not on the top of stack... 703 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; 704 705 // Replace the old instruction with a new instruction 706 MBB->remove(I++); 707 I = BuildMI(*MBB, I, Opcode, 1).addReg(getSTReg(NotTOS)); 708 709 // If both operands are killed, pop one off of the stack in addition to 710 // overwriting the other one. 711 if (KillsOp0 && KillsOp1 && Op0 != Op1) { 712 assert(!updateST0 && "Should have updated other operand!"); 713 popStackAfter(I); // Pop the top of stack 714 } 715 716 // Update stack information so that we know the destination register is now on 717 // the stack. 718 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); 719 assert(UpdatedSlot < StackTop && Dest < 7); 720 Stack[UpdatedSlot] = Dest; 721 RegMap[Dest] = UpdatedSlot; 722 delete MI; // Remove the old instruction 723} 724 725/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP 726/// register arguments and no explicit destinations. 727/// 728void FPS::handleCompareFP(MachineBasicBlock::iterator &I) { 729 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 730 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 731 MachineInstr *MI = I; 732 733 MachineFunction *MF = MI->getParent()->getParent(); 734 const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); 735 unsigned NumOperands = TII.getNumOperands(MI->getOpcode()); 736 assert(NumOperands == 2 && "Illegal FUCOM* instruction!"); 737 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 738 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 739 bool KillsOp0 = LV->KillsRegister(MI, X86::FP0+Op0); 740 bool KillsOp1 = LV->KillsRegister(MI, X86::FP0+Op1); 741 742 // Make sure the first operand is on the top of stack, the other one can be 743 // anywhere. 744 moveToTop(Op0, I); 745 746 // Change from the pseudo instruction to the concrete instruction. 747 MI->getOperand(0).setReg(getSTReg(Op1)); 748 MI->RemoveOperand(1); 749 MI->setOpcode(getConcreteOpcode(MI->getOpcode())); 750 751 // If any of the operands are killed by this instruction, free them. 752 if (KillsOp0) freeStackSlotAfter(I, Op0); 753 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1); 754} 755 756/// handleCondMovFP - Handle two address conditional move instructions. These 757/// instructions move a st(i) register to st(0) iff a condition is true. These 758/// instructions require that the first operand is at the top of the stack, but 759/// otherwise don't modify the stack at all. 760void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { 761 MachineInstr *MI = I; 762 763 unsigned Op0 = getFPReg(MI->getOperand(0)); 764 unsigned Op1 = getFPReg(MI->getOperand(2)); 765 766 // The first operand *must* be on the top of the stack. 767 moveToTop(Op0, I); 768 769 // Change the second operand to the stack register that the operand is in. 770 // Change from the pseudo instruction to the concrete instruction. 771 MI->RemoveOperand(0); 772 MI->RemoveOperand(1); 773 MI->getOperand(0).setReg(getSTReg(Op1)); 774 MI->setOpcode(getConcreteOpcode(MI->getOpcode())); 775 776 777 // If we kill the second operand, make sure to pop it from the stack. 778 if (Op0 != Op1 && LV->KillsRegister(MI, X86::FP0+Op1)) { 779 // Get this value off of the register stack. 780 freeStackSlotAfter(I, Op1); 781 } 782} 783 784 785/// handleSpecialFP - Handle special instructions which behave unlike other 786/// floating point instructions. This is primarily intended for use by pseudo 787/// instructions. 788/// 789void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { 790 MachineInstr *MI = I; 791 switch (MI->getOpcode()) { 792 default: assert(0 && "Unknown SpecialFP instruction!"); 793 case X86::FpGETRESULT: // Appears immediately after a call returning FP type! 794 assert(StackTop == 0 && "Stack should be empty after a call!"); 795 pushReg(getFPReg(MI->getOperand(0))); 796 break; 797 case X86::FpSETRESULT: 798 assert(StackTop == 1 && "Stack should have one element on it to return!"); 799 --StackTop; // "Forget" we have something on the top of stack! 800 break; 801 case X86::FpMOV: { 802 unsigned SrcReg = getFPReg(MI->getOperand(1)); 803 unsigned DestReg = getFPReg(MI->getOperand(0)); 804 805 if (LV->KillsRegister(MI, X86::FP0+SrcReg)) { 806 // If the input operand is killed, we can just change the owner of the 807 // incoming stack slot into the result. 808 unsigned Slot = getSlot(SrcReg); 809 assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); 810 Stack[Slot] = DestReg; 811 RegMap[DestReg] = Slot; 812 813 } else { 814 // For FMOV we just duplicate the specified value to a new stack slot. 815 // This could be made better, but would require substantial changes. 816 duplicateToTop(SrcReg, DestReg, I); 817 } 818 break; 819 } 820 } 821 822 I = MBB->erase(I); // Remove the pseudo instruction 823 --I; 824} 825