X86FloatingPoint.cpp revision afb23f48a4f5f76b4a0fca870ae5a28c27dde028
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the pass which converts floating point instructions from
11// virtual registers into register stack instructions.  This pass uses live
12// variable information to indicate where the FPn registers are used and their
13// lifetimes.
14//
15// This pass is hampered by the lack of decent CFG manipulation routines for
16// machine code.  In particular, this wants to be able to split critical edges
17// as necessary, traverse the machine basic block CFG in depth-first order, and
18// allow there to be multiple machine basic blocks for each LLVM basicblock
19// (needed for critical edge splitting).
20//
21// In particular, this pass currently barfs on critical edges.  Because of this,
22// it requires the instruction selector to insert FP_REG_KILL instructions on
23// the exits of any basic block that has critical edges going from it, or which
24// branch to a critical basic block.
25//
26// FIXME: this is not implemented yet.  The stackifier pass only works on local
27// basic blocks.
28//
29//===----------------------------------------------------------------------===//
30
31#define DEBUG_TYPE "x86-codegen"
32#include "X86.h"
33#include "X86InstrInfo.h"
34#include "llvm/CodeGen/MachineFunctionPass.h"
35#include "llvm/CodeGen/MachineInstrBuilder.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
37#include "llvm/CodeGen/Passes.h"
38#include "llvm/Target/TargetInstrInfo.h"
39#include "llvm/Target/TargetMachine.h"
40#include "llvm/Support/Debug.h"
41#include "llvm/Support/Compiler.h"
42#include "llvm/ADT/DepthFirstIterator.h"
43#include "llvm/ADT/SmallVector.h"
44#include "llvm/ADT/Statistic.h"
45#include "llvm/ADT/STLExtras.h"
46#include <algorithm>
47#include <set>
48using namespace llvm;
49
50STATISTIC(NumFXCH, "Number of fxch instructions inserted");
51STATISTIC(NumFP  , "Number of floating point instructions");
52
53namespace {
54  struct VISIBILITY_HIDDEN FPS : public MachineFunctionPass {
55    static char ID;
56    FPS() : MachineFunctionPass((intptr_t)&ID) {}
57
58    virtual bool runOnMachineFunction(MachineFunction &MF);
59
60    virtual const char *getPassName() const { return "X86 FP Stackifier"; }
61
62  private:
63    const TargetInstrInfo *TII; // Machine instruction info.
64    MachineBasicBlock *MBB;     // Current basic block
65    unsigned Stack[8];          // FP<n> Registers in each stack slot...
66    unsigned RegMap[8];         // Track which stack slot contains each register
67    unsigned StackTop;          // The current top of the FP stack.
68
69    void dumpStack() const {
70      cerr << "Stack contents:";
71      for (unsigned i = 0; i != StackTop; ++i) {
72        cerr << " FP" << Stack[i];
73        assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
74      }
75      cerr << "\n";
76    }
77  private:
78    // getSlot - Return the stack slot number a particular register number is
79    // in...
80    unsigned getSlot(unsigned RegNo) const {
81      assert(RegNo < 8 && "Regno out of range!");
82      return RegMap[RegNo];
83    }
84
85    // getStackEntry - Return the X86::FP<n> register in register ST(i)
86    unsigned getStackEntry(unsigned STi) const {
87      assert(STi < StackTop && "Access past stack top!");
88      return Stack[StackTop-1-STi];
89    }
90
91    // getSTReg - Return the X86::ST(i) register which contains the specified
92    // FP<RegNo> register
93    unsigned getSTReg(unsigned RegNo) const {
94      return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
95    }
96
97    // pushReg - Push the specified FP<n> register onto the stack
98    void pushReg(unsigned Reg) {
99      assert(Reg < 8 && "Register number out of range!");
100      assert(StackTop < 8 && "Stack overflow!");
101      Stack[StackTop] = Reg;
102      RegMap[Reg] = StackTop++;
103    }
104
105    bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
106    void moveToTop(unsigned RegNo, MachineBasicBlock::iterator &I) {
107      if (!isAtTop(RegNo)) {
108        unsigned STReg = getSTReg(RegNo);
109        unsigned RegOnTop = getStackEntry(0);
110
111        // Swap the slots the regs are in
112        std::swap(RegMap[RegNo], RegMap[RegOnTop]);
113
114        // Swap stack slot contents
115        assert(RegMap[RegOnTop] < StackTop);
116        std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
117
118        // Emit an fxch to update the runtime processors version of the state
119        BuildMI(*MBB, I, TII->get(X86::XCH_F)).addReg(STReg);
120        NumFXCH++;
121      }
122    }
123
124    void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
125      unsigned STReg = getSTReg(RegNo);
126      pushReg(AsReg);   // New register on top of stack
127
128      BuildMI(*MBB, I, TII->get(X86::LD_Frr)).addReg(STReg);
129    }
130
131    // popStackAfter - Pop the current value off of the top of the FP stack
132    // after the specified instruction.
133    void popStackAfter(MachineBasicBlock::iterator &I);
134
135    // freeStackSlotAfter - Free the specified register from the register stack,
136    // so that it is no longer in a register.  If the register is currently at
137    // the top of the stack, we just pop the current instruction, otherwise we
138    // store the current top-of-stack into the specified slot, then pop the top
139    // of stack.
140    void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
141
142    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
143
144    void handleZeroArgFP(MachineBasicBlock::iterator &I);
145    void handleOneArgFP(MachineBasicBlock::iterator &I);
146    void handleOneArgFPRW(MachineBasicBlock::iterator &I);
147    void handleTwoArgFP(MachineBasicBlock::iterator &I);
148    void handleCompareFP(MachineBasicBlock::iterator &I);
149    void handleCondMovFP(MachineBasicBlock::iterator &I);
150    void handleSpecialFP(MachineBasicBlock::iterator &I);
151  };
152  char FPS::ID = 0;
153}
154
155FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
156
157/// getFPReg - Return the X86::FPx register number for the specified operand.
158/// For example, this returns 3 for X86::FP3.
159static unsigned getFPReg(const MachineOperand &MO) {
160  assert(MO.isRegister() && "Expected an FP register!");
161  unsigned Reg = MO.getReg();
162  assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
163  return Reg - X86::FP0;
164}
165
166
167/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
168/// register references into FP stack references.
169///
170bool FPS::runOnMachineFunction(MachineFunction &MF) {
171  // We only need to run this pass if there are any FP registers used in this
172  // function.  If it is all integer, there is nothing for us to do!
173  bool FPIsUsed = false;
174
175  assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
176  for (unsigned i = 0; i <= 6; ++i)
177    if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
178      FPIsUsed = true;
179      break;
180    }
181
182  // Early exit.
183  if (!FPIsUsed) return false;
184
185  TII = MF.getTarget().getInstrInfo();
186  StackTop = 0;
187
188  // Process the function in depth first order so that we process at least one
189  // of the predecessors for every reachable block in the function.
190  std::set<MachineBasicBlock*> Processed;
191  MachineBasicBlock *Entry = MF.begin();
192
193  bool Changed = false;
194  for (df_ext_iterator<MachineBasicBlock*, std::set<MachineBasicBlock*> >
195         I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
196       I != E; ++I)
197    Changed |= processBasicBlock(MF, **I);
198
199  return Changed;
200}
201
202/// processBasicBlock - Loop over all of the instructions in the basic block,
203/// transforming FP instructions into their stack form.
204///
205bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
206  bool Changed = false;
207  MBB = &BB;
208
209  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
210    MachineInstr *MI = I;
211    unsigned Flags = MI->getDesc().TSFlags;
212    if ((Flags & X86II::FPTypeMask) == X86II::NotFP)
213      continue;  // Efficiently ignore non-fp insts!
214
215    MachineInstr *PrevMI = 0;
216    if (I != BB.begin())
217      PrevMI = prior(I);
218
219    ++NumFP;  // Keep track of # of pseudo instrs
220    DOUT << "\nFPInst:\t" << *MI;
221
222    // Get dead variables list now because the MI pointer may be deleted as part
223    // of processing!
224    SmallVector<unsigned, 8> DeadRegs;
225    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
226      const MachineOperand &MO = MI->getOperand(i);
227      if (MO.isRegister() && MO.isDead())
228        DeadRegs.push_back(MO.getReg());
229    }
230
231    switch (Flags & X86II::FPTypeMask) {
232    case X86II::ZeroArgFP:  handleZeroArgFP(I); break;
233    case X86II::OneArgFP:   handleOneArgFP(I);  break;  // fstp ST(0)
234    case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
235    case X86II::TwoArgFP:   handleTwoArgFP(I);  break;
236    case X86II::CompareFP:  handleCompareFP(I); break;
237    case X86II::CondMovFP:  handleCondMovFP(I); break;
238    case X86II::SpecialFP:  handleSpecialFP(I); break;
239    default: assert(0 && "Unknown FP Type!");
240    }
241
242    // Check to see if any of the values defined by this instruction are dead
243    // after definition.  If so, pop them.
244    for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
245      unsigned Reg = DeadRegs[i];
246      if (Reg >= X86::FP0 && Reg <= X86::FP6) {
247        DOUT << "Register FP#" << Reg-X86::FP0 << " is dead!\n";
248        freeStackSlotAfter(I, Reg-X86::FP0);
249      }
250    }
251
252    // Print out all of the instructions expanded to if -debug
253    DEBUG(
254      MachineBasicBlock::iterator PrevI(PrevMI);
255      if (I == PrevI) {
256        cerr << "Just deleted pseudo instruction\n";
257      } else {
258        MachineBasicBlock::iterator Start = I;
259        // Rewind to first instruction newly inserted.
260        while (Start != BB.begin() && prior(Start) != PrevI) --Start;
261        cerr << "Inserted instructions:\n\t";
262        Start->print(*cerr.stream(), &MF.getTarget());
263        while (++Start != next(I)) {}
264      }
265      dumpStack();
266    );
267
268    Changed = true;
269  }
270
271  assert(StackTop == 0 && "Stack not empty at end of basic block?");
272  return Changed;
273}
274
275//===----------------------------------------------------------------------===//
276// Efficient Lookup Table Support
277//===----------------------------------------------------------------------===//
278
279namespace {
280  struct TableEntry {
281    unsigned from;
282    unsigned to;
283    bool operator<(const TableEntry &TE) const { return from < TE.from; }
284    friend bool operator<(const TableEntry &TE, unsigned V) {
285      return TE.from < V;
286    }
287    friend bool operator<(unsigned V, const TableEntry &TE) {
288      return V < TE.from;
289    }
290  };
291}
292
293static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
294  for (unsigned i = 0; i != NumEntries-1; ++i)
295    if (!(Table[i] < Table[i+1])) return false;
296  return true;
297}
298
299static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
300  const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
301  if (I != Table+N && I->from == Opcode)
302    return I->to;
303  return -1;
304}
305
306#ifdef NDEBUG
307#define ASSERT_SORTED(TABLE)
308#else
309#define ASSERT_SORTED(TABLE)                                              \
310  { static bool TABLE##Checked = false;                                   \
311    if (!TABLE##Checked) {                                                \
312       assert(TableIsSorted(TABLE, array_lengthof(TABLE)) &&              \
313              "All lookup tables must be sorted for efficient access!");  \
314       TABLE##Checked = true;                                             \
315    }                                                                     \
316  }
317#endif
318
319//===----------------------------------------------------------------------===//
320// Register File -> Register Stack Mapping Methods
321//===----------------------------------------------------------------------===//
322
323// OpcodeTable - Sorted map of register instructions to their stack version.
324// The first element is an register file pseudo instruction, the second is the
325// concrete X86 instruction which uses the register stack.
326//
327static const TableEntry OpcodeTable[] = {
328  { X86::ABS_Fp32     , X86::ABS_F     },
329  { X86::ABS_Fp64     , X86::ABS_F     },
330  { X86::ABS_Fp80     , X86::ABS_F     },
331  { X86::ADD_Fp32m    , X86::ADD_F32m  },
332  { X86::ADD_Fp64m    , X86::ADD_F64m  },
333  { X86::ADD_Fp64m32  , X86::ADD_F32m  },
334  { X86::ADD_Fp80m32  , X86::ADD_F32m  },
335  { X86::ADD_Fp80m64  , X86::ADD_F64m  },
336  { X86::ADD_FpI16m32 , X86::ADD_FI16m },
337  { X86::ADD_FpI16m64 , X86::ADD_FI16m },
338  { X86::ADD_FpI16m80 , X86::ADD_FI16m },
339  { X86::ADD_FpI32m32 , X86::ADD_FI32m },
340  { X86::ADD_FpI32m64 , X86::ADD_FI32m },
341  { X86::ADD_FpI32m80 , X86::ADD_FI32m },
342  { X86::CHS_Fp32     , X86::CHS_F     },
343  { X86::CHS_Fp64     , X86::CHS_F     },
344  { X86::CHS_Fp80     , X86::CHS_F     },
345  { X86::CMOVBE_Fp32  , X86::CMOVBE_F  },
346  { X86::CMOVBE_Fp64  , X86::CMOVBE_F  },
347  { X86::CMOVBE_Fp80  , X86::CMOVBE_F  },
348  { X86::CMOVB_Fp32   , X86::CMOVB_F   },
349  { X86::CMOVB_Fp64   , X86::CMOVB_F  },
350  { X86::CMOVB_Fp80   , X86::CMOVB_F  },
351  { X86::CMOVE_Fp32   , X86::CMOVE_F  },
352  { X86::CMOVE_Fp64   , X86::CMOVE_F   },
353  { X86::CMOVE_Fp80   , X86::CMOVE_F   },
354  { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
355  { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
356  { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
357  { X86::CMOVNB_Fp32  , X86::CMOVNB_F  },
358  { X86::CMOVNB_Fp64  , X86::CMOVNB_F  },
359  { X86::CMOVNB_Fp80  , X86::CMOVNB_F  },
360  { X86::CMOVNE_Fp32  , X86::CMOVNE_F  },
361  { X86::CMOVNE_Fp64  , X86::CMOVNE_F  },
362  { X86::CMOVNE_Fp80  , X86::CMOVNE_F  },
363  { X86::CMOVNP_Fp32  , X86::CMOVNP_F  },
364  { X86::CMOVNP_Fp64  , X86::CMOVNP_F  },
365  { X86::CMOVNP_Fp80  , X86::CMOVNP_F  },
366  { X86::CMOVP_Fp32   , X86::CMOVP_F   },
367  { X86::CMOVP_Fp64   , X86::CMOVP_F   },
368  { X86::CMOVP_Fp80   , X86::CMOVP_F   },
369  { X86::COS_Fp32     , X86::COS_F     },
370  { X86::COS_Fp64     , X86::COS_F     },
371  { X86::COS_Fp80     , X86::COS_F     },
372  { X86::DIVR_Fp32m   , X86::DIVR_F32m },
373  { X86::DIVR_Fp64m   , X86::DIVR_F64m },
374  { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
375  { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
376  { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
377  { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
378  { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
379  { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
380  { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
381  { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
382  { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
383  { X86::DIV_Fp32m    , X86::DIV_F32m  },
384  { X86::DIV_Fp64m    , X86::DIV_F64m  },
385  { X86::DIV_Fp64m32  , X86::DIV_F32m  },
386  { X86::DIV_Fp80m32  , X86::DIV_F32m  },
387  { X86::DIV_Fp80m64  , X86::DIV_F64m  },
388  { X86::DIV_FpI16m32 , X86::DIV_FI16m },
389  { X86::DIV_FpI16m64 , X86::DIV_FI16m },
390  { X86::DIV_FpI16m80 , X86::DIV_FI16m },
391  { X86::DIV_FpI32m32 , X86::DIV_FI32m },
392  { X86::DIV_FpI32m64 , X86::DIV_FI32m },
393  { X86::DIV_FpI32m80 , X86::DIV_FI32m },
394  { X86::ILD_Fp16m32  , X86::ILD_F16m  },
395  { X86::ILD_Fp16m64  , X86::ILD_F16m  },
396  { X86::ILD_Fp16m80  , X86::ILD_F16m  },
397  { X86::ILD_Fp32m32  , X86::ILD_F32m  },
398  { X86::ILD_Fp32m64  , X86::ILD_F32m  },
399  { X86::ILD_Fp32m80  , X86::ILD_F32m  },
400  { X86::ILD_Fp64m32  , X86::ILD_F64m  },
401  { X86::ILD_Fp64m64  , X86::ILD_F64m  },
402  { X86::ILD_Fp64m80  , X86::ILD_F64m  },
403  { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
404  { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
405  { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
406  { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
407  { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
408  { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
409  { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
410  { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
411  { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
412  { X86::IST_Fp16m32  , X86::IST_F16m  },
413  { X86::IST_Fp16m64  , X86::IST_F16m  },
414  { X86::IST_Fp16m80  , X86::IST_F16m  },
415  { X86::IST_Fp32m32  , X86::IST_F32m  },
416  { X86::IST_Fp32m64  , X86::IST_F32m  },
417  { X86::IST_Fp32m80  , X86::IST_F32m  },
418  { X86::IST_Fp64m32  , X86::IST_FP64m },
419  { X86::IST_Fp64m64  , X86::IST_FP64m },
420  { X86::IST_Fp64m80  , X86::IST_FP64m },
421  { X86::LD_Fp032     , X86::LD_F0     },
422  { X86::LD_Fp064     , X86::LD_F0     },
423  { X86::LD_Fp080     , X86::LD_F0     },
424  { X86::LD_Fp132     , X86::LD_F1     },
425  { X86::LD_Fp164     , X86::LD_F1     },
426  { X86::LD_Fp180     , X86::LD_F1     },
427  { X86::LD_Fp32m     , X86::LD_F32m   },
428  { X86::LD_Fp32m64   , X86::LD_F32m   },
429  { X86::LD_Fp32m80   , X86::LD_F32m   },
430  { X86::LD_Fp64m     , X86::LD_F64m   },
431  { X86::LD_Fp64m80   , X86::LD_F64m   },
432  { X86::LD_Fp80m     , X86::LD_F80m   },
433  { X86::MUL_Fp32m    , X86::MUL_F32m  },
434  { X86::MUL_Fp64m    , X86::MUL_F64m  },
435  { X86::MUL_Fp64m32  , X86::MUL_F32m  },
436  { X86::MUL_Fp80m32  , X86::MUL_F32m  },
437  { X86::MUL_Fp80m64  , X86::MUL_F64m  },
438  { X86::MUL_FpI16m32 , X86::MUL_FI16m },
439  { X86::MUL_FpI16m64 , X86::MUL_FI16m },
440  { X86::MUL_FpI16m80 , X86::MUL_FI16m },
441  { X86::MUL_FpI32m32 , X86::MUL_FI32m },
442  { X86::MUL_FpI32m64 , X86::MUL_FI32m },
443  { X86::MUL_FpI32m80 , X86::MUL_FI32m },
444  { X86::SIN_Fp32     , X86::SIN_F     },
445  { X86::SIN_Fp64     , X86::SIN_F     },
446  { X86::SIN_Fp80     , X86::SIN_F     },
447  { X86::SQRT_Fp32    , X86::SQRT_F    },
448  { X86::SQRT_Fp64    , X86::SQRT_F    },
449  { X86::SQRT_Fp80    , X86::SQRT_F    },
450  { X86::ST_Fp32m     , X86::ST_F32m   },
451  { X86::ST_Fp64m     , X86::ST_F64m   },
452  { X86::ST_Fp64m32   , X86::ST_F32m   },
453  { X86::ST_Fp80m32   , X86::ST_F32m   },
454  { X86::ST_Fp80m64   , X86::ST_F64m   },
455  { X86::ST_FpP80m    , X86::ST_FP80m  },
456  { X86::SUBR_Fp32m   , X86::SUBR_F32m },
457  { X86::SUBR_Fp64m   , X86::SUBR_F64m },
458  { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
459  { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
460  { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
461  { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
462  { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
463  { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
464  { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
465  { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
466  { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
467  { X86::SUB_Fp32m    , X86::SUB_F32m  },
468  { X86::SUB_Fp64m    , X86::SUB_F64m  },
469  { X86::SUB_Fp64m32  , X86::SUB_F32m  },
470  { X86::SUB_Fp80m32  , X86::SUB_F32m  },
471  { X86::SUB_Fp80m64  , X86::SUB_F64m  },
472  { X86::SUB_FpI16m32 , X86::SUB_FI16m },
473  { X86::SUB_FpI16m64 , X86::SUB_FI16m },
474  { X86::SUB_FpI16m80 , X86::SUB_FI16m },
475  { X86::SUB_FpI32m32 , X86::SUB_FI32m },
476  { X86::SUB_FpI32m64 , X86::SUB_FI32m },
477  { X86::SUB_FpI32m80 , X86::SUB_FI32m },
478  { X86::TST_Fp32     , X86::TST_F     },
479  { X86::TST_Fp64     , X86::TST_F     },
480  { X86::TST_Fp80     , X86::TST_F     },
481  { X86::UCOM_FpIr32  , X86::UCOM_FIr  },
482  { X86::UCOM_FpIr64  , X86::UCOM_FIr  },
483  { X86::UCOM_FpIr80  , X86::UCOM_FIr  },
484  { X86::UCOM_Fpr32   , X86::UCOM_Fr   },
485  { X86::UCOM_Fpr64   , X86::UCOM_Fr   },
486  { X86::UCOM_Fpr80   , X86::UCOM_Fr   },
487};
488
489static unsigned getConcreteOpcode(unsigned Opcode) {
490  ASSERT_SORTED(OpcodeTable);
491  int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
492  assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
493  return Opc;
494}
495
496//===----------------------------------------------------------------------===//
497// Helper Methods
498//===----------------------------------------------------------------------===//
499
500// PopTable - Sorted map of instructions to their popping version.  The first
501// element is an instruction, the second is the version which pops.
502//
503static const TableEntry PopTable[] = {
504  { X86::ADD_FrST0 , X86::ADD_FPrST0  },
505
506  { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
507  { X86::DIV_FrST0 , X86::DIV_FPrST0  },
508
509  { X86::IST_F16m  , X86::IST_FP16m   },
510  { X86::IST_F32m  , X86::IST_FP32m   },
511
512  { X86::MUL_FrST0 , X86::MUL_FPrST0  },
513
514  { X86::ST_F32m   , X86::ST_FP32m    },
515  { X86::ST_F64m   , X86::ST_FP64m    },
516  { X86::ST_Frr    , X86::ST_FPrr     },
517
518  { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
519  { X86::SUB_FrST0 , X86::SUB_FPrST0  },
520
521  { X86::UCOM_FIr  , X86::UCOM_FIPr   },
522
523  { X86::UCOM_FPr  , X86::UCOM_FPPr   },
524  { X86::UCOM_Fr   , X86::UCOM_FPr    },
525};
526
527/// popStackAfter - Pop the current value off of the top of the FP stack after
528/// the specified instruction.  This attempts to be sneaky and combine the pop
529/// into the instruction itself if possible.  The iterator is left pointing to
530/// the last instruction, be it a new pop instruction inserted, or the old
531/// instruction if it was modified in place.
532///
533void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
534  ASSERT_SORTED(PopTable);
535  assert(StackTop > 0 && "Cannot pop empty stack!");
536  RegMap[Stack[--StackTop]] = ~0;     // Update state
537
538  // Check to see if there is a popping version of this instruction...
539  int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
540  if (Opcode != -1) {
541    I->setDesc(TII->get(Opcode));
542    if (Opcode == X86::UCOM_FPPr)
543      I->RemoveOperand(0);
544  } else {    // Insert an explicit pop
545    I = BuildMI(*MBB, ++I, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
546  }
547}
548
549/// freeStackSlotAfter - Free the specified register from the register stack, so
550/// that it is no longer in a register.  If the register is currently at the top
551/// of the stack, we just pop the current instruction, otherwise we store the
552/// current top-of-stack into the specified slot, then pop the top of stack.
553void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
554  if (getStackEntry(0) == FPRegNo) {  // already at the top of stack? easy.
555    popStackAfter(I);
556    return;
557  }
558
559  // Otherwise, store the top of stack into the dead slot, killing the operand
560  // without having to add in an explicit xchg then pop.
561  //
562  unsigned STReg    = getSTReg(FPRegNo);
563  unsigned OldSlot  = getSlot(FPRegNo);
564  unsigned TopReg   = Stack[StackTop-1];
565  Stack[OldSlot]    = TopReg;
566  RegMap[TopReg]    = OldSlot;
567  RegMap[FPRegNo]   = ~0;
568  Stack[--StackTop] = ~0;
569  I = BuildMI(*MBB, ++I, TII->get(X86::ST_FPrr)).addReg(STReg);
570}
571
572
573//===----------------------------------------------------------------------===//
574// Instruction transformation implementation
575//===----------------------------------------------------------------------===//
576
577/// handleZeroArgFP - ST(0) = fld0    ST(0) = flds <mem>
578///
579void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
580  MachineInstr *MI = I;
581  unsigned DestReg = getFPReg(MI->getOperand(0));
582
583  // Change from the pseudo instruction to the concrete instruction.
584  MI->RemoveOperand(0);   // Remove the explicit ST(0) operand
585  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
586
587  // Result gets pushed on the stack.
588  pushReg(DestReg);
589}
590
591/// handleOneArgFP - fst <mem>, ST(0)
592///
593void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
594  MachineInstr *MI = I;
595  unsigned NumOps = MI->getDesc().getNumOperands();
596  assert((NumOps == 5 || NumOps == 1) &&
597         "Can only handle fst* & ftst instructions!");
598
599  // Is this the last use of the source register?
600  unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
601  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
602
603  // FISTP64m is strange because there isn't a non-popping versions.
604  // If we have one _and_ we don't want to pop the operand, duplicate the value
605  // on the stack instead of moving it.  This ensure that popping the value is
606  // always ok.
607  // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
608  //
609  if (!KillsSrc &&
610      (MI->getOpcode() == X86::IST_Fp64m32 ||
611       MI->getOpcode() == X86::ISTT_Fp16m32 ||
612       MI->getOpcode() == X86::ISTT_Fp32m32 ||
613       MI->getOpcode() == X86::ISTT_Fp64m32 ||
614       MI->getOpcode() == X86::IST_Fp64m64 ||
615       MI->getOpcode() == X86::ISTT_Fp16m64 ||
616       MI->getOpcode() == X86::ISTT_Fp32m64 ||
617       MI->getOpcode() == X86::ISTT_Fp64m64 ||
618       MI->getOpcode() == X86::IST_Fp64m80 ||
619       MI->getOpcode() == X86::ISTT_Fp16m80 ||
620       MI->getOpcode() == X86::ISTT_Fp32m80 ||
621       MI->getOpcode() == X86::ISTT_Fp64m80 ||
622       MI->getOpcode() == X86::ST_FpP80m)) {
623    duplicateToTop(Reg, 7 /*temp register*/, I);
624  } else {
625    moveToTop(Reg, I);            // Move to the top of the stack...
626  }
627
628  // Convert from the pseudo instruction to the concrete instruction.
629  MI->RemoveOperand(NumOps-1);    // Remove explicit ST(0) operand
630  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
631
632  if (MI->getOpcode() == X86::IST_FP64m ||
633      MI->getOpcode() == X86::ISTT_FP16m ||
634      MI->getOpcode() == X86::ISTT_FP32m ||
635      MI->getOpcode() == X86::ISTT_FP64m ||
636      MI->getOpcode() == X86::ST_FP80m) {
637    assert(StackTop > 0 && "Stack empty??");
638    --StackTop;
639  } else if (KillsSrc) { // Last use of operand?
640    popStackAfter(I);
641  }
642}
643
644
645/// handleOneArgFPRW: Handle instructions that read from the top of stack and
646/// replace the value with a newly computed value.  These instructions may have
647/// non-fp operands after their FP operands.
648///
649///  Examples:
650///     R1 = fchs R2
651///     R1 = fadd R2, [mem]
652///
653void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
654  MachineInstr *MI = I;
655  unsigned NumOps = MI->getDesc().getNumOperands();
656  assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
657
658  // Is this the last use of the source register?
659  unsigned Reg = getFPReg(MI->getOperand(1));
660  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
661
662  if (KillsSrc) {
663    // If this is the last use of the source register, just make sure it's on
664    // the top of the stack.
665    moveToTop(Reg, I);
666    assert(StackTop > 0 && "Stack cannot be empty!");
667    --StackTop;
668    pushReg(getFPReg(MI->getOperand(0)));
669  } else {
670    // If this is not the last use of the source register, _copy_ it to the top
671    // of the stack.
672    duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
673  }
674
675  // Change from the pseudo instruction to the concrete instruction.
676  MI->RemoveOperand(1);   // Drop the source operand.
677  MI->RemoveOperand(0);   // Drop the destination operand.
678  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
679}
680
681
682//===----------------------------------------------------------------------===//
683// Define tables of various ways to map pseudo instructions
684//
685
686// ForwardST0Table - Map: A = B op C  into: ST(0) = ST(0) op ST(i)
687static const TableEntry ForwardST0Table[] = {
688  { X86::ADD_Fp32  , X86::ADD_FST0r },
689  { X86::ADD_Fp64  , X86::ADD_FST0r },
690  { X86::ADD_Fp80  , X86::ADD_FST0r },
691  { X86::DIV_Fp32  , X86::DIV_FST0r },
692  { X86::DIV_Fp64  , X86::DIV_FST0r },
693  { X86::DIV_Fp80  , X86::DIV_FST0r },
694  { X86::MUL_Fp32  , X86::MUL_FST0r },
695  { X86::MUL_Fp64  , X86::MUL_FST0r },
696  { X86::MUL_Fp80  , X86::MUL_FST0r },
697  { X86::SUB_Fp32  , X86::SUB_FST0r },
698  { X86::SUB_Fp64  , X86::SUB_FST0r },
699  { X86::SUB_Fp80  , X86::SUB_FST0r },
700};
701
702// ReverseST0Table - Map: A = B op C  into: ST(0) = ST(i) op ST(0)
703static const TableEntry ReverseST0Table[] = {
704  { X86::ADD_Fp32  , X86::ADD_FST0r  },   // commutative
705  { X86::ADD_Fp64  , X86::ADD_FST0r  },   // commutative
706  { X86::ADD_Fp80  , X86::ADD_FST0r  },   // commutative
707  { X86::DIV_Fp32  , X86::DIVR_FST0r },
708  { X86::DIV_Fp64  , X86::DIVR_FST0r },
709  { X86::DIV_Fp80  , X86::DIVR_FST0r },
710  { X86::MUL_Fp32  , X86::MUL_FST0r  },   // commutative
711  { X86::MUL_Fp64  , X86::MUL_FST0r  },   // commutative
712  { X86::MUL_Fp80  , X86::MUL_FST0r  },   // commutative
713  { X86::SUB_Fp32  , X86::SUBR_FST0r },
714  { X86::SUB_Fp64  , X86::SUBR_FST0r },
715  { X86::SUB_Fp80  , X86::SUBR_FST0r },
716};
717
718// ForwardSTiTable - Map: A = B op C  into: ST(i) = ST(0) op ST(i)
719static const TableEntry ForwardSTiTable[] = {
720  { X86::ADD_Fp32  , X86::ADD_FrST0  },   // commutative
721  { X86::ADD_Fp64  , X86::ADD_FrST0  },   // commutative
722  { X86::ADD_Fp80  , X86::ADD_FrST0  },   // commutative
723  { X86::DIV_Fp32  , X86::DIVR_FrST0 },
724  { X86::DIV_Fp64  , X86::DIVR_FrST0 },
725  { X86::DIV_Fp80  , X86::DIVR_FrST0 },
726  { X86::MUL_Fp32  , X86::MUL_FrST0  },   // commutative
727  { X86::MUL_Fp64  , X86::MUL_FrST0  },   // commutative
728  { X86::MUL_Fp80  , X86::MUL_FrST0  },   // commutative
729  { X86::SUB_Fp32  , X86::SUBR_FrST0 },
730  { X86::SUB_Fp64  , X86::SUBR_FrST0 },
731  { X86::SUB_Fp80  , X86::SUBR_FrST0 },
732};
733
734// ReverseSTiTable - Map: A = B op C  into: ST(i) = ST(i) op ST(0)
735static const TableEntry ReverseSTiTable[] = {
736  { X86::ADD_Fp32  , X86::ADD_FrST0 },
737  { X86::ADD_Fp64  , X86::ADD_FrST0 },
738  { X86::ADD_Fp80  , X86::ADD_FrST0 },
739  { X86::DIV_Fp32  , X86::DIV_FrST0 },
740  { X86::DIV_Fp64  , X86::DIV_FrST0 },
741  { X86::DIV_Fp80  , X86::DIV_FrST0 },
742  { X86::MUL_Fp32  , X86::MUL_FrST0 },
743  { X86::MUL_Fp64  , X86::MUL_FrST0 },
744  { X86::MUL_Fp80  , X86::MUL_FrST0 },
745  { X86::SUB_Fp32  , X86::SUB_FrST0 },
746  { X86::SUB_Fp64  , X86::SUB_FrST0 },
747  { X86::SUB_Fp80  , X86::SUB_FrST0 },
748};
749
750
751/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
752/// instructions which need to be simplified and possibly transformed.
753///
754/// Result: ST(0) = fsub  ST(0), ST(i)
755///         ST(i) = fsub  ST(0), ST(i)
756///         ST(0) = fsubr ST(0), ST(i)
757///         ST(i) = fsubr ST(0), ST(i)
758///
759void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
760  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
761  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
762  MachineInstr *MI = I;
763
764  unsigned NumOperands = MI->getDesc().getNumOperands();
765  assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
766  unsigned Dest = getFPReg(MI->getOperand(0));
767  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
768  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
769  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
770  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
771
772  unsigned TOS = getStackEntry(0);
773
774  // One of our operands must be on the top of the stack.  If neither is yet, we
775  // need to move one.
776  if (Op0 != TOS && Op1 != TOS) {   // No operand at TOS?
777    // We can choose to move either operand to the top of the stack.  If one of
778    // the operands is killed by this instruction, we want that one so that we
779    // can update right on top of the old version.
780    if (KillsOp0) {
781      moveToTop(Op0, I);         // Move dead operand to TOS.
782      TOS = Op0;
783    } else if (KillsOp1) {
784      moveToTop(Op1, I);
785      TOS = Op1;
786    } else {
787      // All of the operands are live after this instruction executes, so we
788      // cannot update on top of any operand.  Because of this, we must
789      // duplicate one of the stack elements to the top.  It doesn't matter
790      // which one we pick.
791      //
792      duplicateToTop(Op0, Dest, I);
793      Op0 = TOS = Dest;
794      KillsOp0 = true;
795    }
796  } else if (!KillsOp0 && !KillsOp1) {
797    // If we DO have one of our operands at the top of the stack, but we don't
798    // have a dead operand, we must duplicate one of the operands to a new slot
799    // on the stack.
800    duplicateToTop(Op0, Dest, I);
801    Op0 = TOS = Dest;
802    KillsOp0 = true;
803  }
804
805  // Now we know that one of our operands is on the top of the stack, and at
806  // least one of our operands is killed by this instruction.
807  assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
808         "Stack conditions not set up right!");
809
810  // We decide which form to use based on what is on the top of the stack, and
811  // which operand is killed by this instruction.
812  const TableEntry *InstTable;
813  bool isForward = TOS == Op0;
814  bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
815  if (updateST0) {
816    if (isForward)
817      InstTable = ForwardST0Table;
818    else
819      InstTable = ReverseST0Table;
820  } else {
821    if (isForward)
822      InstTable = ForwardSTiTable;
823    else
824      InstTable = ReverseSTiTable;
825  }
826
827  int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
828                      MI->getOpcode());
829  assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
830
831  // NotTOS - The register which is not on the top of stack...
832  unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
833
834  // Replace the old instruction with a new instruction
835  MBB->remove(I++);
836  I = BuildMI(*MBB, I, TII->get(Opcode)).addReg(getSTReg(NotTOS));
837
838  // If both operands are killed, pop one off of the stack in addition to
839  // overwriting the other one.
840  if (KillsOp0 && KillsOp1 && Op0 != Op1) {
841    assert(!updateST0 && "Should have updated other operand!");
842    popStackAfter(I);   // Pop the top of stack
843  }
844
845  // Update stack information so that we know the destination register is now on
846  // the stack.
847  unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
848  assert(UpdatedSlot < StackTop && Dest < 7);
849  Stack[UpdatedSlot]   = Dest;
850  RegMap[Dest]         = UpdatedSlot;
851  delete MI;   // Remove the old instruction
852}
853
854/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
855/// register arguments and no explicit destinations.
856///
857void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
858  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
859  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
860  MachineInstr *MI = I;
861
862  unsigned NumOperands = MI->getDesc().getNumOperands();
863  assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
864  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
865  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
866  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
867  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
868
869  // Make sure the first operand is on the top of stack, the other one can be
870  // anywhere.
871  moveToTop(Op0, I);
872
873  // Change from the pseudo instruction to the concrete instruction.
874  MI->getOperand(0).setReg(getSTReg(Op1));
875  MI->RemoveOperand(1);
876  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
877
878  // If any of the operands are killed by this instruction, free them.
879  if (KillsOp0) freeStackSlotAfter(I, Op0);
880  if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
881}
882
883/// handleCondMovFP - Handle two address conditional move instructions.  These
884/// instructions move a st(i) register to st(0) iff a condition is true.  These
885/// instructions require that the first operand is at the top of the stack, but
886/// otherwise don't modify the stack at all.
887void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
888  MachineInstr *MI = I;
889
890  unsigned Op0 = getFPReg(MI->getOperand(0));
891  unsigned Op1 = getFPReg(MI->getOperand(2));
892  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
893
894  // The first operand *must* be on the top of the stack.
895  moveToTop(Op0, I);
896
897  // Change the second operand to the stack register that the operand is in.
898  // Change from the pseudo instruction to the concrete instruction.
899  MI->RemoveOperand(0);
900  MI->RemoveOperand(1);
901  MI->getOperand(0).setReg(getSTReg(Op1));
902  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
903
904  // If we kill the second operand, make sure to pop it from the stack.
905  if (Op0 != Op1 && KillsOp1) {
906    // Get this value off of the register stack.
907    freeStackSlotAfter(I, Op1);
908  }
909}
910
911
912/// handleSpecialFP - Handle special instructions which behave unlike other
913/// floating point instructions.  This is primarily intended for use by pseudo
914/// instructions.
915///
916void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
917  MachineInstr *MI = I;
918  switch (MI->getOpcode()) {
919  default: assert(0 && "Unknown SpecialFP instruction!");
920  case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
921  case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
922  case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
923    assert(StackTop == 0 && "Stack should be empty after a call!");
924    pushReg(getFPReg(MI->getOperand(0)));
925    break;
926  case X86::FpGET_ST0_ST1:
927    assert(StackTop == 0 && "Stack should be empty after a call!");
928    pushReg(getFPReg(MI->getOperand(0)));
929    pushReg(getFPReg(MI->getOperand(1)));
930    break;
931  case X86::FpSET_ST0_32:
932  case X86::FpSET_ST0_64:
933  case X86::FpSET_ST0_80:
934    assert(StackTop == 1 && "Stack should have one element on it to return!");
935    --StackTop;   // "Forget" we have something on the top of stack!
936    break;
937  case X86::MOV_Fp3232:
938  case X86::MOV_Fp3264:
939  case X86::MOV_Fp6432:
940  case X86::MOV_Fp6464:
941  case X86::MOV_Fp3280:
942  case X86::MOV_Fp6480:
943  case X86::MOV_Fp8032:
944  case X86::MOV_Fp8064:
945  case X86::MOV_Fp8080: {
946    unsigned SrcReg = getFPReg(MI->getOperand(1));
947    unsigned DestReg = getFPReg(MI->getOperand(0));
948
949    if (MI->killsRegister(X86::FP0+SrcReg)) {
950      // If the input operand is killed, we can just change the owner of the
951      // incoming stack slot into the result.
952      unsigned Slot = getSlot(SrcReg);
953      assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!");
954      Stack[Slot] = DestReg;
955      RegMap[DestReg] = Slot;
956
957    } else {
958      // For FMOV we just duplicate the specified value to a new stack slot.
959      // This could be made better, but would require substantial changes.
960      duplicateToTop(SrcReg, DestReg, I);
961    }
962    break;
963  }
964  }
965
966  I = MBB->erase(I);  // Remove the pseudo instruction
967  --I;
968}
969