1//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a machine function pass that sets appropriate FP rounding
11// modes for all relevant instructions.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "ptx-fp-rounding-mode"
16
17#include "PTX.h"
18#include "PTXTargetMachine.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/CodeGen/MachineFunctionPass.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/raw_ostream.h"
25
26// NOTE: PTXFPRoundingModePass should be executed just before emission.
27
28namespace llvm {
29  /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
30  /// all FP instructions. Essentially, this pass just looks for all FP
31  /// instructions that have a rounding mode set to RndDefault, and sets an
32  /// appropriate rounding mode based on the target device.
33  ///
34  class PTXFPRoundingModePass : public MachineFunctionPass {
35    private:
36      static char ID;
37
38      typedef std::pair<unsigned, unsigned> RndModeDesc;
39
40      PTXTargetMachine& TargetMachine;
41      DenseMap<unsigned, RndModeDesc> Instrs;
42
43    public:
44      PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
45        : MachineFunctionPass(ID),
46          TargetMachine(TM) {
47        initializeMap();
48      }
49
50      virtual bool runOnMachineFunction(MachineFunction &MF);
51
52      virtual const char *getPassName() const {
53        return "PTX FP Rounding Mode Pass";
54      }
55
56    private:
57
58      void initializeMap();
59      void processInstruction(MachineInstr &MI);
60  }; // class PTXFPRoundingModePass
61} // namespace llvm
62
63using namespace llvm;
64
65char PTXFPRoundingModePass::ID = 0;
66
67bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
68  // Look at each basic block
69  for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
70       ++bbi) {
71    MachineBasicBlock &MBB = *bbi;
72    // Look at each instruction
73    for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end();
74         ii != ie; ++ii) {
75      MachineInstr &MI = *ii;
76      processInstruction(MI);
77    }
78  }
79  return false;
80}
81
82void PTXFPRoundingModePass::initializeMap() {
83  using namespace PTXRoundingMode;
84  const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
85
86  // Build a map of default rounding mode for all instructions that need a
87  // rounding mode.
88  Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
89  Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
90  Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
91  Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
92  Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
93  Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
94  Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
95  Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
96  Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
97  Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
98  Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
99  Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
100
101  Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
102  Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
103  Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
104  Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
105
106  unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
107  Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
108  Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
109  Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
110  Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
111
112  unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
113  Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
114  Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
115  Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
116  Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
117  Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
118  Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
119
120  Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
121  Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
122  Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
123  Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
124
125  Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
126  Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
127  Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
128  Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
129  Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
130  Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
131  Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
132  Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
133
134  Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
135  Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
136  Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
137  Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
138  Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
139  Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
140  Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
141  Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
142  Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
143  Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
144  Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
145  Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
146
147  Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
148  Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
149  Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
150  Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
151  Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
152  Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
153  Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
154  Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
155  Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
156  Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
157  Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
158  Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
159  Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
160}
161
162void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
163  // Is this an instruction that needs a rounding mode?
164  if (Instrs.count(MI.getOpcode())) {
165    const RndModeDesc &Desc = Instrs[MI.getOpcode()];
166    // Get the rounding mode operand
167    MachineOperand &Op = MI.getOperand(Desc.first);
168    // Update the rounding mode if needed
169    if (Op.getImm() == PTXRoundingMode::RndDefault) {
170      Op.setImm(Desc.second);
171    }
172  }
173}
174
175FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM,
176                                                CodeGenOpt::Level OptLevel) {
177  return new PTXFPRoundingModePass(TM, OptLevel);
178}
179
180