1dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
2dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
3dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//                     The LLVM Compiler Infrastructure
4dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
5dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// This file is distributed under the University of Illinois Open Source
6dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// License. See LICENSE.TXT for details.
7dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
8dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// i1 values are usually inserted by the CFG Structurize pass and they are
9dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// unique in that they can be copied from VALU to SALU registers.
10dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// This is not possible for any other value type.  Since there are no
11dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
12dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines///
13dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//===----------------------------------------------------------------------===//
14dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
15dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
16dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "si-i1-copies"
17dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "AMDGPU.h"
18dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "SIInstrInfo.h"
19dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/CodeGen/LiveIntervalAnalysis.h"
20dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/CodeGen/MachineDominators.h"
21dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/CodeGen/MachineFunctionPass.h"
22dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/CodeGen/MachineInstrBuilder.h"
23dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/CodeGen/MachineRegisterInfo.h"
24dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/LLVMContext.h"
25dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/Function.h"
26dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/Support/Debug.h"
27dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/Target/TargetMachine.h"
28dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
29dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesusing namespace llvm;
30dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
31dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesnamespace {
32dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
33dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesclass SILowerI1Copies : public MachineFunctionPass {
34dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinespublic:
35dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  static char ID;
36dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
37dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinespublic:
38dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  SILowerI1Copies() : MachineFunctionPass(ID) {
39dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
40dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
41dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
42dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  virtual bool runOnMachineFunction(MachineFunction &MF) override;
43dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
44dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  virtual const char *getPassName() const override {
45dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return "SI Lower il Copies";
46dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
47dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
48dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
49dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  AU.addRequired<MachineDominatorTree>();
50dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    AU.setPreservesCFG();
51dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MachineFunctionPass::getAnalysisUsage(AU);
52dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
53dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines};
54dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
55dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} // End anonymous namespace.
56dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
57dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesINITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE,
58dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                      "SI Lower il Copies", false, false)
59dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
60dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesINITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE,
61dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                    "SI Lower il Copies", false, false)
62dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
63dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hineschar SILowerI1Copies::ID = 0;
64dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
65dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hineschar &llvm::SILowerI1CopiesID = SILowerI1Copies::ID;
66dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
67dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesFunctionPass *llvm::createSILowerI1CopiesPass() {
68dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return new SILowerI1Copies();
69dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
70dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
71dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
72dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  MachineRegisterInfo &MRI = MF.getRegInfo();
73dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
74dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      MF.getTarget().getInstrInfo());
75dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
76dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  std::vector<unsigned> I1Defs;
77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
78dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
79dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                                  BI != BE; ++BI) {
80dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
81dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MachineBasicBlock &MBB = *BI;
82dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MachineBasicBlock::iterator I, Next;
83dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    for (I = MBB.begin(); I != MBB.end(); I = Next) {
84dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Next = std::next(I);
85dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      MachineInstr &MI = *I;
86dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
87dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (MI.getOpcode() == AMDGPU::V_MOV_I1) {
88dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        I1Defs.push_back(MI.getOperand(0).getReg());
89dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
90dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        continue;
91dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      }
92dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
93dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (MI.getOpcode() == AMDGPU::V_AND_I1) {
94dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        I1Defs.push_back(MI.getOperand(0).getReg());
95dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        MI.setDesc(TII->get(AMDGPU::V_AND_B32_e32));
96dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        continue;
97dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      }
98dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
99dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (MI.getOpcode() == AMDGPU::V_OR_I1) {
100dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        I1Defs.push_back(MI.getOperand(0).getReg());
101dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        MI.setDesc(TII->get(AMDGPU::V_OR_B32_e32));
102dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        continue;
103dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      }
104dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
105dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (MI.getOpcode() != AMDGPU::COPY ||
106dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) ||
107dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg()))
108dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        continue;
109dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
110dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
111dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      const TargetRegisterClass *DstRC =
112dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          MRI.getRegClass(MI.getOperand(0).getReg());
113dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      const TargetRegisterClass *SrcRC =
114dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          MRI.getRegClass(MI.getOperand(1).getReg());
115dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
116dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (DstRC == &AMDGPU::VReg_1RegClass &&
117dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        I1Defs.push_back(MI.getOperand(0).getReg());
119dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64))
120dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addOperand(MI.getOperand(0))
121dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addImm(0)
122dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addImm(-1)
123dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addOperand(MI.getOperand(1))
124dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addImm(0)
125dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addImm(0)
126dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addImm(0)
127dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addImm(0);
128dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        MI.eraseFromParent();
129dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
130dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                 SrcRC == &AMDGPU::VReg_1RegClass) {
131dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
132dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addOperand(MI.getOperand(0))
133dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addImm(0)
134dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addOperand(MI.getOperand(1))
135dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addImm(0)
136dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addImm(0)
137dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addImm(0)
138dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                .addImm(0);
139dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        MI.eraseFromParent();
140dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      }
141dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    }
142dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
143dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
144dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  for (unsigned Reg : I1Defs)
145dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MRI.setRegClass(Reg, &AMDGPU::VReg_32RegClass);
146dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
147dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return false;
148dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
149