SIInstrInfo.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- SIInstrInfo.cpp - SI Instruction Information  ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Implementation of TargetInstrInfo.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "SIInstrInfo.h"
17#include "AMDGPUTargetMachine.h"
18#include "SIDefines.h"
19#include "SIMachineFunctionInfo.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/MC/MCInstrDesc.h"
23
24using namespace llvm;
25
26SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
27  : AMDGPUInstrInfo(tm),
28    RI(tm) { }
29
30//===----------------------------------------------------------------------===//
31// TargetInstrInfo callbacks
32//===----------------------------------------------------------------------===//
33
34void
35SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
36                         MachineBasicBlock::iterator MI, DebugLoc DL,
37                         unsigned DestReg, unsigned SrcReg,
38                         bool KillSrc) const {
39
40  // If we are trying to copy to or from SCC, there is a bug somewhere else in
41  // the backend.  While it may be theoretically possible to do this, it should
42  // never be necessary.
43  assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
44
45  static const int16_t Sub0_15[] = {
46    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
47    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
48    AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
49    AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
50  };
51
52  static const int16_t Sub0_7[] = {
53    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
54    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
55  };
56
57  static const int16_t Sub0_3[] = {
58    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
59  };
60
61  static const int16_t Sub0_2[] = {
62    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
63  };
64
65  static const int16_t Sub0_1[] = {
66    AMDGPU::sub0, AMDGPU::sub1, 0
67  };
68
69  unsigned Opcode;
70  const int16_t *SubIndices;
71
72  if (AMDGPU::M0 == DestReg) {
73    // Check if M0 isn't already set to this value
74    for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
75      I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
76
77      if (!I->definesRegister(AMDGPU::M0))
78        continue;
79
80      unsigned Opc = I->getOpcode();
81      if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
82        break;
83
84      if (!I->readsRegister(SrcReg))
85        break;
86
87      // The copy isn't necessary
88      return;
89    }
90  }
91
92  if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
93    assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
94    BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
95            .addReg(SrcReg, getKillRegState(KillSrc));
96    return;
97
98  } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
99    assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
100    BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
101            .addReg(SrcReg, getKillRegState(KillSrc));
102    return;
103
104  } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
105    assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
106    Opcode = AMDGPU::S_MOV_B32;
107    SubIndices = Sub0_3;
108
109  } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
110    assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
111    Opcode = AMDGPU::S_MOV_B32;
112    SubIndices = Sub0_7;
113
114  } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
115    assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
116    Opcode = AMDGPU::S_MOV_B32;
117    SubIndices = Sub0_15;
118
119  } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
120    assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
121           AMDGPU::SReg_32RegClass.contains(SrcReg));
122    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
123            .addReg(SrcReg, getKillRegState(KillSrc));
124    return;
125
126  } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
127    assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
128           AMDGPU::SReg_64RegClass.contains(SrcReg));
129    Opcode = AMDGPU::V_MOV_B32_e32;
130    SubIndices = Sub0_1;
131
132  } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
133    assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
134    Opcode = AMDGPU::V_MOV_B32_e32;
135    SubIndices = Sub0_2;
136
137  } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
138    assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
139           AMDGPU::SReg_128RegClass.contains(SrcReg));
140    Opcode = AMDGPU::V_MOV_B32_e32;
141    SubIndices = Sub0_3;
142
143  } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
144    assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
145           AMDGPU::SReg_256RegClass.contains(SrcReg));
146    Opcode = AMDGPU::V_MOV_B32_e32;
147    SubIndices = Sub0_7;
148
149  } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
150    assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
151           AMDGPU::SReg_512RegClass.contains(SrcReg));
152    Opcode = AMDGPU::V_MOV_B32_e32;
153    SubIndices = Sub0_15;
154
155  } else {
156    llvm_unreachable("Can't copy register!");
157  }
158
159  while (unsigned SubIdx = *SubIndices++) {
160    MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
161      get(Opcode), RI.getSubReg(DestReg, SubIdx));
162
163    Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
164
165    if (*SubIndices)
166      Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
167  }
168}
169
170unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
171  int NewOpc;
172
173  // Try to map original to commuted opcode
174  if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
175    return NewOpc;
176
177  // Try to map commuted to original opcode
178  if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
179    return NewOpc;
180
181  return Opcode;
182}
183
184void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
185                                      MachineBasicBlock::iterator MI,
186                                      unsigned SrcReg, bool isKill,
187                                      int FrameIndex,
188                                      const TargetRegisterClass *RC,
189                                      const TargetRegisterInfo *TRI) const {
190  SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
191  DebugLoc DL = MBB.findDebugLoc(MI);
192  unsigned KillFlag = isKill ? RegState::Kill : 0;
193  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
194
195  if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
196    unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent());
197
198    BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR)
199            .addReg(SrcReg, KillFlag)
200            .addImm(Lane);
201    MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane);
202  } else if (RI.isSGPRClass(RC)) {
203    // We are only allowed to create one new instruction when spilling
204    // registers, so we need to use pseudo instruction for vector
205    // registers.
206    //
207    // Reserve a spot in the spill tracker for each sub-register of
208    // the vector register.
209    unsigned NumSubRegs = RC->getSize() / 4;
210    unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(),
211                                                        NumSubRegs);
212    MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
213                                    FirstLane);
214
215    unsigned Opcode;
216    switch (RC->getSize() * 8) {
217    case 64:  Opcode = AMDGPU::SI_SPILL_S64_SAVE;  break;
218    case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
219    case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
220    case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
221    default: llvm_unreachable("Cannot spill register class");
222    }
223
224    BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR)
225            .addReg(SrcReg)
226            .addImm(FrameIndex);
227  } else {
228    llvm_unreachable("VGPR spilling not supported");
229  }
230}
231
232void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
233                                       MachineBasicBlock::iterator MI,
234                                       unsigned DestReg, int FrameIndex,
235                                       const TargetRegisterClass *RC,
236                                       const TargetRegisterInfo *TRI) const {
237  SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
238  DebugLoc DL = MBB.findDebugLoc(MI);
239  if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
240    SIMachineFunctionInfo::SpilledReg Spill =
241        MFI->SpillTracker.getSpilledReg(FrameIndex);
242    assert(Spill.VGPR);
243    BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
244            .addReg(Spill.VGPR)
245            .addImm(Spill.Lane);
246    insertNOPs(MI, 3);
247  } else if (RI.isSGPRClass(RC)){
248    unsigned Opcode;
249    switch(RC->getSize() * 8) {
250    case 64:  Opcode = AMDGPU::SI_SPILL_S64_RESTORE;  break;
251    case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
252    case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
253    case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
254    default: llvm_unreachable("Cannot spill register class");
255    }
256
257    SIMachineFunctionInfo::SpilledReg Spill =
258        MFI->SpillTracker.getSpilledReg(FrameIndex);
259
260    BuildMI(MBB, MI, DL, get(Opcode), DestReg)
261            .addReg(Spill.VGPR)
262            .addImm(FrameIndex);
263    insertNOPs(MI, 3);
264  } else {
265    llvm_unreachable("VGPR spilling not supported");
266  }
267}
268
269static unsigned getNumSubRegsForSpillOp(unsigned Op) {
270
271  switch (Op) {
272  case AMDGPU::SI_SPILL_S512_SAVE:
273  case AMDGPU::SI_SPILL_S512_RESTORE:
274    return 16;
275  case AMDGPU::SI_SPILL_S256_SAVE:
276  case AMDGPU::SI_SPILL_S256_RESTORE:
277    return 8;
278  case AMDGPU::SI_SPILL_S128_SAVE:
279  case AMDGPU::SI_SPILL_S128_RESTORE:
280    return 4;
281  case AMDGPU::SI_SPILL_S64_SAVE:
282  case AMDGPU::SI_SPILL_S64_RESTORE:
283    return 2;
284  default: llvm_unreachable("Invalid spill opcode");
285  }
286}
287
288void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
289                             int Count) const {
290  while (Count > 0) {
291    int Arg;
292    if (Count >= 8)
293      Arg = 7;
294    else
295      Arg = Count - 1;
296    Count -= 8;
297    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
298            .addImm(Arg);
299  }
300}
301
302bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
303  SIMachineFunctionInfo *MFI =
304      MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
305  MachineBasicBlock &MBB = *MI->getParent();
306  DebugLoc DL = MBB.findDebugLoc(MI);
307  switch (MI->getOpcode()) {
308  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
309
310  // SGPR register spill
311  case AMDGPU::SI_SPILL_S512_SAVE:
312  case AMDGPU::SI_SPILL_S256_SAVE:
313  case AMDGPU::SI_SPILL_S128_SAVE:
314  case AMDGPU::SI_SPILL_S64_SAVE: {
315    unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
316    unsigned FrameIndex = MI->getOperand(2).getImm();
317
318    for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
319      SIMachineFunctionInfo::SpilledReg Spill;
320      unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(),
321                                            &AMDGPU::SGPR_32RegClass, i);
322      Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
323
324      BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
325              MI->getOperand(0).getReg())
326              .addReg(SubReg)
327              .addImm(Spill.Lane + i);
328    }
329    MI->eraseFromParent();
330    break;
331  }
332
333  // SGPR register restore
334  case AMDGPU::SI_SPILL_S512_RESTORE:
335  case AMDGPU::SI_SPILL_S256_RESTORE:
336  case AMDGPU::SI_SPILL_S128_RESTORE:
337  case AMDGPU::SI_SPILL_S64_RESTORE: {
338    unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
339
340    for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
341      SIMachineFunctionInfo::SpilledReg Spill;
342      unsigned FrameIndex = MI->getOperand(2).getImm();
343      unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(),
344                                   &AMDGPU::SGPR_32RegClass, i);
345      Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
346
347      BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg)
348              .addReg(MI->getOperand(1).getReg())
349              .addImm(Spill.Lane + i);
350    }
351    MI->eraseFromParent();
352    break;
353  }
354  }
355  return true;
356}
357
358MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
359                                              bool NewMI) const {
360
361  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
362  if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
363    return nullptr;
364
365  // Cannot commute VOP2 if src0 is SGPR.
366  if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
367      RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
368   return nullptr;
369
370  if (!MI->getOperand(2).isReg()) {
371    // XXX: Commute instructions with FPImm operands
372    if (NewMI || MI->getOperand(2).isFPImm() ||
373       (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
374      return nullptr;
375    }
376
377    // XXX: Commute VOP3 instructions with abs and neg set.
378    if (isVOP3(MI->getOpcode()) &&
379        (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
380                        AMDGPU::OpName::abs)).getImm() ||
381         MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
382                        AMDGPU::OpName::neg)).getImm()))
383      return nullptr;
384
385    unsigned Reg = MI->getOperand(1).getReg();
386    unsigned SubReg = MI->getOperand(1).getSubReg();
387    MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
388    MI->getOperand(2).ChangeToRegister(Reg, false);
389    MI->getOperand(2).setSubReg(SubReg);
390  } else {
391    MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
392  }
393
394  if (MI)
395    MI->setDesc(get(commuteOpcode(MI->getOpcode())));
396
397  return MI;
398}
399
400MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
401                                         MachineBasicBlock::iterator I,
402                                         unsigned DstReg,
403                                         unsigned SrcReg) const {
404  return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
405                 DstReg) .addReg(SrcReg);
406}
407
408bool SIInstrInfo::isMov(unsigned Opcode) const {
409  switch(Opcode) {
410  default: return false;
411  case AMDGPU::S_MOV_B32:
412  case AMDGPU::S_MOV_B64:
413  case AMDGPU::V_MOV_B32_e32:
414  case AMDGPU::V_MOV_B32_e64:
415    return true;
416  }
417}
418
419bool
420SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
421  return RC != &AMDGPU::EXECRegRegClass;
422}
423
424bool
425SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
426                                         AliasAnalysis *AA) const {
427  switch(MI->getOpcode()) {
428  default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
429  case AMDGPU::S_MOV_B32:
430  case AMDGPU::S_MOV_B64:
431  case AMDGPU::V_MOV_B32_e32:
432    return MI->getOperand(1).isImm();
433  }
434}
435
436namespace llvm {
437namespace AMDGPU {
438// Helper function generated by tablegen.  We are wrapping this with
439// an SIInstrInfo function that reutrns bool rather than int.
440int isDS(uint16_t Opcode);
441}
442}
443
444bool SIInstrInfo::isDS(uint16_t Opcode) const {
445  return ::AMDGPU::isDS(Opcode) != -1;
446}
447
448int SIInstrInfo::isMIMG(uint16_t Opcode) const {
449  return get(Opcode).TSFlags & SIInstrFlags::MIMG;
450}
451
452int SIInstrInfo::isSMRD(uint16_t Opcode) const {
453  return get(Opcode).TSFlags & SIInstrFlags::SMRD;
454}
455
456bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
457  return get(Opcode).TSFlags & SIInstrFlags::VOP1;
458}
459
460bool SIInstrInfo::isVOP2(uint16_t Opcode) const {
461  return get(Opcode).TSFlags & SIInstrFlags::VOP2;
462}
463
464bool SIInstrInfo::isVOP3(uint16_t Opcode) const {
465  return get(Opcode).TSFlags & SIInstrFlags::VOP3;
466}
467
468bool SIInstrInfo::isVOPC(uint16_t Opcode) const {
469  return get(Opcode).TSFlags & SIInstrFlags::VOPC;
470}
471
472bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
473  return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
474}
475
476bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
477  int32_t Val = Imm.getSExtValue();
478  if (Val >= -16 && Val <= 64)
479    return true;
480
481  // The actual type of the operand does not seem to matter as long
482  // as the bits match one of the inline immediate values.  For example:
483  //
484  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
485  // so it is a legal inline immediate.
486  //
487  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
488  // floating-point, so it is a legal inline immediate.
489
490  return (APInt::floatToBits(0.0f) == Imm) ||
491         (APInt::floatToBits(1.0f) == Imm) ||
492         (APInt::floatToBits(-1.0f) == Imm) ||
493         (APInt::floatToBits(0.5f) == Imm) ||
494         (APInt::floatToBits(-0.5f) == Imm) ||
495         (APInt::floatToBits(2.0f) == Imm) ||
496         (APInt::floatToBits(-2.0f) == Imm) ||
497         (APInt::floatToBits(4.0f) == Imm) ||
498         (APInt::floatToBits(-4.0f) == Imm);
499}
500
501bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
502  if (MO.isImm())
503    return isInlineConstant(APInt(32, MO.getImm(), true));
504
505  if (MO.isFPImm()) {
506    APFloat FpImm = MO.getFPImm()->getValueAPF();
507    return isInlineConstant(FpImm.bitcastToAPInt());
508  }
509
510  return false;
511}
512
513bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
514  return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
515}
516
517bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
518                                    StringRef &ErrInfo) const {
519  uint16_t Opcode = MI->getOpcode();
520  int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
521  int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
522  int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
523
524  // Make sure the number of operands is correct.
525  const MCInstrDesc &Desc = get(Opcode);
526  if (!Desc.isVariadic() &&
527      Desc.getNumOperands() != MI->getNumExplicitOperands()) {
528     ErrInfo = "Instruction has wrong number of operands.";
529     return false;
530  }
531
532  // Make sure the register classes are correct
533  for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
534    switch (Desc.OpInfo[i].OperandType) {
535    case MCOI::OPERAND_REGISTER:
536      break;
537    case MCOI::OPERAND_IMMEDIATE:
538      if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) {
539        ErrInfo = "Expected immediate, but got non-immediate";
540        return false;
541      }
542      // Fall-through
543    default:
544      continue;
545    }
546
547    if (!MI->getOperand(i).isReg())
548      continue;
549
550    int RegClass = Desc.OpInfo[i].RegClass;
551    if (RegClass != -1) {
552      unsigned Reg = MI->getOperand(i).getReg();
553      if (TargetRegisterInfo::isVirtualRegister(Reg))
554        continue;
555
556      const TargetRegisterClass *RC = RI.getRegClass(RegClass);
557      if (!RC->contains(Reg)) {
558        ErrInfo = "Operand has incorrect register class.";
559        return false;
560      }
561    }
562  }
563
564
565  // Verify VOP*
566  if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
567    unsigned ConstantBusCount = 0;
568    unsigned SGPRUsed = AMDGPU::NoRegister;
569    for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
570      const MachineOperand &MO = MI->getOperand(i);
571      if (MO.isReg() && MO.isUse() &&
572          !TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
573
574        // EXEC register uses the constant bus.
575        if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
576          ++ConstantBusCount;
577
578        // SGPRs use the constant bus
579        if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
580            (!MO.isImplicit() &&
581            (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
582            AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
583          if (SGPRUsed != MO.getReg()) {
584            ++ConstantBusCount;
585            SGPRUsed = MO.getReg();
586          }
587        }
588      }
589      // Literal constants use the constant bus.
590      if (isLiteralConstant(MO))
591        ++ConstantBusCount;
592    }
593    if (ConstantBusCount > 1) {
594      ErrInfo = "VOP* instruction uses the constant bus more than once";
595      return false;
596    }
597  }
598
599  // Verify SRC1 for VOP2 and VOPC
600  if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
601    const MachineOperand &Src1 = MI->getOperand(Src1Idx);
602    if (Src1.isImm() || Src1.isFPImm()) {
603      ErrInfo = "VOP[2C] src1 cannot be an immediate.";
604      return false;
605    }
606  }
607
608  // Verify VOP3
609  if (isVOP3(Opcode)) {
610    if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
611      ErrInfo = "VOP3 src0 cannot be a literal constant.";
612      return false;
613    }
614    if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
615      ErrInfo = "VOP3 src1 cannot be a literal constant.";
616      return false;
617    }
618    if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
619      ErrInfo = "VOP3 src2 cannot be a literal constant.";
620      return false;
621    }
622  }
623  return true;
624}
625
626unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
627  switch (MI.getOpcode()) {
628  default: return AMDGPU::INSTRUCTION_LIST_END;
629  case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
630  case AMDGPU::COPY: return AMDGPU::COPY;
631  case AMDGPU::PHI: return AMDGPU::PHI;
632  case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
633  case AMDGPU::S_MOV_B32:
634    return MI.getOperand(1).isReg() ?
635           AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
636  case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
637  case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
638  case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
639  case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
640  case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
641  case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
642  case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
643  case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
644  case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
645  case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
646  case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
647  case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
648  case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
649  case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
650  case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
651  case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
652  case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
653  case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
654  case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
655  case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
656  case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
657  case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
658  case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
659  case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
660  case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
661  case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
662  case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
663  case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
664  case AMDGPU::S_LOAD_DWORD_IMM:
665  case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
666  case AMDGPU::S_LOAD_DWORDX2_IMM:
667  case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
668  case AMDGPU::S_LOAD_DWORDX4_IMM:
669  case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
670  }
671}
672
673bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
674  return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
675}
676
677const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
678                                                      unsigned OpNo) const {
679  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
680  const MCInstrDesc &Desc = get(MI.getOpcode());
681  if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
682      Desc.OpInfo[OpNo].RegClass == -1)
683    return MRI.getRegClass(MI.getOperand(OpNo).getReg());
684
685  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
686  return RI.getRegClass(RCID);
687}
688
689bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
690  switch (MI.getOpcode()) {
691  case AMDGPU::COPY:
692  case AMDGPU::REG_SEQUENCE:
693  case AMDGPU::PHI:
694  case AMDGPU::INSERT_SUBREG:
695    return RI.hasVGPRs(getOpRegClass(MI, 0));
696  default:
697    return RI.hasVGPRs(getOpRegClass(MI, OpNo));
698  }
699}
700
701void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
702  MachineBasicBlock::iterator I = MI;
703  MachineOperand &MO = MI->getOperand(OpIdx);
704  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
705  unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
706  const TargetRegisterClass *RC = RI.getRegClass(RCID);
707  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
708  if (MO.isReg()) {
709    Opcode = AMDGPU::COPY;
710  } else if (RI.isSGPRClass(RC)) {
711    Opcode = AMDGPU::S_MOV_B32;
712  }
713
714  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
715  unsigned Reg = MRI.createVirtualRegister(VRC);
716  BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
717          Reg).addOperand(MO);
718  MO.ChangeToRegister(Reg, false);
719}
720
721unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
722                                         MachineRegisterInfo &MRI,
723                                         MachineOperand &SuperReg,
724                                         const TargetRegisterClass *SuperRC,
725                                         unsigned SubIdx,
726                                         const TargetRegisterClass *SubRC)
727                                         const {
728  assert(SuperReg.isReg());
729
730  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
731  unsigned SubReg = MRI.createVirtualRegister(SubRC);
732
733  // Just in case the super register is itself a sub-register, copy it to a new
734  // value so we don't need to wory about merging its subreg index with the
735  // SubIdx passed to this function.  The register coalescer should be able to
736  // eliminate this extra copy.
737  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
738          NewSuperReg)
739          .addOperand(SuperReg);
740
741  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
742          SubReg)
743          .addReg(NewSuperReg, 0, SubIdx);
744  return SubReg;
745}
746
747MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
748  MachineBasicBlock::iterator MII,
749  MachineRegisterInfo &MRI,
750  MachineOperand &Op,
751  const TargetRegisterClass *SuperRC,
752  unsigned SubIdx,
753  const TargetRegisterClass *SubRC) const {
754  if (Op.isImm()) {
755    // XXX - Is there a better way to do this?
756    if (SubIdx == AMDGPU::sub0)
757      return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
758    if (SubIdx == AMDGPU::sub1)
759      return MachineOperand::CreateImm(Op.getImm() >> 32);
760
761    llvm_unreachable("Unhandled register index for immediate");
762  }
763
764  unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
765                                       SubIdx, SubRC);
766  return MachineOperand::CreateReg(SubReg, false);
767}
768
769unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
770                                    MachineBasicBlock::iterator MI,
771                                    MachineRegisterInfo &MRI,
772                                    const TargetRegisterClass *RC,
773                                    const MachineOperand &Op) const {
774  MachineBasicBlock *MBB = MI->getParent();
775  DebugLoc DL = MI->getDebugLoc();
776  unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
777  unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
778  unsigned Dst = MRI.createVirtualRegister(RC);
779
780  MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
781                             LoDst)
782    .addImm(Op.getImm() & 0xFFFFFFFF);
783  MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
784                             HiDst)
785    .addImm(Op.getImm() >> 32);
786
787  BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
788    .addReg(LoDst)
789    .addImm(AMDGPU::sub0)
790    .addReg(HiDst)
791    .addImm(AMDGPU::sub1);
792
793  Worklist.push_back(Lo);
794  Worklist.push_back(Hi);
795
796  return Dst;
797}
798
799void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
800  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
801  int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
802                                           AMDGPU::OpName::src0);
803  int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
804                                           AMDGPU::OpName::src1);
805  int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
806                                           AMDGPU::OpName::src2);
807
808  // Legalize VOP2
809  if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
810    MachineOperand &Src0 = MI->getOperand(Src0Idx);
811    MachineOperand &Src1 = MI->getOperand(Src1Idx);
812
813    // If the instruction implicitly reads VCC, we can't have any SGPR operands,
814    // so move any.
815    bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI);
816    if (ReadsVCC && Src0.isReg() &&
817        RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) {
818      legalizeOpWithMove(MI, Src0Idx);
819      return;
820    }
821
822    if (ReadsVCC && Src1.isReg() &&
823        RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
824      legalizeOpWithMove(MI, Src1Idx);
825      return;
826    }
827
828    // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must
829    // be the first operand, and there can only be one.
830    if (Src1.isImm() || Src1.isFPImm() ||
831        (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
832      if (MI->isCommutable()) {
833        if (commuteInstruction(MI))
834          return;
835      }
836      legalizeOpWithMove(MI, Src1Idx);
837    }
838  }
839
840  // XXX - Do any VOP3 instructions read VCC?
841  // Legalize VOP3
842  if (isVOP3(MI->getOpcode())) {
843    int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
844    unsigned SGPRReg = AMDGPU::NoRegister;
845    for (unsigned i = 0; i < 3; ++i) {
846      int Idx = VOP3Idx[i];
847      if (Idx == -1)
848        continue;
849      MachineOperand &MO = MI->getOperand(Idx);
850
851      if (MO.isReg()) {
852        if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
853          continue; // VGPRs are legal
854
855        assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
856
857        if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
858          SGPRReg = MO.getReg();
859          // We can use one SGPR in each VOP3 instruction.
860          continue;
861        }
862      } else if (!isLiteralConstant(MO)) {
863        // If it is not a register and not a literal constant, then it must be
864        // an inline constant which is always legal.
865        continue;
866      }
867      // If we make it this far, then the operand is not legal and we must
868      // legalize it.
869      legalizeOpWithMove(MI, Idx);
870    }
871  }
872
873  // Legalize REG_SEQUENCE and PHI
874  // The register class of the operands much be the same type as the register
875  // class of the output.
876  if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
877      MI->getOpcode() == AMDGPU::PHI) {
878    const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
879    for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
880      if (!MI->getOperand(i).isReg() ||
881          !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
882        continue;
883      const TargetRegisterClass *OpRC =
884              MRI.getRegClass(MI->getOperand(i).getReg());
885      if (RI.hasVGPRs(OpRC)) {
886        VRC = OpRC;
887      } else {
888        SRC = OpRC;
889      }
890    }
891
892    // If any of the operands are VGPR registers, then they all most be
893    // otherwise we will create illegal VGPR->SGPR copies when legalizing
894    // them.
895    if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
896      if (!VRC) {
897        assert(SRC);
898        VRC = RI.getEquivalentVGPRClass(SRC);
899      }
900      RC = VRC;
901    } else {
902      RC = SRC;
903    }
904
905    // Update all the operands so they have the same type.
906    for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
907      if (!MI->getOperand(i).isReg() ||
908          !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
909        continue;
910      unsigned DstReg = MRI.createVirtualRegister(RC);
911      MachineBasicBlock *InsertBB;
912      MachineBasicBlock::iterator Insert;
913      if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
914        InsertBB = MI->getParent();
915        Insert = MI;
916      } else {
917        // MI is a PHI instruction.
918        InsertBB = MI->getOperand(i + 1).getMBB();
919        Insert = InsertBB->getFirstTerminator();
920      }
921      BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
922              get(AMDGPU::COPY), DstReg)
923              .addOperand(MI->getOperand(i));
924      MI->getOperand(i).setReg(DstReg);
925    }
926  }
927
928  // Legalize INSERT_SUBREG
929  // src0 must have the same register class as dst
930  if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
931    unsigned Dst = MI->getOperand(0).getReg();
932    unsigned Src0 = MI->getOperand(1).getReg();
933    const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
934    const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
935    if (DstRC != Src0RC) {
936      MachineBasicBlock &MBB = *MI->getParent();
937      unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
938      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
939              .addReg(Src0);
940      MI->getOperand(1).setReg(NewSrc0);
941    }
942    return;
943  }
944
945  // Legalize MUBUF* instructions
946  // FIXME: If we start using the non-addr64 instructions for compute, we
947  // may need to legalize them here.
948
949  int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
950                                            AMDGPU::OpName::srsrc);
951  int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
952                                             AMDGPU::OpName::vaddr);
953  if (SRsrcIdx != -1 && VAddrIdx != -1) {
954    const TargetRegisterClass *VAddrRC =
955        RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
956
957    if(VAddrRC->getSize() == 8 &&
958       MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
959      // We have a MUBUF instruction that uses a 64-bit vaddr register and
960      // srsrc has the incorrect register class.  In order to fix this, we
961      // need to extract the pointer from the resource descriptor (srsrc),
962      // add it to the value of vadd,  then store the result in the vaddr
963      // operand.  Then, we need to set the pointer field of the resource
964      // descriptor to zero.
965
966      MachineBasicBlock &MBB = *MI->getParent();
967      MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
968      MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
969      unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
970      unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
971      unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
972      unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
973      unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
974      unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
975      unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
976      unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
977
978      // SRsrcPtrLo = srsrc:sub0
979      SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
980          &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
981
982      // SRsrcPtrHi = srsrc:sub1
983      SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
984          &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
985
986      // VAddrLo = vaddr:sub0
987      VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
988          &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
989
990      // VAddrHi = vaddr:sub1
991      VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
992          &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
993
994      // NewVaddrLo = SRsrcPtrLo + VAddrLo
995      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
996              NewVAddrLo)
997              .addReg(SRsrcPtrLo)
998              .addReg(VAddrLo)
999              .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
1000
1001      // NewVaddrHi = SRsrcPtrHi + VAddrHi
1002      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
1003              NewVAddrHi)
1004              .addReg(SRsrcPtrHi)
1005              .addReg(VAddrHi)
1006              .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
1007              .addReg(AMDGPU::VCC, RegState::Implicit);
1008
1009      // NewVaddr = {NewVaddrHi, NewVaddrLo}
1010      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
1011              NewVAddr)
1012              .addReg(NewVAddrLo)
1013              .addImm(AMDGPU::sub0)
1014              .addReg(NewVAddrHi)
1015              .addImm(AMDGPU::sub1);
1016
1017      // Zero64 = 0
1018      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
1019              Zero64)
1020              .addImm(0);
1021
1022      // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
1023      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1024              SRsrcFormatLo)
1025              .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
1026
1027      // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
1028      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1029              SRsrcFormatHi)
1030              .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
1031
1032      // NewSRsrc = {Zero64, SRsrcFormat}
1033      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
1034              NewSRsrc)
1035              .addReg(Zero64)
1036              .addImm(AMDGPU::sub0_sub1)
1037              .addReg(SRsrcFormatLo)
1038              .addImm(AMDGPU::sub2)
1039              .addReg(SRsrcFormatHi)
1040              .addImm(AMDGPU::sub3);
1041
1042      // Update the instruction to use NewVaddr
1043      MI->getOperand(VAddrIdx).setReg(NewVAddr);
1044      // Update the instruction to use NewSRsrc
1045      MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
1046    }
1047  }
1048}
1049
1050void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
1051  MachineBasicBlock *MBB = MI->getParent();
1052  switch (MI->getOpcode()) {
1053    case AMDGPU::S_LOAD_DWORD_IMM:
1054    case AMDGPU::S_LOAD_DWORD_SGPR:
1055    case AMDGPU::S_LOAD_DWORDX2_IMM:
1056    case AMDGPU::S_LOAD_DWORDX2_SGPR:
1057    case AMDGPU::S_LOAD_DWORDX4_IMM:
1058    case AMDGPU::S_LOAD_DWORDX4_SGPR:
1059      unsigned NewOpcode = getVALUOp(*MI);
1060      unsigned RegOffset;
1061      unsigned ImmOffset;
1062
1063      if (MI->getOperand(2).isReg()) {
1064        RegOffset = MI->getOperand(2).getReg();
1065        ImmOffset = 0;
1066      } else {
1067        assert(MI->getOperand(2).isImm());
1068        // SMRD instructions take a dword offsets and MUBUF instructions
1069        // take a byte offset.
1070        ImmOffset = MI->getOperand(2).getImm() << 2;
1071        RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1072        if (isUInt<12>(ImmOffset)) {
1073          BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1074                  RegOffset)
1075                  .addImm(0);
1076        } else {
1077          BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1078                  RegOffset)
1079                  .addImm(ImmOffset);
1080          ImmOffset = 0;
1081        }
1082      }
1083
1084      unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
1085      unsigned DWord0 = RegOffset;
1086      unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1087      unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1088      unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1089
1090      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
1091              .addImm(0);
1092      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
1093              .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
1094      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
1095              .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
1096      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
1097              .addReg(DWord0)
1098              .addImm(AMDGPU::sub0)
1099              .addReg(DWord1)
1100              .addImm(AMDGPU::sub1)
1101              .addReg(DWord2)
1102              .addImm(AMDGPU::sub2)
1103              .addReg(DWord3)
1104              .addImm(AMDGPU::sub3);
1105     MI->setDesc(get(NewOpcode));
1106     if (MI->getOperand(2).isReg()) {
1107       MI->getOperand(2).setReg(MI->getOperand(1).getReg());
1108     } else {
1109       MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
1110     }
1111     MI->getOperand(1).setReg(SRsrc);
1112     MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
1113  }
1114}
1115
1116void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
1117  SmallVector<MachineInstr *, 128> Worklist;
1118  Worklist.push_back(&TopInst);
1119
1120  while (!Worklist.empty()) {
1121    MachineInstr *Inst = Worklist.pop_back_val();
1122    MachineBasicBlock *MBB = Inst->getParent();
1123    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1124
1125    unsigned Opcode = Inst->getOpcode();
1126    unsigned NewOpcode = getVALUOp(*Inst);
1127
1128    // Handle some special cases
1129    switch (Opcode) {
1130    default:
1131      if (isSMRD(Inst->getOpcode())) {
1132        moveSMRDToVALU(Inst, MRI);
1133      }
1134      break;
1135    case AMDGPU::S_MOV_B64: {
1136      DebugLoc DL = Inst->getDebugLoc();
1137
1138      // If the source operand is a register we can replace this with a
1139      // copy.
1140      if (Inst->getOperand(1).isReg()) {
1141        MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
1142          .addOperand(Inst->getOperand(0))
1143          .addOperand(Inst->getOperand(1));
1144        Worklist.push_back(Copy);
1145      } else {
1146        // Otherwise, we need to split this into two movs, because there is
1147        // no 64-bit VALU move instruction.
1148        unsigned Reg = Inst->getOperand(0).getReg();
1149        unsigned Dst = split64BitImm(Worklist,
1150                                     Inst,
1151                                     MRI,
1152                                     MRI.getRegClass(Reg),
1153                                     Inst->getOperand(1));
1154        MRI.replaceRegWith(Reg, Dst);
1155      }
1156      Inst->eraseFromParent();
1157      continue;
1158    }
1159    case AMDGPU::S_AND_B64:
1160      splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32);
1161      Inst->eraseFromParent();
1162      continue;
1163
1164    case AMDGPU::S_OR_B64:
1165      splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32);
1166      Inst->eraseFromParent();
1167      continue;
1168
1169    case AMDGPU::S_XOR_B64:
1170      splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32);
1171      Inst->eraseFromParent();
1172      continue;
1173
1174    case AMDGPU::S_NOT_B64:
1175      splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32);
1176      Inst->eraseFromParent();
1177      continue;
1178
1179    case AMDGPU::S_BFE_U64:
1180    case AMDGPU::S_BFE_I64:
1181    case AMDGPU::S_BFM_B64:
1182      llvm_unreachable("Moving this op to VALU not implemented");
1183    }
1184
1185    if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
1186      // We cannot move this instruction to the VALU, so we should try to
1187      // legalize its operands instead.
1188      legalizeOperands(Inst);
1189      continue;
1190    }
1191
1192    // Use the new VALU Opcode.
1193    const MCInstrDesc &NewDesc = get(NewOpcode);
1194    Inst->setDesc(NewDesc);
1195
1196    // Remove any references to SCC. Vector instructions can't read from it, and
1197    // We're just about to add the implicit use / defs of VCC, and we don't want
1198    // both.
1199    for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
1200      MachineOperand &Op = Inst->getOperand(i);
1201      if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
1202        Inst->RemoveOperand(i);
1203    }
1204
1205    if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
1206      // We are converting these to a BFE, so we need to add the missing
1207      // operands for the size and offset.
1208      unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
1209      Inst->addOperand(Inst->getOperand(1));
1210      Inst->getOperand(1).ChangeToImmediate(0);
1211      Inst->addOperand(MachineOperand::CreateImm(0));
1212      Inst->addOperand(MachineOperand::CreateImm(0));
1213      Inst->addOperand(MachineOperand::CreateImm(0));
1214      Inst->addOperand(MachineOperand::CreateImm(Size));
1215
1216      // XXX - Other pointless operands. There are 4, but it seems you only need
1217      // 3 to not hit an assertion later in MCInstLower.
1218      Inst->addOperand(MachineOperand::CreateImm(0));
1219      Inst->addOperand(MachineOperand::CreateImm(0));
1220    }
1221
1222    addDescImplicitUseDef(NewDesc, Inst);
1223
1224    if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
1225      const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
1226      // If we need to move this to VGPRs, we need to unpack the second operand
1227      // back into the 2 separate ones for bit offset and width.
1228      assert(OffsetWidthOp.isImm() &&
1229             "Scalar BFE is only implemented for constant width and offset");
1230      uint32_t Imm = OffsetWidthOp.getImm();
1231
1232      uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
1233      uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
1234
1235      Inst->RemoveOperand(2); // Remove old immediate.
1236      Inst->addOperand(Inst->getOperand(1));
1237      Inst->getOperand(1).ChangeToImmediate(0);
1238      Inst->addOperand(MachineOperand::CreateImm(0));
1239      Inst->addOperand(MachineOperand::CreateImm(Offset));
1240      Inst->addOperand(MachineOperand::CreateImm(0));
1241      Inst->addOperand(MachineOperand::CreateImm(BitWidth));
1242      Inst->addOperand(MachineOperand::CreateImm(0));
1243      Inst->addOperand(MachineOperand::CreateImm(0));
1244    }
1245
1246    // Update the destination register class.
1247
1248    const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
1249
1250    switch (Opcode) {
1251      // For target instructions, getOpRegClass just returns the virtual
1252      // register class associated with the operand, so we need to find an
1253      // equivalent VGPR register class in order to move the instruction to the
1254      // VALU.
1255    case AMDGPU::COPY:
1256    case AMDGPU::PHI:
1257    case AMDGPU::REG_SEQUENCE:
1258    case AMDGPU::INSERT_SUBREG:
1259      if (RI.hasVGPRs(NewDstRC))
1260        continue;
1261      NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
1262      if (!NewDstRC)
1263        continue;
1264      break;
1265    default:
1266      break;
1267    }
1268
1269    unsigned DstReg = Inst->getOperand(0).getReg();
1270    unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
1271    MRI.replaceRegWith(DstReg, NewDstReg);
1272
1273    // Legalize the operands
1274    legalizeOperands(Inst);
1275
1276    for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
1277           E = MRI.use_end(); I != E; ++I) {
1278      MachineInstr &UseMI = *I->getParent();
1279      if (!canReadVGPR(UseMI, I.getOperandNo())) {
1280        Worklist.push_back(&UseMI);
1281      }
1282    }
1283  }
1284}
1285
1286//===----------------------------------------------------------------------===//
1287// Indirect addressing callbacks
1288//===----------------------------------------------------------------------===//
1289
1290unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
1291                                                 unsigned Channel) const {
1292  assert(Channel == 0);
1293  return RegIndex;
1294}
1295
1296const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
1297  return &AMDGPU::VReg_32RegClass;
1298}
1299
1300void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
1301                                     MachineInstr *Inst,
1302                                     unsigned Opcode) const {
1303  MachineBasicBlock &MBB = *Inst->getParent();
1304  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1305
1306  MachineOperand &Dest = Inst->getOperand(0);
1307  MachineOperand &Src0 = Inst->getOperand(1);
1308  MachineOperand &Src1 = Inst->getOperand(2);
1309  DebugLoc DL = Inst->getDebugLoc();
1310
1311  MachineBasicBlock::iterator MII = Inst;
1312
1313  const MCInstrDesc &InstDesc = get(Opcode);
1314  const TargetRegisterClass *Src0RC = Src0.isReg() ?
1315    MRI.getRegClass(Src0.getReg()) :
1316    &AMDGPU::SGPR_32RegClass;
1317
1318  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
1319  const TargetRegisterClass *Src1RC = Src1.isReg() ?
1320    MRI.getRegClass(Src1.getReg()) :
1321    &AMDGPU::SGPR_32RegClass;
1322
1323  const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
1324
1325  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1326                                                       AMDGPU::sub0, Src0SubRC);
1327  MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
1328                                                       AMDGPU::sub0, Src1SubRC);
1329
1330  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
1331  const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
1332
1333  unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
1334  MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
1335    .addOperand(SrcReg0Sub0)
1336    .addOperand(SrcReg1Sub0);
1337
1338  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1339                                                       AMDGPU::sub1, Src0SubRC);
1340  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
1341                                                       AMDGPU::sub1, Src1SubRC);
1342
1343  unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
1344  MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
1345    .addOperand(SrcReg0Sub1)
1346    .addOperand(SrcReg1Sub1);
1347
1348  unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
1349  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
1350    .addReg(DestSub0)
1351    .addImm(AMDGPU::sub0)
1352    .addReg(DestSub1)
1353    .addImm(AMDGPU::sub1);
1354
1355  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
1356
1357  // Try to legalize the operands in case we need to swap the order to keep it
1358  // valid.
1359  Worklist.push_back(LoHalf);
1360  Worklist.push_back(HiHalf);
1361}
1362
1363void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
1364                                        MachineInstr *Inst) const {
1365  // Add the implict and explicit register definitions.
1366  if (NewDesc.ImplicitUses) {
1367    for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
1368      unsigned Reg = NewDesc.ImplicitUses[i];
1369      Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
1370    }
1371  }
1372
1373  if (NewDesc.ImplicitDefs) {
1374    for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
1375      unsigned Reg = NewDesc.ImplicitDefs[i];
1376      Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
1377    }
1378  }
1379}
1380
1381MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
1382                                   MachineBasicBlock *MBB,
1383                                   MachineBasicBlock::iterator I,
1384                                   unsigned ValueReg,
1385                                   unsigned Address, unsigned OffsetReg) const {
1386  const DebugLoc &DL = MBB->findDebugLoc(I);
1387  unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
1388                                      getIndirectIndexBegin(*MBB->getParent()));
1389
1390  return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
1391          .addReg(IndirectBaseReg, RegState::Define)
1392          .addOperand(I->getOperand(0))
1393          .addReg(IndirectBaseReg)
1394          .addReg(OffsetReg)
1395          .addImm(0)
1396          .addReg(ValueReg);
1397}
1398
1399MachineInstrBuilder SIInstrInfo::buildIndirectRead(
1400                                   MachineBasicBlock *MBB,
1401                                   MachineBasicBlock::iterator I,
1402                                   unsigned ValueReg,
1403                                   unsigned Address, unsigned OffsetReg) const {
1404  const DebugLoc &DL = MBB->findDebugLoc(I);
1405  unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
1406                                      getIndirectIndexBegin(*MBB->getParent()));
1407
1408  return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
1409          .addOperand(I->getOperand(0))
1410          .addOperand(I->getOperand(1))
1411          .addReg(IndirectBaseReg)
1412          .addReg(OffsetReg)
1413          .addImm(0);
1414
1415}
1416
1417void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
1418                                            const MachineFunction &MF) const {
1419  int End = getIndirectIndexEnd(MF);
1420  int Begin = getIndirectIndexBegin(MF);
1421
1422  if (End == -1)
1423    return;
1424
1425
1426  for (int Index = Begin; Index <= End; ++Index)
1427    Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index));
1428
1429  for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
1430    Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
1431
1432  for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
1433    Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
1434
1435  for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
1436    Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
1437
1438  for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
1439    Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
1440
1441  for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
1442    Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
1443}
1444