1//===-- SIInstrInfo.cpp - SI Instruction Information  ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Implementation of TargetInstrInfo.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "SIInstrInfo.h"
17#include "AMDGPUTargetMachine.h"
18#include "SIDefines.h"
19#include "SIMachineFunctionInfo.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/IR/Function.h"
23#include "llvm/MC/MCInstrDesc.h"
24
25using namespace llvm;
26
27SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
28  : AMDGPUInstrInfo(st),
29    RI(st) { }
30
31//===----------------------------------------------------------------------===//
32// TargetInstrInfo callbacks
33//===----------------------------------------------------------------------===//
34
35void
36SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
37                         MachineBasicBlock::iterator MI, DebugLoc DL,
38                         unsigned DestReg, unsigned SrcReg,
39                         bool KillSrc) const {
40
41  // If we are trying to copy to or from SCC, there is a bug somewhere else in
42  // the backend.  While it may be theoretically possible to do this, it should
43  // never be necessary.
44  assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
45
46  static const int16_t Sub0_15[] = {
47    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
48    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
49    AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
50    AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
51  };
52
53  static const int16_t Sub0_7[] = {
54    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
55    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
56  };
57
58  static const int16_t Sub0_3[] = {
59    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
60  };
61
62  static const int16_t Sub0_2[] = {
63    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
64  };
65
66  static const int16_t Sub0_1[] = {
67    AMDGPU::sub0, AMDGPU::sub1, 0
68  };
69
70  unsigned Opcode;
71  const int16_t *SubIndices;
72
73  if (AMDGPU::M0 == DestReg) {
74    // Check if M0 isn't already set to this value
75    for (MachineBasicBlock::reverse_iterator E = MBB.rend(),
76      I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) {
77
78      if (!I->definesRegister(AMDGPU::M0))
79        continue;
80
81      unsigned Opc = I->getOpcode();
82      if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32)
83        break;
84
85      if (!I->readsRegister(SrcReg))
86        break;
87
88      // The copy isn't necessary
89      return;
90    }
91  }
92
93  if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
94    assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
95    BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
96            .addReg(SrcReg, getKillRegState(KillSrc));
97    return;
98
99  } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
100    assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
101    BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
102            .addReg(SrcReg, getKillRegState(KillSrc));
103    return;
104
105  } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
106    assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
107    Opcode = AMDGPU::S_MOV_B32;
108    SubIndices = Sub0_3;
109
110  } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
111    assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
112    Opcode = AMDGPU::S_MOV_B32;
113    SubIndices = Sub0_7;
114
115  } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
116    assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
117    Opcode = AMDGPU::S_MOV_B32;
118    SubIndices = Sub0_15;
119
120  } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
121    assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
122           AMDGPU::SReg_32RegClass.contains(SrcReg));
123    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
124            .addReg(SrcReg, getKillRegState(KillSrc));
125    return;
126
127  } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
128    assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
129           AMDGPU::SReg_64RegClass.contains(SrcReg));
130    Opcode = AMDGPU::V_MOV_B32_e32;
131    SubIndices = Sub0_1;
132
133  } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
134    assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
135    Opcode = AMDGPU::V_MOV_B32_e32;
136    SubIndices = Sub0_2;
137
138  } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
139    assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
140           AMDGPU::SReg_128RegClass.contains(SrcReg));
141    Opcode = AMDGPU::V_MOV_B32_e32;
142    SubIndices = Sub0_3;
143
144  } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
145    assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
146           AMDGPU::SReg_256RegClass.contains(SrcReg));
147    Opcode = AMDGPU::V_MOV_B32_e32;
148    SubIndices = Sub0_7;
149
150  } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
151    assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
152           AMDGPU::SReg_512RegClass.contains(SrcReg));
153    Opcode = AMDGPU::V_MOV_B32_e32;
154    SubIndices = Sub0_15;
155
156  } else {
157    llvm_unreachable("Can't copy register!");
158  }
159
160  while (unsigned SubIdx = *SubIndices++) {
161    MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
162      get(Opcode), RI.getSubReg(DestReg, SubIdx));
163
164    Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
165
166    if (*SubIndices)
167      Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
168  }
169}
170
171unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
172  int NewOpc;
173
174  // Try to map original to commuted opcode
175  if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
176    return NewOpc;
177
178  // Try to map commuted to original opcode
179  if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
180    return NewOpc;
181
182  return Opcode;
183}
184
185void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
186                                      MachineBasicBlock::iterator MI,
187                                      unsigned SrcReg, bool isKill,
188                                      int FrameIndex,
189                                      const TargetRegisterClass *RC,
190                                      const TargetRegisterInfo *TRI) const {
191  MachineFunction *MF = MBB.getParent();
192  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
193  MachineRegisterInfo &MRI = MF->getRegInfo();
194  DebugLoc DL = MBB.findDebugLoc(MI);
195  unsigned KillFlag = isKill ? RegState::Kill : 0;
196
197  if (RI.hasVGPRs(RC)) {
198    LLVMContext &Ctx = MF->getFunction()->getContext();
199    Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!");
200    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
201            .addReg(SrcReg);
202  } else if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
203    unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MF);
204    unsigned TgtReg = MFI->SpillTracker.LaneVGPR;
205
206    BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), TgtReg)
207            .addReg(SrcReg, KillFlag)
208            .addImm(Lane);
209    MFI->SpillTracker.addSpilledReg(FrameIndex, TgtReg, Lane);
210  } else if (RI.isSGPRClass(RC)) {
211    // We are only allowed to create one new instruction when spilling
212    // registers, so we need to use pseudo instruction for vector
213    // registers.
214    //
215    // Reserve a spot in the spill tracker for each sub-register of
216    // the vector register.
217    unsigned NumSubRegs = RC->getSize() / 4;
218    unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MF, NumSubRegs);
219    MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
220                                    FirstLane);
221
222    unsigned Opcode;
223    switch (RC->getSize() * 8) {
224    case 64:  Opcode = AMDGPU::SI_SPILL_S64_SAVE;  break;
225    case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
226    case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
227    case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
228    default: llvm_unreachable("Cannot spill register class");
229    }
230
231    BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR)
232            .addReg(SrcReg)
233            .addImm(FrameIndex);
234  } else {
235    llvm_unreachable("VGPR spilling not supported");
236  }
237}
238
239void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
240                                       MachineBasicBlock::iterator MI,
241                                       unsigned DestReg, int FrameIndex,
242                                       const TargetRegisterClass *RC,
243                                       const TargetRegisterInfo *TRI) const {
244  MachineFunction *MF = MBB.getParent();
245  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
246  DebugLoc DL = MBB.findDebugLoc(MI);
247
248  if (RI.hasVGPRs(RC)) {
249    LLVMContext &Ctx = MF->getFunction()->getContext();
250    Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!");
251    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
252            .addImm(0);
253  } else if (RI.isSGPRClass(RC)){
254    unsigned Opcode;
255    switch(RC->getSize() * 8) {
256    case 32:  Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
257    case 64:  Opcode = AMDGPU::SI_SPILL_S64_RESTORE;  break;
258    case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
259    case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
260    case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
261    default: llvm_unreachable("Cannot spill register class");
262    }
263
264    SIMachineFunctionInfo::SpilledReg Spill =
265        MFI->SpillTracker.getSpilledReg(FrameIndex);
266
267    BuildMI(MBB, MI, DL, get(Opcode), DestReg)
268            .addReg(Spill.VGPR)
269            .addImm(FrameIndex);
270  } else {
271    llvm_unreachable("VGPR spilling not supported");
272  }
273}
274
275static unsigned getNumSubRegsForSpillOp(unsigned Op) {
276
277  switch (Op) {
278  case AMDGPU::SI_SPILL_S512_SAVE:
279  case AMDGPU::SI_SPILL_S512_RESTORE:
280    return 16;
281  case AMDGPU::SI_SPILL_S256_SAVE:
282  case AMDGPU::SI_SPILL_S256_RESTORE:
283    return 8;
284  case AMDGPU::SI_SPILL_S128_SAVE:
285  case AMDGPU::SI_SPILL_S128_RESTORE:
286    return 4;
287  case AMDGPU::SI_SPILL_S64_SAVE:
288  case AMDGPU::SI_SPILL_S64_RESTORE:
289    return 2;
290  case AMDGPU::SI_SPILL_S32_RESTORE:
291    return 1;
292  default: llvm_unreachable("Invalid spill opcode");
293  }
294}
295
296void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
297                             int Count) const {
298  while (Count > 0) {
299    int Arg;
300    if (Count >= 8)
301      Arg = 7;
302    else
303      Arg = Count - 1;
304    Count -= 8;
305    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
306            .addImm(Arg);
307  }
308}
309
310bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
311  SIMachineFunctionInfo *MFI =
312      MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
313  MachineBasicBlock &MBB = *MI->getParent();
314  DebugLoc DL = MBB.findDebugLoc(MI);
315  switch (MI->getOpcode()) {
316  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
317
318  // SGPR register spill
319  case AMDGPU::SI_SPILL_S512_SAVE:
320  case AMDGPU::SI_SPILL_S256_SAVE:
321  case AMDGPU::SI_SPILL_S128_SAVE:
322  case AMDGPU::SI_SPILL_S64_SAVE: {
323    unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
324    unsigned FrameIndex = MI->getOperand(2).getImm();
325
326    for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
327      SIMachineFunctionInfo::SpilledReg Spill;
328      unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(),
329                                            &AMDGPU::SGPR_32RegClass, i);
330      Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
331
332      BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
333              MI->getOperand(0).getReg())
334              .addReg(SubReg)
335              .addImm(Spill.Lane + i);
336    }
337    MI->eraseFromParent();
338    break;
339  }
340
341  // SGPR register restore
342  case AMDGPU::SI_SPILL_S512_RESTORE:
343  case AMDGPU::SI_SPILL_S256_RESTORE:
344  case AMDGPU::SI_SPILL_S128_RESTORE:
345  case AMDGPU::SI_SPILL_S64_RESTORE:
346  case AMDGPU::SI_SPILL_S32_RESTORE: {
347    unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
348
349    for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
350      SIMachineFunctionInfo::SpilledReg Spill;
351      unsigned FrameIndex = MI->getOperand(2).getImm();
352      unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(),
353                                   &AMDGPU::SGPR_32RegClass, i);
354      Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
355
356      BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg)
357              .addReg(MI->getOperand(1).getReg())
358              .addImm(Spill.Lane + i);
359    }
360    insertNOPs(MI, 3);
361    MI->eraseFromParent();
362    break;
363  }
364  }
365  return true;
366}
367
368MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
369                                              bool NewMI) const {
370
371  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
372  if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
373    return nullptr;
374
375  // Cannot commute VOP2 if src0 is SGPR.
376  if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
377      RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
378   return nullptr;
379
380  if (!MI->getOperand(2).isReg()) {
381    // XXX: Commute instructions with FPImm operands
382    if (NewMI || MI->getOperand(2).isFPImm() ||
383       (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
384      return nullptr;
385    }
386
387    // XXX: Commute VOP3 instructions with abs and neg set.
388    if (isVOP3(MI->getOpcode()) &&
389        (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
390                        AMDGPU::OpName::abs)).getImm() ||
391         MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
392                        AMDGPU::OpName::neg)).getImm()))
393      return nullptr;
394
395    unsigned Reg = MI->getOperand(1).getReg();
396    unsigned SubReg = MI->getOperand(1).getSubReg();
397    MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
398    MI->getOperand(2).ChangeToRegister(Reg, false);
399    MI->getOperand(2).setSubReg(SubReg);
400  } else {
401    MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
402  }
403
404  if (MI)
405    MI->setDesc(get(commuteOpcode(MI->getOpcode())));
406
407  return MI;
408}
409
410MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
411                                         MachineBasicBlock::iterator I,
412                                         unsigned DstReg,
413                                         unsigned SrcReg) const {
414  return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
415                 DstReg) .addReg(SrcReg);
416}
417
418bool SIInstrInfo::isMov(unsigned Opcode) const {
419  switch(Opcode) {
420  default: return false;
421  case AMDGPU::S_MOV_B32:
422  case AMDGPU::S_MOV_B64:
423  case AMDGPU::V_MOV_B32_e32:
424  case AMDGPU::V_MOV_B32_e64:
425    return true;
426  }
427}
428
429bool
430SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
431  return RC != &AMDGPU::EXECRegRegClass;
432}
433
434bool
435SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
436                                         AliasAnalysis *AA) const {
437  switch(MI->getOpcode()) {
438  default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
439  case AMDGPU::S_MOV_B32:
440  case AMDGPU::S_MOV_B64:
441  case AMDGPU::V_MOV_B32_e32:
442    return MI->getOperand(1).isImm();
443  }
444}
445
446namespace llvm {
447namespace AMDGPU {
448// Helper function generated by tablegen.  We are wrapping this with
449// an SIInstrInfo function that reutrns bool rather than int.
450int isDS(uint16_t Opcode);
451}
452}
453
454bool SIInstrInfo::isDS(uint16_t Opcode) const {
455  return ::AMDGPU::isDS(Opcode) != -1;
456}
457
458int SIInstrInfo::isMIMG(uint16_t Opcode) const {
459  return get(Opcode).TSFlags & SIInstrFlags::MIMG;
460}
461
462int SIInstrInfo::isSMRD(uint16_t Opcode) const {
463  return get(Opcode).TSFlags & SIInstrFlags::SMRD;
464}
465
466bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
467  return get(Opcode).TSFlags & SIInstrFlags::VOP1;
468}
469
470bool SIInstrInfo::isVOP2(uint16_t Opcode) const {
471  return get(Opcode).TSFlags & SIInstrFlags::VOP2;
472}
473
474bool SIInstrInfo::isVOP3(uint16_t Opcode) const {
475  return get(Opcode).TSFlags & SIInstrFlags::VOP3;
476}
477
478bool SIInstrInfo::isVOPC(uint16_t Opcode) const {
479  return get(Opcode).TSFlags & SIInstrFlags::VOPC;
480}
481
482bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
483  return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
484}
485
486bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
487  int32_t Val = Imm.getSExtValue();
488  if (Val >= -16 && Val <= 64)
489    return true;
490
491  // The actual type of the operand does not seem to matter as long
492  // as the bits match one of the inline immediate values.  For example:
493  //
494  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
495  // so it is a legal inline immediate.
496  //
497  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
498  // floating-point, so it is a legal inline immediate.
499
500  return (APInt::floatToBits(0.0f) == Imm) ||
501         (APInt::floatToBits(1.0f) == Imm) ||
502         (APInt::floatToBits(-1.0f) == Imm) ||
503         (APInt::floatToBits(0.5f) == Imm) ||
504         (APInt::floatToBits(-0.5f) == Imm) ||
505         (APInt::floatToBits(2.0f) == Imm) ||
506         (APInt::floatToBits(-2.0f) == Imm) ||
507         (APInt::floatToBits(4.0f) == Imm) ||
508         (APInt::floatToBits(-4.0f) == Imm);
509}
510
511bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
512  if (MO.isImm())
513    return isInlineConstant(APInt(32, MO.getImm(), true));
514
515  if (MO.isFPImm()) {
516    APFloat FpImm = MO.getFPImm()->getValueAPF();
517    return isInlineConstant(FpImm.bitcastToAPInt());
518  }
519
520  return false;
521}
522
523bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
524  return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
525}
526
527static bool compareMachineOp(const MachineOperand &Op0,
528                             const MachineOperand &Op1) {
529  if (Op0.getType() != Op1.getType())
530    return false;
531
532  switch (Op0.getType()) {
533  case MachineOperand::MO_Register:
534    return Op0.getReg() == Op1.getReg();
535  case MachineOperand::MO_Immediate:
536    return Op0.getImm() == Op1.getImm();
537  case MachineOperand::MO_FPImmediate:
538    return Op0.getFPImm() == Op1.getFPImm();
539  default:
540    llvm_unreachable("Didn't expect to be comparing these operand types");
541  }
542}
543
544bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
545                                    StringRef &ErrInfo) const {
546  uint16_t Opcode = MI->getOpcode();
547  int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
548  int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
549  int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
550
551  // Make sure the number of operands is correct.
552  const MCInstrDesc &Desc = get(Opcode);
553  if (!Desc.isVariadic() &&
554      Desc.getNumOperands() != MI->getNumExplicitOperands()) {
555     ErrInfo = "Instruction has wrong number of operands.";
556     return false;
557  }
558
559  // Make sure the register classes are correct
560  for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
561    switch (Desc.OpInfo[i].OperandType) {
562    case MCOI::OPERAND_REGISTER: {
563      int RegClass = Desc.OpInfo[i].RegClass;
564      if (!RI.regClassCanUseImmediate(RegClass) &&
565          (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) {
566        ErrInfo = "Expected register, but got immediate";
567        return false;
568      }
569    }
570      break;
571    case MCOI::OPERAND_IMMEDIATE:
572      if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) {
573        ErrInfo = "Expected immediate, but got non-immediate";
574        return false;
575      }
576      // Fall-through
577    default:
578      continue;
579    }
580
581    if (!MI->getOperand(i).isReg())
582      continue;
583
584    int RegClass = Desc.OpInfo[i].RegClass;
585    if (RegClass != -1) {
586      unsigned Reg = MI->getOperand(i).getReg();
587      if (TargetRegisterInfo::isVirtualRegister(Reg))
588        continue;
589
590      const TargetRegisterClass *RC = RI.getRegClass(RegClass);
591      if (!RC->contains(Reg)) {
592        ErrInfo = "Operand has incorrect register class.";
593        return false;
594      }
595    }
596  }
597
598
599  // Verify VOP*
600  if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
601    unsigned ConstantBusCount = 0;
602    unsigned SGPRUsed = AMDGPU::NoRegister;
603    for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
604      const MachineOperand &MO = MI->getOperand(i);
605      if (MO.isReg() && MO.isUse() &&
606          !TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
607
608        // EXEC register uses the constant bus.
609        if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
610          ++ConstantBusCount;
611
612        // SGPRs use the constant bus
613        if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
614            (!MO.isImplicit() &&
615            (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
616            AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
617          if (SGPRUsed != MO.getReg()) {
618            ++ConstantBusCount;
619            SGPRUsed = MO.getReg();
620          }
621        }
622      }
623      // Literal constants use the constant bus.
624      if (isLiteralConstant(MO))
625        ++ConstantBusCount;
626    }
627    if (ConstantBusCount > 1) {
628      ErrInfo = "VOP* instruction uses the constant bus more than once";
629      return false;
630    }
631  }
632
633  // Verify SRC1 for VOP2 and VOPC
634  if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
635    const MachineOperand &Src1 = MI->getOperand(Src1Idx);
636    if (Src1.isImm() || Src1.isFPImm()) {
637      ErrInfo = "VOP[2C] src1 cannot be an immediate.";
638      return false;
639    }
640  }
641
642  // Verify VOP3
643  if (isVOP3(Opcode)) {
644    if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
645      ErrInfo = "VOP3 src0 cannot be a literal constant.";
646      return false;
647    }
648    if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
649      ErrInfo = "VOP3 src1 cannot be a literal constant.";
650      return false;
651    }
652    if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
653      ErrInfo = "VOP3 src2 cannot be a literal constant.";
654      return false;
655    }
656  }
657
658  // Verify misc. restrictions on specific instructions.
659  if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
660      Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
661    MI->dump();
662
663    const MachineOperand &Src0 = MI->getOperand(2);
664    const MachineOperand &Src1 = MI->getOperand(3);
665    const MachineOperand &Src2 = MI->getOperand(4);
666    if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
667      if (!compareMachineOp(Src0, Src1) &&
668          !compareMachineOp(Src0, Src2)) {
669        ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
670        return false;
671      }
672    }
673  }
674
675  return true;
676}
677
678unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
679  switch (MI.getOpcode()) {
680  default: return AMDGPU::INSTRUCTION_LIST_END;
681  case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
682  case AMDGPU::COPY: return AMDGPU::COPY;
683  case AMDGPU::PHI: return AMDGPU::PHI;
684  case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
685  case AMDGPU::S_MOV_B32:
686    return MI.getOperand(1).isReg() ?
687           AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
688  case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
689  case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
690  case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
691  case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
692  case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
693  case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
694  case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
695  case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
696  case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
697  case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
698  case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
699  case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
700  case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
701  case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
702  case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
703  case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
704  case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
705  case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
706  case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
707  case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
708  case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
709  case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
710  case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
711  case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
712  case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
713  case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
714  case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
715  case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
716  case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
717  case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
718  case AMDGPU::S_LOAD_DWORD_IMM:
719  case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
720  case AMDGPU::S_LOAD_DWORDX2_IMM:
721  case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
722  case AMDGPU::S_LOAD_DWORDX4_IMM:
723  case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
724  case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
725  case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
726  case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
727  }
728}
729
730bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
731  return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
732}
733
734const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
735                                                      unsigned OpNo) const {
736  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
737  const MCInstrDesc &Desc = get(MI.getOpcode());
738  if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
739      Desc.OpInfo[OpNo].RegClass == -1)
740    return MRI.getRegClass(MI.getOperand(OpNo).getReg());
741
742  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
743  return RI.getRegClass(RCID);
744}
745
746bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
747  switch (MI.getOpcode()) {
748  case AMDGPU::COPY:
749  case AMDGPU::REG_SEQUENCE:
750  case AMDGPU::PHI:
751  case AMDGPU::INSERT_SUBREG:
752    return RI.hasVGPRs(getOpRegClass(MI, 0));
753  default:
754    return RI.hasVGPRs(getOpRegClass(MI, OpNo));
755  }
756}
757
758void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
759  MachineBasicBlock::iterator I = MI;
760  MachineOperand &MO = MI->getOperand(OpIdx);
761  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
762  unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
763  const TargetRegisterClass *RC = RI.getRegClass(RCID);
764  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
765  if (MO.isReg()) {
766    Opcode = AMDGPU::COPY;
767  } else if (RI.isSGPRClass(RC)) {
768    Opcode = AMDGPU::S_MOV_B32;
769  }
770
771  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
772  unsigned Reg = MRI.createVirtualRegister(VRC);
773  BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
774          Reg).addOperand(MO);
775  MO.ChangeToRegister(Reg, false);
776}
777
778unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
779                                         MachineRegisterInfo &MRI,
780                                         MachineOperand &SuperReg,
781                                         const TargetRegisterClass *SuperRC,
782                                         unsigned SubIdx,
783                                         const TargetRegisterClass *SubRC)
784                                         const {
785  assert(SuperReg.isReg());
786
787  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
788  unsigned SubReg = MRI.createVirtualRegister(SubRC);
789
790  // Just in case the super register is itself a sub-register, copy it to a new
791  // value so we don't need to worry about merging its subreg index with the
792  // SubIdx passed to this function. The register coalescer should be able to
793  // eliminate this extra copy.
794  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
795          NewSuperReg)
796          .addOperand(SuperReg);
797
798  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
799          SubReg)
800          .addReg(NewSuperReg, 0, SubIdx);
801  return SubReg;
802}
803
804MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
805  MachineBasicBlock::iterator MII,
806  MachineRegisterInfo &MRI,
807  MachineOperand &Op,
808  const TargetRegisterClass *SuperRC,
809  unsigned SubIdx,
810  const TargetRegisterClass *SubRC) const {
811  if (Op.isImm()) {
812    // XXX - Is there a better way to do this?
813    if (SubIdx == AMDGPU::sub0)
814      return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
815    if (SubIdx == AMDGPU::sub1)
816      return MachineOperand::CreateImm(Op.getImm() >> 32);
817
818    llvm_unreachable("Unhandled register index for immediate");
819  }
820
821  unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
822                                       SubIdx, SubRC);
823  return MachineOperand::CreateReg(SubReg, false);
824}
825
826unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
827                                    MachineBasicBlock::iterator MI,
828                                    MachineRegisterInfo &MRI,
829                                    const TargetRegisterClass *RC,
830                                    const MachineOperand &Op) const {
831  MachineBasicBlock *MBB = MI->getParent();
832  DebugLoc DL = MI->getDebugLoc();
833  unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
834  unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
835  unsigned Dst = MRI.createVirtualRegister(RC);
836
837  MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
838                             LoDst)
839    .addImm(Op.getImm() & 0xFFFFFFFF);
840  MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
841                             HiDst)
842    .addImm(Op.getImm() >> 32);
843
844  BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
845    .addReg(LoDst)
846    .addImm(AMDGPU::sub0)
847    .addReg(HiDst)
848    .addImm(AMDGPU::sub1);
849
850  Worklist.push_back(Lo);
851  Worklist.push_back(Hi);
852
853  return Dst;
854}
855
856void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
857  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
858  int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
859                                           AMDGPU::OpName::src0);
860  int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
861                                           AMDGPU::OpName::src1);
862  int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
863                                           AMDGPU::OpName::src2);
864
865  // Legalize VOP2
866  if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
867    MachineOperand &Src0 = MI->getOperand(Src0Idx);
868    MachineOperand &Src1 = MI->getOperand(Src1Idx);
869
870    // If the instruction implicitly reads VCC, we can't have any SGPR operands,
871    // so move any.
872    bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI);
873    if (ReadsVCC && Src0.isReg() &&
874        RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) {
875      legalizeOpWithMove(MI, Src0Idx);
876      return;
877    }
878
879    if (ReadsVCC && Src1.isReg() &&
880        RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
881      legalizeOpWithMove(MI, Src1Idx);
882      return;
883    }
884
885    // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must
886    // be the first operand, and there can only be one.
887    if (Src1.isImm() || Src1.isFPImm() ||
888        (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
889      if (MI->isCommutable()) {
890        if (commuteInstruction(MI))
891          return;
892      }
893      legalizeOpWithMove(MI, Src1Idx);
894    }
895  }
896
897  // XXX - Do any VOP3 instructions read VCC?
898  // Legalize VOP3
899  if (isVOP3(MI->getOpcode())) {
900    int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
901    unsigned SGPRReg = AMDGPU::NoRegister;
902    for (unsigned i = 0; i < 3; ++i) {
903      int Idx = VOP3Idx[i];
904      if (Idx == -1)
905        continue;
906      MachineOperand &MO = MI->getOperand(Idx);
907
908      if (MO.isReg()) {
909        if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
910          continue; // VGPRs are legal
911
912        assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
913
914        if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
915          SGPRReg = MO.getReg();
916          // We can use one SGPR in each VOP3 instruction.
917          continue;
918        }
919      } else if (!isLiteralConstant(MO)) {
920        // If it is not a register and not a literal constant, then it must be
921        // an inline constant which is always legal.
922        continue;
923      }
924      // If we make it this far, then the operand is not legal and we must
925      // legalize it.
926      legalizeOpWithMove(MI, Idx);
927    }
928  }
929
930  // Legalize REG_SEQUENCE and PHI
931  // The register class of the operands much be the same type as the register
932  // class of the output.
933  if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
934      MI->getOpcode() == AMDGPU::PHI) {
935    const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
936    for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
937      if (!MI->getOperand(i).isReg() ||
938          !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
939        continue;
940      const TargetRegisterClass *OpRC =
941              MRI.getRegClass(MI->getOperand(i).getReg());
942      if (RI.hasVGPRs(OpRC)) {
943        VRC = OpRC;
944      } else {
945        SRC = OpRC;
946      }
947    }
948
949    // If any of the operands are VGPR registers, then they all most be
950    // otherwise we will create illegal VGPR->SGPR copies when legalizing
951    // them.
952    if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
953      if (!VRC) {
954        assert(SRC);
955        VRC = RI.getEquivalentVGPRClass(SRC);
956      }
957      RC = VRC;
958    } else {
959      RC = SRC;
960    }
961
962    // Update all the operands so they have the same type.
963    for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
964      if (!MI->getOperand(i).isReg() ||
965          !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
966        continue;
967      unsigned DstReg = MRI.createVirtualRegister(RC);
968      MachineBasicBlock *InsertBB;
969      MachineBasicBlock::iterator Insert;
970      if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
971        InsertBB = MI->getParent();
972        Insert = MI;
973      } else {
974        // MI is a PHI instruction.
975        InsertBB = MI->getOperand(i + 1).getMBB();
976        Insert = InsertBB->getFirstTerminator();
977      }
978      BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
979              get(AMDGPU::COPY), DstReg)
980              .addOperand(MI->getOperand(i));
981      MI->getOperand(i).setReg(DstReg);
982    }
983  }
984
985  // Legalize INSERT_SUBREG
986  // src0 must have the same register class as dst
987  if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
988    unsigned Dst = MI->getOperand(0).getReg();
989    unsigned Src0 = MI->getOperand(1).getReg();
990    const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
991    const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
992    if (DstRC != Src0RC) {
993      MachineBasicBlock &MBB = *MI->getParent();
994      unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
995      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
996              .addReg(Src0);
997      MI->getOperand(1).setReg(NewSrc0);
998    }
999    return;
1000  }
1001
1002  // Legalize MUBUF* instructions
1003  // FIXME: If we start using the non-addr64 instructions for compute, we
1004  // may need to legalize them here.
1005
1006  int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1007                                            AMDGPU::OpName::srsrc);
1008  int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1009                                             AMDGPU::OpName::vaddr);
1010  if (SRsrcIdx != -1 && VAddrIdx != -1) {
1011    const TargetRegisterClass *VAddrRC =
1012        RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass);
1013
1014    if(VAddrRC->getSize() == 8 &&
1015       MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) {
1016      // We have a MUBUF instruction that uses a 64-bit vaddr register and
1017      // srsrc has the incorrect register class.  In order to fix this, we
1018      // need to extract the pointer from the resource descriptor (srsrc),
1019      // add it to the value of vadd,  then store the result in the vaddr
1020      // operand.  Then, we need to set the pointer field of the resource
1021      // descriptor to zero.
1022
1023      MachineBasicBlock &MBB = *MI->getParent();
1024      MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx);
1025      MachineOperand &VAddrOp = MI->getOperand(VAddrIdx);
1026      unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi;
1027      unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
1028      unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
1029      unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
1030      unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1031      unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1032      unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1033      unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
1034
1035      // SRsrcPtrLo = srsrc:sub0
1036      SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp,
1037          &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
1038
1039      // SRsrcPtrHi = srsrc:sub1
1040      SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp,
1041          &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
1042
1043      // VAddrLo = vaddr:sub0
1044      VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp,
1045          &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass);
1046
1047      // VAddrHi = vaddr:sub1
1048      VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp,
1049          &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass);
1050
1051      // NewVaddrLo = SRsrcPtrLo + VAddrLo
1052      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
1053              NewVAddrLo)
1054              .addReg(SRsrcPtrLo)
1055              .addReg(VAddrLo)
1056              .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit);
1057
1058      // NewVaddrHi = SRsrcPtrHi + VAddrHi
1059      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
1060              NewVAddrHi)
1061              .addReg(SRsrcPtrHi)
1062              .addReg(VAddrHi)
1063              .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
1064              .addReg(AMDGPU::VCC, RegState::Implicit);
1065
1066      // NewVaddr = {NewVaddrHi, NewVaddrLo}
1067      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
1068              NewVAddr)
1069              .addReg(NewVAddrLo)
1070              .addImm(AMDGPU::sub0)
1071              .addReg(NewVAddrHi)
1072              .addImm(AMDGPU::sub1);
1073
1074      // Zero64 = 0
1075      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
1076              Zero64)
1077              .addImm(0);
1078
1079      // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
1080      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1081              SRsrcFormatLo)
1082              .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
1083
1084      // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
1085      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1086              SRsrcFormatHi)
1087              .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
1088
1089      // NewSRsrc = {Zero64, SRsrcFormat}
1090      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
1091              NewSRsrc)
1092              .addReg(Zero64)
1093              .addImm(AMDGPU::sub0_sub1)
1094              .addReg(SRsrcFormatLo)
1095              .addImm(AMDGPU::sub2)
1096              .addReg(SRsrcFormatHi)
1097              .addImm(AMDGPU::sub3);
1098
1099      // Update the instruction to use NewVaddr
1100      MI->getOperand(VAddrIdx).setReg(NewVAddr);
1101      // Update the instruction to use NewSRsrc
1102      MI->getOperand(SRsrcIdx).setReg(NewSRsrc);
1103    }
1104  }
1105}
1106
1107void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
1108  MachineBasicBlock *MBB = MI->getParent();
1109  switch (MI->getOpcode()) {
1110    case AMDGPU::S_LOAD_DWORD_IMM:
1111    case AMDGPU::S_LOAD_DWORD_SGPR:
1112    case AMDGPU::S_LOAD_DWORDX2_IMM:
1113    case AMDGPU::S_LOAD_DWORDX2_SGPR:
1114    case AMDGPU::S_LOAD_DWORDX4_IMM:
1115    case AMDGPU::S_LOAD_DWORDX4_SGPR:
1116      unsigned NewOpcode = getVALUOp(*MI);
1117      unsigned RegOffset;
1118      unsigned ImmOffset;
1119
1120      if (MI->getOperand(2).isReg()) {
1121        RegOffset = MI->getOperand(2).getReg();
1122        ImmOffset = 0;
1123      } else {
1124        assert(MI->getOperand(2).isImm());
1125        // SMRD instructions take a dword offsets and MUBUF instructions
1126        // take a byte offset.
1127        ImmOffset = MI->getOperand(2).getImm() << 2;
1128        RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1129        if (isUInt<12>(ImmOffset)) {
1130          BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1131                  RegOffset)
1132                  .addImm(0);
1133        } else {
1134          BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
1135                  RegOffset)
1136                  .addImm(ImmOffset);
1137          ImmOffset = 0;
1138        }
1139      }
1140
1141      unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
1142      unsigned DWord0 = RegOffset;
1143      unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1144      unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1145      unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
1146
1147      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
1148              .addImm(0);
1149      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
1150              .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
1151      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
1152              .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
1153      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
1154              .addReg(DWord0)
1155              .addImm(AMDGPU::sub0)
1156              .addReg(DWord1)
1157              .addImm(AMDGPU::sub1)
1158              .addReg(DWord2)
1159              .addImm(AMDGPU::sub2)
1160              .addReg(DWord3)
1161              .addImm(AMDGPU::sub3);
1162     MI->setDesc(get(NewOpcode));
1163     if (MI->getOperand(2).isReg()) {
1164       MI->getOperand(2).setReg(MI->getOperand(1).getReg());
1165     } else {
1166       MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
1167     }
1168     MI->getOperand(1).setReg(SRsrc);
1169     MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
1170  }
1171}
1172
1173void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
1174  SmallVector<MachineInstr *, 128> Worklist;
1175  Worklist.push_back(&TopInst);
1176
1177  while (!Worklist.empty()) {
1178    MachineInstr *Inst = Worklist.pop_back_val();
1179    MachineBasicBlock *MBB = Inst->getParent();
1180    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1181
1182    unsigned Opcode = Inst->getOpcode();
1183    unsigned NewOpcode = getVALUOp(*Inst);
1184
1185    // Handle some special cases
1186    switch (Opcode) {
1187    default:
1188      if (isSMRD(Inst->getOpcode())) {
1189        moveSMRDToVALU(Inst, MRI);
1190      }
1191      break;
1192    case AMDGPU::S_MOV_B64: {
1193      DebugLoc DL = Inst->getDebugLoc();
1194
1195      // If the source operand is a register we can replace this with a
1196      // copy.
1197      if (Inst->getOperand(1).isReg()) {
1198        MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
1199          .addOperand(Inst->getOperand(0))
1200          .addOperand(Inst->getOperand(1));
1201        Worklist.push_back(Copy);
1202      } else {
1203        // Otherwise, we need to split this into two movs, because there is
1204        // no 64-bit VALU move instruction.
1205        unsigned Reg = Inst->getOperand(0).getReg();
1206        unsigned Dst = split64BitImm(Worklist,
1207                                     Inst,
1208                                     MRI,
1209                                     MRI.getRegClass(Reg),
1210                                     Inst->getOperand(1));
1211        MRI.replaceRegWith(Reg, Dst);
1212      }
1213      Inst->eraseFromParent();
1214      continue;
1215    }
1216    case AMDGPU::S_AND_B64:
1217      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
1218      Inst->eraseFromParent();
1219      continue;
1220
1221    case AMDGPU::S_OR_B64:
1222      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
1223      Inst->eraseFromParent();
1224      continue;
1225
1226    case AMDGPU::S_XOR_B64:
1227      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
1228      Inst->eraseFromParent();
1229      continue;
1230
1231    case AMDGPU::S_NOT_B64:
1232      splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
1233      Inst->eraseFromParent();
1234      continue;
1235
1236    case AMDGPU::S_BCNT1_I32_B64:
1237      splitScalar64BitBCNT(Worklist, Inst);
1238      Inst->eraseFromParent();
1239      continue;
1240
1241    case AMDGPU::S_BFE_U64:
1242    case AMDGPU::S_BFE_I64:
1243    case AMDGPU::S_BFM_B64:
1244      llvm_unreachable("Moving this op to VALU not implemented");
1245    }
1246
1247    if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
1248      // We cannot move this instruction to the VALU, so we should try to
1249      // legalize its operands instead.
1250      legalizeOperands(Inst);
1251      continue;
1252    }
1253
1254    // Use the new VALU Opcode.
1255    const MCInstrDesc &NewDesc = get(NewOpcode);
1256    Inst->setDesc(NewDesc);
1257
1258    // Remove any references to SCC. Vector instructions can't read from it, and
1259    // We're just about to add the implicit use / defs of VCC, and we don't want
1260    // both.
1261    for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
1262      MachineOperand &Op = Inst->getOperand(i);
1263      if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
1264        Inst->RemoveOperand(i);
1265    }
1266
1267    if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
1268      // We are converting these to a BFE, so we need to add the missing
1269      // operands for the size and offset.
1270      unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
1271      Inst->addOperand(Inst->getOperand(1));
1272      Inst->getOperand(1).ChangeToImmediate(0);
1273      Inst->addOperand(MachineOperand::CreateImm(0));
1274      Inst->addOperand(MachineOperand::CreateImm(0));
1275      Inst->addOperand(MachineOperand::CreateImm(0));
1276      Inst->addOperand(MachineOperand::CreateImm(Size));
1277
1278      // XXX - Other pointless operands. There are 4, but it seems you only need
1279      // 3 to not hit an assertion later in MCInstLower.
1280      Inst->addOperand(MachineOperand::CreateImm(0));
1281      Inst->addOperand(MachineOperand::CreateImm(0));
1282    } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
1283      // The VALU version adds the second operand to the result, so insert an
1284      // extra 0 operand.
1285      Inst->addOperand(MachineOperand::CreateImm(0));
1286    }
1287
1288    addDescImplicitUseDef(NewDesc, Inst);
1289
1290    if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
1291      const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
1292      // If we need to move this to VGPRs, we need to unpack the second operand
1293      // back into the 2 separate ones for bit offset and width.
1294      assert(OffsetWidthOp.isImm() &&
1295             "Scalar BFE is only implemented for constant width and offset");
1296      uint32_t Imm = OffsetWidthOp.getImm();
1297
1298      uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
1299      uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
1300
1301      Inst->RemoveOperand(2); // Remove old immediate.
1302      Inst->addOperand(Inst->getOperand(1));
1303      Inst->getOperand(1).ChangeToImmediate(0);
1304      Inst->addOperand(MachineOperand::CreateImm(0));
1305      Inst->addOperand(MachineOperand::CreateImm(Offset));
1306      Inst->addOperand(MachineOperand::CreateImm(0));
1307      Inst->addOperand(MachineOperand::CreateImm(BitWidth));
1308      Inst->addOperand(MachineOperand::CreateImm(0));
1309      Inst->addOperand(MachineOperand::CreateImm(0));
1310    }
1311
1312    // Update the destination register class.
1313
1314    const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
1315
1316    switch (Opcode) {
1317      // For target instructions, getOpRegClass just returns the virtual
1318      // register class associated with the operand, so we need to find an
1319      // equivalent VGPR register class in order to move the instruction to the
1320      // VALU.
1321    case AMDGPU::COPY:
1322    case AMDGPU::PHI:
1323    case AMDGPU::REG_SEQUENCE:
1324    case AMDGPU::INSERT_SUBREG:
1325      if (RI.hasVGPRs(NewDstRC))
1326        continue;
1327      NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
1328      if (!NewDstRC)
1329        continue;
1330      break;
1331    default:
1332      break;
1333    }
1334
1335    unsigned DstReg = Inst->getOperand(0).getReg();
1336    unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
1337    MRI.replaceRegWith(DstReg, NewDstReg);
1338
1339    // Legalize the operands
1340    legalizeOperands(Inst);
1341
1342    for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
1343           E = MRI.use_end(); I != E; ++I) {
1344      MachineInstr &UseMI = *I->getParent();
1345      if (!canReadVGPR(UseMI, I.getOperandNo())) {
1346        Worklist.push_back(&UseMI);
1347      }
1348    }
1349  }
1350}
1351
1352//===----------------------------------------------------------------------===//
1353// Indirect addressing callbacks
1354//===----------------------------------------------------------------------===//
1355
1356unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
1357                                                 unsigned Channel) const {
1358  assert(Channel == 0);
1359  return RegIndex;
1360}
1361
1362const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
1363  return &AMDGPU::VReg_32RegClass;
1364}
1365
1366void SIInstrInfo::splitScalar64BitUnaryOp(
1367  SmallVectorImpl<MachineInstr *> &Worklist,
1368  MachineInstr *Inst,
1369  unsigned Opcode) const {
1370  MachineBasicBlock &MBB = *Inst->getParent();
1371  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1372
1373  MachineOperand &Dest = Inst->getOperand(0);
1374  MachineOperand &Src0 = Inst->getOperand(1);
1375  DebugLoc DL = Inst->getDebugLoc();
1376
1377  MachineBasicBlock::iterator MII = Inst;
1378
1379  const MCInstrDesc &InstDesc = get(Opcode);
1380  const TargetRegisterClass *Src0RC = Src0.isReg() ?
1381    MRI.getRegClass(Src0.getReg()) :
1382    &AMDGPU::SGPR_32RegClass;
1383
1384  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
1385
1386  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1387                                                       AMDGPU::sub0, Src0SubRC);
1388
1389  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
1390  const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
1391
1392  unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
1393  MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
1394    .addOperand(SrcReg0Sub0);
1395
1396  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1397                                                       AMDGPU::sub1, Src0SubRC);
1398
1399  unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
1400  MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
1401    .addOperand(SrcReg0Sub1);
1402
1403  unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
1404  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
1405    .addReg(DestSub0)
1406    .addImm(AMDGPU::sub0)
1407    .addReg(DestSub1)
1408    .addImm(AMDGPU::sub1);
1409
1410  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
1411
1412  // Try to legalize the operands in case we need to swap the order to keep it
1413  // valid.
1414  Worklist.push_back(LoHalf);
1415  Worklist.push_back(HiHalf);
1416}
1417
1418void SIInstrInfo::splitScalar64BitBinaryOp(
1419  SmallVectorImpl<MachineInstr *> &Worklist,
1420  MachineInstr *Inst,
1421  unsigned Opcode) const {
1422  MachineBasicBlock &MBB = *Inst->getParent();
1423  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1424
1425  MachineOperand &Dest = Inst->getOperand(0);
1426  MachineOperand &Src0 = Inst->getOperand(1);
1427  MachineOperand &Src1 = Inst->getOperand(2);
1428  DebugLoc DL = Inst->getDebugLoc();
1429
1430  MachineBasicBlock::iterator MII = Inst;
1431
1432  const MCInstrDesc &InstDesc = get(Opcode);
1433  const TargetRegisterClass *Src0RC = Src0.isReg() ?
1434    MRI.getRegClass(Src0.getReg()) :
1435    &AMDGPU::SGPR_32RegClass;
1436
1437  const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
1438  const TargetRegisterClass *Src1RC = Src1.isReg() ?
1439    MRI.getRegClass(Src1.getReg()) :
1440    &AMDGPU::SGPR_32RegClass;
1441
1442  const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
1443
1444  MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1445                                                       AMDGPU::sub0, Src0SubRC);
1446  MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
1447                                                       AMDGPU::sub0, Src1SubRC);
1448
1449  const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
1450  const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
1451
1452  unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
1453  MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
1454    .addOperand(SrcReg0Sub0)
1455    .addOperand(SrcReg1Sub0);
1456
1457  MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1458                                                       AMDGPU::sub1, Src0SubRC);
1459  MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
1460                                                       AMDGPU::sub1, Src1SubRC);
1461
1462  unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
1463  MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
1464    .addOperand(SrcReg0Sub1)
1465    .addOperand(SrcReg1Sub1);
1466
1467  unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
1468  BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
1469    .addReg(DestSub0)
1470    .addImm(AMDGPU::sub0)
1471    .addReg(DestSub1)
1472    .addImm(AMDGPU::sub1);
1473
1474  MRI.replaceRegWith(Dest.getReg(), FullDestReg);
1475
1476  // Try to legalize the operands in case we need to swap the order to keep it
1477  // valid.
1478  Worklist.push_back(LoHalf);
1479  Worklist.push_back(HiHalf);
1480}
1481
1482void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
1483                                       MachineInstr *Inst) const {
1484  MachineBasicBlock &MBB = *Inst->getParent();
1485  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1486
1487  MachineBasicBlock::iterator MII = Inst;
1488  DebugLoc DL = Inst->getDebugLoc();
1489
1490  MachineOperand &Dest = Inst->getOperand(0);
1491  MachineOperand &Src = Inst->getOperand(1);
1492
1493  const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32);
1494  const TargetRegisterClass *SrcRC = Src.isReg() ?
1495    MRI.getRegClass(Src.getReg()) :
1496    &AMDGPU::SGPR_32RegClass;
1497
1498  unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1499  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1500
1501  const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
1502
1503  MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
1504                                                      AMDGPU::sub0, SrcSubRC);
1505  MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
1506                                                      AMDGPU::sub1, SrcSubRC);
1507
1508  MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
1509    .addOperand(SrcRegSub0)
1510    .addImm(0);
1511
1512  MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
1513    .addOperand(SrcRegSub1)
1514    .addReg(MidReg);
1515
1516  MRI.replaceRegWith(Dest.getReg(), ResultReg);
1517
1518  Worklist.push_back(First);
1519  Worklist.push_back(Second);
1520}
1521
1522void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
1523                                        MachineInstr *Inst) const {
1524  // Add the implict and explicit register definitions.
1525  if (NewDesc.ImplicitUses) {
1526    for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
1527      unsigned Reg = NewDesc.ImplicitUses[i];
1528      Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
1529    }
1530  }
1531
1532  if (NewDesc.ImplicitDefs) {
1533    for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
1534      unsigned Reg = NewDesc.ImplicitDefs[i];
1535      Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
1536    }
1537  }
1538}
1539
1540MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
1541                                   MachineBasicBlock *MBB,
1542                                   MachineBasicBlock::iterator I,
1543                                   unsigned ValueReg,
1544                                   unsigned Address, unsigned OffsetReg) const {
1545  const DebugLoc &DL = MBB->findDebugLoc(I);
1546  unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
1547                                      getIndirectIndexBegin(*MBB->getParent()));
1548
1549  return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
1550          .addReg(IndirectBaseReg, RegState::Define)
1551          .addOperand(I->getOperand(0))
1552          .addReg(IndirectBaseReg)
1553          .addReg(OffsetReg)
1554          .addImm(0)
1555          .addReg(ValueReg);
1556}
1557
1558MachineInstrBuilder SIInstrInfo::buildIndirectRead(
1559                                   MachineBasicBlock *MBB,
1560                                   MachineBasicBlock::iterator I,
1561                                   unsigned ValueReg,
1562                                   unsigned Address, unsigned OffsetReg) const {
1563  const DebugLoc &DL = MBB->findDebugLoc(I);
1564  unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
1565                                      getIndirectIndexBegin(*MBB->getParent()));
1566
1567  return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
1568          .addOperand(I->getOperand(0))
1569          .addOperand(I->getOperand(1))
1570          .addReg(IndirectBaseReg)
1571          .addReg(OffsetReg)
1572          .addImm(0);
1573
1574}
1575
1576void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
1577                                            const MachineFunction &MF) const {
1578  int End = getIndirectIndexEnd(MF);
1579  int Begin = getIndirectIndexBegin(MF);
1580
1581  if (End == -1)
1582    return;
1583
1584
1585  for (int Index = Begin; Index <= End; ++Index)
1586    Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index));
1587
1588  for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
1589    Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
1590
1591  for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
1592    Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
1593
1594  for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
1595    Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
1596
1597  for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
1598    Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
1599
1600  for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
1601    Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
1602}
1603