1//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the PowerPC-specific support for the FastISel class. Some
11// of the target-specific code is generated by tablegen in the file
12// PPCGenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "PPC.h"
17#include "MCTargetDesc/PPCPredicates.h"
18#include "PPCCallingConv.h"
19#include "PPCCCState.h"
20#include "PPCISelLowering.h"
21#include "PPCMachineFunctionInfo.h"
22#include "PPCSubtarget.h"
23#include "PPCTargetMachine.h"
24#include "llvm/ADT/Optional.h"
25#include "llvm/CodeGen/CallingConvLower.h"
26#include "llvm/CodeGen/FastISel.h"
27#include "llvm/CodeGen/FunctionLoweringInfo.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/MachineRegisterInfo.h"
32#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/GetElementPtrTypeIterator.h"
34#include "llvm/IR/GlobalAlias.h"
35#include "llvm/IR/GlobalVariable.h"
36#include "llvm/IR/IntrinsicInst.h"
37#include "llvm/IR/Operator.h"
38#include "llvm/Support/Debug.h"
39#include "llvm/Target/TargetLowering.h"
40#include "llvm/Target/TargetMachine.h"
41
42//===----------------------------------------------------------------------===//
43//
44// TBD:
45//   fastLowerArguments: Handle simple cases.
46//   PPCMaterializeGV: Handle TLS.
47//   SelectCall: Handle function pointers.
48//   SelectCall: Handle multi-register return values.
49//   SelectCall: Optimize away nops for local calls.
50//   processCallArgs: Handle bit-converted arguments.
51//   finishCall: Handle multi-register return values.
52//   PPCComputeAddress: Handle parameter references as FrameIndex's.
53//   PPCEmitCmp: Handle immediate as operand 1.
54//   SelectCall: Handle small byval arguments.
55//   SelectIntrinsicCall: Implement.
56//   SelectSelect: Implement.
57//   Consider factoring isTypeLegal into the base class.
58//   Implement switches and jump tables.
59//
60//===----------------------------------------------------------------------===//
61using namespace llvm;
62
63#define DEBUG_TYPE "ppcfastisel"
64
65namespace {
66
67typedef struct Address {
68  enum {
69    RegBase,
70    FrameIndexBase
71  } BaseType;
72
73  union {
74    unsigned Reg;
75    int FI;
76  } Base;
77
78  long Offset;
79
80  // Innocuous defaults for our address.
81  Address()
82   : BaseType(RegBase), Offset(0) {
83     Base.Reg = 0;
84   }
85} Address;
86
87class PPCFastISel final : public FastISel {
88
89  const TargetMachine &TM;
90  const PPCSubtarget *PPCSubTarget;
91  PPCFunctionInfo *PPCFuncInfo;
92  const TargetInstrInfo &TII;
93  const TargetLowering &TLI;
94  LLVMContext *Context;
95
96  public:
97    explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
98                         const TargetLibraryInfo *LibInfo)
99        : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
100          PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
101          PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
102          TII(*PPCSubTarget->getInstrInfo()),
103          TLI(*PPCSubTarget->getTargetLowering()),
104          Context(&FuncInfo.Fn->getContext()) {}
105
106  // Backend specific FastISel code.
107  private:
108    bool fastSelectInstruction(const Instruction *I) override;
109    unsigned fastMaterializeConstant(const Constant *C) override;
110    unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
111    bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
112                             const LoadInst *LI) override;
113    bool fastLowerArguments() override;
114    unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
115    unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
116                             const TargetRegisterClass *RC,
117                             unsigned Op0, bool Op0IsKill,
118                             uint64_t Imm);
119    unsigned fastEmitInst_r(unsigned MachineInstOpcode,
120                            const TargetRegisterClass *RC,
121                            unsigned Op0, bool Op0IsKill);
122    unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
123                             const TargetRegisterClass *RC,
124                             unsigned Op0, bool Op0IsKill,
125                             unsigned Op1, bool Op1IsKill);
126
127    bool fastLowerCall(CallLoweringInfo &CLI) override;
128
129  // Instruction selection routines.
130  private:
131    bool SelectLoad(const Instruction *I);
132    bool SelectStore(const Instruction *I);
133    bool SelectBranch(const Instruction *I);
134    bool SelectIndirectBr(const Instruction *I);
135    bool SelectFPExt(const Instruction *I);
136    bool SelectFPTrunc(const Instruction *I);
137    bool SelectIToFP(const Instruction *I, bool IsSigned);
138    bool SelectFPToI(const Instruction *I, bool IsSigned);
139    bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
140    bool SelectRet(const Instruction *I);
141    bool SelectTrunc(const Instruction *I);
142    bool SelectIntExt(const Instruction *I);
143
144  // Utility routines.
145  private:
146    bool isTypeLegal(Type *Ty, MVT &VT);
147    bool isLoadTypeLegal(Type *Ty, MVT &VT);
148    bool isValueAvailable(const Value *V) const;
149    bool isVSFRCRegister(unsigned Register) const {
150      return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
151    }
152    bool isVSSRCRegister(unsigned Register) const {
153      return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID;
154    }
155    bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
156                    bool isZExt, unsigned DestReg);
157    bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
158                     const TargetRegisterClass *RC, bool IsZExt = true,
159                     unsigned FP64LoadOpc = PPC::LFD);
160    bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
161    bool PPCComputeAddress(const Value *Obj, Address &Addr);
162    void PPCSimplifyAddress(Address &Addr, bool &UseOffset,
163                            unsigned &IndexReg);
164    bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
165                           unsigned DestReg, bool IsZExt);
166    unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
167    unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
168    unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
169                               bool UseSExt = true);
170    unsigned PPCMaterialize32BitInt(int64_t Imm,
171                                    const TargetRegisterClass *RC);
172    unsigned PPCMaterialize64BitInt(int64_t Imm,
173                                    const TargetRegisterClass *RC);
174    unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
175                             unsigned SrcReg, bool IsSigned);
176    unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
177
178  // Call handling routines.
179  private:
180    bool processCallArgs(SmallVectorImpl<Value*> &Args,
181                         SmallVectorImpl<unsigned> &ArgRegs,
182                         SmallVectorImpl<MVT> &ArgVTs,
183                         SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
184                         SmallVectorImpl<unsigned> &RegArgs,
185                         CallingConv::ID CC,
186                         unsigned &NumBytes,
187                         bool IsVarArg);
188    bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
189    LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag);
190
191  private:
192  #include "PPCGenFastISel.inc"
193
194};
195
196} // end anonymous namespace
197
198#include "PPCGenCallingConv.inc"
199
200// Function whose sole purpose is to kill compiler warnings
201// stemming from unused functions included from PPCGenCallingConv.inc.
202CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
203  if (Flag == 1)
204    return CC_PPC32_SVR4;
205  else if (Flag == 2)
206    return CC_PPC32_SVR4_ByVal;
207  else if (Flag == 3)
208    return CC_PPC32_SVR4_VarArg;
209  else
210    return RetCC_PPC;
211}
212
213static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
214  switch (Pred) {
215    // These are not representable with any single compare.
216    case CmpInst::FCMP_FALSE:
217    case CmpInst::FCMP_TRUE:
218    // Major concern about the following 6 cases is NaN result. The comparison
219    // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
220    // only one of which will be set. The result is generated by fcmpu
221    // instruction. However, bc instruction only inspects one of the first 3
222    // bits, so when un is set, bc instruction may jump to to an undesired
223    // place.
224    //
225    // More specifically, if we expect an unordered comparison and un is set, we
226    // expect to always go to true branch; in such case UEQ, UGT and ULT still
227    // give false, which are undesired; but UNE, UGE, ULE happen to give true,
228    // since they are tested by inspecting !eq, !lt, !gt, respectively.
229    //
230    // Similarly, for ordered comparison, when un is set, we always expect the
231    // result to be false. In such case OGT, OLT and OEQ is good, since they are
232    // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
233    // and ONE are tested through !lt, !gt and !eq, and these are true.
234    case CmpInst::FCMP_UEQ:
235    case CmpInst::FCMP_UGT:
236    case CmpInst::FCMP_ULT:
237    case CmpInst::FCMP_OGE:
238    case CmpInst::FCMP_OLE:
239    case CmpInst::FCMP_ONE:
240    default:
241      return Optional<PPC::Predicate>();
242
243    case CmpInst::FCMP_OEQ:
244    case CmpInst::ICMP_EQ:
245      return PPC::PRED_EQ;
246
247    case CmpInst::FCMP_OGT:
248    case CmpInst::ICMP_UGT:
249    case CmpInst::ICMP_SGT:
250      return PPC::PRED_GT;
251
252    case CmpInst::FCMP_UGE:
253    case CmpInst::ICMP_UGE:
254    case CmpInst::ICMP_SGE:
255      return PPC::PRED_GE;
256
257    case CmpInst::FCMP_OLT:
258    case CmpInst::ICMP_ULT:
259    case CmpInst::ICMP_SLT:
260      return PPC::PRED_LT;
261
262    case CmpInst::FCMP_ULE:
263    case CmpInst::ICMP_ULE:
264    case CmpInst::ICMP_SLE:
265      return PPC::PRED_LE;
266
267    case CmpInst::FCMP_UNE:
268    case CmpInst::ICMP_NE:
269      return PPC::PRED_NE;
270
271    case CmpInst::FCMP_ORD:
272      return PPC::PRED_NU;
273
274    case CmpInst::FCMP_UNO:
275      return PPC::PRED_UN;
276  }
277}
278
279// Determine whether the type Ty is simple enough to be handled by
280// fast-isel, and return its equivalent machine type in VT.
281// FIXME: Copied directly from ARM -- factor into base class?
282bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
283  EVT Evt = TLI.getValueType(DL, Ty, true);
284
285  // Only handle simple types.
286  if (Evt == MVT::Other || !Evt.isSimple()) return false;
287  VT = Evt.getSimpleVT();
288
289  // Handle all legal types, i.e. a register that will directly hold this
290  // value.
291  return TLI.isTypeLegal(VT);
292}
293
294// Determine whether the type Ty is simple enough to be handled by
295// fast-isel as a load target, and return its equivalent machine type in VT.
296bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
297  if (isTypeLegal(Ty, VT)) return true;
298
299  // If this is a type than can be sign or zero-extended to a basic operation
300  // go ahead and accept it now.
301  if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
302    return true;
303  }
304
305  return false;
306}
307
308bool PPCFastISel::isValueAvailable(const Value *V) const {
309  if (!isa<Instruction>(V))
310    return true;
311
312  const auto *I = cast<Instruction>(V);
313  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
314}
315
316// Given a value Obj, create an Address object Addr that represents its
317// address.  Return false if we can't handle it.
318bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
319  const User *U = nullptr;
320  unsigned Opcode = Instruction::UserOp1;
321  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
322    // Don't walk into other basic blocks unless the object is an alloca from
323    // another block, otherwise it may not have a virtual register assigned.
324    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
325        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
326      Opcode = I->getOpcode();
327      U = I;
328    }
329  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
330    Opcode = C->getOpcode();
331    U = C;
332  }
333
334  switch (Opcode) {
335    default:
336      break;
337    case Instruction::BitCast:
338      // Look through bitcasts.
339      return PPCComputeAddress(U->getOperand(0), Addr);
340    case Instruction::IntToPtr:
341      // Look past no-op inttoptrs.
342      if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
343          TLI.getPointerTy(DL))
344        return PPCComputeAddress(U->getOperand(0), Addr);
345      break;
346    case Instruction::PtrToInt:
347      // Look past no-op ptrtoints.
348      if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
349        return PPCComputeAddress(U->getOperand(0), Addr);
350      break;
351    case Instruction::GetElementPtr: {
352      Address SavedAddr = Addr;
353      long TmpOffset = Addr.Offset;
354
355      // Iterate through the GEP folding the constants into offsets where
356      // we can.
357      gep_type_iterator GTI = gep_type_begin(U);
358      for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
359           II != IE; ++II, ++GTI) {
360        const Value *Op = *II;
361        if (StructType *STy = dyn_cast<StructType>(*GTI)) {
362          const StructLayout *SL = DL.getStructLayout(STy);
363          unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
364          TmpOffset += SL->getElementOffset(Idx);
365        } else {
366          uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
367          for (;;) {
368            if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
369              // Constant-offset addressing.
370              TmpOffset += CI->getSExtValue() * S;
371              break;
372            }
373            if (canFoldAddIntoGEP(U, Op)) {
374              // A compatible add with a constant operand. Fold the constant.
375              ConstantInt *CI =
376              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
377              TmpOffset += CI->getSExtValue() * S;
378              // Iterate on the other operand.
379              Op = cast<AddOperator>(Op)->getOperand(0);
380              continue;
381            }
382            // Unsupported
383            goto unsupported_gep;
384          }
385        }
386      }
387
388      // Try to grab the base operand now.
389      Addr.Offset = TmpOffset;
390      if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
391
392      // We failed, restore everything and try the other options.
393      Addr = SavedAddr;
394
395      unsupported_gep:
396      break;
397    }
398    case Instruction::Alloca: {
399      const AllocaInst *AI = cast<AllocaInst>(Obj);
400      DenseMap<const AllocaInst*, int>::iterator SI =
401        FuncInfo.StaticAllocaMap.find(AI);
402      if (SI != FuncInfo.StaticAllocaMap.end()) {
403        Addr.BaseType = Address::FrameIndexBase;
404        Addr.Base.FI = SI->second;
405        return true;
406      }
407      break;
408    }
409  }
410
411  // FIXME: References to parameters fall through to the behavior
412  // below.  They should be able to reference a frame index since
413  // they are stored to the stack, so we can get "ld rx, offset(r1)"
414  // instead of "addi ry, r1, offset / ld rx, 0(ry)".  Obj will
415  // just contain the parameter.  Try to handle this with a FI.
416
417  // Try to get this in a register if nothing else has worked.
418  if (Addr.Base.Reg == 0)
419    Addr.Base.Reg = getRegForValue(Obj);
420
421  // Prevent assignment of base register to X0, which is inappropriate
422  // for loads and stores alike.
423  if (Addr.Base.Reg != 0)
424    MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
425
426  return Addr.Base.Reg != 0;
427}
428
429// Fix up some addresses that can't be used directly.  For example, if
430// an offset won't fit in an instruction field, we may need to move it
431// into an index register.
432void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
433                                     unsigned &IndexReg) {
434
435  // Check whether the offset fits in the instruction field.
436  if (!isInt<16>(Addr.Offset))
437    UseOffset = false;
438
439  // If this is a stack pointer and the offset needs to be simplified then
440  // put the alloca address into a register, set the base type back to
441  // register and continue. This should almost never happen.
442  if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
443    unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
444    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
445            ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
446    Addr.Base.Reg = ResultReg;
447    Addr.BaseType = Address::RegBase;
448  }
449
450  if (!UseOffset) {
451    IntegerType *OffsetTy = Type::getInt64Ty(*Context);
452    const ConstantInt *Offset =
453      ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
454    IndexReg = PPCMaterializeInt(Offset, MVT::i64);
455    assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
456  }
457}
458
459// Emit a load instruction if possible, returning true if we succeeded,
460// otherwise false.  See commentary below for how the register class of
461// the load is determined.
462bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
463                              const TargetRegisterClass *RC,
464                              bool IsZExt, unsigned FP64LoadOpc) {
465  unsigned Opc;
466  bool UseOffset = true;
467
468  // If ResultReg is given, it determines the register class of the load.
469  // Otherwise, RC is the register class to use.  If the result of the
470  // load isn't anticipated in this block, both may be zero, in which
471  // case we must make a conservative guess.  In particular, don't assign
472  // R0 or X0 to the result register, as the result may be used in a load,
473  // store, add-immediate, or isel that won't permit this.  (Though
474  // perhaps the spill and reload of live-exit values would handle this?)
475  const TargetRegisterClass *UseRC =
476    (ResultReg ? MRI.getRegClass(ResultReg) :
477     (RC ? RC :
478      (VT == MVT::f64 ? &PPC::F8RCRegClass :
479       (VT == MVT::f32 ? &PPC::F4RCRegClass :
480        (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
481         &PPC::GPRC_and_GPRC_NOR0RegClass)))));
482
483  bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
484
485  switch (VT.SimpleTy) {
486    default: // e.g., vector types not handled
487      return false;
488    case MVT::i8:
489      Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
490      break;
491    case MVT::i16:
492      Opc = (IsZExt ?
493             (Is32BitInt ? PPC::LHZ : PPC::LHZ8) :
494             (Is32BitInt ? PPC::LHA : PPC::LHA8));
495      break;
496    case MVT::i32:
497      Opc = (IsZExt ?
498             (Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
499             (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
500      if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
501        UseOffset = false;
502      break;
503    case MVT::i64:
504      Opc = PPC::LD;
505      assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
506             "64-bit load with 32-bit target??");
507      UseOffset = ((Addr.Offset & 3) == 0);
508      break;
509    case MVT::f32:
510      Opc = PPC::LFS;
511      break;
512    case MVT::f64:
513      Opc = FP64LoadOpc;
514      break;
515  }
516
517  // If necessary, materialize the offset into a register and use
518  // the indexed form.  Also handle stack pointers with special needs.
519  unsigned IndexReg = 0;
520  PPCSimplifyAddress(Addr, UseOffset, IndexReg);
521
522  // If this is a potential VSX load with an offset of 0, a VSX indexed load can
523  // be used.
524  bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg);
525  bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg);
526  bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
527  bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD;
528  if ((Is32VSXLoad || Is64VSXLoad) &&
529      (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
530      (Addr.Offset == 0)) {
531    UseOffset = false;
532  }
533
534  if (ResultReg == 0)
535    ResultReg = createResultReg(UseRC);
536
537  // Note: If we still have a frame index here, we know the offset is
538  // in range, as otherwise PPCSimplifyAddress would have converted it
539  // into a RegBase.
540  if (Addr.BaseType == Address::FrameIndexBase) {
541    // VSX only provides an indexed load.
542    if (Is32VSXLoad || Is64VSXLoad) return false;
543
544    MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
545        MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
546                                          Addr.Offset),
547        MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
548        MFI.getObjectAlignment(Addr.Base.FI));
549
550    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
551      .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
552
553  // Base reg with offset in range.
554  } else if (UseOffset) {
555    // VSX only provides an indexed load.
556    if (Is32VSXLoad || Is64VSXLoad) return false;
557
558    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
559      .addImm(Addr.Offset).addReg(Addr.Base.Reg);
560
561  // Indexed form.
562  } else {
563    // Get the RR opcode corresponding to the RI one.  FIXME: It would be
564    // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
565    // is hard to get at.
566    switch (Opc) {
567      default:        llvm_unreachable("Unexpected opcode!");
568      case PPC::LBZ:    Opc = PPC::LBZX;    break;
569      case PPC::LBZ8:   Opc = PPC::LBZX8;   break;
570      case PPC::LHZ:    Opc = PPC::LHZX;    break;
571      case PPC::LHZ8:   Opc = PPC::LHZX8;   break;
572      case PPC::LHA:    Opc = PPC::LHAX;    break;
573      case PPC::LHA8:   Opc = PPC::LHAX8;   break;
574      case PPC::LWZ:    Opc = PPC::LWZX;    break;
575      case PPC::LWZ8:   Opc = PPC::LWZX8;   break;
576      case PPC::LWA:    Opc = PPC::LWAX;    break;
577      case PPC::LWA_32: Opc = PPC::LWAX_32; break;
578      case PPC::LD:     Opc = PPC::LDX;     break;
579      case PPC::LFS:    Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
580      case PPC::LFD:    Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
581    }
582    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
583      .addReg(Addr.Base.Reg).addReg(IndexReg);
584  }
585
586  return true;
587}
588
589// Attempt to fast-select a load instruction.
590bool PPCFastISel::SelectLoad(const Instruction *I) {
591  // FIXME: No atomic loads are supported.
592  if (cast<LoadInst>(I)->isAtomic())
593    return false;
594
595  // Verify we have a legal type before going any further.
596  MVT VT;
597  if (!isLoadTypeLegal(I->getType(), VT))
598    return false;
599
600  // See if we can handle this address.
601  Address Addr;
602  if (!PPCComputeAddress(I->getOperand(0), Addr))
603    return false;
604
605  // Look at the currently assigned register for this instruction
606  // to determine the required register class.  This is necessary
607  // to constrain RA from using R0/X0 when this is not legal.
608  unsigned AssignedReg = FuncInfo.ValueMap[I];
609  const TargetRegisterClass *RC =
610    AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
611
612  unsigned ResultReg = 0;
613  if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
614    return false;
615  updateValueMap(I, ResultReg);
616  return true;
617}
618
619// Emit a store instruction to store SrcReg at Addr.
620bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
621  assert(SrcReg && "Nothing to store!");
622  unsigned Opc;
623  bool UseOffset = true;
624
625  const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
626  bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
627
628  switch (VT.SimpleTy) {
629    default: // e.g., vector types not handled
630      return false;
631    case MVT::i8:
632      Opc = Is32BitInt ? PPC::STB : PPC::STB8;
633      break;
634    case MVT::i16:
635      Opc = Is32BitInt ? PPC::STH : PPC::STH8;
636      break;
637    case MVT::i32:
638      assert(Is32BitInt && "Not GPRC for i32??");
639      Opc = PPC::STW;
640      break;
641    case MVT::i64:
642      Opc = PPC::STD;
643      UseOffset = ((Addr.Offset & 3) == 0);
644      break;
645    case MVT::f32:
646      Opc = PPC::STFS;
647      break;
648    case MVT::f64:
649      Opc = PPC::STFD;
650      break;
651  }
652
653  // If necessary, materialize the offset into a register and use
654  // the indexed form.  Also handle stack pointers with special needs.
655  unsigned IndexReg = 0;
656  PPCSimplifyAddress(Addr, UseOffset, IndexReg);
657
658  // If this is a potential VSX store with an offset of 0, a VSX indexed store
659  // can be used.
660  bool IsVSSRC = isVSSRCRegister(SrcReg);
661  bool IsVSFRC = isVSFRCRegister(SrcReg);
662  bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
663  bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
664  if ((Is32VSXStore || Is64VSXStore) &&
665      (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
666      (Addr.Offset == 0)) {
667    UseOffset = false;
668  }
669
670  // Note: If we still have a frame index here, we know the offset is
671  // in range, as otherwise PPCSimplifyAddress would have converted it
672  // into a RegBase.
673  if (Addr.BaseType == Address::FrameIndexBase) {
674    // VSX only provides an indexed store.
675    if (Is32VSXStore || Is64VSXStore) return false;
676
677    MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
678        MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
679                                          Addr.Offset),
680        MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
681        MFI.getObjectAlignment(Addr.Base.FI));
682
683    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
684        .addReg(SrcReg)
685        .addImm(Addr.Offset)
686        .addFrameIndex(Addr.Base.FI)
687        .addMemOperand(MMO);
688
689  // Base reg with offset in range.
690  } else if (UseOffset) {
691    // VSX only provides an indexed store.
692    if (Is32VSXStore || Is64VSXStore) return false;
693
694    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
695      .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
696
697  // Indexed form.
698  } else {
699    // Get the RR opcode corresponding to the RI one.  FIXME: It would be
700    // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
701    // is hard to get at.
702    switch (Opc) {
703      default:        llvm_unreachable("Unexpected opcode!");
704      case PPC::STB:  Opc = PPC::STBX;  break;
705      case PPC::STH : Opc = PPC::STHX;  break;
706      case PPC::STW : Opc = PPC::STWX;  break;
707      case PPC::STB8: Opc = PPC::STBX8; break;
708      case PPC::STH8: Opc = PPC::STHX8; break;
709      case PPC::STW8: Opc = PPC::STWX8; break;
710      case PPC::STD:  Opc = PPC::STDX;  break;
711      case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
712      case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
713    }
714
715    auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
716        .addReg(SrcReg);
717
718    // If we have an index register defined we use it in the store inst,
719    // otherwise we use X0 as base as it makes the vector instructions to
720    // use zero in the computation of the effective address regardless the
721    // content of the register.
722    if (IndexReg)
723      MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
724    else
725      MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
726  }
727
728  return true;
729}
730
731// Attempt to fast-select a store instruction.
732bool PPCFastISel::SelectStore(const Instruction *I) {
733  Value *Op0 = I->getOperand(0);
734  unsigned SrcReg = 0;
735
736  // FIXME: No atomics loads are supported.
737  if (cast<StoreInst>(I)->isAtomic())
738    return false;
739
740  // Verify we have a legal type before going any further.
741  MVT VT;
742  if (!isLoadTypeLegal(Op0->getType(), VT))
743    return false;
744
745  // Get the value to be stored into a register.
746  SrcReg = getRegForValue(Op0);
747  if (SrcReg == 0)
748    return false;
749
750  // See if we can handle this address.
751  Address Addr;
752  if (!PPCComputeAddress(I->getOperand(1), Addr))
753    return false;
754
755  if (!PPCEmitStore(VT, SrcReg, Addr))
756    return false;
757
758  return true;
759}
760
761// Attempt to fast-select a branch instruction.
762bool PPCFastISel::SelectBranch(const Instruction *I) {
763  const BranchInst *BI = cast<BranchInst>(I);
764  MachineBasicBlock *BrBB = FuncInfo.MBB;
765  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
766  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
767
768  // For now, just try the simplest case where it's fed by a compare.
769  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
770    if (isValueAvailable(CI)) {
771      Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
772      if (!OptPPCPred)
773        return false;
774
775      PPC::Predicate PPCPred = OptPPCPred.getValue();
776
777      // Take advantage of fall-through opportunities.
778      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
779        std::swap(TBB, FBB);
780        PPCPred = PPC::InvertPredicate(PPCPred);
781      }
782
783      unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
784
785      if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
786                      CondReg))
787        return false;
788
789      BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
790        .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
791      finishCondBranch(BI->getParent(), TBB, FBB);
792      return true;
793    }
794  } else if (const ConstantInt *CI =
795             dyn_cast<ConstantInt>(BI->getCondition())) {
796    uint64_t Imm = CI->getZExtValue();
797    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
798    fastEmitBranch(Target, DbgLoc);
799    return true;
800  }
801
802  // FIXME: ARM looks for a case where the block containing the compare
803  // has been split from the block containing the branch.  If this happens,
804  // there is a vreg available containing the result of the compare.  I'm
805  // not sure we can do much, as we've lost the predicate information with
806  // the compare instruction -- we have a 4-bit CR but don't know which bit
807  // to test here.
808  return false;
809}
810
811// Attempt to emit a compare of the two source values.  Signed and unsigned
812// comparisons are supported.  Return false if we can't handle it.
813bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
814                             bool IsZExt, unsigned DestReg) {
815  Type *Ty = SrcValue1->getType();
816  EVT SrcEVT = TLI.getValueType(DL, Ty, true);
817  if (!SrcEVT.isSimple())
818    return false;
819  MVT SrcVT = SrcEVT.getSimpleVT();
820
821  if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
822    return false;
823
824  // See if operand 2 is an immediate encodeable in the compare.
825  // FIXME: Operands are not in canonical order at -O0, so an immediate
826  // operand in position 1 is a lost opportunity for now.  We are
827  // similar to ARM in this regard.
828  long Imm = 0;
829  bool UseImm = false;
830
831  // Only 16-bit integer constants can be represented in compares for
832  // PowerPC.  Others will be materialized into a register.
833  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
834    if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
835        SrcVT == MVT::i8 || SrcVT == MVT::i1) {
836      const APInt &CIVal = ConstInt->getValue();
837      Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
838      if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
839        UseImm = true;
840    }
841  }
842
843  unsigned CmpOpc;
844  bool NeedsExt = false;
845  switch (SrcVT.SimpleTy) {
846    default: return false;
847    case MVT::f32:
848      CmpOpc = PPC::FCMPUS;
849      break;
850    case MVT::f64:
851      CmpOpc = PPC::FCMPUD;
852      break;
853    case MVT::i1:
854    case MVT::i8:
855    case MVT::i16:
856      NeedsExt = true;
857      // Intentional fall-through.
858    case MVT::i32:
859      if (!UseImm)
860        CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
861      else
862        CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
863      break;
864    case MVT::i64:
865      if (!UseImm)
866        CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
867      else
868        CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
869      break;
870  }
871
872  unsigned SrcReg1 = getRegForValue(SrcValue1);
873  if (SrcReg1 == 0)
874    return false;
875
876  unsigned SrcReg2 = 0;
877  if (!UseImm) {
878    SrcReg2 = getRegForValue(SrcValue2);
879    if (SrcReg2 == 0)
880      return false;
881  }
882
883  if (NeedsExt) {
884    unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
885    if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
886      return false;
887    SrcReg1 = ExtReg;
888
889    if (!UseImm) {
890      unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
891      if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
892        return false;
893      SrcReg2 = ExtReg;
894    }
895  }
896
897  if (!UseImm)
898    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
899      .addReg(SrcReg1).addReg(SrcReg2);
900  else
901    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
902      .addReg(SrcReg1).addImm(Imm);
903
904  return true;
905}
906
907// Attempt to fast-select a floating-point extend instruction.
908bool PPCFastISel::SelectFPExt(const Instruction *I) {
909  Value *Src  = I->getOperand(0);
910  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
911  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
912
913  if (SrcVT != MVT::f32 || DestVT != MVT::f64)
914    return false;
915
916  unsigned SrcReg = getRegForValue(Src);
917  if (!SrcReg)
918    return false;
919
920  // No code is generated for a FP extend.
921  updateValueMap(I, SrcReg);
922  return true;
923}
924
925// Attempt to fast-select a floating-point truncate instruction.
926bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
927  Value *Src  = I->getOperand(0);
928  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
929  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
930
931  if (SrcVT != MVT::f64 || DestVT != MVT::f32)
932    return false;
933
934  unsigned SrcReg = getRegForValue(Src);
935  if (!SrcReg)
936    return false;
937
938  // Round the result to single precision.
939  unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
940  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
941    .addReg(SrcReg);
942
943  updateValueMap(I, DestReg);
944  return true;
945}
946
947// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
948// FIXME: When direct register moves are implemented (see PowerISA 2.07),
949// those should be used instead of moving via a stack slot when the
950// subtarget permits.
951// FIXME: The code here is sloppy for the 4-byte case.  Can use a 4-byte
952// stack slot and 4-byte store/load sequence.  Or just sext the 4-byte
953// case to 8 bytes which produces tighter code but wastes stack space.
954unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
955                                     bool IsSigned) {
956
957  // If necessary, extend 32-bit int to 64-bit.
958  if (SrcVT == MVT::i32) {
959    unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
960    if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
961      return 0;
962    SrcReg = TmpReg;
963  }
964
965  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
966  Address Addr;
967  Addr.BaseType = Address::FrameIndexBase;
968  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
969
970  // Store the value from the GPR.
971  if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
972    return 0;
973
974  // Load the integer value into an FPR.  The kind of load used depends
975  // on a number of conditions.
976  unsigned LoadOpc = PPC::LFD;
977
978  if (SrcVT == MVT::i32) {
979    if (!IsSigned) {
980      LoadOpc = PPC::LFIWZX;
981      Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
982    } else if (PPCSubTarget->hasLFIWAX()) {
983      LoadOpc = PPC::LFIWAX;
984      Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
985    }
986  }
987
988  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
989  unsigned ResultReg = 0;
990  if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
991    return 0;
992
993  return ResultReg;
994}
995
996// Attempt to fast-select an integer-to-floating-point conversion.
997// FIXME: Once fast-isel has better support for VSX, conversions using
998//        direct moves should be implemented.
999bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1000  MVT DstVT;
1001  Type *DstTy = I->getType();
1002  if (!isTypeLegal(DstTy, DstVT))
1003    return false;
1004
1005  if (DstVT != MVT::f32 && DstVT != MVT::f64)
1006    return false;
1007
1008  Value *Src = I->getOperand(0);
1009  EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1010  if (!SrcEVT.isSimple())
1011    return false;
1012
1013  MVT SrcVT = SrcEVT.getSimpleVT();
1014
1015  if (SrcVT != MVT::i8  && SrcVT != MVT::i16 &&
1016      SrcVT != MVT::i32 && SrcVT != MVT::i64)
1017    return false;
1018
1019  unsigned SrcReg = getRegForValue(Src);
1020  if (SrcReg == 0)
1021    return false;
1022
1023  // We can only lower an unsigned convert if we have the newer
1024  // floating-point conversion operations.
1025  if (!IsSigned && !PPCSubTarget->hasFPCVT())
1026    return false;
1027
1028  // FIXME: For now we require the newer floating-point conversion operations
1029  // (which are present only on P7 and A2 server models) when converting
1030  // to single-precision float.  Otherwise we have to generate a lot of
1031  // fiddly code to avoid double rounding.  If necessary, the fiddly code
1032  // can be found in PPCTargetLowering::LowerINT_TO_FP().
1033  if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
1034    return false;
1035
1036  // Extend the input if necessary.
1037  if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1038    unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
1039    if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1040      return false;
1041    SrcVT = MVT::i64;
1042    SrcReg = TmpReg;
1043  }
1044
1045  // Move the integer value to an FPR.
1046  unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1047  if (FPReg == 0)
1048    return false;
1049
1050  // Determine the opcode for the conversion.
1051  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1052  unsigned DestReg = createResultReg(RC);
1053  unsigned Opc;
1054
1055  if (DstVT == MVT::f32)
1056    Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1057  else
1058    Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1059
1060  // Generate the convert.
1061  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1062    .addReg(FPReg);
1063
1064  updateValueMap(I, DestReg);
1065  return true;
1066}
1067
1068// Move the floating-point value in SrcReg into an integer destination
1069// register, and return the register (or zero if we can't handle it).
1070// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1071// those should be used instead of moving via a stack slot when the
1072// subtarget permits.
1073unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1074                                      unsigned SrcReg, bool IsSigned) {
1075  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1076  // Note that if have STFIWX available, we could use a 4-byte stack
1077  // slot for i32, but this being fast-isel we'll just go with the
1078  // easiest code gen possible.
1079  Address Addr;
1080  Addr.BaseType = Address::FrameIndexBase;
1081  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
1082
1083  // Store the value from the FPR.
1084  if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1085    return 0;
1086
1087  // Reload it into a GPR.  If we want an i32 on big endian, modify the
1088  // address to have a 4-byte offset so we load from the right place.
1089  if (VT == MVT::i32)
1090    Addr.Offset = (PPCSubTarget->isLittleEndian()) ? 0 : 4;
1091
1092  // Look at the currently assigned register for this instruction
1093  // to determine the required register class.
1094  unsigned AssignedReg = FuncInfo.ValueMap[I];
1095  const TargetRegisterClass *RC =
1096    AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1097
1098  unsigned ResultReg = 0;
1099  if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1100    return 0;
1101
1102  return ResultReg;
1103}
1104
1105// Attempt to fast-select a floating-point-to-integer conversion.
1106// FIXME: Once fast-isel has better support for VSX, conversions using
1107//        direct moves should be implemented.
1108bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1109  MVT DstVT, SrcVT;
1110  Type *DstTy = I->getType();
1111  if (!isTypeLegal(DstTy, DstVT))
1112    return false;
1113
1114  if (DstVT != MVT::i32 && DstVT != MVT::i64)
1115    return false;
1116
1117  // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
1118  if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
1119    return false;
1120
1121  Value *Src = I->getOperand(0);
1122  Type *SrcTy = Src->getType();
1123  if (!isTypeLegal(SrcTy, SrcVT))
1124    return false;
1125
1126  if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1127    return false;
1128
1129  unsigned SrcReg = getRegForValue(Src);
1130  if (SrcReg == 0)
1131    return false;
1132
1133  // Convert f32 to f64 if necessary.  This is just a meaningless copy
1134  // to get the register class right.
1135  const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1136  if (InRC == &PPC::F4RCRegClass) {
1137    unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
1138    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1139            TII.get(TargetOpcode::COPY), TmpReg)
1140      .addReg(SrcReg);
1141    SrcReg = TmpReg;
1142  }
1143
1144  // Determine the opcode for the conversion, which takes place
1145  // entirely within FPRs.
1146  unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
1147  unsigned Opc;
1148
1149  if (DstVT == MVT::i32)
1150    if (IsSigned)
1151      Opc = PPC::FCTIWZ;
1152    else
1153      Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1154  else
1155    Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1156
1157  // Generate the convert.
1158  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1159    .addReg(SrcReg);
1160
1161  // Now move the integer value from a float register to an integer register.
1162  unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1163  if (IntReg == 0)
1164    return false;
1165
1166  updateValueMap(I, IntReg);
1167  return true;
1168}
1169
1170// Attempt to fast-select a binary integer operation that isn't already
1171// handled automatically.
1172bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1173  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1174
1175  // We can get here in the case when we have a binary operation on a non-legal
1176  // type and the target independent selector doesn't know how to handle it.
1177  if (DestVT != MVT::i16 && DestVT != MVT::i8)
1178    return false;
1179
1180  // Look at the currently assigned register for this instruction
1181  // to determine the required register class.  If there is no register,
1182  // make a conservative choice (don't assign R0).
1183  unsigned AssignedReg = FuncInfo.ValueMap[I];
1184  const TargetRegisterClass *RC =
1185    (AssignedReg ? MRI.getRegClass(AssignedReg) :
1186     &PPC::GPRC_and_GPRC_NOR0RegClass);
1187  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1188
1189  unsigned Opc;
1190  switch (ISDOpcode) {
1191    default: return false;
1192    case ISD::ADD:
1193      Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1194      break;
1195    case ISD::OR:
1196      Opc = IsGPRC ? PPC::OR : PPC::OR8;
1197      break;
1198    case ISD::SUB:
1199      Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1200      break;
1201  }
1202
1203  unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1204  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1205  if (SrcReg1 == 0) return false;
1206
1207  // Handle case of small immediate operand.
1208  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1209    const APInt &CIVal = ConstInt->getValue();
1210    int Imm = (int)CIVal.getSExtValue();
1211    bool UseImm = true;
1212    if (isInt<16>(Imm)) {
1213      switch (Opc) {
1214        default:
1215          llvm_unreachable("Missing case!");
1216        case PPC::ADD4:
1217          Opc = PPC::ADDI;
1218          MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1219          break;
1220        case PPC::ADD8:
1221          Opc = PPC::ADDI8;
1222          MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1223          break;
1224        case PPC::OR:
1225          Opc = PPC::ORI;
1226          break;
1227        case PPC::OR8:
1228          Opc = PPC::ORI8;
1229          break;
1230        case PPC::SUBF:
1231          if (Imm == -32768)
1232            UseImm = false;
1233          else {
1234            Opc = PPC::ADDI;
1235            MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1236            Imm = -Imm;
1237          }
1238          break;
1239        case PPC::SUBF8:
1240          if (Imm == -32768)
1241            UseImm = false;
1242          else {
1243            Opc = PPC::ADDI8;
1244            MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1245            Imm = -Imm;
1246          }
1247          break;
1248      }
1249
1250      if (UseImm) {
1251        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
1252                ResultReg)
1253            .addReg(SrcReg1)
1254            .addImm(Imm);
1255        updateValueMap(I, ResultReg);
1256        return true;
1257      }
1258    }
1259  }
1260
1261  // Reg-reg case.
1262  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1263  if (SrcReg2 == 0) return false;
1264
1265  // Reverse operands for subtract-from.
1266  if (ISDOpcode == ISD::SUB)
1267    std::swap(SrcReg1, SrcReg2);
1268
1269  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1270    .addReg(SrcReg1).addReg(SrcReg2);
1271  updateValueMap(I, ResultReg);
1272  return true;
1273}
1274
1275// Handle arguments to a call that we're attempting to fast-select.
1276// Return false if the arguments are too complex for us at the moment.
1277bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
1278                                  SmallVectorImpl<unsigned> &ArgRegs,
1279                                  SmallVectorImpl<MVT> &ArgVTs,
1280                                  SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1281                                  SmallVectorImpl<unsigned> &RegArgs,
1282                                  CallingConv::ID CC,
1283                                  unsigned &NumBytes,
1284                                  bool IsVarArg) {
1285  SmallVector<CCValAssign, 16> ArgLocs;
1286  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1287
1288  // Reserve space for the linkage area on the stack.
1289  unsigned LinkageSize = PPCSubTarget->getFrameLowering()->getLinkageSize();
1290  CCInfo.AllocateStack(LinkageSize, 8);
1291
1292  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
1293
1294  // Bail out if we can't handle any of the arguments.
1295  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1296    CCValAssign &VA = ArgLocs[I];
1297    MVT ArgVT = ArgVTs[VA.getValNo()];
1298
1299    // Skip vector arguments for now, as well as long double and
1300    // uint128_t, and anything that isn't passed in a register.
1301    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1302        !VA.isRegLoc() || VA.needsCustom())
1303      return false;
1304
1305    // Skip bit-converted arguments for now.
1306    if (VA.getLocInfo() == CCValAssign::BCvt)
1307      return false;
1308  }
1309
1310  // Get a count of how many bytes are to be pushed onto the stack.
1311  NumBytes = CCInfo.getNextStackOffset();
1312
1313  // The prolog code of the callee may store up to 8 GPR argument registers to
1314  // the stack, allowing va_start to index over them in memory if its varargs.
1315  // Because we cannot tell if this is needed on the caller side, we have to
1316  // conservatively assume that it is needed.  As such, make sure we have at
1317  // least enough stack space for the caller to store the 8 GPRs.
1318  // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1319  NumBytes = std::max(NumBytes, LinkageSize + 64);
1320
1321  // Issue CALLSEQ_START.
1322  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1323          TII.get(TII.getCallFrameSetupOpcode()))
1324    .addImm(NumBytes);
1325
1326  // Prepare to assign register arguments.  Every argument uses up a
1327  // GPR protocol register even if it's passed in a floating-point
1328  // register (unless we're using the fast calling convention).
1329  unsigned NextGPR = PPC::X3;
1330  unsigned NextFPR = PPC::F1;
1331
1332  // Process arguments.
1333  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1334    CCValAssign &VA = ArgLocs[I];
1335    unsigned Arg = ArgRegs[VA.getValNo()];
1336    MVT ArgVT = ArgVTs[VA.getValNo()];
1337
1338    // Handle argument promotion and bitcasts.
1339    switch (VA.getLocInfo()) {
1340      default:
1341        llvm_unreachable("Unknown loc info!");
1342      case CCValAssign::Full:
1343        break;
1344      case CCValAssign::SExt: {
1345        MVT DestVT = VA.getLocVT();
1346        const TargetRegisterClass *RC =
1347          (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1348        unsigned TmpReg = createResultReg(RC);
1349        if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1350          llvm_unreachable("Failed to emit a sext!");
1351        ArgVT = DestVT;
1352        Arg = TmpReg;
1353        break;
1354      }
1355      case CCValAssign::AExt:
1356      case CCValAssign::ZExt: {
1357        MVT DestVT = VA.getLocVT();
1358        const TargetRegisterClass *RC =
1359          (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1360        unsigned TmpReg = createResultReg(RC);
1361        if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1362          llvm_unreachable("Failed to emit a zext!");
1363        ArgVT = DestVT;
1364        Arg = TmpReg;
1365        break;
1366      }
1367      case CCValAssign::BCvt: {
1368        // FIXME: Not yet handled.
1369        llvm_unreachable("Should have bailed before getting here!");
1370        break;
1371      }
1372    }
1373
1374    // Copy this argument to the appropriate register.
1375    unsigned ArgReg;
1376    if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1377      ArgReg = NextFPR++;
1378      if (CC != CallingConv::Fast)
1379        ++NextGPR;
1380    } else
1381      ArgReg = NextGPR++;
1382
1383    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1384            TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1385    RegArgs.push_back(ArgReg);
1386  }
1387
1388  return true;
1389}
1390
1391// For a call that we've determined we can fast-select, finish the
1392// call sequence and generate a copy to obtain the return value (if any).
1393bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1394  CallingConv::ID CC = CLI.CallConv;
1395
1396  // Issue CallSEQ_END.
1397  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1398          TII.get(TII.getCallFrameDestroyOpcode()))
1399    .addImm(NumBytes).addImm(0);
1400
1401  // Next, generate a copy to obtain the return value.
1402  // FIXME: No multi-register return values yet, though I don't foresee
1403  // any real difficulties there.
1404  if (RetVT != MVT::isVoid) {
1405    SmallVector<CCValAssign, 16> RVLocs;
1406    CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1407    CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1408    CCValAssign &VA = RVLocs[0];
1409    assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1410    assert(VA.isRegLoc() && "Can only return in registers!");
1411
1412    MVT DestVT = VA.getValVT();
1413    MVT CopyVT = DestVT;
1414
1415    // Ints smaller than a register still arrive in a full 64-bit
1416    // register, so make sure we recognize this.
1417    if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1418      CopyVT = MVT::i64;
1419
1420    unsigned SourcePhysReg = VA.getLocReg();
1421    unsigned ResultReg = 0;
1422
1423    if (RetVT == CopyVT) {
1424      const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1425      ResultReg = createResultReg(CpyRC);
1426
1427      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1428              TII.get(TargetOpcode::COPY), ResultReg)
1429        .addReg(SourcePhysReg);
1430
1431    // If necessary, round the floating result to single precision.
1432    } else if (CopyVT == MVT::f64) {
1433      ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1434      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
1435              ResultReg).addReg(SourcePhysReg);
1436
1437    // If only the low half of a general register is needed, generate
1438    // a GPRC copy instead of a G8RC copy.  (EXTRACT_SUBREG can't be
1439    // used along the fast-isel path (not lowered), and downstream logic
1440    // also doesn't like a direct subreg copy on a physical reg.)
1441    } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1442      ResultReg = createResultReg(&PPC::GPRCRegClass);
1443      // Convert physical register from G8RC to GPRC.
1444      SourcePhysReg -= PPC::X0 - PPC::R0;
1445      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1446              TII.get(TargetOpcode::COPY), ResultReg)
1447        .addReg(SourcePhysReg);
1448    }
1449
1450    assert(ResultReg && "ResultReg unset!");
1451    CLI.InRegs.push_back(SourcePhysReg);
1452    CLI.ResultReg = ResultReg;
1453    CLI.NumResultRegs = 1;
1454  }
1455
1456  return true;
1457}
1458
1459bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1460  CallingConv::ID CC  = CLI.CallConv;
1461  bool IsTailCall     = CLI.IsTailCall;
1462  bool IsVarArg       = CLI.IsVarArg;
1463  const Value *Callee = CLI.Callee;
1464  const MCSymbol *Symbol = CLI.Symbol;
1465
1466  if (!Callee && !Symbol)
1467    return false;
1468
1469  // Allow SelectionDAG isel to handle tail calls.
1470  if (IsTailCall)
1471    return false;
1472
1473  // Let SDISel handle vararg functions.
1474  if (IsVarArg)
1475    return false;
1476
1477  // Handle simple calls for now, with legal return types and
1478  // those that can be extended.
1479  Type *RetTy = CLI.RetTy;
1480  MVT RetVT;
1481  if (RetTy->isVoidTy())
1482    RetVT = MVT::isVoid;
1483  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1484           RetVT != MVT::i8)
1485    return false;
1486  else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
1487    // We can't handle boolean returns when CR bits are in use.
1488    return false;
1489
1490  // FIXME: No multi-register return values yet.
1491  if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1492      RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1493      RetVT != MVT::f64) {
1494    SmallVector<CCValAssign, 16> RVLocs;
1495    CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1496    CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1497    if (RVLocs.size() > 1)
1498      return false;
1499  }
1500
1501  // Bail early if more than 8 arguments, as we only currently
1502  // handle arguments passed in registers.
1503  unsigned NumArgs = CLI.OutVals.size();
1504  if (NumArgs > 8)
1505    return false;
1506
1507  // Set up the argument vectors.
1508  SmallVector<Value*, 8> Args;
1509  SmallVector<unsigned, 8> ArgRegs;
1510  SmallVector<MVT, 8> ArgVTs;
1511  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1512
1513  Args.reserve(NumArgs);
1514  ArgRegs.reserve(NumArgs);
1515  ArgVTs.reserve(NumArgs);
1516  ArgFlags.reserve(NumArgs);
1517
1518  for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1519    // Only handle easy calls for now.  It would be reasonably easy
1520    // to handle <= 8-byte structures passed ByVal in registers, but we
1521    // have to ensure they are right-justified in the register.
1522    ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1523    if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1524      return false;
1525
1526    Value *ArgValue = CLI.OutVals[i];
1527    Type *ArgTy = ArgValue->getType();
1528    MVT ArgVT;
1529    if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1530      return false;
1531
1532    if (ArgVT.isVector())
1533      return false;
1534
1535    unsigned Arg = getRegForValue(ArgValue);
1536    if (Arg == 0)
1537      return false;
1538
1539    Args.push_back(ArgValue);
1540    ArgRegs.push_back(Arg);
1541    ArgVTs.push_back(ArgVT);
1542    ArgFlags.push_back(Flags);
1543  }
1544
1545  // Process the arguments.
1546  SmallVector<unsigned, 8> RegArgs;
1547  unsigned NumBytes;
1548
1549  if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1550                       RegArgs, CC, NumBytes, IsVarArg))
1551    return false;
1552
1553  MachineInstrBuilder MIB;
1554  // FIXME: No handling for function pointers yet.  This requires
1555  // implementing the function descriptor (OPD) setup.
1556  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1557  if (!GV) {
1558    // patchpoints are a special case; they always dispatch to a pointer value.
1559    // However, we don't actually want to generate the indirect call sequence
1560    // here (that will be generated, as necessary, during asm printing), and
1561    // the call we generate here will be erased by FastISel::selectPatchpoint,
1562    // so don't try very hard...
1563    if (CLI.IsPatchPoint)
1564      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
1565    else
1566      return false;
1567  } else {
1568    // Build direct call with NOP for TOC restore.
1569    // FIXME: We can and should optimize away the NOP for local calls.
1570    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1571                  TII.get(PPC::BL8_NOP));
1572    // Add callee.
1573    MIB.addGlobalAddress(GV);
1574  }
1575
1576  // Add implicit physical register uses to the call.
1577  for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
1578    MIB.addReg(RegArgs[II], RegState::Implicit);
1579
1580  // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1581  // into the call.
1582  PPCFuncInfo->setUsesTOCBasePtr();
1583  MIB.addReg(PPC::X2, RegState::Implicit);
1584
1585  // Add a register mask with the call-preserved registers.  Proper
1586  // defs for return values will be added by setPhysRegsDeadExcept().
1587  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1588
1589  CLI.Call = MIB;
1590
1591  // Finish off the call including any return values.
1592  return finishCall(RetVT, CLI, NumBytes);
1593}
1594
1595// Attempt to fast-select a return instruction.
1596bool PPCFastISel::SelectRet(const Instruction *I) {
1597
1598  if (!FuncInfo.CanLowerReturn)
1599    return false;
1600
1601  if (TLI.supportSplitCSR(FuncInfo.MF))
1602    return false;
1603
1604  const ReturnInst *Ret = cast<ReturnInst>(I);
1605  const Function &F = *I->getParent()->getParent();
1606
1607  // Build a list of return value registers.
1608  SmallVector<unsigned, 4> RetRegs;
1609  CallingConv::ID CC = F.getCallingConv();
1610
1611  if (Ret->getNumOperands() > 0) {
1612    SmallVector<ISD::OutputArg, 4> Outs;
1613    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1614
1615    // Analyze operands of the call, assigning locations to each operand.
1616    SmallVector<CCValAssign, 16> ValLocs;
1617    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1618    CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1619    const Value *RV = Ret->getOperand(0);
1620
1621    // FIXME: Only one output register for now.
1622    if (ValLocs.size() > 1)
1623      return false;
1624
1625    // Special case for returning a constant integer of any size - materialize
1626    // the constant as an i64 and copy it to the return register.
1627    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
1628      CCValAssign &VA = ValLocs[0];
1629
1630      unsigned RetReg = VA.getLocReg();
1631      // We still need to worry about properly extending the sign. For example,
1632      // we could have only a single bit or a constant that needs zero
1633      // extension rather than sign extension. Make sure we pass the return
1634      // value extension property to integer materialization.
1635      unsigned SrcReg =
1636          PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1637
1638      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1639            TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1640
1641      RetRegs.push_back(RetReg);
1642
1643    } else {
1644      unsigned Reg = getRegForValue(RV);
1645
1646      if (Reg == 0)
1647        return false;
1648
1649      // Copy the result values into the output registers.
1650      for (unsigned i = 0; i < ValLocs.size(); ++i) {
1651
1652        CCValAssign &VA = ValLocs[i];
1653        assert(VA.isRegLoc() && "Can only return in registers!");
1654        RetRegs.push_back(VA.getLocReg());
1655        unsigned SrcReg = Reg + VA.getValNo();
1656
1657        EVT RVEVT = TLI.getValueType(DL, RV->getType());
1658        if (!RVEVT.isSimple())
1659          return false;
1660        MVT RVVT = RVEVT.getSimpleVT();
1661        MVT DestVT = VA.getLocVT();
1662
1663        if (RVVT != DestVT && RVVT != MVT::i8 &&
1664            RVVT != MVT::i16 && RVVT != MVT::i32)
1665          return false;
1666
1667        if (RVVT != DestVT) {
1668          switch (VA.getLocInfo()) {
1669            default:
1670              llvm_unreachable("Unknown loc info!");
1671            case CCValAssign::Full:
1672              llvm_unreachable("Full value assign but types don't match?");
1673            case CCValAssign::AExt:
1674            case CCValAssign::ZExt: {
1675              const TargetRegisterClass *RC =
1676                (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1677              unsigned TmpReg = createResultReg(RC);
1678              if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1679                return false;
1680              SrcReg = TmpReg;
1681              break;
1682            }
1683            case CCValAssign::SExt: {
1684              const TargetRegisterClass *RC =
1685                (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1686              unsigned TmpReg = createResultReg(RC);
1687              if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1688                return false;
1689              SrcReg = TmpReg;
1690              break;
1691            }
1692          }
1693        }
1694
1695        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1696                TII.get(TargetOpcode::COPY), RetRegs[i])
1697          .addReg(SrcReg);
1698      }
1699    }
1700  }
1701
1702  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1703                                    TII.get(PPC::BLR8));
1704
1705  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1706    MIB.addReg(RetRegs[i], RegState::Implicit);
1707
1708  return true;
1709}
1710
1711// Attempt to emit an integer extend of SrcReg into DestReg.  Both
1712// signed and zero extensions are supported.  Return false if we
1713// can't handle it.
1714bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
1715                                unsigned DestReg, bool IsZExt) {
1716  if (DestVT != MVT::i32 && DestVT != MVT::i64)
1717    return false;
1718  if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1719    return false;
1720
1721  // Signed extensions use EXTSB, EXTSH, EXTSW.
1722  if (!IsZExt) {
1723    unsigned Opc;
1724    if (SrcVT == MVT::i8)
1725      Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1726    else if (SrcVT == MVT::i16)
1727      Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1728    else {
1729      assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1730      Opc = PPC::EXTSW_32_64;
1731    }
1732    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1733      .addReg(SrcReg);
1734
1735  // Unsigned 32-bit extensions use RLWINM.
1736  } else if (DestVT == MVT::i32) {
1737    unsigned MB;
1738    if (SrcVT == MVT::i8)
1739      MB = 24;
1740    else {
1741      assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1742      MB = 16;
1743    }
1744    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
1745            DestReg)
1746      .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1747
1748  // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1749  } else {
1750    unsigned MB;
1751    if (SrcVT == MVT::i8)
1752      MB = 56;
1753    else if (SrcVT == MVT::i16)
1754      MB = 48;
1755    else
1756      MB = 32;
1757    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1758            TII.get(PPC::RLDICL_32_64), DestReg)
1759      .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1760  }
1761
1762  return true;
1763}
1764
1765// Attempt to fast-select an indirect branch instruction.
1766bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1767  unsigned AddrReg = getRegForValue(I->getOperand(0));
1768  if (AddrReg == 0)
1769    return false;
1770
1771  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
1772    .addReg(AddrReg);
1773  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
1774
1775  const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1776  for (const BasicBlock *SuccBB : IB->successors())
1777    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
1778
1779  return true;
1780}
1781
1782// Attempt to fast-select an integer truncate instruction.
1783bool PPCFastISel::SelectTrunc(const Instruction *I) {
1784  Value *Src  = I->getOperand(0);
1785  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1786  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1787
1788  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1789    return false;
1790
1791  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1792    return false;
1793
1794  unsigned SrcReg = getRegForValue(Src);
1795  if (!SrcReg)
1796    return false;
1797
1798  // The only interesting case is when we need to switch register classes.
1799  if (SrcVT == MVT::i64) {
1800    unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
1801    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1802            TII.get(TargetOpcode::COPY),
1803            ResultReg).addReg(SrcReg, 0, PPC::sub_32);
1804    SrcReg = ResultReg;
1805  }
1806
1807  updateValueMap(I, SrcReg);
1808  return true;
1809}
1810
1811// Attempt to fast-select an integer extend instruction.
1812bool PPCFastISel::SelectIntExt(const Instruction *I) {
1813  Type *DestTy = I->getType();
1814  Value *Src = I->getOperand(0);
1815  Type *SrcTy = Src->getType();
1816
1817  bool IsZExt = isa<ZExtInst>(I);
1818  unsigned SrcReg = getRegForValue(Src);
1819  if (!SrcReg) return false;
1820
1821  EVT SrcEVT, DestEVT;
1822  SrcEVT = TLI.getValueType(DL, SrcTy, true);
1823  DestEVT = TLI.getValueType(DL, DestTy, true);
1824  if (!SrcEVT.isSimple())
1825    return false;
1826  if (!DestEVT.isSimple())
1827    return false;
1828
1829  MVT SrcVT = SrcEVT.getSimpleVT();
1830  MVT DestVT = DestEVT.getSimpleVT();
1831
1832  // If we know the register class needed for the result of this
1833  // instruction, use it.  Otherwise pick the register class of the
1834  // correct size that does not contain X0/R0, since we don't know
1835  // whether downstream uses permit that assignment.
1836  unsigned AssignedReg = FuncInfo.ValueMap[I];
1837  const TargetRegisterClass *RC =
1838    (AssignedReg ? MRI.getRegClass(AssignedReg) :
1839     (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1840      &PPC::GPRC_and_GPRC_NOR0RegClass));
1841  unsigned ResultReg = createResultReg(RC);
1842
1843  if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1844    return false;
1845
1846  updateValueMap(I, ResultReg);
1847  return true;
1848}
1849
1850// Attempt to fast-select an instruction that wasn't handled by
1851// the table-generated machinery.
1852bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1853
1854  switch (I->getOpcode()) {
1855    case Instruction::Load:
1856      return SelectLoad(I);
1857    case Instruction::Store:
1858      return SelectStore(I);
1859    case Instruction::Br:
1860      return SelectBranch(I);
1861    case Instruction::IndirectBr:
1862      return SelectIndirectBr(I);
1863    case Instruction::FPExt:
1864      return SelectFPExt(I);
1865    case Instruction::FPTrunc:
1866      return SelectFPTrunc(I);
1867    case Instruction::SIToFP:
1868      return SelectIToFP(I, /*IsSigned*/ true);
1869    case Instruction::UIToFP:
1870      return SelectIToFP(I, /*IsSigned*/ false);
1871    case Instruction::FPToSI:
1872      return SelectFPToI(I, /*IsSigned*/ true);
1873    case Instruction::FPToUI:
1874      return SelectFPToI(I, /*IsSigned*/ false);
1875    case Instruction::Add:
1876      return SelectBinaryIntOp(I, ISD::ADD);
1877    case Instruction::Or:
1878      return SelectBinaryIntOp(I, ISD::OR);
1879    case Instruction::Sub:
1880      return SelectBinaryIntOp(I, ISD::SUB);
1881    case Instruction::Call:
1882      return selectCall(I);
1883    case Instruction::Ret:
1884      return SelectRet(I);
1885    case Instruction::Trunc:
1886      return SelectTrunc(I);
1887    case Instruction::ZExt:
1888    case Instruction::SExt:
1889      return SelectIntExt(I);
1890    // Here add other flavors of Instruction::XXX that automated
1891    // cases don't catch.  For example, switches are terminators
1892    // that aren't yet handled.
1893    default:
1894      break;
1895  }
1896  return false;
1897}
1898
1899// Materialize a floating-point constant into a register, and return
1900// the register number (or zero if we failed to handle it).
1901unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1902  // No plans to handle long double here.
1903  if (VT != MVT::f32 && VT != MVT::f64)
1904    return 0;
1905
1906  // All FP constants are loaded from the constant pool.
1907  unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
1908  assert(Align > 0 && "Unexpectedly missing alignment information!");
1909  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
1910  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
1911  CodeModel::Model CModel = TM.getCodeModel();
1912
1913  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
1914      MachinePointerInfo::getConstantPool(*FuncInfo.MF),
1915      MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
1916
1917  unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
1918  unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1919
1920  PPCFuncInfo->setUsesTOCBasePtr();
1921  // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
1922  if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
1923    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
1924            TmpReg)
1925      .addConstantPoolIndex(Idx).addReg(PPC::X2);
1926    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1927      .addImm(0).addReg(TmpReg).addMemOperand(MMO);
1928  } else {
1929    // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
1930    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
1931            TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
1932    // But for large code model, we must generate a LDtocL followed
1933    // by the LF[SD].
1934    if (CModel == CodeModel::Large) {
1935      unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
1936      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
1937              TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
1938      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1939        .addImm(0).addReg(TmpReg2);
1940    } else
1941      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1942        .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
1943        .addReg(TmpReg)
1944        .addMemOperand(MMO);
1945  }
1946
1947  return DestReg;
1948}
1949
1950// Materialize the address of a global value into a register, and return
1951// the register number (or zero if we failed to handle it).
1952unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
1953  assert(VT == MVT::i64 && "Non-address!");
1954  const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
1955  unsigned DestReg = createResultReg(RC);
1956
1957  // Global values may be plain old object addresses, TLS object
1958  // addresses, constant pool entries, or jump tables.  How we generate
1959  // code for these may depend on small, medium, or large code model.
1960  CodeModel::Model CModel = TM.getCodeModel();
1961
1962  // FIXME: Jump tables are not yet required because fast-isel doesn't
1963  // handle switches; if that changes, we need them as well.  For now,
1964  // what follows assumes everything's a generic (or TLS) global address.
1965
1966  // FIXME: We don't yet handle the complexity of TLS.
1967  if (GV->isThreadLocal())
1968    return 0;
1969
1970  PPCFuncInfo->setUsesTOCBasePtr();
1971  // For small code model, generate a simple TOC load.
1972  if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
1973    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
1974            DestReg)
1975        .addGlobalAddress(GV)
1976        .addReg(PPC::X2);
1977  else {
1978    // If the address is an externally defined symbol, a symbol with common
1979    // or externally available linkage, a non-local function address, or a
1980    // jump table address (not yet needed), or if we are generating code
1981    // for large code model, we generate:
1982    //       LDtocL(GV, ADDIStocHA(%X2, GV))
1983    // Otherwise we generate:
1984    //       ADDItocL(ADDIStocHA(%X2, GV), GV)
1985    // Either way, start with the ADDIStocHA:
1986    unsigned HighPartReg = createResultReg(RC);
1987    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
1988            HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
1989
1990    unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
1991    if (GVFlags & PPCII::MO_NLP_FLAG) {
1992      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
1993              DestReg).addGlobalAddress(GV).addReg(HighPartReg);
1994    } else {
1995      // Otherwise generate the ADDItocL.
1996      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
1997              DestReg).addReg(HighPartReg).addGlobalAddress(GV);
1998    }
1999  }
2000
2001  return DestReg;
2002}
2003
2004// Materialize a 32-bit integer constant into a register, and return
2005// the register number (or zero if we failed to handle it).
2006unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2007                                             const TargetRegisterClass *RC) {
2008  unsigned Lo = Imm & 0xFFFF;
2009  unsigned Hi = (Imm >> 16) & 0xFFFF;
2010
2011  unsigned ResultReg = createResultReg(RC);
2012  bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2013
2014  if (isInt<16>(Imm))
2015    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2016            TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2017      .addImm(Imm);
2018  else if (Lo) {
2019    // Both Lo and Hi have nonzero bits.
2020    unsigned TmpReg = createResultReg(RC);
2021    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2022            TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2023      .addImm(Hi);
2024    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2025            TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2026      .addReg(TmpReg).addImm(Lo);
2027  } else
2028    // Just Hi bits.
2029    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2030            TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2031      .addImm(Hi);
2032
2033  return ResultReg;
2034}
2035
2036// Materialize a 64-bit integer constant into a register, and return
2037// the register number (or zero if we failed to handle it).
2038unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2039                                             const TargetRegisterClass *RC) {
2040  unsigned Remainder = 0;
2041  unsigned Shift = 0;
2042
2043  // If the value doesn't fit in 32 bits, see if we can shift it
2044  // so that it fits in 32 bits.
2045  if (!isInt<32>(Imm)) {
2046    Shift = countTrailingZeros<uint64_t>(Imm);
2047    int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2048
2049    if (isInt<32>(ImmSh))
2050      Imm = ImmSh;
2051    else {
2052      Remainder = Imm;
2053      Shift = 32;
2054      Imm >>= 32;
2055    }
2056  }
2057
2058  // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2059  // (if not shifted).
2060  unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2061  if (!Shift)
2062    return TmpReg1;
2063
2064  // If upper 32 bits were not zero, we've built them and need to shift
2065  // them into place.
2066  unsigned TmpReg2;
2067  if (Imm) {
2068    TmpReg2 = createResultReg(RC);
2069    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
2070            TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2071  } else
2072    TmpReg2 = TmpReg1;
2073
2074  unsigned TmpReg3, Hi, Lo;
2075  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2076    TmpReg3 = createResultReg(RC);
2077    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
2078            TmpReg3).addReg(TmpReg2).addImm(Hi);
2079  } else
2080    TmpReg3 = TmpReg2;
2081
2082  if ((Lo = Remainder & 0xFFFF)) {
2083    unsigned ResultReg = createResultReg(RC);
2084    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
2085            ResultReg).addReg(TmpReg3).addImm(Lo);
2086    return ResultReg;
2087  }
2088
2089  return TmpReg3;
2090}
2091
2092// Materialize an integer constant into a register, and return
2093// the register number (or zero if we failed to handle it).
2094unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2095                                        bool UseSExt) {
2096  // If we're using CR bit registers for i1 values, handle that as a special
2097  // case first.
2098  if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
2099    unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2100    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2101            TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2102    return ImmReg;
2103  }
2104
2105  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2106      VT != MVT::i1)
2107    return 0;
2108
2109  const TargetRegisterClass *RC =
2110      ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2111  int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2112
2113  // If the constant is in range, use a load-immediate.
2114  // Since LI will sign extend the constant we need to make sure that for
2115  // our zeroext constants that the sign extended constant fits into 16-bits -
2116  // a range of 0..0x7fff.
2117  if (isInt<16>(Imm)) {
2118    unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2119    unsigned ImmReg = createResultReg(RC);
2120    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
2121        .addImm(Imm);
2122    return ImmReg;
2123  }
2124
2125  // Construct the constant piecewise.
2126  if (VT == MVT::i64)
2127    return PPCMaterialize64BitInt(Imm, RC);
2128  else if (VT == MVT::i32)
2129    return PPCMaterialize32BitInt(Imm, RC);
2130
2131  return 0;
2132}
2133
2134// Materialize a constant into a register, and return the register
2135// number (or zero if we failed to handle it).
2136unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
2137  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2138
2139  // Only handle simple types.
2140  if (!CEVT.isSimple()) return 0;
2141  MVT VT = CEVT.getSimpleVT();
2142
2143  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2144    return PPCMaterializeFP(CFP, VT);
2145  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2146    return PPCMaterializeGV(GV, VT);
2147  else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2148    return PPCMaterializeInt(CI, VT, VT != MVT::i1);
2149
2150  return 0;
2151}
2152
2153// Materialize the address created by an alloca into a register, and
2154// return the register number (or zero if we failed to handle it).
2155unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2156  // Don't handle dynamic allocas.
2157  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
2158
2159  MVT VT;
2160  if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
2161
2162  DenseMap<const AllocaInst*, int>::iterator SI =
2163    FuncInfo.StaticAllocaMap.find(AI);
2164
2165  if (SI != FuncInfo.StaticAllocaMap.end()) {
2166    unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2167    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
2168            ResultReg).addFrameIndex(SI->second).addImm(0);
2169    return ResultReg;
2170  }
2171
2172  return 0;
2173}
2174
2175// Fold loads into extends when possible.
2176// FIXME: We can have multiple redundant extend/trunc instructions
2177// following a load.  The folding only picks up one.  Extend this
2178// to check subsequent instructions for the same pattern and remove
2179// them.  Thus ResultReg should be the def reg for the last redundant
2180// instruction in a chain, and all intervening instructions can be
2181// removed from parent.  Change test/CodeGen/PowerPC/fast-isel-fold.ll
2182// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2183bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2184                                      const LoadInst *LI) {
2185  // Verify we have a legal type before going any further.
2186  MVT VT;
2187  if (!isLoadTypeLegal(LI->getType(), VT))
2188    return false;
2189
2190  // Combine load followed by zero- or sign-extend.
2191  bool IsZExt = false;
2192  switch(MI->getOpcode()) {
2193    default:
2194      return false;
2195
2196    case PPC::RLDICL:
2197    case PPC::RLDICL_32_64: {
2198      IsZExt = true;
2199      unsigned MB = MI->getOperand(3).getImm();
2200      if ((VT == MVT::i8 && MB <= 56) ||
2201          (VT == MVT::i16 && MB <= 48) ||
2202          (VT == MVT::i32 && MB <= 32))
2203        break;
2204      return false;
2205    }
2206
2207    case PPC::RLWINM:
2208    case PPC::RLWINM8: {
2209      IsZExt = true;
2210      unsigned MB = MI->getOperand(3).getImm();
2211      if ((VT == MVT::i8 && MB <= 24) ||
2212          (VT == MVT::i16 && MB <= 16))
2213        break;
2214      return false;
2215    }
2216
2217    case PPC::EXTSB:
2218    case PPC::EXTSB8:
2219    case PPC::EXTSB8_32_64:
2220      /* There is no sign-extending load-byte instruction. */
2221      return false;
2222
2223    case PPC::EXTSH:
2224    case PPC::EXTSH8:
2225    case PPC::EXTSH8_32_64: {
2226      if (VT != MVT::i16 && VT != MVT::i8)
2227        return false;
2228      break;
2229    }
2230
2231    case PPC::EXTSW:
2232    case PPC::EXTSW_32_64: {
2233      if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2234        return false;
2235      break;
2236    }
2237  }
2238
2239  // See if we can handle this address.
2240  Address Addr;
2241  if (!PPCComputeAddress(LI->getOperand(0), Addr))
2242    return false;
2243
2244  unsigned ResultReg = MI->getOperand(0).getReg();
2245
2246  if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
2247    return false;
2248
2249  MI->eraseFromParent();
2250  return true;
2251}
2252
2253// Attempt to lower call arguments in a faster way than done by
2254// the selection DAG code.
2255bool PPCFastISel::fastLowerArguments() {
2256  // Defer to normal argument lowering for now.  It's reasonably
2257  // efficient.  Consider doing something like ARM to handle the
2258  // case where all args fit in registers, no varargs, no float
2259  // or vector args.
2260  return false;
2261}
2262
2263// Handle materializing integer constants into a register.  This is not
2264// automatically generated for PowerPC, so must be explicitly created here.
2265unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2266
2267  if (Opc != ISD::Constant)
2268    return 0;
2269
2270  // If we're using CR bit registers for i1 values, handle that as a special
2271  // case first.
2272  if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
2273    unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2274    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2275            TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2276    return ImmReg;
2277  }
2278
2279  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
2280      VT != MVT::i8 && VT != MVT::i1)
2281    return 0;
2282
2283  const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2284                                   &PPC::GPRCRegClass);
2285  if (VT == MVT::i64)
2286    return PPCMaterialize64BitInt(Imm, RC);
2287  else
2288    return PPCMaterialize32BitInt(Imm, RC);
2289}
2290
2291// Override for ADDI and ADDI8 to set the correct register class
2292// on RHS operand 0.  The automatic infrastructure naively assumes
2293// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2294// for these cases.  At the moment, none of the other automatically
2295// generated RI instructions require special treatment.  However, once
2296// SelectSelect is implemented, "isel" requires similar handling.
2297//
2298// Also be conservative about the output register class.  Avoid
2299// assigning R0 or X0 to the output register for GPRC and G8RC
2300// register classes, as any such result could be used in ADDI, etc.,
2301// where those regs have another meaning.
2302unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2303                                      const TargetRegisterClass *RC,
2304                                      unsigned Op0, bool Op0IsKill,
2305                                      uint64_t Imm) {
2306  if (MachineInstOpcode == PPC::ADDI)
2307    MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2308  else if (MachineInstOpcode == PPC::ADDI8)
2309    MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2310
2311  const TargetRegisterClass *UseRC =
2312    (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2313     (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2314
2315  return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
2316                                   Op0, Op0IsKill, Imm);
2317}
2318
2319// Override for instructions with one register operand to avoid use of
2320// R0/X0.  The automatic infrastructure isn't aware of the context so
2321// we must be conservative.
2322unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2323                                     const TargetRegisterClass* RC,
2324                                     unsigned Op0, bool Op0IsKill) {
2325  const TargetRegisterClass *UseRC =
2326    (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2327     (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2328
2329  return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
2330}
2331
2332// Override for instructions with two register operands to avoid use
2333// of R0/X0.  The automatic infrastructure isn't aware of the context
2334// so we must be conservative.
2335unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2336                                      const TargetRegisterClass* RC,
2337                                      unsigned Op0, bool Op0IsKill,
2338                                      unsigned Op1, bool Op1IsKill) {
2339  const TargetRegisterClass *UseRC =
2340    (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2341     (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2342
2343  return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
2344                                   Op1, Op1IsKill);
2345}
2346
2347namespace llvm {
2348  // Create the fast instruction selector for PowerPC64 ELF.
2349  FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
2350                                const TargetLibraryInfo *LibInfo) {
2351    // Only available on 64-bit ELF for now.
2352    const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2353    if (Subtarget.isPPC64() && Subtarget.isSVR4ABI())
2354      return new PPCFastISel(FuncInfo, LibInfo);
2355    return nullptr;
2356  }
2357}
2358