1//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines a pattern matching instruction selector for PowerPC,
11// converting from a legalized dag to a PPC dag.
12//
13//===----------------------------------------------------------------------===//
14
15#include "PPC.h"
16#include "MCTargetDesc/PPCPredicates.h"
17#include "PPCMachineFunctionInfo.h"
18#include "PPCTargetMachine.h"
19#include "llvm/Analysis/BranchProbabilityInfo.h"
20#include "llvm/CodeGen/FunctionLoweringInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/SelectionDAGISel.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/GlobalAlias.h"
29#include "llvm/IR/GlobalValue.h"
30#include "llvm/IR/GlobalVariable.h"
31#include "llvm/IR/Intrinsics.h"
32#include "llvm/IR/Module.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/MathExtras.h"
37#include "llvm/Support/raw_ostream.h"
38#include "llvm/Target/TargetOptions.h"
39using namespace llvm;
40
41#define DEBUG_TYPE "ppc-codegen"
42
43// FIXME: Remove this once the bug has been fixed!
44cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
45cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
46
47static cl::opt<bool>
48    UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
49                       cl::desc("use aggressive ppc isel for bit permutations"),
50                       cl::Hidden);
51static cl::opt<bool> BPermRewriterNoMasking(
52    "ppc-bit-perm-rewriter-stress-rotates",
53    cl::desc("stress rotate selection in aggressive ppc isel for "
54             "bit permutations"),
55    cl::Hidden);
56
57static cl::opt<bool> EnableBranchHint(
58  "ppc-use-branch-hint", cl::init(true),
59    cl::desc("Enable static hinting of branches on ppc"),
60    cl::Hidden);
61
62namespace llvm {
63  void initializePPCDAGToDAGISelPass(PassRegistry&);
64}
65
66namespace {
67  //===--------------------------------------------------------------------===//
68  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
69  /// instructions for SelectionDAG operations.
70  ///
71  class PPCDAGToDAGISel : public SelectionDAGISel {
72    const PPCTargetMachine &TM;
73    const PPCSubtarget *PPCSubTarget;
74    const PPCTargetLowering *PPCLowering;
75    unsigned GlobalBaseReg;
76  public:
77    explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
78        : SelectionDAGISel(tm), TM(tm) {
79      initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
80    }
81
82    bool runOnMachineFunction(MachineFunction &MF) override {
83      // Make sure we re-emit a set of the global base reg if necessary
84      GlobalBaseReg = 0;
85      PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
86      PPCLowering = PPCSubTarget->getTargetLowering();
87      SelectionDAGISel::runOnMachineFunction(MF);
88
89      if (!PPCSubTarget->isSVR4ABI())
90        InsertVRSaveCode(MF);
91
92      return true;
93    }
94
95    void PreprocessISelDAG() override;
96    void PostprocessISelDAG() override;
97
98    /// getI32Imm - Return a target constant with the specified value, of type
99    /// i32.
100    inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
101      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
102    }
103
104    /// getI64Imm - Return a target constant with the specified value, of type
105    /// i64.
106    inline SDValue getI64Imm(uint64_t Imm, SDLoc dl) {
107      return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
108    }
109
110    /// getSmallIPtrImm - Return a target constant of pointer type.
111    inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) {
112      return CurDAG->getTargetConstant(
113          Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
114    }
115
116    /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
117    /// rotate and mask opcode and mask operation.
118    static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
119                                unsigned &SH, unsigned &MB, unsigned &ME);
120
121    /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
122    /// base register.  Return the virtual register that holds this value.
123    SDNode *getGlobalBaseReg();
124
125    SDNode *getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
126
127    // Select - Convert the specified operand from a target-independent to a
128    // target-specific node if it hasn't already been changed.
129    SDNode *Select(SDNode *N) override;
130
131    SDNode *SelectBitfieldInsert(SDNode *N);
132    SDNode *SelectBitPermutation(SDNode *N);
133
134    /// SelectCC - Select a comparison of the specified values with the
135    /// specified condition code, returning the CR# of the expression.
136    SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDLoc dl);
137
138    /// SelectAddrImm - Returns true if the address N can be represented by
139    /// a base register plus a signed 16-bit displacement [r+imm].
140    bool SelectAddrImm(SDValue N, SDValue &Disp,
141                       SDValue &Base) {
142      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
143    }
144
145    /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
146    /// immediate field.  Note that the operand at this point is already the
147    /// result of a prior SelectAddressRegImm call.
148    bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
149      if (N.getOpcode() == ISD::TargetConstant ||
150          N.getOpcode() == ISD::TargetGlobalAddress) {
151        Out = N;
152        return true;
153      }
154
155      return false;
156    }
157
158    /// SelectAddrIdx - Given the specified addressed, check to see if it can be
159    /// represented as an indexed [r+r] operation.  Returns false if it can
160    /// be represented by [r+imm], which are preferred.
161    bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
162      return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
163    }
164
165    /// SelectAddrIdxOnly - Given the specified addressed, force it to be
166    /// represented as an indexed [r+r] operation.
167    bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
168      return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
169    }
170
171    /// SelectAddrImmX4 - Returns true if the address N can be represented by
172    /// a base register plus a signed 16-bit displacement that is a multiple of 4.
173    /// Suitable for use by STD and friends.
174    bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
175      return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
176    }
177
178    // Select an address into a single register.
179    bool SelectAddr(SDValue N, SDValue &Base) {
180      Base = N;
181      return true;
182    }
183
184    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
185    /// inline asm expressions.  It is always correct to compute the value into
186    /// a register.  The case of adding a (possibly relocatable) constant to a
187    /// register can be improved, but it is wrong to substitute Reg+Reg for
188    /// Reg in an asm, because the load or store opcode would have to change.
189    bool SelectInlineAsmMemoryOperand(const SDValue &Op,
190                                      unsigned ConstraintID,
191                                      std::vector<SDValue> &OutOps) override {
192
193      switch(ConstraintID) {
194      default:
195        errs() << "ConstraintID: " << ConstraintID << "\n";
196        llvm_unreachable("Unexpected asm memory constraint");
197      case InlineAsm::Constraint_es:
198      case InlineAsm::Constraint_i:
199      case InlineAsm::Constraint_m:
200      case InlineAsm::Constraint_o:
201      case InlineAsm::Constraint_Q:
202      case InlineAsm::Constraint_Z:
203      case InlineAsm::Constraint_Zy:
204        // We need to make sure that this one operand does not end up in r0
205        // (because we might end up lowering this as 0(%op)).
206        const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
207        const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
208        SDLoc dl(Op);
209        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
210        SDValue NewOp =
211          SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
212                                         dl, Op.getValueType(),
213                                         Op, RC), 0);
214
215        OutOps.push_back(NewOp);
216        return false;
217      }
218      return true;
219    }
220
221    void InsertVRSaveCode(MachineFunction &MF);
222
223    const char *getPassName() const override {
224      return "PowerPC DAG->DAG Pattern Instruction Selection";
225    }
226
227// Include the pieces autogenerated from the target description.
228#include "PPCGenDAGISel.inc"
229
230private:
231    SDNode *SelectSETCC(SDNode *N);
232
233    void PeepholePPC64();
234    void PeepholePPC64ZExt();
235    void PeepholeCROps();
236
237    SDValue combineToCMPB(SDNode *N);
238    void foldBoolExts(SDValue &Res, SDNode *&N);
239
240    bool AllUsersSelectZero(SDNode *N);
241    void SwapAllSelectUsers(SDNode *N);
242
243    SDNode *transferMemOperands(SDNode *N, SDNode *Result);
244  };
245}
246
247/// InsertVRSaveCode - Once the entire function has been instruction selected,
248/// all virtual registers are created and all machine instructions are built,
249/// check to see if we need to save/restore VRSAVE.  If so, do it.
250void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
251  // Check to see if this function uses vector registers, which means we have to
252  // save and restore the VRSAVE register and update it with the regs we use.
253  //
254  // In this case, there will be virtual registers of vector type created
255  // by the scheduler.  Detect them now.
256  bool HasVectorVReg = false;
257  for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
258    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
259    if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
260      HasVectorVReg = true;
261      break;
262    }
263  }
264  if (!HasVectorVReg) return;  // nothing to do.
265
266  // If we have a vector register, we want to emit code into the entry and exit
267  // blocks to save and restore the VRSAVE register.  We do this here (instead
268  // of marking all vector instructions as clobbering VRSAVE) for two reasons:
269  //
270  // 1. This (trivially) reduces the load on the register allocator, by not
271  //    having to represent the live range of the VRSAVE register.
272  // 2. This (more significantly) allows us to create a temporary virtual
273  //    register to hold the saved VRSAVE value, allowing this temporary to be
274  //    register allocated, instead of forcing it to be spilled to the stack.
275
276  // Create two vregs - one to hold the VRSAVE register that is live-in to the
277  // function and one for the value after having bits or'd into it.
278  unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
279  unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
280
281  const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
282  MachineBasicBlock &EntryBB = *Fn.begin();
283  DebugLoc dl;
284  // Emit the following code into the entry block:
285  // InVRSAVE = MFVRSAVE
286  // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
287  // MTVRSAVE UpdatedVRSAVE
288  MachineBasicBlock::iterator IP = EntryBB.begin();  // Insert Point
289  BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
290  BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
291          UpdatedVRSAVE).addReg(InVRSAVE);
292  BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
293
294  // Find all return blocks, outputting a restore in each epilog.
295  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
296    if (BB->isReturnBlock()) {
297      IP = BB->end(); --IP;
298
299      // Skip over all terminator instructions, which are part of the return
300      // sequence.
301      MachineBasicBlock::iterator I2 = IP;
302      while (I2 != BB->begin() && (--I2)->isTerminator())
303        IP = I2;
304
305      // Emit: MTVRSAVE InVRSave
306      BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
307    }
308  }
309}
310
311
312/// getGlobalBaseReg - Output the instructions required to put the
313/// base address to use for accessing globals into a register.
314///
315SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
316  if (!GlobalBaseReg) {
317    const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
318    // Insert the set of GlobalBaseReg into the first MBB of the function
319    MachineBasicBlock &FirstMBB = MF->front();
320    MachineBasicBlock::iterator MBBI = FirstMBB.begin();
321    const Module *M = MF->getFunction()->getParent();
322    DebugLoc dl;
323
324    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
325      if (PPCSubTarget->isTargetELF()) {
326        GlobalBaseReg = PPC::R30;
327        if (M->getPICLevel() == PICLevel::Small) {
328          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
329          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
330          MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
331        } else {
332          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
333          BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
334          unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
335          BuildMI(FirstMBB, MBBI, dl,
336                  TII.get(PPC::UpdateGBR), GlobalBaseReg)
337                  .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
338          MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
339        }
340      } else {
341        GlobalBaseReg =
342          RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
343        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
344        BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
345      }
346    } else {
347      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
348      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
349      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
350    }
351  }
352  return CurDAG->getRegister(GlobalBaseReg,
353                             PPCLowering->getPointerTy(CurDAG->getDataLayout()))
354      .getNode();
355}
356
357/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
358/// or 64-bit immediate, and if the value can be accurately represented as a
359/// sign extension from a 16-bit value.  If so, this returns true and the
360/// immediate.
361static bool isIntS16Immediate(SDNode *N, short &Imm) {
362  if (N->getOpcode() != ISD::Constant)
363    return false;
364
365  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
366  if (N->getValueType(0) == MVT::i32)
367    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
368  else
369    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
370}
371
372static bool isIntS16Immediate(SDValue Op, short &Imm) {
373  return isIntS16Immediate(Op.getNode(), Imm);
374}
375
376
377/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
378/// operand. If so Imm will receive the 32-bit value.
379static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
380  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
381    Imm = cast<ConstantSDNode>(N)->getZExtValue();
382    return true;
383  }
384  return false;
385}
386
387/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
388/// operand.  If so Imm will receive the 64-bit value.
389static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
390  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
391    Imm = cast<ConstantSDNode>(N)->getZExtValue();
392    return true;
393  }
394  return false;
395}
396
397// isInt32Immediate - This method tests to see if a constant operand.
398// If so Imm will receive the 32 bit value.
399static bool isInt32Immediate(SDValue N, unsigned &Imm) {
400  return isInt32Immediate(N.getNode(), Imm);
401}
402
403static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
404                              const SDValue &DestMBB) {
405  assert(isa<BasicBlockSDNode>(DestMBB));
406
407  if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
408
409  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
410  const TerminatorInst *BBTerm = BB->getTerminator();
411
412  if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
413
414  const BasicBlock *TBB = BBTerm->getSuccessor(0);
415  const BasicBlock *FBB = BBTerm->getSuccessor(1);
416
417  uint32_t TWeight = FuncInfo->BPI->getEdgeWeight(BB, TBB);
418  uint32_t FWeight = FuncInfo->BPI->getEdgeWeight(BB, FBB);
419
420  // We only want to handle cases which are easy to predict at static time, e.g.
421  // C++ throw statement, that is very likely not taken, or calling never
422  // returned function, e.g. stdlib exit(). So we set Threshold to filter
423  // unwanted cases.
424  //
425  // Below is LLVM branch weight table, we only want to handle case 1, 2
426  //
427  // Case                  Taken:Nontaken  Example
428  // 1. Unreachable        1048575:1       C++ throw, stdlib exit(),
429  // 2. Invoke-terminating 1:1048575
430  // 3. Coldblock          4:64            __builtin_expect
431  // 4. Loop Branch        124:4           For loop
432  // 5. PH/ZH/FPH          20:12
433  const uint32_t Threshold = 10000;
434
435  // Minimal weight should be at least 1
436  if (std::max(TWeight, FWeight) /
437      std::max(1u, std::min(TWeight, FWeight)) < Threshold)
438    return PPC::BR_NO_HINT;
439
440  DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
441               << BB->getName() << "'\n"
442               << " -> " << TBB->getName() << ": " << TWeight << "\n"
443               << " -> " << FBB->getName() << ": " << FWeight << "\n");
444
445  const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
446
447  // If Dest BasicBlock is False-BasicBlock (FBB), swap branch weight,
448  // because we want 'TWeight' stands for 'branch weight' to Dest BasicBlock
449  if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
450    std::swap(TWeight, FWeight);
451
452  return (TWeight > FWeight) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
453}
454
455// isOpcWithIntImmediate - This method tests to see if the node is a specific
456// opcode and that it has a immediate integer right operand.
457// If so Imm will receive the 32 bit value.
458static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
459  return N->getOpcode() == Opc
460         && isInt32Immediate(N->getOperand(1).getNode(), Imm);
461}
462
463SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
464  SDLoc dl(SN);
465  int FI = cast<FrameIndexSDNode>(N)->getIndex();
466  SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
467  unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
468  if (SN->hasOneUse())
469    return CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
470                                getSmallIPtrImm(Offset, dl));
471  return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
472                                getSmallIPtrImm(Offset, dl));
473}
474
475bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
476                                      bool isShiftMask, unsigned &SH,
477                                      unsigned &MB, unsigned &ME) {
478  // Don't even go down this path for i64, since different logic will be
479  // necessary for rldicl/rldicr/rldimi.
480  if (N->getValueType(0) != MVT::i32)
481    return false;
482
483  unsigned Shift  = 32;
484  unsigned Indeterminant = ~0;  // bit mask marking indeterminant results
485  unsigned Opcode = N->getOpcode();
486  if (N->getNumOperands() != 2 ||
487      !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
488    return false;
489
490  if (Opcode == ISD::SHL) {
491    // apply shift left to mask if it comes first
492    if (isShiftMask) Mask = Mask << Shift;
493    // determine which bits are made indeterminant by shift
494    Indeterminant = ~(0xFFFFFFFFu << Shift);
495  } else if (Opcode == ISD::SRL) {
496    // apply shift right to mask if it comes first
497    if (isShiftMask) Mask = Mask >> Shift;
498    // determine which bits are made indeterminant by shift
499    Indeterminant = ~(0xFFFFFFFFu >> Shift);
500    // adjust for the left rotate
501    Shift = 32 - Shift;
502  } else if (Opcode == ISD::ROTL) {
503    Indeterminant = 0;
504  } else {
505    return false;
506  }
507
508  // if the mask doesn't intersect any Indeterminant bits
509  if (Mask && !(Mask & Indeterminant)) {
510    SH = Shift & 31;
511    // make sure the mask is still a mask (wrap arounds may not be)
512    return isRunOfOnes(Mask, MB, ME);
513  }
514  return false;
515}
516
517/// SelectBitfieldInsert - turn an or of two masked values into
518/// the rotate left word immediate then mask insert (rlwimi) instruction.
519SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
520  SDValue Op0 = N->getOperand(0);
521  SDValue Op1 = N->getOperand(1);
522  SDLoc dl(N);
523
524  APInt LKZ, LKO, RKZ, RKO;
525  CurDAG->computeKnownBits(Op0, LKZ, LKO);
526  CurDAG->computeKnownBits(Op1, RKZ, RKO);
527
528  unsigned TargetMask = LKZ.getZExtValue();
529  unsigned InsertMask = RKZ.getZExtValue();
530
531  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
532    unsigned Op0Opc = Op0.getOpcode();
533    unsigned Op1Opc = Op1.getOpcode();
534    unsigned Value, SH = 0;
535    TargetMask = ~TargetMask;
536    InsertMask = ~InsertMask;
537
538    // If the LHS has a foldable shift and the RHS does not, then swap it to the
539    // RHS so that we can fold the shift into the insert.
540    if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
541      if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
542          Op0.getOperand(0).getOpcode() == ISD::SRL) {
543        if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
544            Op1.getOperand(0).getOpcode() != ISD::SRL) {
545          std::swap(Op0, Op1);
546          std::swap(Op0Opc, Op1Opc);
547          std::swap(TargetMask, InsertMask);
548        }
549      }
550    } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
551      if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
552          Op1.getOperand(0).getOpcode() != ISD::SRL) {
553        std::swap(Op0, Op1);
554        std::swap(Op0Opc, Op1Opc);
555        std::swap(TargetMask, InsertMask);
556      }
557    }
558
559    unsigned MB, ME;
560    if (isRunOfOnes(InsertMask, MB, ME)) {
561      SDValue Tmp1, Tmp2;
562
563      if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
564          isInt32Immediate(Op1.getOperand(1), Value)) {
565        Op1 = Op1.getOperand(0);
566        SH  = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
567      }
568      if (Op1Opc == ISD::AND) {
569       // The AND mask might not be a constant, and we need to make sure that
570       // if we're going to fold the masking with the insert, all bits not
571       // know to be zero in the mask are known to be one.
572        APInt MKZ, MKO;
573        CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO);
574        bool CanFoldMask = InsertMask == MKO.getZExtValue();
575
576        unsigned SHOpc = Op1.getOperand(0).getOpcode();
577        if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
578            isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
579          // Note that Value must be in range here (less than 32) because
580          // otherwise there would not be any bits set in InsertMask.
581          Op1 = Op1.getOperand(0).getOperand(0);
582          SH  = (SHOpc == ISD::SHL) ? Value : 32 - Value;
583        }
584      }
585
586      SH &= 31;
587      SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
588                          getI32Imm(ME, dl) };
589      return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
590    }
591  }
592  return nullptr;
593}
594
595// Predict the number of instructions that would be generated by calling
596// SelectInt64(N).
597static unsigned SelectInt64CountDirect(int64_t Imm) {
598  // Assume no remaining bits.
599  unsigned Remainder = 0;
600  // Assume no shift required.
601  unsigned Shift = 0;
602
603  // If it can't be represented as a 32 bit value.
604  if (!isInt<32>(Imm)) {
605    Shift = countTrailingZeros<uint64_t>(Imm);
606    int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
607
608    // If the shifted value fits 32 bits.
609    if (isInt<32>(ImmSh)) {
610      // Go with the shifted value.
611      Imm = ImmSh;
612    } else {
613      // Still stuck with a 64 bit value.
614      Remainder = Imm;
615      Shift = 32;
616      Imm >>= 32;
617    }
618  }
619
620  // Intermediate operand.
621  unsigned Result = 0;
622
623  // Handle first 32 bits.
624  unsigned Lo = Imm & 0xFFFF;
625
626  // Simple value.
627  if (isInt<16>(Imm)) {
628    // Just the Lo bits.
629    ++Result;
630  } else if (Lo) {
631    // Handle the Hi bits and Lo bits.
632    Result += 2;
633  } else {
634    // Just the Hi bits.
635    ++Result;
636  }
637
638  // If no shift, we're done.
639  if (!Shift) return Result;
640
641  // Shift for next step if the upper 32-bits were not zero.
642  if (Imm)
643    ++Result;
644
645  // Add in the last bits as required.
646  if ((Remainder >> 16) & 0xFFFF)
647    ++Result;
648  if (Remainder & 0xFFFF)
649    ++Result;
650
651  return Result;
652}
653
654static uint64_t Rot64(uint64_t Imm, unsigned R) {
655  return (Imm << R) | (Imm >> (64 - R));
656}
657
658static unsigned SelectInt64Count(int64_t Imm) {
659  unsigned Count = SelectInt64CountDirect(Imm);
660  if (Count == 1)
661    return Count;
662
663  for (unsigned r = 1; r < 63; ++r) {
664    uint64_t RImm = Rot64(Imm, r);
665    unsigned RCount = SelectInt64CountDirect(RImm) + 1;
666    Count = std::min(Count, RCount);
667
668    // See comments in SelectInt64 for an explanation of the logic below.
669    unsigned LS = findLastSet(RImm);
670    if (LS != r-1)
671      continue;
672
673    uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
674    uint64_t RImmWithOnes = RImm | OnesMask;
675
676    RCount = SelectInt64CountDirect(RImmWithOnes) + 1;
677    Count = std::min(Count, RCount);
678  }
679
680  return Count;
681}
682
683// Select a 64-bit constant. For cost-modeling purposes, SelectInt64Count
684// (above) needs to be kept in sync with this function.
685static SDNode *SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
686  // Assume no remaining bits.
687  unsigned Remainder = 0;
688  // Assume no shift required.
689  unsigned Shift = 0;
690
691  // If it can't be represented as a 32 bit value.
692  if (!isInt<32>(Imm)) {
693    Shift = countTrailingZeros<uint64_t>(Imm);
694    int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
695
696    // If the shifted value fits 32 bits.
697    if (isInt<32>(ImmSh)) {
698      // Go with the shifted value.
699      Imm = ImmSh;
700    } else {
701      // Still stuck with a 64 bit value.
702      Remainder = Imm;
703      Shift = 32;
704      Imm >>= 32;
705    }
706  }
707
708  // Intermediate operand.
709  SDNode *Result;
710
711  // Handle first 32 bits.
712  unsigned Lo = Imm & 0xFFFF;
713  unsigned Hi = (Imm >> 16) & 0xFFFF;
714
715  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
716      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
717  };
718
719  // Simple value.
720  if (isInt<16>(Imm)) {
721    // Just the Lo bits.
722    Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
723  } else if (Lo) {
724    // Handle the Hi bits.
725    unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
726    Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
727    // And Lo bits.
728    Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
729                                    SDValue(Result, 0), getI32Imm(Lo));
730  } else {
731    // Just the Hi bits.
732    Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
733  }
734
735  // If no shift, we're done.
736  if (!Shift) return Result;
737
738  // Shift for next step if the upper 32-bits were not zero.
739  if (Imm) {
740    Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
741                                    SDValue(Result, 0),
742                                    getI32Imm(Shift),
743                                    getI32Imm(63 - Shift));
744  }
745
746  // Add in the last bits as required.
747  if ((Hi = (Remainder >> 16) & 0xFFFF)) {
748    Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
749                                    SDValue(Result, 0), getI32Imm(Hi));
750  }
751  if ((Lo = Remainder & 0xFFFF)) {
752    Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
753                                    SDValue(Result, 0), getI32Imm(Lo));
754  }
755
756  return Result;
757}
758
759static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
760  unsigned Count = SelectInt64CountDirect(Imm);
761  if (Count == 1)
762    return SelectInt64Direct(CurDAG, dl, Imm);
763
764  unsigned RMin = 0;
765
766  int64_t MatImm;
767  unsigned MaskEnd;
768
769  for (unsigned r = 1; r < 63; ++r) {
770    uint64_t RImm = Rot64(Imm, r);
771    unsigned RCount = SelectInt64CountDirect(RImm) + 1;
772    if (RCount < Count) {
773      Count = RCount;
774      RMin = r;
775      MatImm = RImm;
776      MaskEnd = 63;
777    }
778
779    // If the immediate to generate has many trailing zeros, it might be
780    // worthwhile to generate a rotated value with too many leading ones
781    // (because that's free with li/lis's sign-extension semantics), and then
782    // mask them off after rotation.
783
784    unsigned LS = findLastSet(RImm);
785    // We're adding (63-LS) higher-order ones, and we expect to mask them off
786    // after performing the inverse rotation by (64-r). So we need that:
787    //   63-LS == 64-r => LS == r-1
788    if (LS != r-1)
789      continue;
790
791    uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
792    uint64_t RImmWithOnes = RImm | OnesMask;
793
794    RCount = SelectInt64CountDirect(RImmWithOnes) + 1;
795    if (RCount < Count) {
796      Count = RCount;
797      RMin = r;
798      MatImm = RImmWithOnes;
799      MaskEnd = LS;
800    }
801  }
802
803  if (!RMin)
804    return SelectInt64Direct(CurDAG, dl, Imm);
805
806  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
807      return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
808  };
809
810  SDValue Val = SDValue(SelectInt64Direct(CurDAG, dl, MatImm), 0);
811  return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
812                                getI32Imm(64 - RMin), getI32Imm(MaskEnd));
813}
814
815// Select a 64-bit constant.
816static SDNode *SelectInt64(SelectionDAG *CurDAG, SDNode *N) {
817  SDLoc dl(N);
818
819  // Get 64 bit value.
820  int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
821  return SelectInt64(CurDAG, dl, Imm);
822}
823
824namespace {
825class BitPermutationSelector {
826  struct ValueBit {
827    SDValue V;
828
829    // The bit number in the value, using a convention where bit 0 is the
830    // lowest-order bit.
831    unsigned Idx;
832
833    enum Kind {
834      ConstZero,
835      Variable
836    } K;
837
838    ValueBit(SDValue V, unsigned I, Kind K = Variable)
839      : V(V), Idx(I), K(K) {}
840    ValueBit(Kind K = Variable)
841      : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
842
843    bool isZero() const {
844      return K == ConstZero;
845    }
846
847    bool hasValue() const {
848      return K == Variable;
849    }
850
851    SDValue getValue() const {
852      assert(hasValue() && "Cannot get the value of a constant bit");
853      return V;
854    }
855
856    unsigned getValueBitIndex() const {
857      assert(hasValue() && "Cannot get the value bit index of a constant bit");
858      return Idx;
859    }
860  };
861
862  // A bit group has the same underlying value and the same rotate factor.
863  struct BitGroup {
864    SDValue V;
865    unsigned RLAmt;
866    unsigned StartIdx, EndIdx;
867
868    // This rotation amount assumes that the lower 32 bits of the quantity are
869    // replicated in the high 32 bits by the rotation operator (which is done
870    // by rlwinm and friends in 64-bit mode).
871    bool Repl32;
872    // Did converting to Repl32 == true change the rotation factor? If it did,
873    // it decreased it by 32.
874    bool Repl32CR;
875    // Was this group coalesced after setting Repl32 to true?
876    bool Repl32Coalesced;
877
878    BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
879      : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
880        Repl32Coalesced(false) {
881      DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R <<
882                      " [" << S << ", " << E << "]\n");
883    }
884  };
885
886  // Information on each (Value, RLAmt) pair (like the number of groups
887  // associated with each) used to choose the lowering method.
888  struct ValueRotInfo {
889    SDValue V;
890    unsigned RLAmt;
891    unsigned NumGroups;
892    unsigned FirstGroupStartIdx;
893    bool Repl32;
894
895    ValueRotInfo()
896      : RLAmt(UINT32_MAX), NumGroups(0), FirstGroupStartIdx(UINT32_MAX),
897        Repl32(false) {}
898
899    // For sorting (in reverse order) by NumGroups, and then by
900    // FirstGroupStartIdx.
901    bool operator < (const ValueRotInfo &Other) const {
902      // We need to sort so that the non-Repl32 come first because, when we're
903      // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
904      // masking operation.
905      if (Repl32 < Other.Repl32)
906        return true;
907      else if (Repl32 > Other.Repl32)
908        return false;
909      else if (NumGroups > Other.NumGroups)
910        return true;
911      else if (NumGroups < Other.NumGroups)
912        return false;
913      else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
914        return true;
915      return false;
916    }
917  };
918
919  // Return true if something interesting was deduced, return false if we're
920  // providing only a generic representation of V (or something else likewise
921  // uninteresting for instruction selection).
922  bool getValueBits(SDValue V, SmallVector<ValueBit, 64> &Bits) {
923    switch (V.getOpcode()) {
924    default: break;
925    case ISD::ROTL:
926      if (isa<ConstantSDNode>(V.getOperand(1))) {
927        unsigned RotAmt = V.getConstantOperandVal(1);
928
929        SmallVector<ValueBit, 64> LHSBits(Bits.size());
930        getValueBits(V.getOperand(0), LHSBits);
931
932        for (unsigned i = 0; i < Bits.size(); ++i)
933          Bits[i] = LHSBits[i < RotAmt ? i + (Bits.size() - RotAmt) : i - RotAmt];
934
935        return true;
936      }
937      break;
938    case ISD::SHL:
939      if (isa<ConstantSDNode>(V.getOperand(1))) {
940        unsigned ShiftAmt = V.getConstantOperandVal(1);
941
942        SmallVector<ValueBit, 64> LHSBits(Bits.size());
943        getValueBits(V.getOperand(0), LHSBits);
944
945        for (unsigned i = ShiftAmt; i < Bits.size(); ++i)
946          Bits[i] = LHSBits[i - ShiftAmt];
947
948        for (unsigned i = 0; i < ShiftAmt; ++i)
949          Bits[i] = ValueBit(ValueBit::ConstZero);
950
951        return true;
952      }
953      break;
954    case ISD::SRL:
955      if (isa<ConstantSDNode>(V.getOperand(1))) {
956        unsigned ShiftAmt = V.getConstantOperandVal(1);
957
958        SmallVector<ValueBit, 64> LHSBits(Bits.size());
959        getValueBits(V.getOperand(0), LHSBits);
960
961        for (unsigned i = 0; i < Bits.size() - ShiftAmt; ++i)
962          Bits[i] = LHSBits[i + ShiftAmt];
963
964        for (unsigned i = Bits.size() - ShiftAmt; i < Bits.size(); ++i)
965          Bits[i] = ValueBit(ValueBit::ConstZero);
966
967        return true;
968      }
969      break;
970    case ISD::AND:
971      if (isa<ConstantSDNode>(V.getOperand(1))) {
972        uint64_t Mask = V.getConstantOperandVal(1);
973
974        SmallVector<ValueBit, 64> LHSBits(Bits.size());
975        bool LHSTrivial = getValueBits(V.getOperand(0), LHSBits);
976
977        for (unsigned i = 0; i < Bits.size(); ++i)
978          if (((Mask >> i) & 1) == 1)
979            Bits[i] = LHSBits[i];
980          else
981            Bits[i] = ValueBit(ValueBit::ConstZero);
982
983        // Mark this as interesting, only if the LHS was also interesting. This
984        // prevents the overall procedure from matching a single immediate 'and'
985        // (which is non-optimal because such an and might be folded with other
986        // things if we don't select it here).
987        return LHSTrivial;
988      }
989      break;
990    case ISD::OR: {
991      SmallVector<ValueBit, 64> LHSBits(Bits.size()), RHSBits(Bits.size());
992      getValueBits(V.getOperand(0), LHSBits);
993      getValueBits(V.getOperand(1), RHSBits);
994
995      bool AllDisjoint = true;
996      for (unsigned i = 0; i < Bits.size(); ++i)
997        if (LHSBits[i].isZero())
998          Bits[i] = RHSBits[i];
999        else if (RHSBits[i].isZero())
1000          Bits[i] = LHSBits[i];
1001        else {
1002          AllDisjoint = false;
1003          break;
1004        }
1005
1006      if (!AllDisjoint)
1007        break;
1008
1009      return true;
1010    }
1011    }
1012
1013    for (unsigned i = 0; i < Bits.size(); ++i)
1014      Bits[i] = ValueBit(V, i);
1015
1016    return false;
1017  }
1018
1019  // For each value (except the constant ones), compute the left-rotate amount
1020  // to get it from its original to final position.
1021  void computeRotationAmounts() {
1022    HasZeros = false;
1023    RLAmt.resize(Bits.size());
1024    for (unsigned i = 0; i < Bits.size(); ++i)
1025      if (Bits[i].hasValue()) {
1026        unsigned VBI = Bits[i].getValueBitIndex();
1027        if (i >= VBI)
1028          RLAmt[i] = i - VBI;
1029        else
1030          RLAmt[i] = Bits.size() - (VBI - i);
1031      } else if (Bits[i].isZero()) {
1032        HasZeros = true;
1033        RLAmt[i] = UINT32_MAX;
1034      } else {
1035        llvm_unreachable("Unknown value bit type");
1036      }
1037  }
1038
1039  // Collect groups of consecutive bits with the same underlying value and
1040  // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1041  // they break up groups.
1042  void collectBitGroups(bool LateMask) {
1043    BitGroups.clear();
1044
1045    unsigned LastRLAmt = RLAmt[0];
1046    SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1047    unsigned LastGroupStartIdx = 0;
1048    for (unsigned i = 1; i < Bits.size(); ++i) {
1049      unsigned ThisRLAmt = RLAmt[i];
1050      SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1051      if (LateMask && !ThisValue) {
1052        ThisValue = LastValue;
1053        ThisRLAmt = LastRLAmt;
1054        // If we're doing late masking, then the first bit group always starts
1055        // at zero (even if the first bits were zero).
1056        if (BitGroups.empty())
1057          LastGroupStartIdx = 0;
1058      }
1059
1060      // If this bit has the same underlying value and the same rotate factor as
1061      // the last one, then they're part of the same group.
1062      if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1063        continue;
1064
1065      if (LastValue.getNode())
1066        BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1067                                     i-1));
1068      LastRLAmt = ThisRLAmt;
1069      LastValue = ThisValue;
1070      LastGroupStartIdx = i;
1071    }
1072    if (LastValue.getNode())
1073      BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1074                                   Bits.size()-1));
1075
1076    if (BitGroups.empty())
1077      return;
1078
1079    // We might be able to combine the first and last groups.
1080    if (BitGroups.size() > 1) {
1081      // If the first and last groups are the same, then remove the first group
1082      // in favor of the last group, making the ending index of the last group
1083      // equal to the ending index of the to-be-removed first group.
1084      if (BitGroups[0].StartIdx == 0 &&
1085          BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1086          BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1087          BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1088        DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1089        BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1090        BitGroups.erase(BitGroups.begin());
1091      }
1092    }
1093  }
1094
1095  // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1096  // associated with each. If there is a degeneracy, pick the one that occurs
1097  // first (in the final value).
1098  void collectValueRotInfo() {
1099    ValueRots.clear();
1100
1101    for (auto &BG : BitGroups) {
1102      unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1103      ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1104      VRI.V = BG.V;
1105      VRI.RLAmt = BG.RLAmt;
1106      VRI.Repl32 = BG.Repl32;
1107      VRI.NumGroups += 1;
1108      VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1109    }
1110
1111    // Now that we've collected the various ValueRotInfo instances, we need to
1112    // sort them.
1113    ValueRotsVec.clear();
1114    for (auto &I : ValueRots) {
1115      ValueRotsVec.push_back(I.second);
1116    }
1117    std::sort(ValueRotsVec.begin(), ValueRotsVec.end());
1118  }
1119
1120  // In 64-bit mode, rlwinm and friends have a rotation operator that
1121  // replicates the low-order 32 bits into the high-order 32-bits. The mask
1122  // indices of these instructions can only be in the lower 32 bits, so they
1123  // can only represent some 64-bit bit groups. However, when they can be used,
1124  // the 32-bit replication can be used to represent, as a single bit group,
1125  // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1126  // groups when possible. Returns true if any of the bit groups were
1127  // converted.
1128  void assignRepl32BitGroups() {
1129    // If we have bits like this:
1130    //
1131    // Indices:    15 14 13 12 11 10 9 8  7  6  5  4  3  2  1  0
1132    // V bits: ... 7  6  5  4  3  2  1 0 31 30 29 28 27 26 25 24
1133    // Groups:    |      RLAmt = 8      |      RLAmt = 40       |
1134    //
1135    // But, making use of a 32-bit operation that replicates the low-order 32
1136    // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1137    // of 8.
1138
1139    auto IsAllLow32 = [this](BitGroup & BG) {
1140      if (BG.StartIdx <= BG.EndIdx) {
1141        for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1142          if (!Bits[i].hasValue())
1143            continue;
1144          if (Bits[i].getValueBitIndex() >= 32)
1145            return false;
1146        }
1147      } else {
1148        for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1149          if (!Bits[i].hasValue())
1150            continue;
1151          if (Bits[i].getValueBitIndex() >= 32)
1152            return false;
1153        }
1154        for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1155          if (!Bits[i].hasValue())
1156            continue;
1157          if (Bits[i].getValueBitIndex() >= 32)
1158            return false;
1159        }
1160      }
1161
1162      return true;
1163    };
1164
1165    for (auto &BG : BitGroups) {
1166      if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1167        if (IsAllLow32(BG)) {
1168          if (BG.RLAmt >= 32) {
1169            BG.RLAmt -= 32;
1170            BG.Repl32CR = true;
1171          }
1172
1173          BG.Repl32 = true;
1174
1175          DEBUG(dbgs() << "\t32-bit replicated bit group for " <<
1176                          BG.V.getNode() << " RLAmt = " << BG.RLAmt <<
1177                          " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1178        }
1179      }
1180    }
1181
1182    // Now walk through the bit groups, consolidating where possible.
1183    for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1184      // We might want to remove this bit group by merging it with the previous
1185      // group (which might be the ending group).
1186      auto IP = (I == BitGroups.begin()) ?
1187                std::prev(BitGroups.end()) : std::prev(I);
1188      if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1189          I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1190
1191        DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " <<
1192                        I->V.getNode() << " RLAmt = " << I->RLAmt <<
1193                        " [" << I->StartIdx << ", " << I->EndIdx <<
1194                        "] with group with range [" <<
1195                        IP->StartIdx << ", " << IP->EndIdx << "]\n");
1196
1197        IP->EndIdx = I->EndIdx;
1198        IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1199        IP->Repl32Coalesced = true;
1200        I = BitGroups.erase(I);
1201        continue;
1202      } else {
1203        // There is a special case worth handling: If there is a single group
1204        // covering the entire upper 32 bits, and it can be merged with both
1205        // the next and previous groups (which might be the same group), then
1206        // do so. If it is the same group (so there will be only one group in
1207        // total), then we need to reverse the order of the range so that it
1208        // covers the entire 64 bits.
1209        if (I->StartIdx == 32 && I->EndIdx == 63) {
1210          assert(std::next(I) == BitGroups.end() &&
1211                 "bit group ends at index 63 but there is another?");
1212          auto IN = BitGroups.begin();
1213
1214          if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1215              (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1216              IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1217              IsAllLow32(*I)) {
1218
1219            DEBUG(dbgs() << "\tcombining bit group for " <<
1220                            I->V.getNode() << " RLAmt = " << I->RLAmt <<
1221                            " [" << I->StartIdx << ", " << I->EndIdx <<
1222                            "] with 32-bit replicated groups with ranges [" <<
1223                            IP->StartIdx << ", " << IP->EndIdx << "] and [" <<
1224                            IN->StartIdx << ", " << IN->EndIdx << "]\n");
1225
1226            if (IP == IN) {
1227              // There is only one other group; change it to cover the whole
1228              // range (backward, so that it can still be Repl32 but cover the
1229              // whole 64-bit range).
1230              IP->StartIdx = 31;
1231              IP->EndIdx = 30;
1232              IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1233              IP->Repl32Coalesced = true;
1234              I = BitGroups.erase(I);
1235            } else {
1236              // There are two separate groups, one before this group and one
1237              // after us (at the beginning). We're going to remove this group,
1238              // but also the group at the very beginning.
1239              IP->EndIdx = IN->EndIdx;
1240              IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1241              IP->Repl32Coalesced = true;
1242              I = BitGroups.erase(I);
1243              BitGroups.erase(BitGroups.begin());
1244            }
1245
1246            // This must be the last group in the vector (and we might have
1247            // just invalidated the iterator above), so break here.
1248            break;
1249          }
1250        }
1251      }
1252
1253      ++I;
1254    }
1255  }
1256
1257  SDValue getI32Imm(unsigned Imm, SDLoc dl) {
1258    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1259  }
1260
1261  uint64_t getZerosMask() {
1262    uint64_t Mask = 0;
1263    for (unsigned i = 0; i < Bits.size(); ++i) {
1264      if (Bits[i].hasValue())
1265        continue;
1266      Mask |= (UINT64_C(1) << i);
1267    }
1268
1269    return ~Mask;
1270  }
1271
1272  // Depending on the number of groups for a particular value, it might be
1273  // better to rotate, mask explicitly (using andi/andis), and then or the
1274  // result. Select this part of the result first.
1275  void SelectAndParts32(SDLoc dl, SDValue &Res, unsigned *InstCnt) {
1276    if (BPermRewriterNoMasking)
1277      return;
1278
1279    for (ValueRotInfo &VRI : ValueRotsVec) {
1280      unsigned Mask = 0;
1281      for (unsigned i = 0; i < Bits.size(); ++i) {
1282        if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1283          continue;
1284        if (RLAmt[i] != VRI.RLAmt)
1285          continue;
1286        Mask |= (1u << i);
1287      }
1288
1289      // Compute the masks for andi/andis that would be necessary.
1290      unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1291      assert((ANDIMask != 0 || ANDISMask != 0) &&
1292             "No set bits in mask for value bit groups");
1293      bool NeedsRotate = VRI.RLAmt != 0;
1294
1295      // We're trying to minimize the number of instructions. If we have one
1296      // group, using one of andi/andis can break even.  If we have three
1297      // groups, we can use both andi and andis and break even (to use both
1298      // andi and andis we also need to or the results together). We need four
1299      // groups if we also need to rotate. To use andi/andis we need to do more
1300      // than break even because rotate-and-mask instructions tend to be easier
1301      // to schedule.
1302
1303      // FIXME: We've biased here against using andi/andis, which is right for
1304      // POWER cores, but not optimal everywhere. For example, on the A2,
1305      // andi/andis have single-cycle latency whereas the rotate-and-mask
1306      // instructions take two cycles, and it would be better to bias toward
1307      // andi/andis in break-even cases.
1308
1309      unsigned NumAndInsts = (unsigned) NeedsRotate +
1310                             (unsigned) (ANDIMask != 0) +
1311                             (unsigned) (ANDISMask != 0) +
1312                             (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
1313                             (unsigned) (bool) Res;
1314
1315      DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1316                      " RL: " << VRI.RLAmt << ":" <<
1317                      "\n\t\t\tisel using masking: " << NumAndInsts <<
1318                      " using rotates: " << VRI.NumGroups << "\n");
1319
1320      if (NumAndInsts >= VRI.NumGroups)
1321        continue;
1322
1323      DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1324
1325      if (InstCnt) *InstCnt += NumAndInsts;
1326
1327      SDValue VRot;
1328      if (VRI.RLAmt) {
1329        SDValue Ops[] =
1330          { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1331            getI32Imm(31, dl) };
1332        VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
1333                                              Ops), 0);
1334      } else {
1335        VRot = VRI.V;
1336      }
1337
1338      SDValue ANDIVal, ANDISVal;
1339      if (ANDIMask != 0)
1340        ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1341                            VRot, getI32Imm(ANDIMask, dl)), 0);
1342      if (ANDISMask != 0)
1343        ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1344                             VRot, getI32Imm(ANDISMask, dl)), 0);
1345
1346      SDValue TotalVal;
1347      if (!ANDIVal)
1348        TotalVal = ANDISVal;
1349      else if (!ANDISVal)
1350        TotalVal = ANDIVal;
1351      else
1352        TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1353                             ANDIVal, ANDISVal), 0);
1354
1355      if (!Res)
1356        Res = TotalVal;
1357      else
1358        Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1359                        Res, TotalVal), 0);
1360
1361      // Now, remove all groups with this underlying value and rotation
1362      // factor.
1363      eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1364        return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1365      });
1366    }
1367  }
1368
1369  // Instruction selection for the 32-bit case.
1370  SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
1371    SDLoc dl(N);
1372    SDValue Res;
1373
1374    if (InstCnt) *InstCnt = 0;
1375
1376    // Take care of cases that should use andi/andis first.
1377    SelectAndParts32(dl, Res, InstCnt);
1378
1379    // If we've not yet selected a 'starting' instruction, and we have no zeros
1380    // to fill in, select the (Value, RLAmt) with the highest priority (largest
1381    // number of groups), and start with this rotated value.
1382    if ((!HasZeros || LateMask) && !Res) {
1383      ValueRotInfo &VRI = ValueRotsVec[0];
1384      if (VRI.RLAmt) {
1385        if (InstCnt) *InstCnt += 1;
1386        SDValue Ops[] =
1387          { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1388            getI32Imm(31, dl) };
1389        Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
1390                      0);
1391      } else {
1392        Res = VRI.V;
1393      }
1394
1395      // Now, remove all groups with this underlying value and rotation factor.
1396      eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1397        return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1398      });
1399    }
1400
1401    if (InstCnt) *InstCnt += BitGroups.size();
1402
1403    // Insert the other groups (one at a time).
1404    for (auto &BG : BitGroups) {
1405      if (!Res) {
1406        SDValue Ops[] =
1407          { BG.V, getI32Imm(BG.RLAmt, dl),
1408            getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1409            getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1410        Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
1411      } else {
1412        SDValue Ops[] =
1413          { Res, BG.V, getI32Imm(BG.RLAmt, dl),
1414              getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1415            getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1416        Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
1417      }
1418    }
1419
1420    if (LateMask) {
1421      unsigned Mask = (unsigned) getZerosMask();
1422
1423      unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1424      assert((ANDIMask != 0 || ANDISMask != 0) &&
1425             "No set bits in zeros mask?");
1426
1427      if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1428                               (unsigned) (ANDISMask != 0) +
1429                               (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1430
1431      SDValue ANDIVal, ANDISVal;
1432      if (ANDIMask != 0)
1433        ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1434                            Res, getI32Imm(ANDIMask, dl)), 0);
1435      if (ANDISMask != 0)
1436        ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1437                             Res, getI32Imm(ANDISMask, dl)), 0);
1438
1439      if (!ANDIVal)
1440        Res = ANDISVal;
1441      else if (!ANDISVal)
1442        Res = ANDIVal;
1443      else
1444        Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1445                        ANDIVal, ANDISVal), 0);
1446    }
1447
1448    return Res.getNode();
1449  }
1450
1451  unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
1452                                unsigned MaskStart, unsigned MaskEnd,
1453                                bool IsIns) {
1454    // In the notation used by the instructions, 'start' and 'end' are reversed
1455    // because bits are counted from high to low order.
1456    unsigned InstMaskStart = 64 - MaskEnd - 1,
1457             InstMaskEnd   = 64 - MaskStart - 1;
1458
1459    if (Repl32)
1460      return 1;
1461
1462    if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
1463        InstMaskEnd == 63 - RLAmt)
1464      return 1;
1465
1466    return 2;
1467  }
1468
1469  // For 64-bit values, not all combinations of rotates and masks are
1470  // available. Produce one if it is available.
1471  SDValue SelectRotMask64(SDValue V, SDLoc dl, unsigned RLAmt, bool Repl32,
1472                          unsigned MaskStart, unsigned MaskEnd,
1473                          unsigned *InstCnt = nullptr) {
1474    // In the notation used by the instructions, 'start' and 'end' are reversed
1475    // because bits are counted from high to low order.
1476    unsigned InstMaskStart = 64 - MaskEnd - 1,
1477             InstMaskEnd   = 64 - MaskStart - 1;
1478
1479    if (InstCnt) *InstCnt += 1;
1480
1481    if (Repl32) {
1482      // This rotation amount assumes that the lower 32 bits of the quantity
1483      // are replicated in the high 32 bits by the rotation operator (which is
1484      // done by rlwinm and friends).
1485      assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1486      assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
1487      SDValue Ops[] =
1488        { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
1489          getI32Imm(InstMaskEnd - 32, dl) };
1490      return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
1491                                            Ops), 0);
1492    }
1493
1494    if (InstMaskEnd == 63) {
1495      SDValue Ops[] =
1496        { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
1497      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
1498    }
1499
1500    if (InstMaskStart == 0) {
1501      SDValue Ops[] =
1502        { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) };
1503      return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
1504    }
1505
1506    if (InstMaskEnd == 63 - RLAmt) {
1507      SDValue Ops[] =
1508        { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
1509      return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
1510    }
1511
1512    // We cannot do this with a single instruction, so we'll use two. The
1513    // problem is that we're not free to choose both a rotation amount and mask
1514    // start and end independently. We can choose an arbitrary mask start and
1515    // end, but then the rotation amount is fixed. Rotation, however, can be
1516    // inverted, and so by applying an "inverse" rotation first, we can get the
1517    // desired result.
1518    if (InstCnt) *InstCnt += 1;
1519
1520    // The rotation mask for the second instruction must be MaskStart.
1521    unsigned RLAmt2 = MaskStart;
1522    // The first instruction must rotate V so that the overall rotation amount
1523    // is RLAmt.
1524    unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1525    if (RLAmt1)
1526      V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1527    return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
1528  }
1529
1530  // For 64-bit values, not all combinations of rotates and masks are
1531  // available. Produce a rotate-mask-and-insert if one is available.
1532  SDValue SelectRotMaskIns64(SDValue Base, SDValue V, SDLoc dl, unsigned RLAmt,
1533                             bool Repl32, unsigned MaskStart,
1534                             unsigned MaskEnd, unsigned *InstCnt = nullptr) {
1535    // In the notation used by the instructions, 'start' and 'end' are reversed
1536    // because bits are counted from high to low order.
1537    unsigned InstMaskStart = 64 - MaskEnd - 1,
1538             InstMaskEnd   = 64 - MaskStart - 1;
1539
1540    if (InstCnt) *InstCnt += 1;
1541
1542    if (Repl32) {
1543      // This rotation amount assumes that the lower 32 bits of the quantity
1544      // are replicated in the high 32 bits by the rotation operator (which is
1545      // done by rlwinm and friends).
1546      assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1547      assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
1548      SDValue Ops[] =
1549        { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl),
1550          getI32Imm(InstMaskEnd - 32, dl) };
1551      return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
1552                                            Ops), 0);
1553    }
1554
1555    if (InstMaskEnd == 63 - RLAmt) {
1556      SDValue Ops[] =
1557        { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) };
1558      return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
1559    }
1560
1561    // We cannot do this with a single instruction, so we'll use two. The
1562    // problem is that we're not free to choose both a rotation amount and mask
1563    // start and end independently. We can choose an arbitrary mask start and
1564    // end, but then the rotation amount is fixed. Rotation, however, can be
1565    // inverted, and so by applying an "inverse" rotation first, we can get the
1566    // desired result.
1567    if (InstCnt) *InstCnt += 1;
1568
1569    // The rotation mask for the second instruction must be MaskStart.
1570    unsigned RLAmt2 = MaskStart;
1571    // The first instruction must rotate V so that the overall rotation amount
1572    // is RLAmt.
1573    unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1574    if (RLAmt1)
1575      V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1576    return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
1577  }
1578
1579  void SelectAndParts64(SDLoc dl, SDValue &Res, unsigned *InstCnt) {
1580    if (BPermRewriterNoMasking)
1581      return;
1582
1583    // The idea here is the same as in the 32-bit version, but with additional
1584    // complications from the fact that Repl32 might be true. Because we
1585    // aggressively convert bit groups to Repl32 form (which, for small
1586    // rotation factors, involves no other change), and then coalesce, it might
1587    // be the case that a single 64-bit masking operation could handle both
1588    // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
1589    // form allowed coalescing, then we must use a 32-bit rotaton in order to
1590    // completely capture the new combined bit group.
1591
1592    for (ValueRotInfo &VRI : ValueRotsVec) {
1593      uint64_t Mask = 0;
1594
1595      // We need to add to the mask all bits from the associated bit groups.
1596      // If Repl32 is false, we need to add bits from bit groups that have
1597      // Repl32 true, but are trivially convertable to Repl32 false. Such a
1598      // group is trivially convertable if it overlaps only with the lower 32
1599      // bits, and the group has not been coalesced.
1600      auto MatchingBG = [VRI](const BitGroup &BG) {
1601        if (VRI.V != BG.V)
1602          return false;
1603
1604        unsigned EffRLAmt = BG.RLAmt;
1605        if (!VRI.Repl32 && BG.Repl32) {
1606          if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
1607              !BG.Repl32Coalesced) {
1608            if (BG.Repl32CR)
1609              EffRLAmt += 32;
1610          } else {
1611            return false;
1612          }
1613        } else if (VRI.Repl32 != BG.Repl32) {
1614          return false;
1615        }
1616
1617        if (VRI.RLAmt != EffRLAmt)
1618          return false;
1619
1620        return true;
1621      };
1622
1623      for (auto &BG : BitGroups) {
1624        if (!MatchingBG(BG))
1625          continue;
1626
1627        if (BG.StartIdx <= BG.EndIdx) {
1628          for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
1629            Mask |= (UINT64_C(1) << i);
1630        } else {
1631          for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
1632            Mask |= (UINT64_C(1) << i);
1633          for (unsigned i = 0; i <= BG.EndIdx; ++i)
1634            Mask |= (UINT64_C(1) << i);
1635        }
1636      }
1637
1638      // We can use the 32-bit andi/andis technique if the mask does not
1639      // require any higher-order bits. This can save an instruction compared
1640      // to always using the general 64-bit technique.
1641      bool Use32BitInsts = isUInt<32>(Mask);
1642      // Compute the masks for andi/andis that would be necessary.
1643      unsigned ANDIMask = (Mask & UINT16_MAX),
1644               ANDISMask = (Mask >> 16) & UINT16_MAX;
1645
1646      bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
1647
1648      unsigned NumAndInsts = (unsigned) NeedsRotate +
1649                             (unsigned) (bool) Res;
1650      if (Use32BitInsts)
1651        NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
1652                       (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1653      else
1654        NumAndInsts += SelectInt64Count(Mask) + /* and */ 1;
1655
1656      unsigned NumRLInsts = 0;
1657      bool FirstBG = true;
1658      for (auto &BG : BitGroups) {
1659        if (!MatchingBG(BG))
1660          continue;
1661        NumRLInsts +=
1662          SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
1663                               !FirstBG);
1664        FirstBG = false;
1665      }
1666
1667      DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1668                      " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") <<
1669                      "\n\t\t\tisel using masking: " << NumAndInsts <<
1670                      " using rotates: " << NumRLInsts << "\n");
1671
1672      // When we'd use andi/andis, we bias toward using the rotates (andi only
1673      // has a record form, and is cracked on POWER cores). However, when using
1674      // general 64-bit constant formation, bias toward the constant form,
1675      // because that exposes more opportunities for CSE.
1676      if (NumAndInsts > NumRLInsts)
1677        continue;
1678      if (Use32BitInsts && NumAndInsts == NumRLInsts)
1679        continue;
1680
1681      DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1682
1683      if (InstCnt) *InstCnt += NumAndInsts;
1684
1685      SDValue VRot;
1686      // We actually need to generate a rotation if we have a non-zero rotation
1687      // factor or, in the Repl32 case, if we care about any of the
1688      // higher-order replicated bits. In the latter case, we generate a mask
1689      // backward so that it actually includes the entire 64 bits.
1690      if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
1691        VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
1692                               VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
1693      else
1694        VRot = VRI.V;
1695
1696      SDValue TotalVal;
1697      if (Use32BitInsts) {
1698        assert((ANDIMask != 0 || ANDISMask != 0) &&
1699               "No set bits in mask when using 32-bit ands for 64-bit value");
1700
1701        SDValue ANDIVal, ANDISVal;
1702        if (ANDIMask != 0)
1703          ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
1704                              VRot, getI32Imm(ANDIMask, dl)), 0);
1705        if (ANDISMask != 0)
1706          ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
1707                               VRot, getI32Imm(ANDISMask, dl)), 0);
1708
1709        if (!ANDIVal)
1710          TotalVal = ANDISVal;
1711        else if (!ANDISVal)
1712          TotalVal = ANDIVal;
1713        else
1714          TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1715                               ANDIVal, ANDISVal), 0);
1716      } else {
1717        TotalVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0);
1718        TotalVal =
1719          SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
1720                                         VRot, TotalVal), 0);
1721     }
1722
1723      if (!Res)
1724        Res = TotalVal;
1725      else
1726        Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1727                                             Res, TotalVal), 0);
1728
1729      // Now, remove all groups with this underlying value and rotation
1730      // factor.
1731      eraseMatchingBitGroups(MatchingBG);
1732    }
1733  }
1734
1735  // Instruction selection for the 64-bit case.
1736  SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
1737    SDLoc dl(N);
1738    SDValue Res;
1739
1740    if (InstCnt) *InstCnt = 0;
1741
1742    // Take care of cases that should use andi/andis first.
1743    SelectAndParts64(dl, Res, InstCnt);
1744
1745    // If we've not yet selected a 'starting' instruction, and we have no zeros
1746    // to fill in, select the (Value, RLAmt) with the highest priority (largest
1747    // number of groups), and start with this rotated value.
1748    if ((!HasZeros || LateMask) && !Res) {
1749      // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
1750      // groups will come first, and so the VRI representing the largest number
1751      // of groups might not be first (it might be the first Repl32 groups).
1752      unsigned MaxGroupsIdx = 0;
1753      if (!ValueRotsVec[0].Repl32) {
1754        for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
1755          if (ValueRotsVec[i].Repl32) {
1756            if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
1757              MaxGroupsIdx = i;
1758            break;
1759          }
1760      }
1761
1762      ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
1763      bool NeedsRotate = false;
1764      if (VRI.RLAmt) {
1765        NeedsRotate = true;
1766      } else if (VRI.Repl32) {
1767        for (auto &BG : BitGroups) {
1768          if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
1769              BG.Repl32 != VRI.Repl32)
1770            continue;
1771
1772          // We don't need a rotate if the bit group is confined to the lower
1773          // 32 bits.
1774          if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
1775            continue;
1776
1777          NeedsRotate = true;
1778          break;
1779        }
1780      }
1781
1782      if (NeedsRotate)
1783        Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
1784                              VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
1785                              InstCnt);
1786      else
1787        Res = VRI.V;
1788
1789      // Now, remove all groups with this underlying value and rotation factor.
1790      if (Res)
1791        eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1792          return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
1793                 BG.Repl32 == VRI.Repl32;
1794        });
1795    }
1796
1797    // Because 64-bit rotates are more flexible than inserts, we might have a
1798    // preference regarding which one we do first (to save one instruction).
1799    if (!Res)
1800      for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
1801        if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
1802                                false) <
1803            SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
1804                                true)) {
1805          if (I != BitGroups.begin()) {
1806            BitGroup BG = *I;
1807            BitGroups.erase(I);
1808            BitGroups.insert(BitGroups.begin(), BG);
1809          }
1810
1811          break;
1812        }
1813      }
1814
1815    // Insert the other groups (one at a time).
1816    for (auto &BG : BitGroups) {
1817      if (!Res)
1818        Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
1819                              BG.EndIdx, InstCnt);
1820      else
1821        Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
1822                                 BG.StartIdx, BG.EndIdx, InstCnt);
1823    }
1824
1825    if (LateMask) {
1826      uint64_t Mask = getZerosMask();
1827
1828      // We can use the 32-bit andi/andis technique if the mask does not
1829      // require any higher-order bits. This can save an instruction compared
1830      // to always using the general 64-bit technique.
1831      bool Use32BitInsts = isUInt<32>(Mask);
1832      // Compute the masks for andi/andis that would be necessary.
1833      unsigned ANDIMask = (Mask & UINT16_MAX),
1834               ANDISMask = (Mask >> 16) & UINT16_MAX;
1835
1836      if (Use32BitInsts) {
1837        assert((ANDIMask != 0 || ANDISMask != 0) &&
1838               "No set bits in mask when using 32-bit ands for 64-bit value");
1839
1840        if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1841                                 (unsigned) (ANDISMask != 0) +
1842                                 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1843
1844        SDValue ANDIVal, ANDISVal;
1845        if (ANDIMask != 0)
1846          ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
1847                              Res, getI32Imm(ANDIMask, dl)), 0);
1848        if (ANDISMask != 0)
1849          ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
1850                               Res, getI32Imm(ANDISMask, dl)), 0);
1851
1852        if (!ANDIVal)
1853          Res = ANDISVal;
1854        else if (!ANDISVal)
1855          Res = ANDIVal;
1856        else
1857          Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
1858                          ANDIVal, ANDISVal), 0);
1859      } else {
1860        if (InstCnt) *InstCnt += SelectInt64Count(Mask) + /* and */ 1;
1861
1862        SDValue MaskVal = SDValue(SelectInt64(CurDAG, dl, Mask), 0);
1863        Res =
1864          SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
1865                                         Res, MaskVal), 0);
1866      }
1867    }
1868
1869    return Res.getNode();
1870  }
1871
1872  SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
1873    // Fill in BitGroups.
1874    collectBitGroups(LateMask);
1875    if (BitGroups.empty())
1876      return nullptr;
1877
1878    // For 64-bit values, figure out when we can use 32-bit instructions.
1879    if (Bits.size() == 64)
1880      assignRepl32BitGroups();
1881
1882    // Fill in ValueRotsVec.
1883    collectValueRotInfo();
1884
1885    if (Bits.size() == 32) {
1886      return Select32(N, LateMask, InstCnt);
1887    } else {
1888      assert(Bits.size() == 64 && "Not 64 bits here?");
1889      return Select64(N, LateMask, InstCnt);
1890    }
1891
1892    return nullptr;
1893  }
1894
1895  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
1896    BitGroups.erase(std::remove_if(BitGroups.begin(), BitGroups.end(), F),
1897                    BitGroups.end());
1898  }
1899
1900  SmallVector<ValueBit, 64> Bits;
1901
1902  bool HasZeros;
1903  SmallVector<unsigned, 64> RLAmt;
1904
1905  SmallVector<BitGroup, 16> BitGroups;
1906
1907  DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
1908  SmallVector<ValueRotInfo, 16> ValueRotsVec;
1909
1910  SelectionDAG *CurDAG;
1911
1912public:
1913  BitPermutationSelector(SelectionDAG *DAG)
1914    : CurDAG(DAG) {}
1915
1916  // Here we try to match complex bit permutations into a set of
1917  // rotate-and-shift/shift/and/or instructions, using a set of heuristics
1918  // known to produce optimial code for common cases (like i32 byte swapping).
1919  SDNode *Select(SDNode *N) {
1920    Bits.resize(N->getValueType(0).getSizeInBits());
1921    if (!getValueBits(SDValue(N, 0), Bits))
1922      return nullptr;
1923
1924    DEBUG(dbgs() << "Considering bit-permutation-based instruction"
1925                    " selection for:    ");
1926    DEBUG(N->dump(CurDAG));
1927
1928    // Fill it RLAmt and set HasZeros.
1929    computeRotationAmounts();
1930
1931    if (!HasZeros)
1932      return Select(N, false);
1933
1934    // We currently have two techniques for handling results with zeros: early
1935    // masking (the default) and late masking. Late masking is sometimes more
1936    // efficient, but because the structure of the bit groups is different, it
1937    // is hard to tell without generating both and comparing the results. With
1938    // late masking, we ignore zeros in the resulting value when inserting each
1939    // set of bit groups, and then mask in the zeros at the end. With early
1940    // masking, we only insert the non-zero parts of the result at every step.
1941
1942    unsigned InstCnt, InstCntLateMask;
1943    DEBUG(dbgs() << "\tEarly masking:\n");
1944    SDNode *RN = Select(N, false, &InstCnt);
1945    DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
1946
1947    DEBUG(dbgs() << "\tLate masking:\n");
1948    SDNode *RNLM = Select(N, true, &InstCntLateMask);
1949    DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask <<
1950                    " instructions\n");
1951
1952    if (InstCnt <= InstCntLateMask) {
1953      DEBUG(dbgs() << "\tUsing early-masking for isel\n");
1954      return RN;
1955    }
1956
1957    DEBUG(dbgs() << "\tUsing late-masking for isel\n");
1958    return RNLM;
1959  }
1960};
1961} // anonymous namespace
1962
1963SDNode *PPCDAGToDAGISel::SelectBitPermutation(SDNode *N) {
1964  if (N->getValueType(0) != MVT::i32 &&
1965      N->getValueType(0) != MVT::i64)
1966    return nullptr;
1967
1968  if (!UseBitPermRewriter)
1969    return nullptr;
1970
1971  switch (N->getOpcode()) {
1972  default: break;
1973  case ISD::ROTL:
1974  case ISD::SHL:
1975  case ISD::SRL:
1976  case ISD::AND:
1977  case ISD::OR: {
1978    BitPermutationSelector BPS(CurDAG);
1979    return BPS.Select(N);
1980  }
1981  }
1982
1983  return nullptr;
1984}
1985
1986/// SelectCC - Select a comparison of the specified values with the specified
1987/// condition code, returning the CR# of the expression.
1988SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
1989                                    ISD::CondCode CC, SDLoc dl) {
1990  // Always select the LHS.
1991  unsigned Opc;
1992
1993  if (LHS.getValueType() == MVT::i32) {
1994    unsigned Imm;
1995    if (CC == ISD::SETEQ || CC == ISD::SETNE) {
1996      if (isInt32Immediate(RHS, Imm)) {
1997        // SETEQ/SETNE comparison with 16-bit immediate, fold it.
1998        if (isUInt<16>(Imm))
1999          return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
2000                                                getI32Imm(Imm & 0xFFFF, dl)),
2001                         0);
2002        // If this is a 16-bit signed immediate, fold it.
2003        if (isInt<16>((int)Imm))
2004          return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
2005                                                getI32Imm(Imm & 0xFFFF, dl)),
2006                         0);
2007
2008        // For non-equality comparisons, the default code would materialize the
2009        // constant, then compare against it, like this:
2010        //   lis r2, 4660
2011        //   ori r2, r2, 22136
2012        //   cmpw cr0, r3, r2
2013        // Since we are just comparing for equality, we can emit this instead:
2014        //   xoris r0,r3,0x1234
2015        //   cmplwi cr0,r0,0x5678
2016        //   beq cr0,L6
2017        SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
2018                                           getI32Imm(Imm >> 16, dl)), 0);
2019        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
2020                                              getI32Imm(Imm & 0xFFFF, dl)), 0);
2021      }
2022      Opc = PPC::CMPLW;
2023    } else if (ISD::isUnsignedIntSetCC(CC)) {
2024      if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
2025        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
2026                                              getI32Imm(Imm & 0xFFFF, dl)), 0);
2027      Opc = PPC::CMPLW;
2028    } else {
2029      short SImm;
2030      if (isIntS16Immediate(RHS, SImm))
2031        return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
2032                                              getI32Imm((int)SImm & 0xFFFF,
2033                                                        dl)),
2034                         0);
2035      Opc = PPC::CMPW;
2036    }
2037  } else if (LHS.getValueType() == MVT::i64) {
2038    uint64_t Imm;
2039    if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2040      if (isInt64Immediate(RHS.getNode(), Imm)) {
2041        // SETEQ/SETNE comparison with 16-bit immediate, fold it.
2042        if (isUInt<16>(Imm))
2043          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
2044                                                getI32Imm(Imm & 0xFFFF, dl)),
2045                         0);
2046        // If this is a 16-bit signed immediate, fold it.
2047        if (isInt<16>(Imm))
2048          return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
2049                                                getI32Imm(Imm & 0xFFFF, dl)),
2050                         0);
2051
2052        // For non-equality comparisons, the default code would materialize the
2053        // constant, then compare against it, like this:
2054        //   lis r2, 4660
2055        //   ori r2, r2, 22136
2056        //   cmpd cr0, r3, r2
2057        // Since we are just comparing for equality, we can emit this instead:
2058        //   xoris r0,r3,0x1234
2059        //   cmpldi cr0,r0,0x5678
2060        //   beq cr0,L6
2061        if (isUInt<32>(Imm)) {
2062          SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
2063                                             getI64Imm(Imm >> 16, dl)), 0);
2064          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
2065                                                getI64Imm(Imm & 0xFFFF, dl)),
2066                         0);
2067        }
2068      }
2069      Opc = PPC::CMPLD;
2070    } else if (ISD::isUnsignedIntSetCC(CC)) {
2071      if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
2072        return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
2073                                              getI64Imm(Imm & 0xFFFF, dl)), 0);
2074      Opc = PPC::CMPLD;
2075    } else {
2076      short SImm;
2077      if (isIntS16Immediate(RHS, SImm))
2078        return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
2079                                              getI64Imm(SImm & 0xFFFF, dl)),
2080                         0);
2081      Opc = PPC::CMPD;
2082    }
2083  } else if (LHS.getValueType() == MVT::f32) {
2084    Opc = PPC::FCMPUS;
2085  } else {
2086    assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
2087    Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
2088  }
2089  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
2090}
2091
2092static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
2093  switch (CC) {
2094  case ISD::SETUEQ:
2095  case ISD::SETONE:
2096  case ISD::SETOLE:
2097  case ISD::SETOGE:
2098    llvm_unreachable("Should be lowered by legalize!");
2099  default: llvm_unreachable("Unknown condition!");
2100  case ISD::SETOEQ:
2101  case ISD::SETEQ:  return PPC::PRED_EQ;
2102  case ISD::SETUNE:
2103  case ISD::SETNE:  return PPC::PRED_NE;
2104  case ISD::SETOLT:
2105  case ISD::SETLT:  return PPC::PRED_LT;
2106  case ISD::SETULE:
2107  case ISD::SETLE:  return PPC::PRED_LE;
2108  case ISD::SETOGT:
2109  case ISD::SETGT:  return PPC::PRED_GT;
2110  case ISD::SETUGE:
2111  case ISD::SETGE:  return PPC::PRED_GE;
2112  case ISD::SETO:   return PPC::PRED_NU;
2113  case ISD::SETUO:  return PPC::PRED_UN;
2114    // These two are invalid for floating point.  Assume we have int.
2115  case ISD::SETULT: return PPC::PRED_LT;
2116  case ISD::SETUGT: return PPC::PRED_GT;
2117  }
2118}
2119
2120/// getCRIdxForSetCC - Return the index of the condition register field
2121/// associated with the SetCC condition, and whether or not the field is
2122/// treated as inverted.  That is, lt = 0; ge = 0 inverted.
2123static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
2124  Invert = false;
2125  switch (CC) {
2126  default: llvm_unreachable("Unknown condition!");
2127  case ISD::SETOLT:
2128  case ISD::SETLT:  return 0;                  // Bit #0 = SETOLT
2129  case ISD::SETOGT:
2130  case ISD::SETGT:  return 1;                  // Bit #1 = SETOGT
2131  case ISD::SETOEQ:
2132  case ISD::SETEQ:  return 2;                  // Bit #2 = SETOEQ
2133  case ISD::SETUO:  return 3;                  // Bit #3 = SETUO
2134  case ISD::SETUGE:
2135  case ISD::SETGE:  Invert = true; return 0;   // !Bit #0 = SETUGE
2136  case ISD::SETULE:
2137  case ISD::SETLE:  Invert = true; return 1;   // !Bit #1 = SETULE
2138  case ISD::SETUNE:
2139  case ISD::SETNE:  Invert = true; return 2;   // !Bit #2 = SETUNE
2140  case ISD::SETO:   Invert = true; return 3;   // !Bit #3 = SETO
2141  case ISD::SETUEQ:
2142  case ISD::SETOGE:
2143  case ISD::SETOLE:
2144  case ISD::SETONE:
2145    llvm_unreachable("Invalid branch code: should be expanded by legalize");
2146  // These are invalid for floating point.  Assume integer.
2147  case ISD::SETULT: return 0;
2148  case ISD::SETUGT: return 1;
2149  }
2150}
2151
2152// getVCmpInst: return the vector compare instruction for the specified
2153// vector type and condition code. Since this is for altivec specific code,
2154// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
2155static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
2156                                bool HasVSX, bool &Swap, bool &Negate) {
2157  Swap = false;
2158  Negate = false;
2159
2160  if (VecVT.isFloatingPoint()) {
2161    /* Handle some cases by swapping input operands.  */
2162    switch (CC) {
2163      case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
2164      case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
2165      case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
2166      case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
2167      case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
2168      case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
2169      default: break;
2170    }
2171    /* Handle some cases by negating the result.  */
2172    switch (CC) {
2173      case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
2174      case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
2175      case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
2176      case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
2177      default: break;
2178    }
2179    /* We have instructions implementing the remaining cases.  */
2180    switch (CC) {
2181      case ISD::SETEQ:
2182      case ISD::SETOEQ:
2183        if (VecVT == MVT::v4f32)
2184          return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
2185        else if (VecVT == MVT::v2f64)
2186          return PPC::XVCMPEQDP;
2187        break;
2188      case ISD::SETGT:
2189      case ISD::SETOGT:
2190        if (VecVT == MVT::v4f32)
2191          return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
2192        else if (VecVT == MVT::v2f64)
2193          return PPC::XVCMPGTDP;
2194        break;
2195      case ISD::SETGE:
2196      case ISD::SETOGE:
2197        if (VecVT == MVT::v4f32)
2198          return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
2199        else if (VecVT == MVT::v2f64)
2200          return PPC::XVCMPGEDP;
2201        break;
2202      default:
2203        break;
2204    }
2205    llvm_unreachable("Invalid floating-point vector compare condition");
2206  } else {
2207    /* Handle some cases by swapping input operands.  */
2208    switch (CC) {
2209      case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
2210      case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
2211      case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
2212      case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
2213      default: break;
2214    }
2215    /* Handle some cases by negating the result.  */
2216    switch (CC) {
2217      case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
2218      case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
2219      case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
2220      case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
2221      default: break;
2222    }
2223    /* We have instructions implementing the remaining cases.  */
2224    switch (CC) {
2225      case ISD::SETEQ:
2226      case ISD::SETUEQ:
2227        if (VecVT == MVT::v16i8)
2228          return PPC::VCMPEQUB;
2229        else if (VecVT == MVT::v8i16)
2230          return PPC::VCMPEQUH;
2231        else if (VecVT == MVT::v4i32)
2232          return PPC::VCMPEQUW;
2233        else if (VecVT == MVT::v2i64)
2234          return PPC::VCMPEQUD;
2235        break;
2236      case ISD::SETGT:
2237        if (VecVT == MVT::v16i8)
2238          return PPC::VCMPGTSB;
2239        else if (VecVT == MVT::v8i16)
2240          return PPC::VCMPGTSH;
2241        else if (VecVT == MVT::v4i32)
2242          return PPC::VCMPGTSW;
2243        else if (VecVT == MVT::v2i64)
2244          return PPC::VCMPGTSD;
2245        break;
2246      case ISD::SETUGT:
2247        if (VecVT == MVT::v16i8)
2248          return PPC::VCMPGTUB;
2249        else if (VecVT == MVT::v8i16)
2250          return PPC::VCMPGTUH;
2251        else if (VecVT == MVT::v4i32)
2252          return PPC::VCMPGTUW;
2253        else if (VecVT == MVT::v2i64)
2254          return PPC::VCMPGTUD;
2255        break;
2256      default:
2257        break;
2258    }
2259    llvm_unreachable("Invalid integer vector compare condition");
2260  }
2261}
2262
2263SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
2264  SDLoc dl(N);
2265  unsigned Imm;
2266  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2267  EVT PtrVT =
2268      CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
2269  bool isPPC64 = (PtrVT == MVT::i64);
2270
2271  if (!PPCSubTarget->useCRBits() &&
2272      isInt32Immediate(N->getOperand(1), Imm)) {
2273    // We can codegen setcc op, imm very efficiently compared to a brcond.
2274    // Check for those cases here.
2275    // setcc op, 0
2276    if (Imm == 0) {
2277      SDValue Op = N->getOperand(0);
2278      switch (CC) {
2279      default: break;
2280      case ISD::SETEQ: {
2281        Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
2282        SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
2283                          getI32Imm(31, dl) };
2284        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2285      }
2286      case ISD::SETNE: {
2287        if (isPPC64) break;
2288        SDValue AD =
2289          SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2290                                         Op, getI32Imm(~0U, dl)), 0);
2291        return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
2292                                    AD.getValue(1));
2293      }
2294      case ISD::SETLT: {
2295        SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
2296                          getI32Imm(31, dl) };
2297        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2298      }
2299      case ISD::SETGT: {
2300        SDValue T =
2301          SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
2302        T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
2303        SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
2304                          getI32Imm(31, dl) };
2305        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2306      }
2307      }
2308    } else if (Imm == ~0U) {        // setcc op, -1
2309      SDValue Op = N->getOperand(0);
2310      switch (CC) {
2311      default: break;
2312      case ISD::SETEQ:
2313        if (isPPC64) break;
2314        Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2315                                            Op, getI32Imm(1, dl)), 0);
2316        return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
2317                              SDValue(CurDAG->getMachineNode(PPC::LI, dl,
2318                                                             MVT::i32,
2319                                                             getI32Imm(0, dl)),
2320                                      0), Op.getValue(1));
2321      case ISD::SETNE: {
2322        if (isPPC64) break;
2323        Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
2324        SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2325                                            Op, getI32Imm(~0U, dl));
2326        return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
2327                                    Op, SDValue(AD, 1));
2328      }
2329      case ISD::SETLT: {
2330        SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
2331                                                    getI32Imm(1, dl)), 0);
2332        SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
2333                                                    Op), 0);
2334        SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
2335                          getI32Imm(31, dl) };
2336        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2337      }
2338      case ISD::SETGT: {
2339        SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
2340                          getI32Imm(31, dl) };
2341        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2342        return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
2343                                    getI32Imm(1, dl));
2344      }
2345      }
2346    }
2347  }
2348
2349  SDValue LHS = N->getOperand(0);
2350  SDValue RHS = N->getOperand(1);
2351
2352  // Altivec Vector compare instructions do not set any CR register by default and
2353  // vector compare operations return the same type as the operands.
2354  if (LHS.getValueType().isVector()) {
2355    if (PPCSubTarget->hasQPX())
2356      return nullptr;
2357
2358    EVT VecVT = LHS.getValueType();
2359    bool Swap, Negate;
2360    unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
2361                                        PPCSubTarget->hasVSX(), Swap, Negate);
2362    if (Swap)
2363      std::swap(LHS, RHS);
2364
2365    EVT ResVT = VecVT.changeVectorElementTypeToInteger();
2366    if (Negate) {
2367      SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
2368      return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
2369                                                              PPC::VNOR,
2370                                  ResVT, VCmp, VCmp);
2371    }
2372
2373    return CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
2374  }
2375
2376  if (PPCSubTarget->useCRBits())
2377    return nullptr;
2378
2379  bool Inv;
2380  unsigned Idx = getCRIdxForSetCC(CC, Inv);
2381  SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
2382  SDValue IntCR;
2383
2384  // Force the ccreg into CR7.
2385  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
2386
2387  SDValue InFlag(nullptr, 0);  // Null incoming flag value.
2388  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
2389                               InFlag).getValue(1);
2390
2391  IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
2392                                         CCReg), 0);
2393
2394  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
2395                      getI32Imm(31, dl), getI32Imm(31, dl) };
2396  if (!Inv)
2397    return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2398
2399  // Get the specified bit.
2400  SDValue Tmp =
2401    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2402  return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
2403}
2404
2405SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
2406  // Transfer memoperands.
2407  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2408  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2409  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
2410  return Result;
2411}
2412
2413
2414// Select - Convert the specified operand from a target-independent to a
2415// target-specific node if it hasn't already been changed.
2416SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
2417  SDLoc dl(N);
2418  if (N->isMachineOpcode()) {
2419    N->setNodeId(-1);
2420    return nullptr;   // Already selected.
2421  }
2422
2423  // In case any misguided DAG-level optimizations form an ADD with a
2424  // TargetConstant operand, crash here instead of miscompiling (by selecting
2425  // an r+r add instead of some kind of r+i add).
2426  if (N->getOpcode() == ISD::ADD &&
2427      N->getOperand(1).getOpcode() == ISD::TargetConstant)
2428    llvm_unreachable("Invalid ADD with TargetConstant operand");
2429
2430  // Try matching complex bit permutations before doing anything else.
2431  if (SDNode *NN = SelectBitPermutation(N))
2432    return NN;
2433
2434  switch (N->getOpcode()) {
2435  default: break;
2436
2437  case ISD::Constant: {
2438    if (N->getValueType(0) == MVT::i64)
2439      return SelectInt64(CurDAG, N);
2440    break;
2441  }
2442
2443  case ISD::SETCC: {
2444    SDNode *SN = SelectSETCC(N);
2445    if (SN)
2446      return SN;
2447    break;
2448  }
2449  case PPCISD::GlobalBaseReg:
2450    return getGlobalBaseReg();
2451
2452  case ISD::FrameIndex:
2453    return getFrameIndex(N, N);
2454
2455  case PPCISD::MFOCRF: {
2456    SDValue InFlag = N->getOperand(1);
2457    return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
2458                                  N->getOperand(0), InFlag);
2459  }
2460
2461  case PPCISD::READ_TIME_BASE: {
2462    return CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
2463                                  MVT::Other, N->getOperand(0));
2464  }
2465
2466  case PPCISD::SRA_ADDZE: {
2467    SDValue N0 = N->getOperand(0);
2468    SDValue ShiftAmt =
2469      CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
2470                                  getConstantIntValue(), dl,
2471                                  N->getValueType(0));
2472    if (N->getValueType(0) == MVT::i64) {
2473      SDNode *Op =
2474        CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
2475                               N0, ShiftAmt);
2476      return CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64,
2477                                  SDValue(Op, 0), SDValue(Op, 1));
2478    } else {
2479      assert(N->getValueType(0) == MVT::i32 &&
2480             "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
2481      SDNode *Op =
2482        CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
2483                               N0, ShiftAmt);
2484      return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
2485                                  SDValue(Op, 0), SDValue(Op, 1));
2486    }
2487  }
2488
2489  case ISD::LOAD: {
2490    // Handle preincrement loads.
2491    LoadSDNode *LD = cast<LoadSDNode>(N);
2492    EVT LoadedVT = LD->getMemoryVT();
2493
2494    // Normal loads are handled by code generated from the .td file.
2495    if (LD->getAddressingMode() != ISD::PRE_INC)
2496      break;
2497
2498    SDValue Offset = LD->getOffset();
2499    if (Offset.getOpcode() == ISD::TargetConstant ||
2500        Offset.getOpcode() == ISD::TargetGlobalAddress) {
2501
2502      unsigned Opcode;
2503      bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
2504      if (LD->getValueType(0) != MVT::i64) {
2505        // Handle PPC32 integer and normal FP loads.
2506        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2507        switch (LoadedVT.getSimpleVT().SimpleTy) {
2508          default: llvm_unreachable("Invalid PPC load type!");
2509          case MVT::f64: Opcode = PPC::LFDU; break;
2510          case MVT::f32: Opcode = PPC::LFSU; break;
2511          case MVT::i32: Opcode = PPC::LWZU; break;
2512          case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
2513          case MVT::i1:
2514          case MVT::i8:  Opcode = PPC::LBZU; break;
2515        }
2516      } else {
2517        assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
2518        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2519        switch (LoadedVT.getSimpleVT().SimpleTy) {
2520          default: llvm_unreachable("Invalid PPC load type!");
2521          case MVT::i64: Opcode = PPC::LDU; break;
2522          case MVT::i32: Opcode = PPC::LWZU8; break;
2523          case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
2524          case MVT::i1:
2525          case MVT::i8:  Opcode = PPC::LBZU8; break;
2526        }
2527      }
2528
2529      SDValue Chain = LD->getChain();
2530      SDValue Base = LD->getBasePtr();
2531      SDValue Ops[] = { Offset, Base, Chain };
2532      return transferMemOperands(
2533          N, CurDAG->getMachineNode(
2534                 Opcode, dl, LD->getValueType(0),
2535                 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
2536                 Ops));
2537    } else {
2538      unsigned Opcode;
2539      bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
2540      if (LD->getValueType(0) != MVT::i64) {
2541        // Handle PPC32 integer and normal FP loads.
2542        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
2543        switch (LoadedVT.getSimpleVT().SimpleTy) {
2544          default: llvm_unreachable("Invalid PPC load type!");
2545          case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
2546          case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
2547          case MVT::f64: Opcode = PPC::LFDUX; break;
2548          case MVT::f32: Opcode = PPC::LFSUX; break;
2549          case MVT::i32: Opcode = PPC::LWZUX; break;
2550          case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
2551          case MVT::i1:
2552          case MVT::i8:  Opcode = PPC::LBZUX; break;
2553        }
2554      } else {
2555        assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
2556        assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
2557               "Invalid sext update load");
2558        switch (LoadedVT.getSimpleVT().SimpleTy) {
2559          default: llvm_unreachable("Invalid PPC load type!");
2560          case MVT::i64: Opcode = PPC::LDUX; break;
2561          case MVT::i32: Opcode = isSExt ? PPC::LWAUX  : PPC::LWZUX8; break;
2562          case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
2563          case MVT::i1:
2564          case MVT::i8:  Opcode = PPC::LBZUX8; break;
2565        }
2566      }
2567
2568      SDValue Chain = LD->getChain();
2569      SDValue Base = LD->getBasePtr();
2570      SDValue Ops[] = { Base, Offset, Chain };
2571      return transferMemOperands(
2572          N, CurDAG->getMachineNode(
2573                 Opcode, dl, LD->getValueType(0),
2574                 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
2575                 Ops));
2576    }
2577  }
2578
2579  case ISD::AND: {
2580    unsigned Imm, Imm2, SH, MB, ME;
2581    uint64_t Imm64;
2582
2583    // If this is an and of a value rotated between 0 and 31 bits and then and'd
2584    // with a mask, emit rlwinm
2585    if (isInt32Immediate(N->getOperand(1), Imm) &&
2586        isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
2587      SDValue Val = N->getOperand(0).getOperand(0);
2588      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
2589                        getI32Imm(ME, dl) };
2590      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2591    }
2592    // If this is just a masked value where the input is not handled above, and
2593    // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
2594    if (isInt32Immediate(N->getOperand(1), Imm) &&
2595        isRunOfOnes(Imm, MB, ME) &&
2596        N->getOperand(0).getOpcode() != ISD::ROTL) {
2597      SDValue Val = N->getOperand(0);
2598      SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
2599                        getI32Imm(ME, dl) };
2600      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2601    }
2602    // If this is a 64-bit zero-extension mask, emit rldicl.
2603    if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
2604        isMask_64(Imm64)) {
2605      SDValue Val = N->getOperand(0);
2606      MB = 64 - countTrailingOnes(Imm64);
2607      SH = 0;
2608
2609      // If the operand is a logical right shift, we can fold it into this
2610      // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
2611      // for n <= mb. The right shift is really a left rotate followed by a
2612      // mask, and this mask is a more-restrictive sub-mask of the mask implied
2613      // by the shift.
2614      if (Val.getOpcode() == ISD::SRL &&
2615          isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
2616        assert(Imm < 64 && "Illegal shift amount");
2617        Val = Val.getOperand(0);
2618        SH = 64 - Imm;
2619      }
2620
2621      SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
2622      return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
2623    }
2624    // AND X, 0 -> 0, not "rlwinm 32".
2625    if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
2626      ReplaceUses(SDValue(N, 0), N->getOperand(1));
2627      return nullptr;
2628    }
2629    // ISD::OR doesn't get all the bitfield insertion fun.
2630    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
2631    // bitfield insert.
2632    if (isInt32Immediate(N->getOperand(1), Imm) &&
2633        N->getOperand(0).getOpcode() == ISD::OR &&
2634        isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
2635      // The idea here is to check whether this is equivalent to:
2636      //   (c1 & m) | (x & ~m)
2637      // where m is a run-of-ones mask. The logic here is that, for each bit in
2638      // c1 and c2:
2639      //  - if both are 1, then the output will be 1.
2640      //  - if both are 0, then the output will be 0.
2641      //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
2642      //    come from x.
2643      //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
2644      //    be 0.
2645      //  If that last condition is never the case, then we can form m from the
2646      //  bits that are the same between c1 and c2.
2647      unsigned MB, ME;
2648      if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
2649        SDValue Ops[] = { N->getOperand(0).getOperand(0),
2650                            N->getOperand(0).getOperand(1),
2651                            getI32Imm(0, dl), getI32Imm(MB, dl),
2652                            getI32Imm(ME, dl) };
2653        return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
2654      }
2655    }
2656
2657    // Other cases are autogenerated.
2658    break;
2659  }
2660  case ISD::OR: {
2661    if (N->getValueType(0) == MVT::i32)
2662      if (SDNode *I = SelectBitfieldInsert(N))
2663        return I;
2664
2665    short Imm;
2666    if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
2667        isIntS16Immediate(N->getOperand(1), Imm)) {
2668      APInt LHSKnownZero, LHSKnownOne;
2669      CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne);
2670
2671      // If this is equivalent to an add, then we can fold it with the
2672      // FrameIndex calculation.
2673      if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL)
2674        return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
2675    }
2676
2677    // Other cases are autogenerated.
2678    break;
2679  }
2680  case ISD::ADD: {
2681    short Imm;
2682    if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
2683        isIntS16Immediate(N->getOperand(1), Imm))
2684      return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
2685
2686    break;
2687  }
2688  case ISD::SHL: {
2689    unsigned Imm, SH, MB, ME;
2690    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
2691        isRotateAndMask(N, Imm, true, SH, MB, ME)) {
2692      SDValue Ops[] = { N->getOperand(0).getOperand(0),
2693                          getI32Imm(SH, dl), getI32Imm(MB, dl),
2694                          getI32Imm(ME, dl) };
2695      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2696    }
2697
2698    // Other cases are autogenerated.
2699    break;
2700  }
2701  case ISD::SRL: {
2702    unsigned Imm, SH, MB, ME;
2703    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
2704        isRotateAndMask(N, Imm, true, SH, MB, ME)) {
2705      SDValue Ops[] = { N->getOperand(0).getOperand(0),
2706                          getI32Imm(SH, dl), getI32Imm(MB, dl),
2707                          getI32Imm(ME, dl) };
2708      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
2709    }
2710
2711    // Other cases are autogenerated.
2712    break;
2713  }
2714  // FIXME: Remove this once the ANDI glue bug is fixed:
2715  case PPCISD::ANDIo_1_EQ_BIT:
2716  case PPCISD::ANDIo_1_GT_BIT: {
2717    if (!ANDIGlueBug)
2718      break;
2719
2720    EVT InVT = N->getOperand(0).getValueType();
2721    assert((InVT == MVT::i64 || InVT == MVT::i32) &&
2722           "Invalid input type for ANDIo_1_EQ_BIT");
2723
2724    unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
2725    SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
2726                                        N->getOperand(0),
2727                                        CurDAG->getTargetConstant(1, dl, InVT)),
2728                 0);
2729    SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2730    SDValue SRIdxVal =
2731      CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
2732                                PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
2733
2734    return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1,
2735                                CR0Reg, SRIdxVal,
2736                                SDValue(AndI.getNode(), 1) /* glue */);
2737  }
2738  case ISD::SELECT_CC: {
2739    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2740    EVT PtrVT =
2741        CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
2742    bool isPPC64 = (PtrVT == MVT::i64);
2743
2744    // If this is a select of i1 operands, we'll pattern match it.
2745    if (PPCSubTarget->useCRBits() &&
2746        N->getOperand(0).getValueType() == MVT::i1)
2747      break;
2748
2749    // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
2750    if (!isPPC64)
2751      if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2752        if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2753          if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
2754            if (N1C->isNullValue() && N3C->isNullValue() &&
2755                N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
2756                // FIXME: Implement this optzn for PPC64.
2757                N->getValueType(0) == MVT::i32) {
2758              SDNode *Tmp =
2759                CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
2760                                       N->getOperand(0), getI32Imm(~0U, dl));
2761              return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
2762                                          SDValue(Tmp, 0), N->getOperand(0),
2763                                          SDValue(Tmp, 1));
2764            }
2765
2766    SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
2767
2768    if (N->getValueType(0) == MVT::i1) {
2769      // An i1 select is: (c & t) | (!c & f).
2770      bool Inv;
2771      unsigned Idx = getCRIdxForSetCC(CC, Inv);
2772
2773      unsigned SRI;
2774      switch (Idx) {
2775      default: llvm_unreachable("Invalid CC index");
2776      case 0: SRI = PPC::sub_lt; break;
2777      case 1: SRI = PPC::sub_gt; break;
2778      case 2: SRI = PPC::sub_eq; break;
2779      case 3: SRI = PPC::sub_un; break;
2780      }
2781
2782      SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
2783
2784      SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
2785                                              CCBit, CCBit), 0);
2786      SDValue C =    Inv ? NotCCBit : CCBit,
2787              NotC = Inv ? CCBit    : NotCCBit;
2788
2789      SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
2790                                           C, N->getOperand(2)), 0);
2791      SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
2792                                              NotC, N->getOperand(3)), 0);
2793
2794      return CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
2795    }
2796
2797    unsigned BROpc = getPredicateForSetCC(CC);
2798
2799    unsigned SelectCCOp;
2800    if (N->getValueType(0) == MVT::i32)
2801      SelectCCOp = PPC::SELECT_CC_I4;
2802    else if (N->getValueType(0) == MVT::i64)
2803      SelectCCOp = PPC::SELECT_CC_I8;
2804    else if (N->getValueType(0) == MVT::f32)
2805      if (PPCSubTarget->hasP8Vector())
2806        SelectCCOp = PPC::SELECT_CC_VSSRC;
2807      else
2808        SelectCCOp = PPC::SELECT_CC_F4;
2809    else if (N->getValueType(0) == MVT::f64)
2810      if (PPCSubTarget->hasVSX())
2811        SelectCCOp = PPC::SELECT_CC_VSFRC;
2812      else
2813        SelectCCOp = PPC::SELECT_CC_F8;
2814    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
2815      SelectCCOp = PPC::SELECT_CC_QFRC;
2816    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
2817      SelectCCOp = PPC::SELECT_CC_QSRC;
2818    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
2819      SelectCCOp = PPC::SELECT_CC_QBRC;
2820    else if (N->getValueType(0) == MVT::v2f64 ||
2821             N->getValueType(0) == MVT::v2i64)
2822      SelectCCOp = PPC::SELECT_CC_VSRC;
2823    else
2824      SelectCCOp = PPC::SELECT_CC_VRRC;
2825
2826    SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
2827                        getI32Imm(BROpc, dl) };
2828    return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
2829  }
2830  case ISD::VSELECT:
2831    if (PPCSubTarget->hasVSX()) {
2832      SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
2833      return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
2834    }
2835
2836    break;
2837  case ISD::VECTOR_SHUFFLE:
2838    if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
2839                                  N->getValueType(0) == MVT::v2i64)) {
2840      ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
2841
2842      SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
2843              Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
2844      unsigned DM[2];
2845
2846      for (int i = 0; i < 2; ++i)
2847        if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
2848          DM[i] = 0;
2849        else
2850          DM[i] = 1;
2851
2852      if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
2853          Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
2854          isa<LoadSDNode>(Op1.getOperand(0))) {
2855        LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
2856        SDValue Base, Offset;
2857
2858        if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
2859            (LD->getMemoryVT() == MVT::f64 ||
2860             LD->getMemoryVT() == MVT::i64) &&
2861            SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
2862          SDValue Chain = LD->getChain();
2863          SDValue Ops[] = { Base, Offset, Chain };
2864          return CurDAG->SelectNodeTo(N, PPC::LXVDSX,
2865                                      N->getValueType(0), Ops);
2866        }
2867      }
2868
2869      // For little endian, we must swap the input operands and adjust
2870      // the mask elements (reverse and invert them).
2871      if (PPCSubTarget->isLittleEndian()) {
2872        std::swap(Op1, Op2);
2873        unsigned tmp = DM[0];
2874        DM[0] = 1 - DM[1];
2875        DM[1] = 1 - tmp;
2876      }
2877
2878      SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
2879                                              MVT::i32);
2880      SDValue Ops[] = { Op1, Op2, DMV };
2881      return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
2882    }
2883
2884    break;
2885  case PPCISD::BDNZ:
2886  case PPCISD::BDZ: {
2887    bool IsPPC64 = PPCSubTarget->isPPC64();
2888    SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
2889    return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ?
2890                                   (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
2891                                   (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
2892                                MVT::Other, Ops);
2893  }
2894  case PPCISD::COND_BRANCH: {
2895    // Op #0 is the Chain.
2896    // Op #1 is the PPC::PRED_* number.
2897    // Op #2 is the CR#
2898    // Op #3 is the Dest MBB
2899    // Op #4 is the Flag.
2900    // Prevent PPC::PRED_* from being selected into LI.
2901    unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2902    if (EnableBranchHint)
2903      PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
2904
2905    SDValue Pred = getI32Imm(PCC, dl);
2906    SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
2907      N->getOperand(0), N->getOperand(4) };
2908    return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
2909  }
2910  case ISD::BR_CC: {
2911    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
2912    unsigned PCC = getPredicateForSetCC(CC);
2913
2914    if (N->getOperand(2).getValueType() == MVT::i1) {
2915      unsigned Opc;
2916      bool Swap;
2917      switch (PCC) {
2918      default: llvm_unreachable("Unexpected Boolean-operand predicate");
2919      case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true;  break;
2920      case PPC::PRED_LE: Opc = PPC::CRORC;  Swap = true;  break;
2921      case PPC::PRED_EQ: Opc = PPC::CREQV;  Swap = false; break;
2922      case PPC::PRED_GE: Opc = PPC::CRORC;  Swap = false; break;
2923      case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
2924      case PPC::PRED_NE: Opc = PPC::CRXOR;  Swap = false; break;
2925      }
2926
2927      SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
2928                                             N->getOperand(Swap ? 3 : 2),
2929                                             N->getOperand(Swap ? 2 : 3)), 0);
2930      return CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other,
2931                                  BitComp, N->getOperand(4), N->getOperand(0));
2932    }
2933
2934    if (EnableBranchHint)
2935      PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
2936
2937    SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
2938    SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
2939                        N->getOperand(4), N->getOperand(0) };
2940    return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
2941  }
2942  case ISD::BRIND: {
2943    // FIXME: Should custom lower this.
2944    SDValue Chain = N->getOperand(0);
2945    SDValue Target = N->getOperand(1);
2946    unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
2947    unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
2948    Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
2949                                           Chain), 0);
2950    return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
2951  }
2952  case PPCISD::TOC_ENTRY: {
2953    assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
2954            "Only supported for 64-bit ABI and 32-bit SVR4");
2955    if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
2956      SDValue GA = N->getOperand(0);
2957      return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl,
2958                                      MVT::i32, GA, N->getOperand(1)));
2959    }
2960
2961    // For medium and large code model, we generate two instructions as
2962    // described below.  Otherwise we allow SelectCodeCommon to handle this,
2963    // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
2964    CodeModel::Model CModel = TM.getCodeModel();
2965    if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
2966      break;
2967
2968    // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
2969    // If it must be toc-referenced according to PPCSubTarget, we generate:
2970    //   LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
2971    // Otherwise we generate:
2972    //   ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
2973    SDValue GA = N->getOperand(0);
2974    SDValue TOCbase = N->getOperand(1);
2975    SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
2976                                         TOCbase, GA);
2977
2978    if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
2979        CModel == CodeModel::Large)
2980      return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
2981                                      MVT::i64, GA, SDValue(Tmp, 0)));
2982
2983    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
2984      const GlobalValue *GV = G->getGlobal();
2985      unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
2986      if (GVFlags & PPCII::MO_NLP_FLAG) {
2987        return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
2988                                        MVT::i64, GA, SDValue(Tmp, 0)));
2989      }
2990    }
2991
2992    return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
2993                                  SDValue(Tmp, 0), GA);
2994  }
2995  case PPCISD::PPC32_PICGOT: {
2996    // Generate a PIC-safe GOT reference.
2997    assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
2998      "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
2999    return CurDAG->SelectNodeTo(
3000        N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()),
3001        MVT::i32);
3002  }
3003  case PPCISD::VADD_SPLAT: {
3004    // This expands into one of three sequences, depending on whether
3005    // the first operand is odd or even, positive or negative.
3006    assert(isa<ConstantSDNode>(N->getOperand(0)) &&
3007           isa<ConstantSDNode>(N->getOperand(1)) &&
3008           "Invalid operand on VADD_SPLAT!");
3009
3010    int Elt     = N->getConstantOperandVal(0);
3011    int EltSize = N->getConstantOperandVal(1);
3012    unsigned Opc1, Opc2, Opc3;
3013    EVT VT;
3014
3015    if (EltSize == 1) {
3016      Opc1 = PPC::VSPLTISB;
3017      Opc2 = PPC::VADDUBM;
3018      Opc3 = PPC::VSUBUBM;
3019      VT = MVT::v16i8;
3020    } else if (EltSize == 2) {
3021      Opc1 = PPC::VSPLTISH;
3022      Opc2 = PPC::VADDUHM;
3023      Opc3 = PPC::VSUBUHM;
3024      VT = MVT::v8i16;
3025    } else {
3026      assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
3027      Opc1 = PPC::VSPLTISW;
3028      Opc2 = PPC::VADDUWM;
3029      Opc3 = PPC::VSUBUWM;
3030      VT = MVT::v4i32;
3031    }
3032
3033    if ((Elt & 1) == 0) {
3034      // Elt is even, in the range [-32,-18] + [16,30].
3035      //
3036      // Convert: VADD_SPLAT elt, size
3037      // Into:    tmp = VSPLTIS[BHW] elt
3038      //          VADDU[BHW]M tmp, tmp
3039      // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
3040      SDValue EltVal = getI32Imm(Elt >> 1, dl);
3041      SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3042      SDValue TmpVal = SDValue(Tmp, 0);
3043      return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
3044
3045    } else if (Elt > 0) {
3046      // Elt is odd and positive, in the range [17,31].
3047      //
3048      // Convert: VADD_SPLAT elt, size
3049      // Into:    tmp1 = VSPLTIS[BHW] elt-16
3050      //          tmp2 = VSPLTIS[BHW] -16
3051      //          VSUBU[BHW]M tmp1, tmp2
3052      SDValue EltVal = getI32Imm(Elt - 16, dl);
3053      SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3054      EltVal = getI32Imm(-16, dl);
3055      SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3056      return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
3057                                    SDValue(Tmp2, 0));
3058
3059    } else {
3060      // Elt is odd and negative, in the range [-31,-17].
3061      //
3062      // Convert: VADD_SPLAT elt, size
3063      // Into:    tmp1 = VSPLTIS[BHW] elt+16
3064      //          tmp2 = VSPLTIS[BHW] -16
3065      //          VADDU[BHW]M tmp1, tmp2
3066      SDValue EltVal = getI32Imm(Elt + 16, dl);
3067      SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3068      EltVal = getI32Imm(-16, dl);
3069      SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
3070      return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
3071                                    SDValue(Tmp2, 0));
3072    }
3073  }
3074  }
3075
3076  return SelectCode(N);
3077}
3078
3079// If the target supports the cmpb instruction, do the idiom recognition here.
3080// We don't do this as a DAG combine because we don't want to do it as nodes
3081// are being combined (because we might miss part of the eventual idiom). We
3082// don't want to do it during instruction selection because we want to reuse
3083// the logic for lowering the masking operations already part of the
3084// instruction selector.
3085SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
3086  SDLoc dl(N);
3087
3088  assert(N->getOpcode() == ISD::OR &&
3089         "Only OR nodes are supported for CMPB");
3090
3091  SDValue Res;
3092  if (!PPCSubTarget->hasCMPB())
3093    return Res;
3094
3095  if (N->getValueType(0) != MVT::i32 &&
3096      N->getValueType(0) != MVT::i64)
3097    return Res;
3098
3099  EVT VT = N->getValueType(0);
3100
3101  SDValue RHS, LHS;
3102  bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
3103  uint64_t Mask = 0, Alt = 0;
3104
3105  auto IsByteSelectCC = [this](SDValue O, unsigned &b,
3106                               uint64_t &Mask, uint64_t &Alt,
3107                               SDValue &LHS, SDValue &RHS) {
3108    if (O.getOpcode() != ISD::SELECT_CC)
3109      return false;
3110    ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
3111
3112    if (!isa<ConstantSDNode>(O.getOperand(2)) ||
3113        !isa<ConstantSDNode>(O.getOperand(3)))
3114      return false;
3115
3116    uint64_t PM = O.getConstantOperandVal(2);
3117    uint64_t PAlt = O.getConstantOperandVal(3);
3118    for (b = 0; b < 8; ++b) {
3119      uint64_t Mask = UINT64_C(0xFF) << (8*b);
3120      if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
3121        break;
3122    }
3123
3124    if (b == 8)
3125      return false;
3126    Mask |= PM;
3127    Alt  |= PAlt;
3128
3129    if (!isa<ConstantSDNode>(O.getOperand(1)) ||
3130        O.getConstantOperandVal(1) != 0) {
3131      SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
3132      if (Op0.getOpcode() == ISD::TRUNCATE)
3133        Op0 = Op0.getOperand(0);
3134      if (Op1.getOpcode() == ISD::TRUNCATE)
3135        Op1 = Op1.getOperand(0);
3136
3137      if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
3138          Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
3139          isa<ConstantSDNode>(Op0.getOperand(1))) {
3140
3141        unsigned Bits = Op0.getValueType().getSizeInBits();
3142        if (b != Bits/8-1)
3143          return false;
3144        if (Op0.getConstantOperandVal(1) != Bits-8)
3145          return false;
3146
3147        LHS = Op0.getOperand(0);
3148        RHS = Op1.getOperand(0);
3149        return true;
3150      }
3151
3152      // When we have small integers (i16 to be specific), the form present
3153      // post-legalization uses SETULT in the SELECT_CC for the
3154      // higher-order byte, depending on the fact that the
3155      // even-higher-order bytes are known to all be zero, for example:
3156      //   select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
3157      // (so when the second byte is the same, because all higher-order
3158      // bits from bytes 3 and 4 are known to be zero, the result of the
3159      // xor can be at most 255)
3160      if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
3161          isa<ConstantSDNode>(O.getOperand(1))) {
3162
3163        uint64_t ULim = O.getConstantOperandVal(1);
3164        if (ULim != (UINT64_C(1) << b*8))
3165          return false;
3166
3167        // Now we need to make sure that the upper bytes are known to be
3168        // zero.
3169        unsigned Bits = Op0.getValueType().getSizeInBits();
3170        if (!CurDAG->MaskedValueIsZero(Op0,
3171              APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
3172          return false;
3173
3174        LHS = Op0.getOperand(0);
3175        RHS = Op0.getOperand(1);
3176        return true;
3177      }
3178
3179      return false;
3180    }
3181
3182    if (CC != ISD::SETEQ)
3183      return false;
3184
3185    SDValue Op = O.getOperand(0);
3186    if (Op.getOpcode() == ISD::AND) {
3187      if (!isa<ConstantSDNode>(Op.getOperand(1)))
3188        return false;
3189      if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
3190        return false;
3191
3192      SDValue XOR = Op.getOperand(0);
3193      if (XOR.getOpcode() == ISD::TRUNCATE)
3194        XOR = XOR.getOperand(0);
3195      if (XOR.getOpcode() != ISD::XOR)
3196        return false;
3197
3198      LHS = XOR.getOperand(0);
3199      RHS = XOR.getOperand(1);
3200      return true;
3201    } else if (Op.getOpcode() == ISD::SRL) {
3202      if (!isa<ConstantSDNode>(Op.getOperand(1)))
3203        return false;
3204      unsigned Bits = Op.getValueType().getSizeInBits();
3205      if (b != Bits/8-1)
3206        return false;
3207      if (Op.getConstantOperandVal(1) != Bits-8)
3208        return false;
3209
3210      SDValue XOR = Op.getOperand(0);
3211      if (XOR.getOpcode() == ISD::TRUNCATE)
3212        XOR = XOR.getOperand(0);
3213      if (XOR.getOpcode() != ISD::XOR)
3214        return false;
3215
3216      LHS = XOR.getOperand(0);
3217      RHS = XOR.getOperand(1);
3218      return true;
3219    }
3220
3221    return false;
3222  };
3223
3224  SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
3225  while (!Queue.empty()) {
3226    SDValue V = Queue.pop_back_val();
3227
3228    for (const SDValue &O : V.getNode()->ops()) {
3229      unsigned b;
3230      uint64_t M = 0, A = 0;
3231      SDValue OLHS, ORHS;
3232      if (O.getOpcode() == ISD::OR) {
3233        Queue.push_back(O);
3234      } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
3235        if (!LHS) {
3236          LHS = OLHS;
3237          RHS = ORHS;
3238          BytesFound[b] = true;
3239          Mask |= M;
3240          Alt  |= A;
3241        } else if ((LHS == ORHS && RHS == OLHS) ||
3242                   (RHS == ORHS && LHS == OLHS)) {
3243          BytesFound[b] = true;
3244          Mask |= M;
3245          Alt  |= A;
3246        } else {
3247          return Res;
3248        }
3249      } else {
3250        return Res;
3251      }
3252    }
3253  }
3254
3255  unsigned LastB = 0, BCnt = 0;
3256  for (unsigned i = 0; i < 8; ++i)
3257    if (BytesFound[LastB]) {
3258      ++BCnt;
3259      LastB = i;
3260    }
3261
3262  if (!LastB || BCnt < 2)
3263    return Res;
3264
3265  // Because we'll be zero-extending the output anyway if don't have a specific
3266  // value for each input byte (via the Mask), we can 'anyext' the inputs.
3267  if (LHS.getValueType() != VT) {
3268    LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
3269    RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
3270  }
3271
3272  Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
3273
3274  bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
3275  if (NonTrivialMask && !Alt) {
3276    // Res = Mask & CMPB
3277    Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
3278                          CurDAG->getConstant(Mask, dl, VT));
3279  } else if (Alt) {
3280    // Res = (CMPB & Mask) | (~CMPB & Alt)
3281    // Which, as suggested here:
3282    //   https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
3283    // can be written as:
3284    // Res = Alt ^ ((Alt ^ Mask) & CMPB)
3285    // useful because the (Alt ^ Mask) can be pre-computed.
3286    Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
3287                          CurDAG->getConstant(Mask ^ Alt, dl, VT));
3288    Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
3289                          CurDAG->getConstant(Alt, dl, VT));
3290  }
3291
3292  return Res;
3293}
3294
3295// When CR bit registers are enabled, an extension of an i1 variable to a i32
3296// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
3297// involves constant materialization of a 0 or a 1 or both. If the result of
3298// the extension is then operated upon by some operator that can be constant
3299// folded with a constant 0 or 1, and that constant can be materialized using
3300// only one instruction (like a zero or one), then we should fold in those
3301// operations with the select.
3302void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
3303  if (!PPCSubTarget->useCRBits())
3304    return;
3305
3306  if (N->getOpcode() != ISD::ZERO_EXTEND &&
3307      N->getOpcode() != ISD::SIGN_EXTEND &&
3308      N->getOpcode() != ISD::ANY_EXTEND)
3309    return;
3310
3311  if (N->getOperand(0).getValueType() != MVT::i1)
3312    return;
3313
3314  if (!N->hasOneUse())
3315    return;
3316
3317  SDLoc dl(N);
3318  EVT VT = N->getValueType(0);
3319  SDValue Cond = N->getOperand(0);
3320  SDValue ConstTrue =
3321    CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
3322  SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
3323
3324  do {
3325    SDNode *User = *N->use_begin();
3326    if (User->getNumOperands() != 2)
3327      break;
3328
3329    auto TryFold = [this, N, User, dl](SDValue Val) {
3330      SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
3331      SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
3332      SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
3333
3334      return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
3335                                            User->getValueType(0),
3336                                            O0.getNode(), O1.getNode());
3337    };
3338
3339    SDValue TrueRes = TryFold(ConstTrue);
3340    if (!TrueRes)
3341      break;
3342    SDValue FalseRes = TryFold(ConstFalse);
3343    if (!FalseRes)
3344      break;
3345
3346    // For us to materialize these using one instruction, we must be able to
3347    // represent them as signed 16-bit integers.
3348    uint64_t True  = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
3349             False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
3350    if (!isInt<16>(True) || !isInt<16>(False))
3351      break;
3352
3353    // We can replace User with a new SELECT node, and try again to see if we
3354    // can fold the select with its user.
3355    Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
3356    N = User;
3357    ConstTrue = TrueRes;
3358    ConstFalse = FalseRes;
3359  } while (N->hasOneUse());
3360}
3361
3362void PPCDAGToDAGISel::PreprocessISelDAG() {
3363  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
3364  ++Position;
3365
3366  bool MadeChange = false;
3367  while (Position != CurDAG->allnodes_begin()) {
3368    SDNode *N = &*--Position;
3369    if (N->use_empty())
3370      continue;
3371
3372    SDValue Res;
3373    switch (N->getOpcode()) {
3374    default: break;
3375    case ISD::OR:
3376      Res = combineToCMPB(N);
3377      break;
3378    }
3379
3380    if (!Res)
3381      foldBoolExts(Res, N);
3382
3383    if (Res) {
3384      DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld:    ");
3385      DEBUG(N->dump(CurDAG));
3386      DEBUG(dbgs() << "\nNew: ");
3387      DEBUG(Res.getNode()->dump(CurDAG));
3388      DEBUG(dbgs() << "\n");
3389
3390      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
3391      MadeChange = true;
3392    }
3393  }
3394
3395  if (MadeChange)
3396    CurDAG->RemoveDeadNodes();
3397}
3398
3399/// PostprocessISelDAG - Perform some late peephole optimizations
3400/// on the DAG representation.
3401void PPCDAGToDAGISel::PostprocessISelDAG() {
3402
3403  // Skip peepholes at -O0.
3404  if (TM.getOptLevel() == CodeGenOpt::None)
3405    return;
3406
3407  PeepholePPC64();
3408  PeepholeCROps();
3409  PeepholePPC64ZExt();
3410}
3411
3412// Check if all users of this node will become isel where the second operand
3413// is the constant zero. If this is so, and if we can negate the condition,
3414// then we can flip the true and false operands. This will allow the zero to
3415// be folded with the isel so that we don't need to materialize a register
3416// containing zero.
3417bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
3418  // If we're not using isel, then this does not matter.
3419  if (!PPCSubTarget->hasISEL())
3420    return false;
3421
3422  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
3423       UI != UE; ++UI) {
3424    SDNode *User = *UI;
3425    if (!User->isMachineOpcode())
3426      return false;
3427    if (User->getMachineOpcode() != PPC::SELECT_I4 &&
3428        User->getMachineOpcode() != PPC::SELECT_I8)
3429      return false;
3430
3431    SDNode *Op2 = User->getOperand(2).getNode();
3432    if (!Op2->isMachineOpcode())
3433      return false;
3434
3435    if (Op2->getMachineOpcode() != PPC::LI &&
3436        Op2->getMachineOpcode() != PPC::LI8)
3437      return false;
3438
3439    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
3440    if (!C)
3441      return false;
3442
3443    if (!C->isNullValue())
3444      return false;
3445  }
3446
3447  return true;
3448}
3449
3450void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
3451  SmallVector<SDNode *, 4> ToReplace;
3452  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
3453       UI != UE; ++UI) {
3454    SDNode *User = *UI;
3455    assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
3456            User->getMachineOpcode() == PPC::SELECT_I8) &&
3457           "Must have all select users");
3458    ToReplace.push_back(User);
3459  }
3460
3461  for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
3462       UE = ToReplace.end(); UI != UE; ++UI) {
3463    SDNode *User = *UI;
3464    SDNode *ResNode =
3465      CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
3466                             User->getValueType(0), User->getOperand(0),
3467                             User->getOperand(2),
3468                             User->getOperand(1));
3469
3470      DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
3471      DEBUG(User->dump(CurDAG));
3472      DEBUG(dbgs() << "\nNew: ");
3473      DEBUG(ResNode->dump(CurDAG));
3474      DEBUG(dbgs() << "\n");
3475
3476      ReplaceUses(User, ResNode);
3477  }
3478}
3479
3480void PPCDAGToDAGISel::PeepholeCROps() {
3481  bool IsModified;
3482  do {
3483    IsModified = false;
3484    for (SDNode &Node : CurDAG->allnodes()) {
3485      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
3486      if (!MachineNode || MachineNode->use_empty())
3487        continue;
3488      SDNode *ResNode = MachineNode;
3489
3490      bool Op1Set   = false, Op1Unset = false,
3491           Op1Not   = false,
3492           Op2Set   = false, Op2Unset = false,
3493           Op2Not   = false;
3494
3495      unsigned Opcode = MachineNode->getMachineOpcode();
3496      switch (Opcode) {
3497      default: break;
3498      case PPC::CRAND:
3499      case PPC::CRNAND:
3500      case PPC::CROR:
3501      case PPC::CRXOR:
3502      case PPC::CRNOR:
3503      case PPC::CREQV:
3504      case PPC::CRANDC:
3505      case PPC::CRORC: {
3506        SDValue Op = MachineNode->getOperand(1);
3507        if (Op.isMachineOpcode()) {
3508          if (Op.getMachineOpcode() == PPC::CRSET)
3509            Op2Set = true;
3510          else if (Op.getMachineOpcode() == PPC::CRUNSET)
3511            Op2Unset = true;
3512          else if (Op.getMachineOpcode() == PPC::CRNOR &&
3513                   Op.getOperand(0) == Op.getOperand(1))
3514            Op2Not = true;
3515        }
3516        }  // fallthrough
3517      case PPC::BC:
3518      case PPC::BCn:
3519      case PPC::SELECT_I4:
3520      case PPC::SELECT_I8:
3521      case PPC::SELECT_F4:
3522      case PPC::SELECT_F8:
3523      case PPC::SELECT_QFRC:
3524      case PPC::SELECT_QSRC:
3525      case PPC::SELECT_QBRC:
3526      case PPC::SELECT_VRRC:
3527      case PPC::SELECT_VSFRC:
3528      case PPC::SELECT_VSSRC:
3529      case PPC::SELECT_VSRC: {
3530        SDValue Op = MachineNode->getOperand(0);
3531        if (Op.isMachineOpcode()) {
3532          if (Op.getMachineOpcode() == PPC::CRSET)
3533            Op1Set = true;
3534          else if (Op.getMachineOpcode() == PPC::CRUNSET)
3535            Op1Unset = true;
3536          else if (Op.getMachineOpcode() == PPC::CRNOR &&
3537                   Op.getOperand(0) == Op.getOperand(1))
3538            Op1Not = true;
3539        }
3540        }
3541        break;
3542      }
3543
3544      bool SelectSwap = false;
3545      switch (Opcode) {
3546      default: break;
3547      case PPC::CRAND:
3548        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3549          // x & x = x
3550          ResNode = MachineNode->getOperand(0).getNode();
3551        else if (Op1Set)
3552          // 1 & y = y
3553          ResNode = MachineNode->getOperand(1).getNode();
3554        else if (Op2Set)
3555          // x & 1 = x
3556          ResNode = MachineNode->getOperand(0).getNode();
3557        else if (Op1Unset || Op2Unset)
3558          // x & 0 = 0 & y = 0
3559          ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3560                                           MVT::i1);
3561        else if (Op1Not)
3562          // ~x & y = andc(y, x)
3563          ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3564                                           MVT::i1, MachineNode->getOperand(1),
3565                                           MachineNode->getOperand(0).
3566                                             getOperand(0));
3567        else if (Op2Not)
3568          // x & ~y = andc(x, y)
3569          ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3570                                           MVT::i1, MachineNode->getOperand(0),
3571                                           MachineNode->getOperand(1).
3572                                             getOperand(0));
3573        else if (AllUsersSelectZero(MachineNode))
3574          ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
3575                                           MVT::i1, MachineNode->getOperand(0),
3576                                           MachineNode->getOperand(1)),
3577          SelectSwap = true;
3578        break;
3579      case PPC::CRNAND:
3580        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3581          // nand(x, x) -> nor(x, x)
3582          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3583                                           MVT::i1, MachineNode->getOperand(0),
3584                                           MachineNode->getOperand(0));
3585        else if (Op1Set)
3586          // nand(1, y) -> nor(y, y)
3587          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3588                                           MVT::i1, MachineNode->getOperand(1),
3589                                           MachineNode->getOperand(1));
3590        else if (Op2Set)
3591          // nand(x, 1) -> nor(x, x)
3592          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3593                                           MVT::i1, MachineNode->getOperand(0),
3594                                           MachineNode->getOperand(0));
3595        else if (Op1Unset || Op2Unset)
3596          // nand(x, 0) = nand(0, y) = 1
3597          ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3598                                           MVT::i1);
3599        else if (Op1Not)
3600          // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
3601          ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3602                                           MVT::i1, MachineNode->getOperand(0).
3603                                                      getOperand(0),
3604                                           MachineNode->getOperand(1));
3605        else if (Op2Not)
3606          // nand(x, ~y) = ~x | y = orc(y, x)
3607          ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3608                                           MVT::i1, MachineNode->getOperand(1).
3609                                                      getOperand(0),
3610                                           MachineNode->getOperand(0));
3611        else if (AllUsersSelectZero(MachineNode))
3612          ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
3613                                           MVT::i1, MachineNode->getOperand(0),
3614                                           MachineNode->getOperand(1)),
3615          SelectSwap = true;
3616        break;
3617      case PPC::CROR:
3618        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3619          // x | x = x
3620          ResNode = MachineNode->getOperand(0).getNode();
3621        else if (Op1Set || Op2Set)
3622          // x | 1 = 1 | y = 1
3623          ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3624                                           MVT::i1);
3625        else if (Op1Unset)
3626          // 0 | y = y
3627          ResNode = MachineNode->getOperand(1).getNode();
3628        else if (Op2Unset)
3629          // x | 0 = x
3630          ResNode = MachineNode->getOperand(0).getNode();
3631        else if (Op1Not)
3632          // ~x | y = orc(y, x)
3633          ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3634                                           MVT::i1, MachineNode->getOperand(1),
3635                                           MachineNode->getOperand(0).
3636                                             getOperand(0));
3637        else if (Op2Not)
3638          // x | ~y = orc(x, y)
3639          ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3640                                           MVT::i1, MachineNode->getOperand(0),
3641                                           MachineNode->getOperand(1).
3642                                             getOperand(0));
3643        else if (AllUsersSelectZero(MachineNode))
3644          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3645                                           MVT::i1, MachineNode->getOperand(0),
3646                                           MachineNode->getOperand(1)),
3647          SelectSwap = true;
3648        break;
3649      case PPC::CRXOR:
3650        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3651          // xor(x, x) = 0
3652          ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3653                                           MVT::i1);
3654        else if (Op1Set)
3655          // xor(1, y) -> nor(y, y)
3656          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3657                                           MVT::i1, MachineNode->getOperand(1),
3658                                           MachineNode->getOperand(1));
3659        else if (Op2Set)
3660          // xor(x, 1) -> nor(x, x)
3661          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3662                                           MVT::i1, MachineNode->getOperand(0),
3663                                           MachineNode->getOperand(0));
3664        else if (Op1Unset)
3665          // xor(0, y) = y
3666          ResNode = MachineNode->getOperand(1).getNode();
3667        else if (Op2Unset)
3668          // xor(x, 0) = x
3669          ResNode = MachineNode->getOperand(0).getNode();
3670        else if (Op1Not)
3671          // xor(~x, y) = eqv(x, y)
3672          ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3673                                           MVT::i1, MachineNode->getOperand(0).
3674                                                      getOperand(0),
3675                                           MachineNode->getOperand(1));
3676        else if (Op2Not)
3677          // xor(x, ~y) = eqv(x, y)
3678          ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3679                                           MVT::i1, MachineNode->getOperand(0),
3680                                           MachineNode->getOperand(1).
3681                                             getOperand(0));
3682        else if (AllUsersSelectZero(MachineNode))
3683          ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
3684                                           MVT::i1, MachineNode->getOperand(0),
3685                                           MachineNode->getOperand(1)),
3686          SelectSwap = true;
3687        break;
3688      case PPC::CRNOR:
3689        if (Op1Set || Op2Set)
3690          // nor(1, y) -> 0
3691          ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3692                                           MVT::i1);
3693        else if (Op1Unset)
3694          // nor(0, y) = ~y -> nor(y, y)
3695          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3696                                           MVT::i1, MachineNode->getOperand(1),
3697                                           MachineNode->getOperand(1));
3698        else if (Op2Unset)
3699          // nor(x, 0) = ~x
3700          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3701                                           MVT::i1, MachineNode->getOperand(0),
3702                                           MachineNode->getOperand(0));
3703        else if (Op1Not)
3704          // nor(~x, y) = andc(x, y)
3705          ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3706                                           MVT::i1, MachineNode->getOperand(0).
3707                                                      getOperand(0),
3708                                           MachineNode->getOperand(1));
3709        else if (Op2Not)
3710          // nor(x, ~y) = andc(y, x)
3711          ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3712                                           MVT::i1, MachineNode->getOperand(1).
3713                                                      getOperand(0),
3714                                           MachineNode->getOperand(0));
3715        else if (AllUsersSelectZero(MachineNode))
3716          ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
3717                                           MVT::i1, MachineNode->getOperand(0),
3718                                           MachineNode->getOperand(1)),
3719          SelectSwap = true;
3720        break;
3721      case PPC::CREQV:
3722        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3723          // eqv(x, x) = 1
3724          ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3725                                           MVT::i1);
3726        else if (Op1Set)
3727          // eqv(1, y) = y
3728          ResNode = MachineNode->getOperand(1).getNode();
3729        else if (Op2Set)
3730          // eqv(x, 1) = x
3731          ResNode = MachineNode->getOperand(0).getNode();
3732        else if (Op1Unset)
3733          // eqv(0, y) = ~y -> nor(y, y)
3734          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3735                                           MVT::i1, MachineNode->getOperand(1),
3736                                           MachineNode->getOperand(1));
3737        else if (Op2Unset)
3738          // eqv(x, 0) = ~x
3739          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3740                                           MVT::i1, MachineNode->getOperand(0),
3741                                           MachineNode->getOperand(0));
3742        else if (Op1Not)
3743          // eqv(~x, y) = xor(x, y)
3744          ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
3745                                           MVT::i1, MachineNode->getOperand(0).
3746                                                      getOperand(0),
3747                                           MachineNode->getOperand(1));
3748        else if (Op2Not)
3749          // eqv(x, ~y) = xor(x, y)
3750          ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
3751                                           MVT::i1, MachineNode->getOperand(0),
3752                                           MachineNode->getOperand(1).
3753                                             getOperand(0));
3754        else if (AllUsersSelectZero(MachineNode))
3755          ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
3756                                           MVT::i1, MachineNode->getOperand(0),
3757                                           MachineNode->getOperand(1)),
3758          SelectSwap = true;
3759        break;
3760      case PPC::CRANDC:
3761        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3762          // andc(x, x) = 0
3763          ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3764                                           MVT::i1);
3765        else if (Op1Set)
3766          // andc(1, y) = ~y
3767          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3768                                           MVT::i1, MachineNode->getOperand(1),
3769                                           MachineNode->getOperand(1));
3770        else if (Op1Unset || Op2Set)
3771          // andc(0, y) = andc(x, 1) = 0
3772          ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
3773                                           MVT::i1);
3774        else if (Op2Unset)
3775          // andc(x, 0) = x
3776          ResNode = MachineNode->getOperand(0).getNode();
3777        else if (Op1Not)
3778          // andc(~x, y) = ~(x | y) = nor(x, y)
3779          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3780                                           MVT::i1, MachineNode->getOperand(0).
3781                                                      getOperand(0),
3782                                           MachineNode->getOperand(1));
3783        else if (Op2Not)
3784          // andc(x, ~y) = x & y
3785          ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
3786                                           MVT::i1, MachineNode->getOperand(0),
3787                                           MachineNode->getOperand(1).
3788                                             getOperand(0));
3789        else if (AllUsersSelectZero(MachineNode))
3790          ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
3791                                           MVT::i1, MachineNode->getOperand(1),
3792                                           MachineNode->getOperand(0)),
3793          SelectSwap = true;
3794        break;
3795      case PPC::CRORC:
3796        if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
3797          // orc(x, x) = 1
3798          ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3799                                           MVT::i1);
3800        else if (Op1Set || Op2Unset)
3801          // orc(1, y) = orc(x, 0) = 1
3802          ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
3803                                           MVT::i1);
3804        else if (Op2Set)
3805          // orc(x, 1) = x
3806          ResNode = MachineNode->getOperand(0).getNode();
3807        else if (Op1Unset)
3808          // orc(0, y) = ~y
3809          ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
3810                                           MVT::i1, MachineNode->getOperand(1),
3811                                           MachineNode->getOperand(1));
3812        else if (Op1Not)
3813          // orc(~x, y) = ~(x & y) = nand(x, y)
3814          ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
3815                                           MVT::i1, MachineNode->getOperand(0).
3816                                                      getOperand(0),
3817                                           MachineNode->getOperand(1));
3818        else if (Op2Not)
3819          // orc(x, ~y) = x | y
3820          ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
3821                                           MVT::i1, MachineNode->getOperand(0),
3822                                           MachineNode->getOperand(1).
3823                                             getOperand(0));
3824        else if (AllUsersSelectZero(MachineNode))
3825          ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
3826                                           MVT::i1, MachineNode->getOperand(1),
3827                                           MachineNode->getOperand(0)),
3828          SelectSwap = true;
3829        break;
3830      case PPC::SELECT_I4:
3831      case PPC::SELECT_I8:
3832      case PPC::SELECT_F4:
3833      case PPC::SELECT_F8:
3834      case PPC::SELECT_QFRC:
3835      case PPC::SELECT_QSRC:
3836      case PPC::SELECT_QBRC:
3837      case PPC::SELECT_VRRC:
3838      case PPC::SELECT_VSFRC:
3839      case PPC::SELECT_VSSRC:
3840      case PPC::SELECT_VSRC:
3841        if (Op1Set)
3842          ResNode = MachineNode->getOperand(1).getNode();
3843        else if (Op1Unset)
3844          ResNode = MachineNode->getOperand(2).getNode();
3845        else if (Op1Not)
3846          ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
3847                                           SDLoc(MachineNode),
3848                                           MachineNode->getValueType(0),
3849                                           MachineNode->getOperand(0).
3850                                             getOperand(0),
3851                                           MachineNode->getOperand(2),
3852                                           MachineNode->getOperand(1));
3853        break;
3854      case PPC::BC:
3855      case PPC::BCn:
3856        if (Op1Not)
3857          ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
3858                                                               PPC::BC,
3859                                           SDLoc(MachineNode),
3860                                           MVT::Other,
3861                                           MachineNode->getOperand(0).
3862                                             getOperand(0),
3863                                           MachineNode->getOperand(1),
3864                                           MachineNode->getOperand(2));
3865        // FIXME: Handle Op1Set, Op1Unset here too.
3866        break;
3867      }
3868
3869      // If we're inverting this node because it is used only by selects that
3870      // we'd like to swap, then swap the selects before the node replacement.
3871      if (SelectSwap)
3872        SwapAllSelectUsers(MachineNode);
3873
3874      if (ResNode != MachineNode) {
3875        DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
3876        DEBUG(MachineNode->dump(CurDAG));
3877        DEBUG(dbgs() << "\nNew: ");
3878        DEBUG(ResNode->dump(CurDAG));
3879        DEBUG(dbgs() << "\n");
3880
3881        ReplaceUses(MachineNode, ResNode);
3882        IsModified = true;
3883      }
3884    }
3885    if (IsModified)
3886      CurDAG->RemoveDeadNodes();
3887  } while (IsModified);
3888}
3889
3890// Gather the set of 32-bit operations that are known to have their
3891// higher-order 32 bits zero, where ToPromote contains all such operations.
3892static bool PeepholePPC64ZExtGather(SDValue Op32,
3893                                    SmallPtrSetImpl<SDNode *> &ToPromote) {
3894  if (!Op32.isMachineOpcode())
3895    return false;
3896
3897  // First, check for the "frontier" instructions (those that will clear the
3898  // higher-order 32 bits.
3899
3900  // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
3901  // around. If it does not, then these instructions will clear the
3902  // higher-order bits.
3903  if ((Op32.getMachineOpcode() == PPC::RLWINM ||
3904       Op32.getMachineOpcode() == PPC::RLWNM) &&
3905      Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
3906    ToPromote.insert(Op32.getNode());
3907    return true;
3908  }
3909
3910  // SLW and SRW always clear the higher-order bits.
3911  if (Op32.getMachineOpcode() == PPC::SLW ||
3912      Op32.getMachineOpcode() == PPC::SRW) {
3913    ToPromote.insert(Op32.getNode());
3914    return true;
3915  }
3916
3917  // For LI and LIS, we need the immediate to be positive (so that it is not
3918  // sign extended).
3919  if (Op32.getMachineOpcode() == PPC::LI ||
3920      Op32.getMachineOpcode() == PPC::LIS) {
3921    if (!isUInt<15>(Op32.getConstantOperandVal(0)))
3922      return false;
3923
3924    ToPromote.insert(Op32.getNode());
3925    return true;
3926  }
3927
3928  // LHBRX and LWBRX always clear the higher-order bits.
3929  if (Op32.getMachineOpcode() == PPC::LHBRX ||
3930      Op32.getMachineOpcode() == PPC::LWBRX) {
3931    ToPromote.insert(Op32.getNode());
3932    return true;
3933  }
3934
3935  // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
3936  if (Op32.getMachineOpcode() == PPC::CNTLZW) {
3937    ToPromote.insert(Op32.getNode());
3938    return true;
3939  }
3940
3941  // Next, check for those instructions we can look through.
3942
3943  // Assuming the mask does not wrap around, then the higher-order bits are
3944  // taken directly from the first operand.
3945  if (Op32.getMachineOpcode() == PPC::RLWIMI &&
3946      Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
3947    SmallPtrSet<SDNode *, 16> ToPromote1;
3948    if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
3949      return false;
3950
3951    ToPromote.insert(Op32.getNode());
3952    ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
3953    return true;
3954  }
3955
3956  // For OR, the higher-order bits are zero if that is true for both operands.
3957  // For SELECT_I4, the same is true (but the relevant operand numbers are
3958  // shifted by 1).
3959  if (Op32.getMachineOpcode() == PPC::OR ||
3960      Op32.getMachineOpcode() == PPC::SELECT_I4) {
3961    unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
3962    SmallPtrSet<SDNode *, 16> ToPromote1;
3963    if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
3964      return false;
3965    if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
3966      return false;
3967
3968    ToPromote.insert(Op32.getNode());
3969    ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
3970    return true;
3971  }
3972
3973  // For ORI and ORIS, we need the higher-order bits of the first operand to be
3974  // zero, and also for the constant to be positive (so that it is not sign
3975  // extended).
3976  if (Op32.getMachineOpcode() == PPC::ORI ||
3977      Op32.getMachineOpcode() == PPC::ORIS) {
3978    SmallPtrSet<SDNode *, 16> ToPromote1;
3979    if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
3980      return false;
3981    if (!isUInt<15>(Op32.getConstantOperandVal(1)))
3982      return false;
3983
3984    ToPromote.insert(Op32.getNode());
3985    ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
3986    return true;
3987  }
3988
3989  // The higher-order bits of AND are zero if that is true for at least one of
3990  // the operands.
3991  if (Op32.getMachineOpcode() == PPC::AND) {
3992    SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
3993    bool Op0OK =
3994      PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
3995    bool Op1OK =
3996      PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
3997    if (!Op0OK && !Op1OK)
3998      return false;
3999
4000    ToPromote.insert(Op32.getNode());
4001
4002    if (Op0OK)
4003      ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4004
4005    if (Op1OK)
4006      ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
4007
4008    return true;
4009  }
4010
4011  // For ANDI and ANDIS, the higher-order bits are zero if either that is true
4012  // of the first operand, or if the second operand is positive (so that it is
4013  // not sign extended).
4014  if (Op32.getMachineOpcode() == PPC::ANDIo ||
4015      Op32.getMachineOpcode() == PPC::ANDISo) {
4016    SmallPtrSet<SDNode *, 16> ToPromote1;
4017    bool Op0OK =
4018      PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
4019    bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
4020    if (!Op0OK && !Op1OK)
4021      return false;
4022
4023    ToPromote.insert(Op32.getNode());
4024
4025    if (Op0OK)
4026      ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
4027
4028    return true;
4029  }
4030
4031  return false;
4032}
4033
4034void PPCDAGToDAGISel::PeepholePPC64ZExt() {
4035  if (!PPCSubTarget->isPPC64())
4036    return;
4037
4038  // When we zero-extend from i32 to i64, we use a pattern like this:
4039  // def : Pat<(i64 (zext i32:$in)),
4040  //           (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
4041  //                   0, 32)>;
4042  // There are several 32-bit shift/rotate instructions, however, that will
4043  // clear the higher-order bits of their output, rendering the RLDICL
4044  // unnecessary. When that happens, we remove it here, and redefine the
4045  // relevant 32-bit operation to be a 64-bit operation.
4046
4047  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
4048  ++Position;
4049
4050  bool MadeChange = false;
4051  while (Position != CurDAG->allnodes_begin()) {
4052    SDNode *N = &*--Position;
4053    // Skip dead nodes and any non-machine opcodes.
4054    if (N->use_empty() || !N->isMachineOpcode())
4055      continue;
4056
4057    if (N->getMachineOpcode() != PPC::RLDICL)
4058      continue;
4059
4060    if (N->getConstantOperandVal(1) != 0 ||
4061        N->getConstantOperandVal(2) != 32)
4062      continue;
4063
4064    SDValue ISR = N->getOperand(0);
4065    if (!ISR.isMachineOpcode() ||
4066        ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
4067      continue;
4068
4069    if (!ISR.hasOneUse())
4070      continue;
4071
4072    if (ISR.getConstantOperandVal(2) != PPC::sub_32)
4073      continue;
4074
4075    SDValue IDef = ISR.getOperand(0);
4076    if (!IDef.isMachineOpcode() ||
4077        IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
4078      continue;
4079
4080    // We now know that we're looking at a canonical i32 -> i64 zext. See if we
4081    // can get rid of it.
4082
4083    SDValue Op32 = ISR->getOperand(1);
4084    if (!Op32.isMachineOpcode())
4085      continue;
4086
4087    // There are some 32-bit instructions that always clear the high-order 32
4088    // bits, there are also some instructions (like AND) that we can look
4089    // through.
4090    SmallPtrSet<SDNode *, 16> ToPromote;
4091    if (!PeepholePPC64ZExtGather(Op32, ToPromote))
4092      continue;
4093
4094    // If the ToPromote set contains nodes that have uses outside of the set
4095    // (except for the original INSERT_SUBREG), then abort the transformation.
4096    bool OutsideUse = false;
4097    for (SDNode *PN : ToPromote) {
4098      for (SDNode *UN : PN->uses()) {
4099        if (!ToPromote.count(UN) && UN != ISR.getNode()) {
4100          OutsideUse = true;
4101          break;
4102        }
4103      }
4104
4105      if (OutsideUse)
4106        break;
4107    }
4108    if (OutsideUse)
4109      continue;
4110
4111    MadeChange = true;
4112
4113    // We now know that this zero extension can be removed by promoting to
4114    // nodes in ToPromote to 64-bit operations, where for operations in the
4115    // frontier of the set, we need to insert INSERT_SUBREGs for their
4116    // operands.
4117    for (SDNode *PN : ToPromote) {
4118      unsigned NewOpcode;
4119      switch (PN->getMachineOpcode()) {
4120      default:
4121        llvm_unreachable("Don't know the 64-bit variant of this instruction");
4122      case PPC::RLWINM:    NewOpcode = PPC::RLWINM8; break;
4123      case PPC::RLWNM:     NewOpcode = PPC::RLWNM8; break;
4124      case PPC::SLW:       NewOpcode = PPC::SLW8; break;
4125      case PPC::SRW:       NewOpcode = PPC::SRW8; break;
4126      case PPC::LI:        NewOpcode = PPC::LI8; break;
4127      case PPC::LIS:       NewOpcode = PPC::LIS8; break;
4128      case PPC::LHBRX:     NewOpcode = PPC::LHBRX8; break;
4129      case PPC::LWBRX:     NewOpcode = PPC::LWBRX8; break;
4130      case PPC::CNTLZW:    NewOpcode = PPC::CNTLZW8; break;
4131      case PPC::RLWIMI:    NewOpcode = PPC::RLWIMI8; break;
4132      case PPC::OR:        NewOpcode = PPC::OR8; break;
4133      case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
4134      case PPC::ORI:       NewOpcode = PPC::ORI8; break;
4135      case PPC::ORIS:      NewOpcode = PPC::ORIS8; break;
4136      case PPC::AND:       NewOpcode = PPC::AND8; break;
4137      case PPC::ANDIo:     NewOpcode = PPC::ANDIo8; break;
4138      case PPC::ANDISo:    NewOpcode = PPC::ANDISo8; break;
4139      }
4140
4141      // Note: During the replacement process, the nodes will be in an
4142      // inconsistent state (some instructions will have operands with values
4143      // of the wrong type). Once done, however, everything should be right
4144      // again.
4145
4146      SmallVector<SDValue, 4> Ops;
4147      for (const SDValue &V : PN->ops()) {
4148        if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
4149            !isa<ConstantSDNode>(V)) {
4150          SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
4151          SDNode *ReplOp =
4152            CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
4153                                   ISR.getNode()->getVTList(), ReplOpOps);
4154          Ops.push_back(SDValue(ReplOp, 0));
4155        } else {
4156          Ops.push_back(V);
4157        }
4158      }
4159
4160      // Because all to-be-promoted nodes only have users that are other
4161      // promoted nodes (or the original INSERT_SUBREG), we can safely replace
4162      // the i32 result value type with i64.
4163
4164      SmallVector<EVT, 2> NewVTs;
4165      SDVTList VTs = PN->getVTList();
4166      for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
4167        if (VTs.VTs[i] == MVT::i32)
4168          NewVTs.push_back(MVT::i64);
4169        else
4170          NewVTs.push_back(VTs.VTs[i]);
4171
4172      DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld:    ");
4173      DEBUG(PN->dump(CurDAG));
4174
4175      CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
4176
4177      DEBUG(dbgs() << "\nNew: ");
4178      DEBUG(PN->dump(CurDAG));
4179      DEBUG(dbgs() << "\n");
4180    }
4181
4182    // Now we replace the original zero extend and its associated INSERT_SUBREG
4183    // with the value feeding the INSERT_SUBREG (which has now been promoted to
4184    // return an i64).
4185
4186    DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld:    ");
4187    DEBUG(N->dump(CurDAG));
4188    DEBUG(dbgs() << "\nNew: ");
4189    DEBUG(Op32.getNode()->dump(CurDAG));
4190    DEBUG(dbgs() << "\n");
4191
4192    ReplaceUses(N, Op32.getNode());
4193  }
4194
4195  if (MadeChange)
4196    CurDAG->RemoveDeadNodes();
4197}
4198
4199void PPCDAGToDAGISel::PeepholePPC64() {
4200  // These optimizations are currently supported only for 64-bit SVR4.
4201  if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
4202    return;
4203
4204  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
4205  ++Position;
4206
4207  while (Position != CurDAG->allnodes_begin()) {
4208    SDNode *N = &*--Position;
4209    // Skip dead nodes and any non-machine opcodes.
4210    if (N->use_empty() || !N->isMachineOpcode())
4211      continue;
4212
4213    unsigned FirstOp;
4214    unsigned StorageOpcode = N->getMachineOpcode();
4215
4216    switch (StorageOpcode) {
4217    default: continue;
4218
4219    case PPC::LBZ:
4220    case PPC::LBZ8:
4221    case PPC::LD:
4222    case PPC::LFD:
4223    case PPC::LFS:
4224    case PPC::LHA:
4225    case PPC::LHA8:
4226    case PPC::LHZ:
4227    case PPC::LHZ8:
4228    case PPC::LWA:
4229    case PPC::LWZ:
4230    case PPC::LWZ8:
4231      FirstOp = 0;
4232      break;
4233
4234    case PPC::STB:
4235    case PPC::STB8:
4236    case PPC::STD:
4237    case PPC::STFD:
4238    case PPC::STFS:
4239    case PPC::STH:
4240    case PPC::STH8:
4241    case PPC::STW:
4242    case PPC::STW8:
4243      FirstOp = 1;
4244      break;
4245    }
4246
4247    // If this is a load or store with a zero offset, or within the alignment,
4248    // we may be able to fold an add-immediate into the memory operation.
4249    // The check against alignment is below, as it can't occur until we check
4250    // the arguments to N
4251    if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
4252      continue;
4253
4254    SDValue Base = N->getOperand(FirstOp + 1);
4255    if (!Base.isMachineOpcode())
4256      continue;
4257
4258    // On targets with fusion, we don't want this to fire and remove a fusion
4259    // opportunity, unless a) it results in another fusion opportunity or
4260    // b) optimizing for size.
4261    if (PPCSubTarget->hasFusion() &&
4262        (!MF->getFunction()->optForSize() && !Base.hasOneUse()))
4263      continue;
4264
4265    unsigned Flags = 0;
4266    bool ReplaceFlags = true;
4267
4268    // When the feeding operation is an add-immediate of some sort,
4269    // determine whether we need to add relocation information to the
4270    // target flags on the immediate operand when we fold it into the
4271    // load instruction.
4272    //
4273    // For something like ADDItocL, the relocation information is
4274    // inferred from the opcode; when we process it in the AsmPrinter,
4275    // we add the necessary relocation there.  A load, though, can receive
4276    // relocation from various flavors of ADDIxxx, so we need to carry
4277    // the relocation information in the target flags.
4278    switch (Base.getMachineOpcode()) {
4279    default: continue;
4280
4281    case PPC::ADDI8:
4282    case PPC::ADDI:
4283      // In some cases (such as TLS) the relocation information
4284      // is already in place on the operand, so copying the operand
4285      // is sufficient.
4286      ReplaceFlags = false;
4287      // For these cases, the immediate may not be divisible by 4, in
4288      // which case the fold is illegal for DS-form instructions.  (The
4289      // other cases provide aligned addresses and are always safe.)
4290      if ((StorageOpcode == PPC::LWA ||
4291           StorageOpcode == PPC::LD  ||
4292           StorageOpcode == PPC::STD) &&
4293          (!isa<ConstantSDNode>(Base.getOperand(1)) ||
4294           Base.getConstantOperandVal(1) % 4 != 0))
4295        continue;
4296      break;
4297    case PPC::ADDIdtprelL:
4298      Flags = PPCII::MO_DTPREL_LO;
4299      break;
4300    case PPC::ADDItlsldL:
4301      Flags = PPCII::MO_TLSLD_LO;
4302      break;
4303    case PPC::ADDItocL:
4304      Flags = PPCII::MO_TOC_LO;
4305      break;
4306    }
4307
4308    SDValue ImmOpnd = Base.getOperand(1);
4309    int MaxDisplacement = 0;
4310    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
4311      const GlobalValue *GV = GA->getGlobal();
4312      MaxDisplacement = GV->getAlignment() - 1;
4313    }
4314
4315    int Offset = N->getConstantOperandVal(FirstOp);
4316    if (Offset < 0 || Offset > MaxDisplacement)
4317      continue;
4318
4319    // We found an opportunity.  Reverse the operands from the add
4320    // immediate and substitute them into the load or store.  If
4321    // needed, update the target flags for the immediate operand to
4322    // reflect the necessary relocation information.
4323    DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
4324    DEBUG(Base->dump(CurDAG));
4325    DEBUG(dbgs() << "\nN: ");
4326    DEBUG(N->dump(CurDAG));
4327    DEBUG(dbgs() << "\n");
4328
4329    // If the relocation information isn't already present on the
4330    // immediate operand, add it now.
4331    if (ReplaceFlags) {
4332      if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
4333        SDLoc dl(GA);
4334        const GlobalValue *GV = GA->getGlobal();
4335        // We can't perform this optimization for data whose alignment
4336        // is insufficient for the instruction encoding.
4337        if (GV->getAlignment() < 4 &&
4338            (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
4339             StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
4340          DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
4341          continue;
4342        }
4343        ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
4344      } else if (ConstantPoolSDNode *CP =
4345                 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
4346        const Constant *C = CP->getConstVal();
4347        ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
4348                                                CP->getAlignment(),
4349                                                Offset, Flags);
4350      }
4351    }
4352
4353    if (FirstOp == 1) // Store
4354      (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
4355                                       Base.getOperand(0), N->getOperand(3));
4356    else // Load
4357      (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
4358                                       N->getOperand(2));
4359
4360    // The add-immediate may now be dead, in which case remove it.
4361    if (Base.getNode()->use_empty())
4362      CurDAG->RemoveDeadNode(Base.getNode());
4363  }
4364}
4365
4366
4367/// createPPCISelDag - This pass converts a legalized DAG into a
4368/// PowerPC-specific DAG, ready for instruction scheduling.
4369///
4370FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
4371  return new PPCDAGToDAGISel(TM);
4372}
4373
4374static void initializePassOnce(PassRegistry &Registry) {
4375  const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
4376  PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID,
4377                              nullptr, false, false);
4378  Registry.registerPass(*PI, true);
4379}
4380
4381void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) {
4382  CALL_ONCE_INITIALIZATION(initializePassOnce);
4383}
4384
4385