PPCISelLowering.cpp revision e7c768ea24027938b52abd6ee94755b6d17f3da3
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PPCISelLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCISelLowering.h"
15#include "PPCTargetMachine.h"
16#include "PPCPerfectShuffle.h"
17#include "llvm/ADT/VectorExtras.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/SelectionDAG.h"
23#include "llvm/CodeGen/SSARegMap.h"
24#include "llvm/Constants.h"
25#include "llvm/Function.h"
26#include "llvm/Intrinsics.h"
27#include "llvm/Support/MathExtras.h"
28#include "llvm/Target/TargetOptions.h"
29using namespace llvm;
30
31PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
32  : TargetLowering(TM) {
33
34  // Fold away setcc operations if possible.
35  setSetCCIsExpensive();
36  setPow2DivIsCheap();
37
38  // Use _setjmp/_longjmp instead of setjmp/longjmp.
39  setUseUnderscoreSetJmpLongJmp(true);
40
41  // Set up the register classes.
42  addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
43  addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
44  addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
45
46  setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
47  setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
48
49  // PowerPC has no intrinsics for these particular operations
50  setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
51  setOperationAction(ISD::MEMSET, MVT::Other, Expand);
52  setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
53
54  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
55  setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);
56  setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);
57
58  // PowerPC has no SREM/UREM instructions
59  setOperationAction(ISD::SREM, MVT::i32, Expand);
60  setOperationAction(ISD::UREM, MVT::i32, Expand);
61
62  // We don't support sin/cos/sqrt/fmod
63  setOperationAction(ISD::FSIN , MVT::f64, Expand);
64  setOperationAction(ISD::FCOS , MVT::f64, Expand);
65  setOperationAction(ISD::FREM , MVT::f64, Expand);
66  setOperationAction(ISD::FSIN , MVT::f32, Expand);
67  setOperationAction(ISD::FCOS , MVT::f32, Expand);
68  setOperationAction(ISD::FREM , MVT::f32, Expand);
69
70  // If we're enabling GP optimizations, use hardware square root
71  if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
72    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
73    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
74  }
75
76  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
77  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
78
79  // PowerPC does not have BSWAP, CTPOP or CTTZ
80  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
81  setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
82  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
83
84  // PowerPC does not have ROTR
85  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
86
87  // PowerPC does not have Select
88  setOperationAction(ISD::SELECT, MVT::i32, Expand);
89  setOperationAction(ISD::SELECT, MVT::f32, Expand);
90  setOperationAction(ISD::SELECT, MVT::f64, Expand);
91
92  // PowerPC wants to turn select_cc of FP into fsel when possible.
93  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
94  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
95
96  // PowerPC wants to optimize integer setcc a bit
97  setOperationAction(ISD::SETCC, MVT::i32, Custom);
98
99  // PowerPC does not have BRCOND which requires SetCC
100  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
101
102  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
103  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
104
105  // PowerPC does not have [U|S]INT_TO_FP
106  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
107  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
108
109  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
110  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
111
112  // PowerPC does not have truncstore for i1.
113  setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);
114
115  // Support label based line numbers.
116  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
117  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
118  // FIXME - use subtarget debug flags
119  if (!TM.getSubtarget<PPCSubtarget>().isDarwin())
120    setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
121
122  // We want to legalize GlobalAddress and ConstantPool nodes into the
123  // appropriate instructions to materialize the address.
124  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
125  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
126
127  // RET must be custom lowered, to meet ABI requirements
128  setOperationAction(ISD::RET               , MVT::Other, Custom);
129
130  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
131  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
132
133  // Use the default implementation.
134  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
135  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
136  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
137  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
138  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
139  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
140
141  // We want to custom lower some of our intrinsics.
142  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
143
144  if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
145    // They also have instructions for converting between i64 and fp.
146    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
147    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
148
149    // FIXME: disable this lowered code.  This generates 64-bit register values,
150    // and we don't model the fact that the top part is clobbered by calls.  We
151    // need to flag these together so that the value isn't live across a call.
152    //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
153
154    // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
155    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
156  } else {
157    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
158    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
159  }
160
161  if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) {
162    // 64 bit PowerPC implementations can support i64 types directly
163    addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
164    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
165    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
166  } else {
167    // 32 bit PowerPC wants to expand i64 shifts itself.
168    setOperationAction(ISD::SHL, MVT::i64, Custom);
169    setOperationAction(ISD::SRL, MVT::i64, Custom);
170    setOperationAction(ISD::SRA, MVT::i64, Custom);
171  }
172
173  if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
174    // First set operation action for all vector types to expand. Then we
175    // will selectively turn on ones that can be effectively codegen'd.
176    for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
177         VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
178      // add/sub are legal for all supported vector VT's.
179      setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
180      setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
181
182      // We promote all shuffles to v16i8.
183      setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote);
184      AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8);
185
186      // We promote all non-typed operations to v4i32.
187      setOperationAction(ISD::AND   , (MVT::ValueType)VT, Promote);
188      AddPromotedToType (ISD::AND   , (MVT::ValueType)VT, MVT::v4i32);
189      setOperationAction(ISD::OR    , (MVT::ValueType)VT, Promote);
190      AddPromotedToType (ISD::OR    , (MVT::ValueType)VT, MVT::v4i32);
191      setOperationAction(ISD::XOR   , (MVT::ValueType)VT, Promote);
192      AddPromotedToType (ISD::XOR   , (MVT::ValueType)VT, MVT::v4i32);
193      setOperationAction(ISD::LOAD  , (MVT::ValueType)VT, Promote);
194      AddPromotedToType (ISD::LOAD  , (MVT::ValueType)VT, MVT::v4i32);
195      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
196      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32);
197      setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote);
198      AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32);
199
200      // No other operations are legal.
201      setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
202      setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
203      setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
204      setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
205      setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
206      setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
207      setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
208      setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
209
210      setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand);
211    }
212
213    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
214    // with merges, splats, etc.
215    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
216
217    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
218    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
219    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
220    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
221    setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
222    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
223
224    addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
225    addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
226    addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
227    addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
228
229    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
230    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
231
232    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
233    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
234
235    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
236    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
237    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
238    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
239  }
240
241  setSetCCResultContents(ZeroOrOneSetCCResult);
242  setStackPointerRegisterToSaveRestore(PPC::R1);
243
244  // We have target-specific dag combine patterns for the following nodes:
245  setTargetDAGCombine(ISD::SINT_TO_FP);
246  setTargetDAGCombine(ISD::STORE);
247
248  computeRegisterProperties();
249}
250
251const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
252  switch (Opcode) {
253  default: return 0;
254  case PPCISD::FSEL:          return "PPCISD::FSEL";
255  case PPCISD::FCFID:         return "PPCISD::FCFID";
256  case PPCISD::FCTIDZ:        return "PPCISD::FCTIDZ";
257  case PPCISD::FCTIWZ:        return "PPCISD::FCTIWZ";
258  case PPCISD::STFIWX:        return "PPCISD::STFIWX";
259  case PPCISD::VMADDFP:       return "PPCISD::VMADDFP";
260  case PPCISD::VNMSUBFP:      return "PPCISD::VNMSUBFP";
261  case PPCISD::VPERM:         return "PPCISD::VPERM";
262  case PPCISD::Hi:            return "PPCISD::Hi";
263  case PPCISD::Lo:            return "PPCISD::Lo";
264  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
265  case PPCISD::SRL:           return "PPCISD::SRL";
266  case PPCISD::SRA:           return "PPCISD::SRA";
267  case PPCISD::SHL:           return "PPCISD::SHL";
268  case PPCISD::EXTSW_32:      return "PPCISD::EXTSW_32";
269  case PPCISD::STD_32:        return "PPCISD::STD_32";
270  case PPCISD::CALL:          return "PPCISD::CALL";
271  case PPCISD::RET_FLAG:      return "PPCISD::RET_FLAG";
272  case PPCISD::MFCR:          return "PPCISD::MFCR";
273  case PPCISD::VCMP:          return "PPCISD::VCMP";
274  case PPCISD::VCMPo:         return "PPCISD::VCMPo";
275  }
276}
277
278//===----------------------------------------------------------------------===//
279// Node matching predicates, for use by the tblgen matching code.
280//===----------------------------------------------------------------------===//
281
282/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
283static bool isFloatingPointZero(SDOperand Op) {
284  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
285    return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
286  else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {
287    // Maybe this has already been legalized into the constant pool?
288    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
289      if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))
290        return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
291  }
292  return false;
293}
294
295/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
296/// true if Op is undef or if it matches the specified value.
297static bool isConstantOrUndef(SDOperand Op, unsigned Val) {
298  return Op.getOpcode() == ISD::UNDEF ||
299         cast<ConstantSDNode>(Op)->getValue() == Val;
300}
301
302/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
303/// VPKUHUM instruction.
304bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
305  if (!isUnary) {
306    for (unsigned i = 0; i != 16; ++i)
307      if (!isConstantOrUndef(N->getOperand(i),  i*2+1))
308        return false;
309  } else {
310    for (unsigned i = 0; i != 8; ++i)
311      if (!isConstantOrUndef(N->getOperand(i),  i*2+1) ||
312          !isConstantOrUndef(N->getOperand(i+8),  i*2+1))
313        return false;
314  }
315  return true;
316}
317
318/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
319/// VPKUWUM instruction.
320bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
321  if (!isUnary) {
322    for (unsigned i = 0; i != 16; i += 2)
323      if (!isConstantOrUndef(N->getOperand(i  ),  i*2+2) ||
324          !isConstantOrUndef(N->getOperand(i+1),  i*2+3))
325        return false;
326  } else {
327    for (unsigned i = 0; i != 8; i += 2)
328      if (!isConstantOrUndef(N->getOperand(i  ),  i*2+2) ||
329          !isConstantOrUndef(N->getOperand(i+1),  i*2+3) ||
330          !isConstantOrUndef(N->getOperand(i+8),  i*2+2) ||
331          !isConstantOrUndef(N->getOperand(i+9),  i*2+3))
332        return false;
333  }
334  return true;
335}
336
337/// isVMerge - Common function, used to match vmrg* shuffles.
338///
339static bool isVMerge(SDNode *N, unsigned UnitSize,
340                     unsigned LHSStart, unsigned RHSStart) {
341  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
342         N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
343  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
344         "Unsupported merge size!");
345
346  for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
347    for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
348      if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),
349                             LHSStart+j+i*UnitSize) ||
350          !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),
351                             RHSStart+j+i*UnitSize))
352        return false;
353    }
354      return true;
355}
356
357/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
358/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
359bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
360  if (!isUnary)
361    return isVMerge(N, UnitSize, 8, 24);
362  return isVMerge(N, UnitSize, 8, 8);
363}
364
365/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
366/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
367bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
368  if (!isUnary)
369    return isVMerge(N, UnitSize, 0, 16);
370  return isVMerge(N, UnitSize, 0, 0);
371}
372
373
374/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
375/// amount, otherwise return -1.
376int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
377  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
378         N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
379  // Find the first non-undef value in the shuffle mask.
380  unsigned i;
381  for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
382    /*search*/;
383
384  if (i == 16) return -1;  // all undef.
385
386  // Otherwise, check to see if the rest of the elements are consequtively
387  // numbered from this value.
388  unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();
389  if (ShiftAmt < i) return -1;
390  ShiftAmt -= i;
391
392  if (!isUnary) {
393    // Check the rest of the elements to see if they are consequtive.
394    for (++i; i != 16; ++i)
395      if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))
396        return -1;
397  } else {
398    // Check the rest of the elements to see if they are consequtive.
399    for (++i; i != 16; ++i)
400      if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))
401        return -1;
402  }
403
404  return ShiftAmt;
405}
406
407/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
408/// specifies a splat of a single element that is suitable for input to
409/// VSPLTB/VSPLTH/VSPLTW.
410bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {
411  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
412         N->getNumOperands() == 16 &&
413         (EltSize == 1 || EltSize == 2 || EltSize == 4));
414
415  // This is a splat operation if each element of the permute is the same, and
416  // if the value doesn't reference the second vector.
417  unsigned ElementBase = 0;
418  SDOperand Elt = N->getOperand(0);
419  if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
420    ElementBase = EltV->getValue();
421  else
422    return false;   // FIXME: Handle UNDEF elements too!
423
424  if (cast<ConstantSDNode>(Elt)->getValue() >= 16)
425    return false;
426
427  // Check that they are consequtive.
428  for (unsigned i = 1; i != EltSize; ++i) {
429    if (!isa<ConstantSDNode>(N->getOperand(i)) ||
430        cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase)
431      return false;
432  }
433
434  assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
435  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
436    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
437    assert(isa<ConstantSDNode>(N->getOperand(i)) &&
438           "Invalid VECTOR_SHUFFLE mask!");
439    for (unsigned j = 0; j != EltSize; ++j)
440      if (N->getOperand(i+j) != N->getOperand(j))
441        return false;
442  }
443
444  return true;
445}
446
447/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
448/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
449unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
450  assert(isSplatShuffleMask(N, EltSize));
451  return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize;
452}
453
454/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
455/// by using a vspltis[bhw] instruction of the specified element size, return
456/// the constant being splatted.  The ByteSize field indicates the number of
457/// bytes of each element [124] -> [bhw].
458SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
459  SDOperand OpVal(0, 0);
460
461  // If ByteSize of the splat is bigger than the element size of the
462  // build_vector, then we have a case where we are checking for a splat where
463  // multiple elements of the buildvector are folded together into a single
464  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
465  unsigned EltSize = 16/N->getNumOperands();
466  if (EltSize < ByteSize) {
467    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
468    SDOperand UniquedVals[4];
469    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
470
471    // See if all of the elements in the buildvector agree across.
472    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
473      if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
474      // If the element isn't a constant, bail fully out.
475      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand();
476
477
478      if (UniquedVals[i&(Multiple-1)].Val == 0)
479        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
480      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
481        return SDOperand();  // no match.
482    }
483
484    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
485    // either constant or undef values that are identical for each chunk.  See
486    // if these chunks can form into a larger vspltis*.
487
488    // Check to see if all of the leading entries are either 0 or -1.  If
489    // neither, then this won't fit into the immediate field.
490    bool LeadingZero = true;
491    bool LeadingOnes = true;
492    for (unsigned i = 0; i != Multiple-1; ++i) {
493      if (UniquedVals[i].Val == 0) continue;  // Must have been undefs.
494
495      LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
496      LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
497    }
498    // Finally, check the least significant entry.
499    if (LeadingZero) {
500      if (UniquedVals[Multiple-1].Val == 0)
501        return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
502      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue();
503      if (Val < 16)
504        return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
505    }
506    if (LeadingOnes) {
507      if (UniquedVals[Multiple-1].Val == 0)
508        return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
509      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended();
510      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
511        return DAG.getTargetConstant(Val, MVT::i32);
512    }
513
514    return SDOperand();
515  }
516
517  // Check to see if this buildvec has a single non-undef value in its elements.
518  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
519    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
520    if (OpVal.Val == 0)
521      OpVal = N->getOperand(i);
522    else if (OpVal != N->getOperand(i))
523      return SDOperand();
524  }
525
526  if (OpVal.Val == 0) return SDOperand();  // All UNDEF: use implicit def.
527
528  unsigned ValSizeInBytes = 0;
529  uint64_t Value = 0;
530  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
531    Value = CN->getValue();
532    ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;
533  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
534    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
535    Value = FloatToBits(CN->getValue());
536    ValSizeInBytes = 4;
537  }
538
539  // If the splat value is larger than the element value, then we can never do
540  // this splat.  The only case that we could fit the replicated bits into our
541  // immediate field for would be zero, and we prefer to use vxor for it.
542  if (ValSizeInBytes < ByteSize) return SDOperand();
543
544  // If the element value is larger than the splat value, cut it in half and
545  // check to see if the two halves are equal.  Continue doing this until we
546  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
547  while (ValSizeInBytes > ByteSize) {
548    ValSizeInBytes >>= 1;
549
550    // If the top half equals the bottom half, we're still ok.
551    if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
552         (Value                        & ((1 << (8*ValSizeInBytes))-1)))
553      return SDOperand();
554  }
555
556  // Properly sign extend the value.
557  int ShAmt = (4-ByteSize)*8;
558  int MaskVal = ((int)Value << ShAmt) >> ShAmt;
559
560  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
561  if (MaskVal == 0) return SDOperand();
562
563  // Finally, if this value fits in a 5 bit sext field, return it
564  if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
565    return DAG.getTargetConstant(MaskVal, MVT::i32);
566  return SDOperand();
567}
568
569//===----------------------------------------------------------------------===//
570//  LowerOperation implementation
571//===----------------------------------------------------------------------===//
572
573static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
574  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
575  Constant *C = CP->get();
576  SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment());
577  SDOperand Zero = DAG.getConstant(0, MVT::i32);
578
579  const TargetMachine &TM = DAG.getTarget();
580
581  // If this is a non-darwin platform, we don't support non-static relo models
582  // yet.
583  if (TM.getRelocationModel() == Reloc::Static ||
584      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
585    // Generate non-pic code that has direct accesses to the constant pool.
586    // The address of the global is just (hi(&g)+lo(&g)).
587    SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
588    SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
589    return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
590  }
591
592  SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
593  if (TM.getRelocationModel() == Reloc::PIC) {
594    // With PIC, the first instruction is actually "GR+hi(&G)".
595    Hi = DAG.getNode(ISD::ADD, MVT::i32,
596                     DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
597  }
598
599  SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
600  Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
601  return Lo;
602}
603
604static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
605  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
606  GlobalValue *GV = GSDN->getGlobal();
607  SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset());
608  SDOperand Zero = DAG.getConstant(0, MVT::i32);
609
610  const TargetMachine &TM = DAG.getTarget();
611
612  // If this is a non-darwin platform, we don't support non-static relo models
613  // yet.
614  if (TM.getRelocationModel() == Reloc::Static ||
615      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
616    // Generate non-pic code that has direct accesses to globals.
617    // The address of the global is just (hi(&g)+lo(&g)).
618    SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
619    SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
620    return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
621  }
622
623  SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
624  if (TM.getRelocationModel() == Reloc::PIC) {
625    // With PIC, the first instruction is actually "GR+hi(&G)".
626    Hi = DAG.getNode(ISD::ADD, MVT::i32,
627                     DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
628  }
629
630  SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
631  Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
632
633  if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() &&
634      (!GV->isExternal() || GV->hasNotBeenReadFromBytecode()))
635    return Lo;
636
637  // If the global is weak or external, we have to go through the lazy
638  // resolution stub.
639  return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0));
640}
641
642static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
643  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
644
645  // If we're comparing for equality to zero, expose the fact that this is
646  // implented as a ctlz/srl pair on ppc, so that the dag combiner can
647  // fold the new nodes.
648  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
649    if (C->isNullValue() && CC == ISD::SETEQ) {
650      MVT::ValueType VT = Op.getOperand(0).getValueType();
651      SDOperand Zext = Op.getOperand(0);
652      if (VT < MVT::i32) {
653        VT = MVT::i32;
654        Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
655      }
656      unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
657      SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
658      SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
659                                  DAG.getConstant(Log2b, MVT::i32));
660      return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc);
661    }
662    // Leave comparisons against 0 and -1 alone for now, since they're usually
663    // optimized.  FIXME: revisit this when we can custom lower all setcc
664    // optimizations.
665    if (C->isAllOnesValue() || C->isNullValue())
666      return SDOperand();
667  }
668
669  // If we have an integer seteq/setne, turn it into a compare against zero
670  // by subtracting the rhs from the lhs, which is faster than setting a
671  // condition register, reading it back out, and masking the correct bit.
672  MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
673  if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
674    MVT::ValueType VT = Op.getValueType();
675    SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0),
676                                Op.getOperand(1));
677    return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
678  }
679  return SDOperand();
680}
681
682static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,
683                              unsigned VarArgsFrameIndex) {
684  // vastart just stores the address of the VarArgsFrameIndex slot into the
685  // memory location argument.
686  SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
687  return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
688                     Op.getOperand(1), Op.getOperand(2));
689}
690
691static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {
692  SDOperand Copy;
693  switch(Op.getNumOperands()) {
694  default:
695    assert(0 && "Do not know how to return this many arguments!");
696    abort();
697  case 1:
698    return SDOperand(); // ret void is legal
699  case 2: {
700    MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
701    unsigned ArgReg;
702    if (MVT::isVector(ArgVT))
703      ArgReg = PPC::V2;
704    else if (MVT::isInteger(ArgVT))
705      ArgReg = PPC::R3;
706    else {
707      assert(MVT::isFloatingPoint(ArgVT));
708      ArgReg = PPC::F1;
709    }
710
711    Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),
712                            SDOperand());
713
714    // If we haven't noted the R3/F1 are live out, do so now.
715    if (DAG.getMachineFunction().liveout_empty())
716      DAG.getMachineFunction().addLiveOut(ArgReg);
717    break;
718  }
719  case 3:
720    Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2),
721                            SDOperand());
722    Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));
723    // If we haven't noted the R3+R4 are live out, do so now.
724    if (DAG.getMachineFunction().liveout_empty()) {
725      DAG.getMachineFunction().addLiveOut(PPC::R3);
726      DAG.getMachineFunction().addLiveOut(PPC::R4);
727    }
728    break;
729  }
730  return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
731}
732
733/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
734/// possible.
735static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {
736  // Not FP? Not a fsel.
737  if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
738      !MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
739    return SDOperand();
740
741  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
742
743  // Cannot handle SETEQ/SETNE.
744  if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand();
745
746  MVT::ValueType ResVT = Op.getValueType();
747  MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
748  SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
749  SDOperand TV  = Op.getOperand(2), FV  = Op.getOperand(3);
750
751  // If the RHS of the comparison is a 0.0, we don't need to do the
752  // subtraction at all.
753  if (isFloatingPointZero(RHS))
754    switch (CC) {
755    default: break;       // SETUO etc aren't handled by fsel.
756    case ISD::SETULT:
757    case ISD::SETLT:
758      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
759    case ISD::SETUGE:
760    case ISD::SETGE:
761      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
762        LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
763      return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
764    case ISD::SETUGT:
765    case ISD::SETGT:
766      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
767    case ISD::SETULE:
768    case ISD::SETLE:
769      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
770        LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
771      return DAG.getNode(PPCISD::FSEL, ResVT,
772                         DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
773    }
774
775      SDOperand Cmp;
776  switch (CC) {
777  default: break;       // SETUO etc aren't handled by fsel.
778  case ISD::SETULT:
779  case ISD::SETLT:
780    Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
781    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
782      Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
783      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
784  case ISD::SETUGE:
785  case ISD::SETGE:
786    Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
787    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
788      Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
789      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
790  case ISD::SETUGT:
791  case ISD::SETGT:
792    Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
793    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
794      Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
795      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
796  case ISD::SETULE:
797  case ISD::SETLE:
798    Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
799    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
800      Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
801      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
802  }
803  return SDOperand();
804}
805
806static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
807  assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
808  SDOperand Src = Op.getOperand(0);
809  if (Src.getValueType() == MVT::f32)
810    Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
811
812  SDOperand Tmp;
813  switch (Op.getValueType()) {
814  default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
815  case MVT::i32:
816    Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
817    break;
818  case MVT::i64:
819    Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
820    break;
821  }
822
823  // Convert the FP value to an int value through memory.
824  SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);
825  if (Op.getValueType() == MVT::i32)
826    Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
827  return Bits;
828}
829
830static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
831  if (Op.getOperand(0).getValueType() == MVT::i64) {
832    SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
833    SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
834    if (Op.getValueType() == MVT::f32)
835      FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
836    return FP;
837  }
838
839  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
840         "Unhandled SINT_TO_FP type in custom expander!");
841  // Since we only generate this in 64-bit mode, we can take advantage of
842  // 64-bit registers.  In particular, sign extend the input value into the
843  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
844  // then lfd it and fcfid it.
845  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
846  int FrameIdx = FrameInfo->CreateStackObject(8, 8);
847  SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
848
849  SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
850                                Op.getOperand(0));
851
852  // STD the extended value into the stack slot.
853  SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
854                                DAG.getEntryNode(), Ext64, FIdx,
855                                DAG.getSrcValue(NULL));
856  // Load the value as a double.
857  SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));
858
859  // FCFID it and return it.
860  SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
861  if (Op.getValueType() == MVT::f32)
862    FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
863  return FP;
864}
865
866static SDOperand LowerSHL(SDOperand Op, SelectionDAG &DAG) {
867  assert(Op.getValueType() == MVT::i64 &&
868         Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
869  // The generic code does a fine job expanding shift by a constant.
870  if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
871
872  // Otherwise, expand into a bunch of logical ops.  Note that these ops
873  // depend on the PPC behavior for oversized shift amounts.
874  SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
875                             DAG.getConstant(0, MVT::i32));
876  SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
877                             DAG.getConstant(1, MVT::i32));
878  SDOperand Amt = Op.getOperand(1);
879
880  SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
881                               DAG.getConstant(32, MVT::i32), Amt);
882  SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
883  SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
884  SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
885  SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
886                               DAG.getConstant(-32U, MVT::i32));
887  SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
888  SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
889  SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
890  return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
891}
892
893static SDOperand LowerSRL(SDOperand Op, SelectionDAG &DAG) {
894  assert(Op.getValueType() == MVT::i64 &&
895         Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
896  // The generic code does a fine job expanding shift by a constant.
897  if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
898
899  // Otherwise, expand into a bunch of logical ops.  Note that these ops
900  // depend on the PPC behavior for oversized shift amounts.
901  SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
902                             DAG.getConstant(0, MVT::i32));
903  SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
904                             DAG.getConstant(1, MVT::i32));
905  SDOperand Amt = Op.getOperand(1);
906
907  SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
908                               DAG.getConstant(32, MVT::i32), Amt);
909  SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
910  SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
911  SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
912  SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
913                               DAG.getConstant(-32U, MVT::i32));
914  SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
915  SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
916  SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
917  return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
918}
919
920static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG) {
921  assert(Op.getValueType() == MVT::i64 &&
922         Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
923  // The generic code does a fine job expanding shift by a constant.
924  if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
925
926  // Otherwise, expand into a bunch of logical ops, followed by a select_cc.
927  SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
928                             DAG.getConstant(0, MVT::i32));
929  SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
930                             DAG.getConstant(1, MVT::i32));
931  SDOperand Amt = Op.getOperand(1);
932
933  SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
934                               DAG.getConstant(32, MVT::i32), Amt);
935  SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
936  SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
937  SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
938  SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
939                               DAG.getConstant(-32U, MVT::i32));
940  SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
941  SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
942  SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
943                                    Tmp4, Tmp6, ISD::SETLE);
944  return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
945}
946
947//===----------------------------------------------------------------------===//
948// Vector related lowering.
949//
950
951// If this is a vector of constants or undefs, get the bits.  A bit in
952// UndefBits is set if the corresponding element of the vector is an
953// ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
954// zero.   Return true if this is not an array of constants, false if it is.
955//
956static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
957                                       uint64_t UndefBits[2]) {
958  // Start with zero'd results.
959  VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
960
961  unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
962  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
963    SDOperand OpVal = BV->getOperand(i);
964
965    unsigned PartNo = i >= e/2;     // In the upper 128 bits?
966    unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
967
968    uint64_t EltBits = 0;
969    if (OpVal.getOpcode() == ISD::UNDEF) {
970      uint64_t EltUndefBits = ~0U >> (32-EltBitSize);
971      UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
972      continue;
973    } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
974      EltBits = CN->getValue() & (~0U >> (32-EltBitSize));
975    } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
976      assert(CN->getValueType(0) == MVT::f32 &&
977             "Only one legal FP vector type!");
978      EltBits = FloatToBits(CN->getValue());
979    } else {
980      // Nonconstant element.
981      return true;
982    }
983
984    VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
985  }
986
987  //printf("%llx %llx  %llx %llx\n",
988  //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
989  return false;
990}
991
992// If this is a splat (repetition) of a value across the whole vector, return
993// the smallest size that splats it.  For example, "0x01010101010101..." is a
994// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
995// SplatSize = 1 byte.
996static bool isConstantSplat(const uint64_t Bits128[2],
997                            const uint64_t Undef128[2],
998                            unsigned &SplatBits, unsigned &SplatUndef,
999                            unsigned &SplatSize) {
1000
1001  // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1002  // the same as the lower 64-bits, ignoring undefs.
1003  if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))
1004    return false;  // Can't be a splat if two pieces don't match.
1005
1006  uint64_t Bits64  = Bits128[0] | Bits128[1];
1007  uint64_t Undef64 = Undef128[0] & Undef128[1];
1008
1009  // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1010  // undefs.
1011  if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))
1012    return false;  // Can't be a splat if two pieces don't match.
1013
1014  uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1015  uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1016
1017  // If the top 16-bits are different than the lower 16-bits, ignoring
1018  // undefs, we have an i32 splat.
1019  if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {
1020    SplatBits = Bits32;
1021    SplatUndef = Undef32;
1022    SplatSize = 4;
1023    return true;
1024  }
1025
1026  uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1027  uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1028
1029  // If the top 8-bits are different than the lower 8-bits, ignoring
1030  // undefs, we have an i16 splat.
1031  if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {
1032    SplatBits = Bits16;
1033    SplatUndef = Undef16;
1034    SplatSize = 2;
1035    return true;
1036  }
1037
1038  // Otherwise, we have an 8-bit splat.
1039  SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1040  SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1041  SplatSize = 1;
1042  return true;
1043}
1044
1045/// BuildSplatI - Build a canonical splati of Val with an element size of
1046/// SplatSize.  Cast the result to VT.
1047static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT,
1048                             SelectionDAG &DAG) {
1049  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
1050
1051  // Force vspltis[hw] -1 to vspltisb -1.
1052  if (Val == -1) SplatSize = 1;
1053
1054  static const MVT::ValueType VTys[] = { // canonical VT to use for each size.
1055    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
1056  };
1057  MVT::ValueType CanonicalVT = VTys[SplatSize-1];
1058
1059  // Build a canonical splat for this value.
1060  SDOperand Elt = DAG.getConstant(Val, MVT::getVectorBaseType(CanonicalVT));
1061  std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
1062  SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
1063  return DAG.getNode(ISD::BIT_CONVERT, VT, Res);
1064}
1065
1066/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
1067/// specified intrinsic ID.
1068static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS,
1069                                  SelectionDAG &DAG,
1070                                  MVT::ValueType DestVT = MVT::Other) {
1071  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
1072  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
1073                     DAG.getConstant(IID, MVT::i32), LHS, RHS);
1074}
1075
1076/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
1077/// specified intrinsic ID.
1078static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1,
1079                                  SDOperand Op2, SelectionDAG &DAG,
1080                                  MVT::ValueType DestVT = MVT::Other) {
1081  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
1082  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
1083                     DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
1084}
1085
1086
1087/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
1088/// amount.  The result has the specified value type.
1089static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt,
1090                             MVT::ValueType VT, SelectionDAG &DAG) {
1091  // Force LHS/RHS to be the right type.
1092  LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS);
1093  RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS);
1094
1095  std::vector<SDOperand> Ops;
1096  for (unsigned i = 0; i != 16; ++i)
1097    Ops.push_back(DAG.getConstant(i+Amt, MVT::i32));
1098  SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS,
1099                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));
1100  return DAG.getNode(ISD::BIT_CONVERT, VT, T);
1101}
1102
1103// If this is a case we can't handle, return null and let the default
1104// expansion code take care of it.  If we CAN select this case, and if it
1105// selects to a single instruction, return Op.  Otherwise, if we can codegen
1106// this case more efficiently than a constant pool load, lower it to the
1107// sequence of ops that should be used.
1108static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1109  // If this is a vector of constants or undefs, get the bits.  A bit in
1110  // UndefBits is set if the corresponding element of the vector is an
1111  // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1112  // zero.
1113  uint64_t VectorBits[2];
1114  uint64_t UndefBits[2];
1115  if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
1116    return SDOperand();   // Not a constant vector.
1117
1118  // If this is a splat (repetition) of a value across the whole vector, return
1119  // the smallest size that splats it.  For example, "0x01010101010101..." is a
1120  // splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1121  // SplatSize = 1 byte.
1122  unsigned SplatBits, SplatUndef, SplatSize;
1123  if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){
1124    bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0;
1125
1126    // First, handle single instruction cases.
1127
1128    // All zeros?
1129    if (SplatBits == 0) {
1130      // Canonicalize all zero vectors to be v4i32.
1131      if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
1132        SDOperand Z = DAG.getConstant(0, MVT::i32);
1133        Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
1134        Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
1135      }
1136      return Op;
1137    }
1138
1139    // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
1140    int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);
1141    if (SextVal >= -16 && SextVal <= 15)
1142      return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG);
1143
1144
1145    // Two instruction sequences.
1146
1147    // If this value is in the range [-32,30] and is even, use:
1148    //    tmp = VSPLTI[bhw], result = add tmp, tmp
1149    if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
1150      Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG);
1151      return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op);
1152    }
1153
1154    // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
1155    // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
1156    // for fneg/fabs.
1157    if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
1158      // Make -1 and vspltisw -1:
1159      SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG);
1160
1161      // Make the VSLW intrinsic, computing 0x8000_0000.
1162      SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
1163                                       OnesV, DAG);
1164
1165      // xor by OnesV to invert it.
1166      Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
1167      return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
1168    }
1169
1170    // Check to see if this is a wide variety of vsplti*, binop self cases.
1171    unsigned SplatBitSize = SplatSize*8;
1172    static const char SplatCsts[] = {
1173      -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
1174      -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
1175    };
1176    for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){
1177      // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
1178      // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
1179      int i = SplatCsts[idx];
1180
1181      // Figure out what shift amount will be used by altivec if shifted by i in
1182      // this splat size.
1183      unsigned TypeShiftAmt = i & (SplatBitSize-1);
1184
1185      // vsplti + shl self.
1186      if (SextVal == (i << (int)TypeShiftAmt)) {
1187        Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
1188        static const unsigned IIDs[] = { // Intrinsic to use for each size.
1189          Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
1190          Intrinsic::ppc_altivec_vslw
1191        };
1192        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
1193      }
1194
1195      // vsplti + srl self.
1196      if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
1197        Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
1198        static const unsigned IIDs[] = { // Intrinsic to use for each size.
1199          Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
1200          Intrinsic::ppc_altivec_vsrw
1201        };
1202        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
1203      }
1204
1205      // vsplti + sra self.
1206      if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
1207        Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
1208        static const unsigned IIDs[] = { // Intrinsic to use for each size.
1209          Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
1210          Intrinsic::ppc_altivec_vsraw
1211        };
1212        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
1213      }
1214
1215      // vsplti + rol self.
1216      if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
1217                           ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
1218        Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
1219        static const unsigned IIDs[] = { // Intrinsic to use for each size.
1220          Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
1221          Intrinsic::ppc_altivec_vrlw
1222        };
1223        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
1224      }
1225
1226      // t = vsplti c, result = vsldoi t, t, 1
1227      if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
1228        SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
1229        return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG);
1230      }
1231      // t = vsplti c, result = vsldoi t, t, 2
1232      if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
1233        SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
1234        return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG);
1235      }
1236      // t = vsplti c, result = vsldoi t, t, 3
1237      if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
1238        SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
1239        return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG);
1240      }
1241    }
1242
1243    // Three instruction sequences.
1244
1245    // Odd, in range [17,31]:  (vsplti C)-(vsplti -16).
1246    if (SextVal >= 0 && SextVal <= 31) {
1247      SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, Op.getValueType(),DAG);
1248      SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);
1249      return DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS);
1250    }
1251    // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
1252    if (SextVal >= -31 && SextVal <= 0) {
1253      SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, Op.getValueType(),DAG);
1254      SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);
1255      return DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS);
1256    }
1257  }
1258
1259  return SDOperand();
1260}
1261
1262/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
1263/// the specified operations to build the shuffle.
1264static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS,
1265                                        SDOperand RHS, SelectionDAG &DAG) {
1266  unsigned OpNum = (PFEntry >> 26) & 0x0F;
1267  unsigned LHSID  = (PFEntry >> 13) & ((1 << 13)-1);
1268  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
1269
1270  enum {
1271    OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
1272    OP_VMRGHW,
1273    OP_VMRGLW,
1274    OP_VSPLTISW0,
1275    OP_VSPLTISW1,
1276    OP_VSPLTISW2,
1277    OP_VSPLTISW3,
1278    OP_VSLDOI4,
1279    OP_VSLDOI8,
1280    OP_VSLDOI12,
1281  };
1282
1283  if (OpNum == OP_COPY) {
1284    if (LHSID == (1*9+2)*9+3) return LHS;
1285    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
1286    return RHS;
1287  }
1288
1289  SDOperand OpLHS, OpRHS;
1290  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG);
1291  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG);
1292
1293  unsigned ShufIdxs[16];
1294  switch (OpNum) {
1295  default: assert(0 && "Unknown i32 permute!");
1296  case OP_VMRGHW:
1297    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
1298    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
1299    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
1300    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
1301    break;
1302  case OP_VMRGLW:
1303    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
1304    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
1305    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
1306    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
1307    break;
1308  case OP_VSPLTISW0:
1309    for (unsigned i = 0; i != 16; ++i)
1310      ShufIdxs[i] = (i&3)+0;
1311    break;
1312  case OP_VSPLTISW1:
1313    for (unsigned i = 0; i != 16; ++i)
1314      ShufIdxs[i] = (i&3)+4;
1315    break;
1316  case OP_VSPLTISW2:
1317    for (unsigned i = 0; i != 16; ++i)
1318      ShufIdxs[i] = (i&3)+8;
1319    break;
1320  case OP_VSPLTISW3:
1321    for (unsigned i = 0; i != 16; ++i)
1322      ShufIdxs[i] = (i&3)+12;
1323    break;
1324  case OP_VSLDOI4:
1325    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG);
1326  case OP_VSLDOI8:
1327    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG);
1328  case OP_VSLDOI12:
1329    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG);
1330  }
1331  std::vector<SDOperand> Ops;
1332  for (unsigned i = 0; i != 16; ++i)
1333    Ops.push_back(DAG.getConstant(ShufIdxs[i], MVT::i32));
1334
1335  return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS,
1336                     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));
1337}
1338
1339/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
1340/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
1341/// return the code it can be lowered into.  Worst case, it can always be
1342/// lowered into a vperm.
1343static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1344  SDOperand V1 = Op.getOperand(0);
1345  SDOperand V2 = Op.getOperand(1);
1346  SDOperand PermMask = Op.getOperand(2);
1347
1348  // Cases that are handled by instructions that take permute immediates
1349  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
1350  // selected by the instruction selector.
1351  if (V2.getOpcode() == ISD::UNDEF) {
1352    if (PPC::isSplatShuffleMask(PermMask.Val, 1) ||
1353        PPC::isSplatShuffleMask(PermMask.Val, 2) ||
1354        PPC::isSplatShuffleMask(PermMask.Val, 4) ||
1355        PPC::isVPKUWUMShuffleMask(PermMask.Val, true) ||
1356        PPC::isVPKUHUMShuffleMask(PermMask.Val, true) ||
1357        PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 ||
1358        PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) ||
1359        PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) ||
1360        PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) ||
1361        PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) ||
1362        PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) ||
1363        PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {
1364      return Op;
1365    }
1366  }
1367
1368  // Altivec has a variety of "shuffle immediates" that take two vector inputs
1369  // and produce a fixed permutation.  If any of these match, do not lower to
1370  // VPERM.
1371  if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) ||
1372      PPC::isVPKUHUMShuffleMask(PermMask.Val, false) ||
1373      PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 ||
1374      PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) ||
1375      PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) ||
1376      PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) ||
1377      PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) ||
1378      PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) ||
1379      PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))
1380    return Op;
1381
1382  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
1383  // perfect shuffle table to emit an optimal matching sequence.
1384  unsigned PFIndexes[4];
1385  bool isFourElementShuffle = true;
1386  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
1387    unsigned EltNo = 8;   // Start out undef.
1388    for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
1389      if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)
1390        continue;   // Undef, ignore it.
1391
1392      unsigned ByteSource =
1393        cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue();
1394      if ((ByteSource & 3) != j) {
1395        isFourElementShuffle = false;
1396        break;
1397      }
1398
1399      if (EltNo == 8) {
1400        EltNo = ByteSource/4;
1401      } else if (EltNo != ByteSource/4) {
1402        isFourElementShuffle = false;
1403        break;
1404      }
1405    }
1406    PFIndexes[i] = EltNo;
1407  }
1408
1409  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
1410  // perfect shuffle vector to determine if it is cost effective to do this as
1411  // discrete instructions, or whether we should use a vperm.
1412  if (isFourElementShuffle) {
1413    // Compute the index in the perfect shuffle table.
1414    unsigned PFTableIndex =
1415      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
1416
1417    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
1418    unsigned Cost  = (PFEntry >> 30);
1419
1420    // Determining when to avoid vperm is tricky.  Many things affect the cost
1421    // of vperm, particularly how many times the perm mask needs to be computed.
1422    // For example, if the perm mask can be hoisted out of a loop or is already
1423    // used (perhaps because there are multiple permutes with the same shuffle
1424    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
1425    // the loop requires an extra register.
1426    //
1427    // As a compromise, we only emit discrete instructions if the shuffle can be
1428    // generated in 3 or fewer operations.  When we have loop information
1429    // available, if this block is within a loop, we should avoid using vperm
1430    // for 3-operation perms and use a constant pool load instead.
1431    if (Cost < 3)
1432      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG);
1433  }
1434
1435  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
1436  // vector that will get spilled to the constant pool.
1437  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1438
1439  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
1440  // that it is in input element units, not in bytes.  Convert now.
1441  MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
1442  unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1443
1444  std::vector<SDOperand> ResultMask;
1445  for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1446    unsigned SrcElt;
1447    if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1448      SrcElt = 0;
1449    else
1450      SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1451
1452    for (unsigned j = 0; j != BytesPerElement; ++j)
1453      ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1454                                           MVT::i8));
1455  }
1456
1457  SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
1458  return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
1459}
1460
1461/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
1462/// lower, do it, otherwise return null.
1463static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
1464  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
1465
1466  // If this is a lowered altivec predicate compare, CompareOpc is set to the
1467  // opcode number of the comparison.
1468  int CompareOpc = -1;
1469  bool isDot = false;
1470  switch (IntNo) {
1471  default: return SDOperand();    // Don't custom lower most intrinsics.
1472  // Comparison predicates.
1473  case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
1474  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
1475  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
1476  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
1477  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
1478  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
1479  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
1480  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
1481  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
1482  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
1483  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
1484  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
1485  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
1486
1487    // Normal Comparisons.
1488  case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
1489  case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
1490  case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
1491  case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
1492  case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
1493  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
1494  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
1495  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
1496  case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
1497  case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
1498  case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
1499  case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
1500  case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
1501  }
1502
1503  assert(CompareOpc>0 && "We only lower altivec predicate compares so far!");
1504
1505  // If this is a non-dot comparison, make the VCMP node.
1506  if (!isDot) {
1507    SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),
1508                                Op.getOperand(1), Op.getOperand(2),
1509                                DAG.getConstant(CompareOpc, MVT::i32));
1510    return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp);
1511  }
1512
1513  // Create the PPCISD altivec 'dot' comparison node.
1514  std::vector<SDOperand> Ops;
1515  std::vector<MVT::ValueType> VTs;
1516  Ops.push_back(Op.getOperand(2));  // LHS
1517  Ops.push_back(Op.getOperand(3));  // RHS
1518  Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));
1519  VTs.push_back(Op.getOperand(2).getValueType());
1520  VTs.push_back(MVT::Flag);
1521  SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);
1522
1523  // Now that we have the comparison, emit a copy from the CR to a GPR.
1524  // This is flagged to the above dot comparison.
1525  SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,
1526                                DAG.getRegister(PPC::CR6, MVT::i32),
1527                                CompNode.getValue(1));
1528
1529  // Unpack the result based on how the target uses it.
1530  unsigned BitNo;   // Bit # of CR6.
1531  bool InvertBit;   // Invert result?
1532  switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
1533  default:  // Can't happen, don't crash on invalid number though.
1534  case 0:   // Return the value of the EQ bit of CR6.
1535    BitNo = 0; InvertBit = false;
1536    break;
1537  case 1:   // Return the inverted value of the EQ bit of CR6.
1538    BitNo = 0; InvertBit = true;
1539    break;
1540  case 2:   // Return the value of the LT bit of CR6.
1541    BitNo = 2; InvertBit = false;
1542    break;
1543  case 3:   // Return the inverted value of the LT bit of CR6.
1544    BitNo = 2; InvertBit = true;
1545    break;
1546  }
1547
1548  // Shift the bit into the low position.
1549  Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,
1550                      DAG.getConstant(8-(3-BitNo), MVT::i32));
1551  // Isolate the bit.
1552  Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,
1553                      DAG.getConstant(1, MVT::i32));
1554
1555  // If we are supposed to, toggle the bit.
1556  if (InvertBit)
1557    Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,
1558                        DAG.getConstant(1, MVT::i32));
1559  return Flags;
1560}
1561
1562static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1563  // Create a stack slot that is 16-byte aligned.
1564  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
1565  int FrameIdx = FrameInfo->CreateStackObject(16, 16);
1566  SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
1567
1568  // Store the input value into Value#0 of the stack slot.
1569  SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
1570                                Op.getOperand(0), FIdx,DAG.getSrcValue(NULL));
1571  // Load it out.
1572  return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL));
1573}
1574
1575static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {
1576  assert(Op.getValueType() == MVT::v4i32 && "Unknown mul to lower!");
1577  SDOperand LHS = Op.getOperand(0);
1578  SDOperand RHS = Op.getOperand(1);
1579
1580  SDOperand Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG);
1581  SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG);  // +16 as shift amt.
1582
1583  SDOperand RHSSwap =   // = vrlw RHS, 16
1584    BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG);
1585
1586  // Shrinkify inputs to v8i16.
1587  LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS);
1588  RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS);
1589  RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap);
1590
1591  // Low parts multiplied together, generating 32-bit results (we ignore the top
1592  // parts).
1593  SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
1594                                      LHS, RHS, DAG, MVT::v4i32);
1595
1596  SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
1597                                      LHS, RHSSwap, Zero, DAG, MVT::v4i32);
1598  // Shift the high parts up 16 bits.
1599  HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG);
1600  return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd);
1601}
1602
1603/// LowerOperation - Provide custom lowering hooks for some operations.
1604///
1605SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
1606  switch (Op.getOpcode()) {
1607  default: assert(0 && "Wasn't expecting to be able to lower this!");
1608  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
1609  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
1610  case ISD::SETCC:              return LowerSETCC(Op, DAG);
1611  case ISD::VASTART:            return LowerVASTART(Op, DAG, VarArgsFrameIndex);
1612  case ISD::RET:                return LowerRET(Op, DAG);
1613
1614  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
1615  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
1616  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
1617
1618  // Lower 64-bit shifts.
1619  case ISD::SHL:                return LowerSHL(Op, DAG);
1620  case ISD::SRL:                return LowerSRL(Op, DAG);
1621  case ISD::SRA:                return LowerSRA(Op, DAG);
1622
1623  // Vector-related lowering.
1624  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
1625  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
1626  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
1627  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
1628  case ISD::MUL:                return LowerMUL(Op, DAG);
1629  }
1630  return SDOperand();
1631}
1632
1633//===----------------------------------------------------------------------===//
1634//  Other Lowering Code
1635//===----------------------------------------------------------------------===//
1636
1637std::vector<SDOperand>
1638PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
1639  //
1640  // add beautiful description of PPC stack frame format, or at least some docs
1641  //
1642  MachineFunction &MF = DAG.getMachineFunction();
1643  MachineFrameInfo *MFI = MF.getFrameInfo();
1644  MachineBasicBlock& BB = MF.front();
1645  SSARegMap *RegMap = MF.getSSARegMap();
1646  std::vector<SDOperand> ArgValues;
1647
1648  unsigned ArgOffset = 24;
1649  unsigned GPR_remaining = 8;
1650  unsigned FPR_remaining = 13;
1651  unsigned GPR_idx = 0, FPR_idx = 0;
1652  static const unsigned GPR[] = {
1653    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
1654    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
1655  };
1656  static const unsigned FPR[] = {
1657    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1658    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
1659  };
1660
1661  // Add DAG nodes to load the arguments...  On entry to a function on PPC,
1662  // the arguments start at offset 24, although they are likely to be passed
1663  // in registers.
1664  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
1665    SDOperand newroot, argt;
1666    unsigned ObjSize;
1667    bool needsLoad = false;
1668    bool ArgLive = !I->use_empty();
1669    MVT::ValueType ObjectVT = getValueType(I->getType());
1670
1671    switch (ObjectVT) {
1672    default: assert(0 && "Unhandled argument type!");
1673    case MVT::i1:
1674    case MVT::i8:
1675    case MVT::i16:
1676    case MVT::i32:
1677      ObjSize = 4;
1678      if (!ArgLive) break;
1679      if (GPR_remaining > 0) {
1680        unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
1681        MF.addLiveIn(GPR[GPR_idx], VReg);
1682        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
1683        if (ObjectVT != MVT::i32) {
1684          unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext
1685                                                       : ISD::AssertZext;
1686          argt = DAG.getNode(AssertOp, MVT::i32, argt,
1687                             DAG.getValueType(ObjectVT));
1688          argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt);
1689        }
1690      } else {
1691        needsLoad = true;
1692      }
1693      break;
1694    case MVT::i64:
1695      ObjSize = 8;
1696      if (!ArgLive) break;
1697      if (GPR_remaining > 0) {
1698        SDOperand argHi, argLo;
1699        unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
1700        MF.addLiveIn(GPR[GPR_idx], VReg);
1701        argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
1702        // If we have two or more remaining argument registers, then both halves
1703        // of the i64 can be sourced from there.  Otherwise, the lower half will
1704        // have to come off the stack.  This can happen when an i64 is preceded
1705        // by 28 bytes of arguments.
1706        if (GPR_remaining > 1) {
1707          unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
1708          MF.addLiveIn(GPR[GPR_idx+1], VReg);
1709          argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32);
1710        } else {
1711          int FI = MFI->CreateFixedObject(4, ArgOffset+4);
1712          SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
1713          argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
1714                              DAG.getSrcValue(NULL));
1715        }
1716        // Build the outgoing arg thingy
1717        argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi);
1718        newroot = argLo;
1719      } else {
1720        needsLoad = true;
1721      }
1722      break;
1723    case MVT::f32:
1724    case MVT::f64:
1725      ObjSize = (ObjectVT == MVT::f64) ? 8 : 4;
1726      if (!ArgLive) {
1727        if (FPR_remaining > 0) {
1728          --FPR_remaining;
1729          ++FPR_idx;
1730        }
1731        break;
1732      }
1733      if (FPR_remaining > 0) {
1734        unsigned VReg;
1735        if (ObjectVT == MVT::f32)
1736          VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
1737        else
1738          VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
1739        MF.addLiveIn(FPR[FPR_idx], VReg);
1740        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT);
1741        --FPR_remaining;
1742        ++FPR_idx;
1743      } else {
1744        needsLoad = true;
1745      }
1746      break;
1747    }
1748
1749    // We need to load the argument to a virtual register if we determined above
1750    // that we ran out of physical registers of the appropriate type
1751    if (needsLoad) {
1752      unsigned SubregOffset = 0;
1753      if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3;
1754      if (ObjectVT == MVT::i16) SubregOffset = 2;
1755      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1756      SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
1757      FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN,
1758                        DAG.getConstant(SubregOffset, MVT::i32));
1759      argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
1760                                   DAG.getSrcValue(NULL));
1761    }
1762
1763    // Every 4 bytes of argument space consumes one of the GPRs available for
1764    // argument passing.
1765    if (GPR_remaining > 0) {
1766      unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1;
1767      GPR_remaining -= delta;
1768      GPR_idx += delta;
1769    }
1770    ArgOffset += ObjSize;
1771    if (newroot.Val)
1772      DAG.setRoot(newroot.getValue(1));
1773
1774    ArgValues.push_back(argt);
1775  }
1776
1777  // If the function takes variable number of arguments, make a frame index for
1778  // the start of the first vararg value... for expansion of llvm.va_start.
1779  if (F.isVarArg()) {
1780    VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
1781    SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
1782    // If this function is vararg, store any remaining integer argument regs
1783    // to their spots on the stack so that they may be loaded by deferencing the
1784    // result of va_next.
1785    std::vector<SDOperand> MemOps;
1786    for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) {
1787      unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
1788      MF.addLiveIn(GPR[GPR_idx], VReg);
1789      SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
1790      SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
1791                                    Val, FIN, DAG.getSrcValue(NULL));
1792      MemOps.push_back(Store);
1793      // Increment the address by four for the next argument to store
1794      SDOperand PtrOff = DAG.getConstant(4, getPointerTy());
1795      FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff);
1796    }
1797    if (!MemOps.empty()) {
1798      MemOps.push_back(DAG.getRoot());
1799      DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps));
1800    }
1801  }
1802
1803  return ArgValues;
1804}
1805
1806std::pair<SDOperand, SDOperand>
1807PPCTargetLowering::LowerCallTo(SDOperand Chain,
1808                               const Type *RetTy, bool isVarArg,
1809                               unsigned CallingConv, bool isTailCall,
1810                               SDOperand Callee, ArgListTy &Args,
1811                               SelectionDAG &DAG) {
1812  // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
1813  // SelectExpr to use to put the arguments in the appropriate registers.
1814  std::vector<SDOperand> args_to_use;
1815
1816  // Count how many bytes are to be pushed on the stack, including the linkage
1817  // area, and parameter passing area.
1818  unsigned NumBytes = 24;
1819
1820  if (Args.empty()) {
1821    Chain = DAG.getCALLSEQ_START(Chain,
1822                                 DAG.getConstant(NumBytes, getPointerTy()));
1823  } else {
1824    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1825      switch (getValueType(Args[i].second)) {
1826      default: assert(0 && "Unknown value type!");
1827      case MVT::i1:
1828      case MVT::i8:
1829      case MVT::i16:
1830      case MVT::i32:
1831      case MVT::f32:
1832        NumBytes += 4;
1833        break;
1834      case MVT::i64:
1835      case MVT::f64:
1836        NumBytes += 8;
1837        break;
1838      }
1839    }
1840
1841    // Just to be safe, we'll always reserve the full 24 bytes of linkage area
1842    // plus 32 bytes of argument space in case any called code gets funky on us.
1843    // (Required by ABI to support var arg)
1844    if (NumBytes < 56) NumBytes = 56;
1845
1846    // Adjust the stack pointer for the new arguments...
1847    // These operations are automatically eliminated by the prolog/epilog pass
1848    Chain = DAG.getCALLSEQ_START(Chain,
1849                                 DAG.getConstant(NumBytes, getPointerTy()));
1850
1851    // Set up a copy of the stack pointer for use loading and storing any
1852    // arguments that may not fit in the registers available for argument
1853    // passing.
1854    SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
1855
1856    // Figure out which arguments are going to go in registers, and which in
1857    // memory.  Also, if this is a vararg function, floating point operations
1858    // must be stored to our stack, and loaded into integer regs as well, if
1859    // any integer regs are available for argument passing.
1860    unsigned ArgOffset = 24;
1861    unsigned GPR_remaining = 8;
1862    unsigned FPR_remaining = 13;
1863
1864    std::vector<SDOperand> MemOps;
1865    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1866      // PtrOff will be used to store the current argument to the stack if a
1867      // register cannot be found for it.
1868      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1869      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
1870      MVT::ValueType ArgVT = getValueType(Args[i].second);
1871
1872      switch (ArgVT) {
1873      default: assert(0 && "Unexpected ValueType for argument!");
1874      case MVT::i1:
1875      case MVT::i8:
1876      case MVT::i16:
1877        // Promote the integer to 32 bits.  If the input type is signed use a
1878        // sign extend, otherwise use a zero extend.
1879        if (Args[i].second->isSigned())
1880          Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
1881        else
1882          Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
1883        // FALL THROUGH
1884      case MVT::i32:
1885        if (GPR_remaining > 0) {
1886          args_to_use.push_back(Args[i].first);
1887          --GPR_remaining;
1888        } else {
1889          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1890                                       Args[i].first, PtrOff,
1891                                       DAG.getSrcValue(NULL)));
1892        }
1893        ArgOffset += 4;
1894        break;
1895      case MVT::i64:
1896        // If we have one free GPR left, we can place the upper half of the i64
1897        // in it, and store the other half to the stack.  If we have two or more
1898        // free GPRs, then we can pass both halves of the i64 in registers.
1899        if (GPR_remaining > 0) {
1900          SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1901                                     Args[i].first, DAG.getConstant(1, MVT::i32));
1902          SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1903                                     Args[i].first, DAG.getConstant(0, MVT::i32));
1904          args_to_use.push_back(Hi);
1905          --GPR_remaining;
1906          if (GPR_remaining > 0) {
1907            args_to_use.push_back(Lo);
1908            --GPR_remaining;
1909          } else {
1910            SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
1911            PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
1912            MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1913                                         Lo, PtrOff, DAG.getSrcValue(NULL)));
1914          }
1915        } else {
1916          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1917                                       Args[i].first, PtrOff,
1918                                       DAG.getSrcValue(NULL)));
1919        }
1920        ArgOffset += 8;
1921        break;
1922      case MVT::f32:
1923      case MVT::f64:
1924        if (FPR_remaining > 0) {
1925          args_to_use.push_back(Args[i].first);
1926          --FPR_remaining;
1927          if (isVarArg) {
1928            SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
1929                                          Args[i].first, PtrOff,
1930                                          DAG.getSrcValue(NULL));
1931            MemOps.push_back(Store);
1932            // Float varargs are always shadowed in available integer registers
1933            if (GPR_remaining > 0) {
1934              SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
1935                                           DAG.getSrcValue(NULL));
1936              MemOps.push_back(Load.getValue(1));
1937              args_to_use.push_back(Load);
1938              --GPR_remaining;
1939            }
1940            if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
1941              SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
1942              PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
1943              SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
1944                                           DAG.getSrcValue(NULL));
1945              MemOps.push_back(Load.getValue(1));
1946              args_to_use.push_back(Load);
1947              --GPR_remaining;
1948            }
1949          } else {
1950            // If we have any FPRs remaining, we may also have GPRs remaining.
1951            // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
1952            // GPRs.
1953            if (GPR_remaining > 0) {
1954              args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
1955              --GPR_remaining;
1956            }
1957            if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
1958              args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
1959              --GPR_remaining;
1960            }
1961          }
1962        } else {
1963          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1964                                       Args[i].first, PtrOff,
1965                                       DAG.getSrcValue(NULL)));
1966        }
1967        ArgOffset += (ArgVT == MVT::f32) ? 4 : 8;
1968        break;
1969      }
1970    }
1971    if (!MemOps.empty())
1972      Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);
1973  }
1974
1975  std::vector<MVT::ValueType> RetVals;
1976  MVT::ValueType RetTyVT = getValueType(RetTy);
1977  MVT::ValueType ActualRetTyVT = RetTyVT;
1978  if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16)
1979    ActualRetTyVT = MVT::i32;   // Promote result to i32.
1980
1981  if (RetTyVT == MVT::i64) {
1982    RetVals.push_back(MVT::i32);
1983    RetVals.push_back(MVT::i32);
1984  } else if (RetTyVT != MVT::isVoid) {
1985    RetVals.push_back(ActualRetTyVT);
1986  }
1987  RetVals.push_back(MVT::Other);
1988
1989  // If the callee is a GlobalAddress node (quite common, every direct call is)
1990  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1991  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1992    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
1993
1994  std::vector<SDOperand> Ops;
1995  Ops.push_back(Chain);
1996  Ops.push_back(Callee);
1997  Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
1998  SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops);
1999  Chain = TheCall.getValue(TheCall.Val->getNumValues()-1);
2000  Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
2001                      DAG.getConstant(NumBytes, getPointerTy()));
2002  SDOperand RetVal = TheCall;
2003
2004  // If the result is a small value, add a note so that we keep track of the
2005  // information about whether it is sign or zero extended.
2006  if (RetTyVT != ActualRetTyVT) {
2007    RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext,
2008                         MVT::i32, RetVal, DAG.getValueType(RetTyVT));
2009    RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
2010  } else if (RetTyVT == MVT::i64) {
2011    RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1));
2012  }
2013
2014  return std::make_pair(RetVal, Chain);
2015}
2016
2017MachineBasicBlock *
2018PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2019                                           MachineBasicBlock *BB) {
2020  assert((MI->getOpcode() == PPC::SELECT_CC_Int ||
2021          MI->getOpcode() == PPC::SELECT_CC_F4 ||
2022          MI->getOpcode() == PPC::SELECT_CC_F8 ||
2023          MI->getOpcode() == PPC::SELECT_CC_VRRC) &&
2024         "Unexpected instr type to insert");
2025
2026  // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
2027  // control-flow pattern.  The incoming instruction knows the destination vreg
2028  // to set, the condition code register to branch on, the true/false values to
2029  // select between, and a branch opcode to use.
2030  const BasicBlock *LLVM_BB = BB->getBasicBlock();
2031  ilist<MachineBasicBlock>::iterator It = BB;
2032  ++It;
2033
2034  //  thisMBB:
2035  //  ...
2036  //   TrueVal = ...
2037  //   cmpTY ccX, r1, r2
2038  //   bCC copy1MBB
2039  //   fallthrough --> copy0MBB
2040  MachineBasicBlock *thisMBB = BB;
2041  MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
2042  MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
2043  BuildMI(BB, MI->getOperand(4).getImmedValue(), 2)
2044    .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
2045  MachineFunction *F = BB->getParent();
2046  F->getBasicBlockList().insert(It, copy0MBB);
2047  F->getBasicBlockList().insert(It, sinkMBB);
2048  // Update machine-CFG edges by first adding all successors of the current
2049  // block to the new block which will contain the Phi node for the select.
2050  for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
2051      e = BB->succ_end(); i != e; ++i)
2052    sinkMBB->addSuccessor(*i);
2053  // Next, remove all successors of the current block, and add the true
2054  // and fallthrough blocks as its successors.
2055  while(!BB->succ_empty())
2056    BB->removeSuccessor(BB->succ_begin());
2057  BB->addSuccessor(copy0MBB);
2058  BB->addSuccessor(sinkMBB);
2059
2060  //  copy0MBB:
2061  //   %FalseValue = ...
2062  //   # fallthrough to sinkMBB
2063  BB = copy0MBB;
2064
2065  // Update machine-CFG edges
2066  BB->addSuccessor(sinkMBB);
2067
2068  //  sinkMBB:
2069  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
2070  //  ...
2071  BB = sinkMBB;
2072  BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg())
2073    .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
2074    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
2075
2076  delete MI;   // The pseudo instruction is gone now.
2077  return BB;
2078}
2079
2080//===----------------------------------------------------------------------===//
2081// Target Optimization Hooks
2082//===----------------------------------------------------------------------===//
2083
2084SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
2085                                               DAGCombinerInfo &DCI) const {
2086  TargetMachine &TM = getTargetMachine();
2087  SelectionDAG &DAG = DCI.DAG;
2088  switch (N->getOpcode()) {
2089  default: break;
2090  case ISD::SINT_TO_FP:
2091    if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
2092      if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
2093        // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
2094        // We allow the src/dst to be either f32/f64, but the intermediate
2095        // type must be i64.
2096        if (N->getOperand(0).getValueType() == MVT::i64) {
2097          SDOperand Val = N->getOperand(0).getOperand(0);
2098          if (Val.getValueType() == MVT::f32) {
2099            Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
2100            DCI.AddToWorklist(Val.Val);
2101          }
2102
2103          Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
2104          DCI.AddToWorklist(Val.Val);
2105          Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
2106          DCI.AddToWorklist(Val.Val);
2107          if (N->getValueType(0) == MVT::f32) {
2108            Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
2109            DCI.AddToWorklist(Val.Val);
2110          }
2111          return Val;
2112        } else if (N->getOperand(0).getValueType() == MVT::i32) {
2113          // If the intermediate type is i32, we can avoid the load/store here
2114          // too.
2115        }
2116      }
2117    }
2118    break;
2119  case ISD::STORE:
2120    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
2121    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
2122        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
2123        N->getOperand(1).getValueType() == MVT::i32) {
2124      SDOperand Val = N->getOperand(1).getOperand(0);
2125      if (Val.getValueType() == MVT::f32) {
2126        Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
2127        DCI.AddToWorklist(Val.Val);
2128      }
2129      Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
2130      DCI.AddToWorklist(Val.Val);
2131
2132      Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
2133                        N->getOperand(2), N->getOperand(3));
2134      DCI.AddToWorklist(Val.Val);
2135      return Val;
2136    }
2137    break;
2138  case PPCISD::VCMP: {
2139    // If a VCMPo node already exists with exactly the same operands as this
2140    // node, use its result instead of this node (VCMPo computes both a CR6 and
2141    // a normal output).
2142    //
2143    if (!N->getOperand(0).hasOneUse() &&
2144        !N->getOperand(1).hasOneUse() &&
2145        !N->getOperand(2).hasOneUse()) {
2146
2147      // Scan all of the users of the LHS, looking for VCMPo's that match.
2148      SDNode *VCMPoNode = 0;
2149
2150      SDNode *LHSN = N->getOperand(0).Val;
2151      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
2152           UI != E; ++UI)
2153        if ((*UI)->getOpcode() == PPCISD::VCMPo &&
2154            (*UI)->getOperand(1) == N->getOperand(1) &&
2155            (*UI)->getOperand(2) == N->getOperand(2) &&
2156            (*UI)->getOperand(0) == N->getOperand(0)) {
2157          VCMPoNode = *UI;
2158          break;
2159        }
2160
2161      // If there are non-zero uses of the flag value, use the VCMPo node!
2162      if (VCMPoNode && !VCMPoNode->hasNUsesOfValue(0, 1))
2163        return SDOperand(VCMPoNode, 0);
2164    }
2165    break;
2166  }
2167  }
2168
2169  return SDOperand();
2170}
2171
2172//===----------------------------------------------------------------------===//
2173// Inline Assembly Support
2174//===----------------------------------------------------------------------===//
2175
2176void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2177                                                       uint64_t Mask,
2178                                                       uint64_t &KnownZero,
2179                                                       uint64_t &KnownOne,
2180                                                       unsigned Depth) const {
2181  KnownZero = 0;
2182  KnownOne = 0;
2183  switch (Op.getOpcode()) {
2184  default: break;
2185  case ISD::INTRINSIC_WO_CHAIN: {
2186    switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) {
2187    default: break;
2188    case Intrinsic::ppc_altivec_vcmpbfp_p:
2189    case Intrinsic::ppc_altivec_vcmpeqfp_p:
2190    case Intrinsic::ppc_altivec_vcmpequb_p:
2191    case Intrinsic::ppc_altivec_vcmpequh_p:
2192    case Intrinsic::ppc_altivec_vcmpequw_p:
2193    case Intrinsic::ppc_altivec_vcmpgefp_p:
2194    case Intrinsic::ppc_altivec_vcmpgtfp_p:
2195    case Intrinsic::ppc_altivec_vcmpgtsb_p:
2196    case Intrinsic::ppc_altivec_vcmpgtsh_p:
2197    case Intrinsic::ppc_altivec_vcmpgtsw_p:
2198    case Intrinsic::ppc_altivec_vcmpgtub_p:
2199    case Intrinsic::ppc_altivec_vcmpgtuh_p:
2200    case Intrinsic::ppc_altivec_vcmpgtuw_p:
2201      KnownZero = ~1U;  // All bits but the low one are known to be zero.
2202      break;
2203    }
2204  }
2205  }
2206}
2207
2208
2209/// getConstraintType - Given a constraint letter, return the type of
2210/// constraint it is for this target.
2211PPCTargetLowering::ConstraintType
2212PPCTargetLowering::getConstraintType(char ConstraintLetter) const {
2213  switch (ConstraintLetter) {
2214  default: break;
2215  case 'b':
2216  case 'r':
2217  case 'f':
2218  case 'v':
2219  case 'y':
2220    return C_RegisterClass;
2221  }
2222  return TargetLowering::getConstraintType(ConstraintLetter);
2223}
2224
2225
2226std::vector<unsigned> PPCTargetLowering::
2227getRegClassForInlineAsmConstraint(const std::string &Constraint,
2228                                  MVT::ValueType VT) const {
2229  if (Constraint.size() == 1) {
2230    switch (Constraint[0]) {      // GCC RS6000 Constraint Letters
2231    default: break;  // Unknown constriant letter
2232    case 'b':
2233      return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 ,
2234                                   PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
2235                                   PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
2236                                   PPC::R12, PPC::R13, PPC::R14, PPC::R15,
2237                                   PPC::R16, PPC::R17, PPC::R18, PPC::R19,
2238                                   PPC::R20, PPC::R21, PPC::R22, PPC::R23,
2239                                   PPC::R24, PPC::R25, PPC::R26, PPC::R27,
2240                                   PPC::R28, PPC::R29, PPC::R30, PPC::R31,
2241                                   0);
2242    case 'r':
2243      return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 ,
2244                                   PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
2245                                   PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
2246                                   PPC::R12, PPC::R13, PPC::R14, PPC::R15,
2247                                   PPC::R16, PPC::R17, PPC::R18, PPC::R19,
2248                                   PPC::R20, PPC::R21, PPC::R22, PPC::R23,
2249                                   PPC::R24, PPC::R25, PPC::R26, PPC::R27,
2250                                   PPC::R28, PPC::R29, PPC::R30, PPC::R31,
2251                                   0);
2252    case 'f':
2253      return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 ,
2254                                   PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 ,
2255                                   PPC::F8 , PPC::F9 , PPC::F10, PPC::F11,
2256                                   PPC::F12, PPC::F13, PPC::F14, PPC::F15,
2257                                   PPC::F16, PPC::F17, PPC::F18, PPC::F19,
2258                                   PPC::F20, PPC::F21, PPC::F22, PPC::F23,
2259                                   PPC::F24, PPC::F25, PPC::F26, PPC::F27,
2260                                   PPC::F28, PPC::F29, PPC::F30, PPC::F31,
2261                                   0);
2262    case 'v':
2263      return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 ,
2264                                   PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
2265                                   PPC::V8 , PPC::V9 , PPC::V10, PPC::V11,
2266                                   PPC::V12, PPC::V13, PPC::V14, PPC::V15,
2267                                   PPC::V16, PPC::V17, PPC::V18, PPC::V19,
2268                                   PPC::V20, PPC::V21, PPC::V22, PPC::V23,
2269                                   PPC::V24, PPC::V25, PPC::V26, PPC::V27,
2270                                   PPC::V28, PPC::V29, PPC::V30, PPC::V31,
2271                                   0);
2272    case 'y':
2273      return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
2274                                   PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7,
2275                                   0);
2276    }
2277  }
2278
2279  return std::vector<unsigned>();
2280}
2281
2282// isOperandValidForConstraint
2283bool PPCTargetLowering::
2284isOperandValidForConstraint(SDOperand Op, char Letter) {
2285  switch (Letter) {
2286  default: break;
2287  case 'I':
2288  case 'J':
2289  case 'K':
2290  case 'L':
2291  case 'M':
2292  case 'N':
2293  case 'O':
2294  case 'P': {
2295    if (!isa<ConstantSDNode>(Op)) return false;  // Must be an immediate.
2296    unsigned Value = cast<ConstantSDNode>(Op)->getValue();
2297    switch (Letter) {
2298    default: assert(0 && "Unknown constraint letter!");
2299    case 'I':  // "I" is a signed 16-bit constant.
2300      return (short)Value == (int)Value;
2301    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
2302    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
2303      return (short)Value == 0;
2304    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
2305      return (Value >> 16) == 0;
2306    case 'M':  // "M" is a constant that is greater than 31.
2307      return Value > 31;
2308    case 'N':  // "N" is a positive constant that is an exact power of two.
2309      return (int)Value > 0 && isPowerOf2_32(Value);
2310    case 'O':  // "O" is the constant zero.
2311      return Value == 0;
2312    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
2313      return (short)-Value == (int)-Value;
2314    }
2315    break;
2316  }
2317  }
2318
2319  // Handle standard constraint letters.
2320  return TargetLowering::isOperandValidForConstraint(Op, Letter);
2321}
2322
2323/// isLegalAddressImmediate - Return true if the integer value can be used
2324/// as the offset of the target addressing mode.
2325bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const {
2326  // PPC allows a sign-extended 16-bit immediate field.
2327  return (V > -(1 << 16) && V < (1 << 16)-1);
2328}
2329