PPCISelLowering.cpp revision d0608e191ff9c00af68985f246410c219d1bec57
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PPCISelLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCISelLowering.h"
15#include "PPCTargetMachine.h"
16#include "llvm/ADT/VectorExtras.h"
17#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18#include "llvm/CodeGen/MachineFrameInfo.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22#include "llvm/CodeGen/SSARegMap.h"
23#include "llvm/Constants.h"
24#include "llvm/Function.h"
25#include "llvm/Intrinsics.h"
26#include "llvm/Support/MathExtras.h"
27#include "llvm/Target/TargetOptions.h"
28using namespace llvm;
29
30PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
31  : TargetLowering(TM) {
32
33  // Fold away setcc operations if possible.
34  setSetCCIsExpensive();
35  setPow2DivIsCheap();
36
37  // Use _setjmp/_longjmp instead of setjmp/longjmp.
38  setUseUnderscoreSetJmpLongJmp(true);
39
40  // Set up the register classes.
41  addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
42  addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
43  addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
44
45  setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
46  setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
47
48  // PowerPC has no intrinsics for these particular operations
49  setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
50  setOperationAction(ISD::MEMSET, MVT::Other, Expand);
51  setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
52
53  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
54  setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);
55  setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);
56
57  // PowerPC has no SREM/UREM instructions
58  setOperationAction(ISD::SREM, MVT::i32, Expand);
59  setOperationAction(ISD::UREM, MVT::i32, Expand);
60
61  // We don't support sin/cos/sqrt/fmod
62  setOperationAction(ISD::FSIN , MVT::f64, Expand);
63  setOperationAction(ISD::FCOS , MVT::f64, Expand);
64  setOperationAction(ISD::FREM , MVT::f64, Expand);
65  setOperationAction(ISD::FSIN , MVT::f32, Expand);
66  setOperationAction(ISD::FCOS , MVT::f32, Expand);
67  setOperationAction(ISD::FREM , MVT::f32, Expand);
68
69  // If we're enabling GP optimizations, use hardware square root
70  if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
71    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
72    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
73  }
74
75  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
76  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
77
78  // PowerPC does not have BSWAP, CTPOP or CTTZ
79  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
80  setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
81  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
82
83  // PowerPC does not have ROTR
84  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
85
86  // PowerPC does not have Select
87  setOperationAction(ISD::SELECT, MVT::i32, Expand);
88  setOperationAction(ISD::SELECT, MVT::f32, Expand);
89  setOperationAction(ISD::SELECT, MVT::f64, Expand);
90
91  // PowerPC wants to turn select_cc of FP into fsel when possible.
92  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
93  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
94
95  // PowerPC wants to optimize integer setcc a bit
96  setOperationAction(ISD::SETCC, MVT::i32, Custom);
97
98  // PowerPC does not have BRCOND which requires SetCC
99  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
100
101  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
102  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
103
104  // PowerPC does not have [U|S]INT_TO_FP
105  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
106  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
107
108  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
109  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
110
111  // PowerPC does not have truncstore for i1.
112  setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);
113
114  // Support label based line numbers.
115  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
116  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
117  // FIXME - use subtarget debug flags
118  if (!TM.getSubtarget<PPCSubtarget>().isDarwin())
119    setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
120
121  // We want to legalize GlobalAddress and ConstantPool nodes into the
122  // appropriate instructions to materialize the address.
123  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
124  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
125
126  // RET must be custom lowered, to meet ABI requirements
127  setOperationAction(ISD::RET               , MVT::Other, Custom);
128
129  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
130  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
131
132  // Use the default implementation.
133  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
134  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
135  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
136  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
137  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
138  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
139
140  // We want to custom lower some of our intrinsics.
141  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
142
143  if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
144    // They also have instructions for converting between i64 and fp.
145    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
146    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
147
148    // FIXME: disable this lowered code.  This generates 64-bit register values,
149    // and we don't model the fact that the top part is clobbered by calls.  We
150    // need to flag these together so that the value isn't live across a call.
151    //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
152
153    // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
154    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
155  } else {
156    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
157    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
158  }
159
160  if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) {
161    // 64 bit PowerPC implementations can support i64 types directly
162    addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
163    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
164    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
165  } else {
166    // 32 bit PowerPC wants to expand i64 shifts itself.
167    setOperationAction(ISD::SHL, MVT::i64, Custom);
168    setOperationAction(ISD::SRL, MVT::i64, Custom);
169    setOperationAction(ISD::SRA, MVT::i64, Custom);
170  }
171
172  if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
173    // First set operation action for all vector types to expand. Then we
174    // will selectively turn on ones that can be effectively codegen'd.
175    for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
176         VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
177      // add/sub/and/or/xor are legal for all supported vector VT's.
178      setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
179      setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
180      setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
181      setOperationAction(ISD::OR  , (MVT::ValueType)VT, Legal);
182      setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
183
184      // We promote all shuffles to v16i8.
185      setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote);
186      AddPromotedToType(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8);
187
188      setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
189      setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
190      setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
191      setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
192      setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
193      setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
194      setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
195      setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
196
197      setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand);
198    }
199
200    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
201    // with merges, splats, etc.
202    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
203
204    addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
205    addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
206    addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
207    addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
208
209    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
210
211    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
212    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
213
214    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
215    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
216    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
217    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
218  }
219
220  setSetCCResultContents(ZeroOrOneSetCCResult);
221  setStackPointerRegisterToSaveRestore(PPC::R1);
222
223  // We have target-specific dag combine patterns for the following nodes:
224  setTargetDAGCombine(ISD::SINT_TO_FP);
225  setTargetDAGCombine(ISD::STORE);
226
227  computeRegisterProperties();
228}
229
230const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
231  switch (Opcode) {
232  default: return 0;
233  case PPCISD::FSEL:          return "PPCISD::FSEL";
234  case PPCISD::FCFID:         return "PPCISD::FCFID";
235  case PPCISD::FCTIDZ:        return "PPCISD::FCTIDZ";
236  case PPCISD::FCTIWZ:        return "PPCISD::FCTIWZ";
237  case PPCISD::STFIWX:        return "PPCISD::STFIWX";
238  case PPCISD::VMADDFP:       return "PPCISD::VMADDFP";
239  case PPCISD::VNMSUBFP:      return "PPCISD::VNMSUBFP";
240  case PPCISD::VPERM:         return "PPCISD::VPERM";
241  case PPCISD::Hi:            return "PPCISD::Hi";
242  case PPCISD::Lo:            return "PPCISD::Lo";
243  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
244  case PPCISD::SRL:           return "PPCISD::SRL";
245  case PPCISD::SRA:           return "PPCISD::SRA";
246  case PPCISD::SHL:           return "PPCISD::SHL";
247  case PPCISD::EXTSW_32:      return "PPCISD::EXTSW_32";
248  case PPCISD::STD_32:        return "PPCISD::STD_32";
249  case PPCISD::CALL:          return "PPCISD::CALL";
250  case PPCISD::RET_FLAG:      return "PPCISD::RET_FLAG";
251  case PPCISD::MFCR:          return "PPCISD::MFCR";
252  case PPCISD::VCMP:          return "PPCISD::VCMP";
253  case PPCISD::VCMPo:         return "PPCISD::VCMPo";
254  }
255}
256
257/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
258static bool isFloatingPointZero(SDOperand Op) {
259  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
260    return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
261  else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {
262    // Maybe this has already been legalized into the constant pool?
263    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
264      if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))
265        return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
266  }
267  return false;
268}
269
270/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
271/// true if Op is undef or if it matches the specified value.
272static bool isConstantOrUndef(SDOperand Op, unsigned Val) {
273  return Op.getOpcode() == ISD::UNDEF ||
274         cast<ConstantSDNode>(Op)->getValue() == Val;
275}
276
277/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
278/// VPKUHUM instruction.
279bool PPC::isVPKUHUMShuffleMask(SDNode *N) {
280  for (unsigned i = 0; i != 16; ++i)
281    if (!isConstantOrUndef(N->getOperand(i),  i*2+1))
282      return false;
283  return true;
284}
285
286/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
287/// VPKUWUM instruction.
288bool PPC::isVPKUWUMShuffleMask(SDNode *N) {
289  for (unsigned i = 0; i != 16; i += 2)
290    if (!isConstantOrUndef(N->getOperand(i  ),  i*2+2) ||
291        !isConstantOrUndef(N->getOperand(i+1),  i*2+3))
292      return false;
293  return true;
294}
295
296/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
297/// amount, otherwise return -1.
298int PPC::isVSLDOIShuffleMask(SDNode *N) {
299  assert(N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
300  // Find the first non-undef value in the shuffle mask.
301  unsigned i;
302  for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
303    /*search*/;
304
305  if (i == 16) return -1;  // all undef.
306
307  // Otherwise, check to see if the rest of the elements are consequtively
308  // numbered from this value.
309  unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();
310  if (ShiftAmt < i) return -1;
311  ShiftAmt -= i;
312
313  // Check the rest of the elements to see if they are consequtive.
314  for (++i; i != 16; ++i)
315    if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))
316      return -1;
317
318  return ShiftAmt;
319}
320
321/// isVSLDOIRotateShuffleMask - If this is a vsldoi rotate shuffle mask,
322/// return the shift amount, otherwise return -1.  Note that vlsdoi(x,x) will
323/// result in the shuffle being changed to shuffle(x,undef, ...) with
324/// transformed byte numbers.
325int PPC::isVSLDOIRotateShuffleMask(SDNode *N) {
326  assert(N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
327  // Find the first non-undef value in the shuffle mask.
328  unsigned i;
329  for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
330    /*search*/;
331
332  if (i == 16) return -1;  // all undef.
333
334  // Otherwise, check to see if the rest of the elements are consequtively
335  // numbered from this value.
336  unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();
337  if (ShiftAmt < i) return -1;
338  ShiftAmt -= i;
339
340  // Check the rest of the elements to see if they are consequtive.
341  for (++i; i != 16; ++i)
342    if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))
343      return -1;
344
345  return ShiftAmt;
346}
347
348/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
349/// specifies a splat of a single element that is suitable for input to
350/// VSPLTB/VSPLTH/VSPLTW.
351bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {
352  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
353         N->getNumOperands() == 16 &&
354         (EltSize == 1 || EltSize == 2 || EltSize == 4));
355
356  // This is a splat operation if each element of the permute is the same, and
357  // if the value doesn't reference the second vector.
358  unsigned ElementBase = 0;
359  SDOperand Elt = N->getOperand(0);
360  if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
361    ElementBase = EltV->getValue();
362  else
363    return false;   // FIXME: Handle UNDEF elements too!
364
365  if (cast<ConstantSDNode>(Elt)->getValue() >= 16)
366    return false;
367
368  // Check that they are consequtive.
369  for (unsigned i = 1; i != EltSize; ++i) {
370    if (!isa<ConstantSDNode>(N->getOperand(i)) ||
371        cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase)
372      return false;
373  }
374
375  assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
376  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
377    assert(isa<ConstantSDNode>(N->getOperand(i)) &&
378           "Invalid VECTOR_SHUFFLE mask!");
379    for (unsigned j = 0; j != EltSize; ++j)
380      if (N->getOperand(i+j) != N->getOperand(j))
381        return false;
382  }
383
384  return true;
385}
386
387/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
388/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
389unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
390  assert(isSplatShuffleMask(N, EltSize));
391  return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize;
392}
393
394/// isVecSplatImm - Return true if this is a build_vector of constants which
395/// can be formed by using a vspltis[bhw] instruction.  The ByteSize field
396/// indicates the number of bytes of each element [124] -> [bhw].
397bool PPC::isVecSplatImm(SDNode *N, unsigned ByteSize, char *Val) {
398  SDOperand OpVal(0, 0);
399  // Check to see if this buildvec has a single non-undef value in its elements.
400  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
401    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
402    if (OpVal.Val == 0)
403      OpVal = N->getOperand(i);
404    else if (OpVal != N->getOperand(i))
405      return false;
406  }
407
408  if (OpVal.Val == 0) return false;  // All UNDEF: use implicit def.
409
410  unsigned ValSizeInBytes = 0;
411  uint64_t Value = 0;
412  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
413    Value = CN->getValue();
414    ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;
415  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
416    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
417    Value = FloatToBits(CN->getValue());
418    ValSizeInBytes = 4;
419  }
420
421  // If the splat value is larger than the element value, then we can never do
422  // this splat.  The only case that we could fit the replicated bits into our
423  // immediate field for would be zero, and we prefer to use vxor for it.
424  if (ValSizeInBytes < ByteSize) return false;
425
426  // If the element value is larger than the splat value, cut it in half and
427  // check to see if the two halves are equal.  Continue doing this until we
428  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
429  while (ValSizeInBytes > ByteSize) {
430    ValSizeInBytes >>= 1;
431
432    // If the top half equals the bottom half, we're still ok.
433    if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
434         (Value                        & ((1 << (8*ValSizeInBytes))-1)))
435      return false;
436  }
437
438  // Properly sign extend the value.
439  int ShAmt = (4-ByteSize)*8;
440  int MaskVal = ((int)Value << ShAmt) >> ShAmt;
441
442  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
443  if (MaskVal == 0) return false;
444
445  if (Val) *Val = MaskVal;
446
447  // Finally, if this value fits in a 5 bit sext field, return true.
448  return ((MaskVal << (32-5)) >> (32-5)) == MaskVal;
449}
450
451
452/// LowerOperation - Provide custom lowering hooks for some operations.
453///
454SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
455  switch (Op.getOpcode()) {
456  default: assert(0 && "Wasn't expecting to be able to lower this!");
457  case ISD::FP_TO_SINT: {
458    assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
459    SDOperand Src = Op.getOperand(0);
460    if (Src.getValueType() == MVT::f32)
461      Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
462
463    SDOperand Tmp;
464    switch (Op.getValueType()) {
465    default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
466    case MVT::i32:
467      Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
468      break;
469    case MVT::i64:
470      Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
471      break;
472    }
473
474    // Convert the FP value to an int value through memory.
475    SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);
476    if (Op.getValueType() == MVT::i32)
477      Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
478    return Bits;
479  }
480  case ISD::SINT_TO_FP:
481    if (Op.getOperand(0).getValueType() == MVT::i64) {
482      SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
483      SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
484      if (Op.getValueType() == MVT::f32)
485        FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
486      return FP;
487    } else {
488      assert(Op.getOperand(0).getValueType() == MVT::i32 &&
489             "Unhandled SINT_TO_FP type in custom expander!");
490      // Since we only generate this in 64-bit mode, we can take advantage of
491      // 64-bit registers.  In particular, sign extend the input value into the
492      // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
493      // then lfd it and fcfid it.
494      MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
495      int FrameIdx = FrameInfo->CreateStackObject(8, 8);
496      SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
497
498      SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
499                                    Op.getOperand(0));
500
501      // STD the extended value into the stack slot.
502      SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
503                                    DAG.getEntryNode(), Ext64, FIdx,
504                                    DAG.getSrcValue(NULL));
505      // Load the value as a double.
506      SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));
507
508      // FCFID it and return it.
509      SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
510      if (Op.getValueType() == MVT::f32)
511        FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
512      return FP;
513    }
514    break;
515
516  case ISD::SELECT_CC: {
517    // Turn FP only select_cc's into fsel instructions.
518    if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
519        !MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
520      break;
521
522    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
523
524    // Cannot handle SETEQ/SETNE.
525    if (CC == ISD::SETEQ || CC == ISD::SETNE) break;
526
527    MVT::ValueType ResVT = Op.getValueType();
528    MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
529    SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
530    SDOperand TV  = Op.getOperand(2), FV  = Op.getOperand(3);
531
532    // If the RHS of the comparison is a 0.0, we don't need to do the
533    // subtraction at all.
534    if (isFloatingPointZero(RHS))
535      switch (CC) {
536      default: break;       // SETUO etc aren't handled by fsel.
537      case ISD::SETULT:
538      case ISD::SETLT:
539        std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
540      case ISD::SETUGE:
541      case ISD::SETGE:
542        if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
543          LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
544        return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
545      case ISD::SETUGT:
546      case ISD::SETGT:
547        std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
548      case ISD::SETULE:
549      case ISD::SETLE:
550        if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
551          LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
552        return DAG.getNode(PPCISD::FSEL, ResVT,
553                           DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
554      }
555
556    SDOperand Cmp;
557    switch (CC) {
558    default: break;       // SETUO etc aren't handled by fsel.
559    case ISD::SETULT:
560    case ISD::SETLT:
561      Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
562      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
563        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
564      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
565    case ISD::SETUGE:
566    case ISD::SETGE:
567      Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
568      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
569        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
570      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
571    case ISD::SETUGT:
572    case ISD::SETGT:
573      Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
574      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
575        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
576      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
577    case ISD::SETULE:
578    case ISD::SETLE:
579      Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
580      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
581        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
582      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
583    }
584    break;
585  }
586  case ISD::SHL: {
587    assert(Op.getValueType() == MVT::i64 &&
588           Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
589    // The generic code does a fine job expanding shift by a constant.
590    if (isa<ConstantSDNode>(Op.getOperand(1))) break;
591
592    // Otherwise, expand into a bunch of logical ops.  Note that these ops
593    // depend on the PPC behavior for oversized shift amounts.
594    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
595                               DAG.getConstant(0, MVT::i32));
596    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
597                               DAG.getConstant(1, MVT::i32));
598    SDOperand Amt = Op.getOperand(1);
599
600    SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
601                                 DAG.getConstant(32, MVT::i32), Amt);
602    SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
603    SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
604    SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
605    SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
606                                 DAG.getConstant(-32U, MVT::i32));
607    SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
608    SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
609    SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
610    return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
611  }
612  case ISD::SRL: {
613    assert(Op.getValueType() == MVT::i64 &&
614           Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
615    // The generic code does a fine job expanding shift by a constant.
616    if (isa<ConstantSDNode>(Op.getOperand(1))) break;
617
618    // Otherwise, expand into a bunch of logical ops.  Note that these ops
619    // depend on the PPC behavior for oversized shift amounts.
620    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
621                               DAG.getConstant(0, MVT::i32));
622    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
623                               DAG.getConstant(1, MVT::i32));
624    SDOperand Amt = Op.getOperand(1);
625
626    SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
627                                 DAG.getConstant(32, MVT::i32), Amt);
628    SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
629    SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
630    SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
631    SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
632                                 DAG.getConstant(-32U, MVT::i32));
633    SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
634    SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
635    SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
636    return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
637  }
638  case ISD::SRA: {
639    assert(Op.getValueType() == MVT::i64 &&
640           Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
641    // The generic code does a fine job expanding shift by a constant.
642    if (isa<ConstantSDNode>(Op.getOperand(1))) break;
643
644    // Otherwise, expand into a bunch of logical ops, followed by a select_cc.
645    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
646                               DAG.getConstant(0, MVT::i32));
647    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
648                               DAG.getConstant(1, MVT::i32));
649    SDOperand Amt = Op.getOperand(1);
650
651    SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
652                                 DAG.getConstant(32, MVT::i32), Amt);
653    SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
654    SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
655    SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
656    SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
657                                 DAG.getConstant(-32U, MVT::i32));
658    SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
659    SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
660    SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
661                                      Tmp4, Tmp6, ISD::SETLE);
662    return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
663  }
664  case ISD::ConstantPool: {
665    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
666    Constant *C = CP->get();
667    SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment());
668    SDOperand Zero = DAG.getConstant(0, MVT::i32);
669
670    if (getTargetMachine().getRelocationModel() == Reloc::Static) {
671      // Generate non-pic code that has direct accesses to the constant pool.
672      // The address of the global is just (hi(&g)+lo(&g)).
673      SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
674      SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
675      return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
676    }
677
678    // Only lower ConstantPool on Darwin.
679    if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
680    SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
681    if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
682      // With PIC, the first instruction is actually "GR+hi(&G)".
683      Hi = DAG.getNode(ISD::ADD, MVT::i32,
684                       DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
685    }
686
687    SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
688    Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
689    return Lo;
690  }
691  case ISD::GlobalAddress: {
692    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
693    GlobalValue *GV = GSDN->getGlobal();
694    SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset());
695    SDOperand Zero = DAG.getConstant(0, MVT::i32);
696
697    if (getTargetMachine().getRelocationModel() == Reloc::Static) {
698      // Generate non-pic code that has direct accesses to globals.
699      // The address of the global is just (hi(&g)+lo(&g)).
700      SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
701      SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
702      return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
703    }
704
705    // Only lower GlobalAddress on Darwin.
706    if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
707
708    SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
709    if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
710      // With PIC, the first instruction is actually "GR+hi(&G)".
711      Hi = DAG.getNode(ISD::ADD, MVT::i32,
712                       DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
713    }
714
715    SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
716    Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
717
718    if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() &&
719        (!GV->isExternal() || GV->hasNotBeenReadFromBytecode()))
720      return Lo;
721
722    // If the global is weak or external, we have to go through the lazy
723    // resolution stub.
724    return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0));
725  }
726  case ISD::SETCC: {
727    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
728
729    // If we're comparing for equality to zero, expose the fact that this is
730    // implented as a ctlz/srl pair on ppc, so that the dag combiner can
731    // fold the new nodes.
732    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
733      if (C->isNullValue() && CC == ISD::SETEQ) {
734        MVT::ValueType VT = Op.getOperand(0).getValueType();
735        SDOperand Zext = Op.getOperand(0);
736        if (VT < MVT::i32) {
737          VT = MVT::i32;
738          Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
739        }
740        unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
741        SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
742        SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
743                                    DAG.getConstant(Log2b, getShiftAmountTy()));
744        return DAG.getNode(ISD::TRUNCATE, getSetCCResultTy(), Scc);
745      }
746      // Leave comparisons against 0 and -1 alone for now, since they're usually
747      // optimized.  FIXME: revisit this when we can custom lower all setcc
748      // optimizations.
749      if (C->isAllOnesValue() || C->isNullValue())
750        break;
751    }
752
753    // If we have an integer seteq/setne, turn it into a compare against zero
754    // by subtracting the rhs from the lhs, which is faster than setting a
755    // condition register, reading it back out, and masking the correct bit.
756    MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
757    if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
758      MVT::ValueType VT = Op.getValueType();
759      SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0),
760                                  Op.getOperand(1));
761      return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
762    }
763    break;
764  }
765  case ISD::VASTART: {
766    // vastart just stores the address of the VarArgsFrameIndex slot into the
767    // memory location argument.
768    // FIXME: Replace MVT::i32 with PointerTy
769    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
770    return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
771                       Op.getOperand(1), Op.getOperand(2));
772  }
773  case ISD::RET: {
774    SDOperand Copy;
775
776    switch(Op.getNumOperands()) {
777    default:
778      assert(0 && "Do not know how to return this many arguments!");
779      abort();
780    case 1:
781      return SDOperand(); // ret void is legal
782    case 2: {
783      MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
784      unsigned ArgReg = MVT::isInteger(ArgVT) ? PPC::R3 : PPC::F1;
785      Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),
786                              SDOperand());
787      break;
788    }
789    case 3:
790      Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2),
791                              SDOperand());
792      Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));
793      break;
794    }
795    return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
796  }
797  case ISD::SCALAR_TO_VECTOR: {
798    // Create a stack slot that is 16-byte aligned.
799    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
800    int FrameIdx = FrameInfo->CreateStackObject(16, 16);
801    SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
802
803    // Store the input value into Value#0 of the stack slot.
804    SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
805                                  Op.getOperand(0), FIdx,DAG.getSrcValue(NULL));
806    // Load it out.
807    return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL));
808  }
809  case ISD::BUILD_VECTOR:
810    // If this is a case we can't handle, return null and let the default
811    // expansion code take care of it.  If we CAN select this case, return Op.
812
813    // See if this is all zeros.
814    // FIXME: We should handle splat(-0.0), and other cases here.
815    if (ISD::isBuildVectorAllZeros(Op.Val))
816      return Op;
817
818    if (PPC::isVecSplatImm(Op.Val, 1) ||    // vspltisb
819        PPC::isVecSplatImm(Op.Val, 2) ||    // vspltish
820        PPC::isVecSplatImm(Op.Val, 4))      // vspltisw
821      return Op;
822
823    return SDOperand();
824
825  case ISD::VECTOR_SHUFFLE: {
826    SDOperand V1 = Op.getOperand(0);
827    SDOperand V2 = Op.getOperand(1);
828    SDOperand PermMask = Op.getOperand(2);
829
830    // Cases that are handled by instructions that take permute immediates
831    // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
832    // selected by the instruction selector.
833    if (V2.getOpcode() == ISD::UNDEF &&
834        (PPC::isSplatShuffleMask(PermMask.Val, 1) ||
835         PPC::isSplatShuffleMask(PermMask.Val, 2) ||
836         PPC::isSplatShuffleMask(PermMask.Val, 4)))
837      return Op;
838
839    if (PPC::isVPKUWUMShuffleMask(PermMask.Val) ||
840        PPC::isVPKUHUMShuffleMask(PermMask.Val) ||
841        PPC::isVSLDOIShuffleMask(PermMask.Val) != -1 ||
842        PPC::isVSLDOIRotateShuffleMask(PermMask.Val) != -1)
843      return Op;
844
845    // TODO: Handle more cases, and also handle cases that are cheaper to do as
846    // multiple such instructions than as a constant pool load/vperm pair.
847
848    // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
849    // vector that will get spilled to the constant pool.
850    if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
851
852    // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
853    // that it is in input element units, not in bytes.  Convert now.
854    MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
855    unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
856
857    std::vector<SDOperand> ResultMask;
858    for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
859      unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
860
861      for (unsigned j = 0; j != BytesPerElement; ++j)
862        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
863                                             MVT::i8));
864    }
865
866    SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
867    return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
868  }
869  case ISD::INTRINSIC_WO_CHAIN: {
870    unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
871
872    // If this is a lowered altivec predicate compare, CompareOpc is set to the
873    // opcode number of the comparison.
874    int CompareOpc = -1;
875    bool isDot = false;
876    switch (IntNo) {
877    default: return SDOperand();    // Don't custom lower most intrinsics.
878    // Comparison predicates.
879    case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
880    case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
881    case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
882    case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
883    case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
884    case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
885    case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
886    case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
887    case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
888    case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
889    case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
890    case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
891    case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
892
893    // Normal Comparisons.
894    case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
895    case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
896    case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
897    case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
898    case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
899    case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
900    case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
901    case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
902    case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
903    case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
904    case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
905    case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
906    case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
907    }
908
909    assert(CompareOpc>0 && "We only lower altivec predicate compares so far!");
910
911    // If this is a non-dot comparison, make the VCMP node.
912    if (!isDot)
913      return DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),
914                         Op.getOperand(1), Op.getOperand(2),
915                         DAG.getConstant(CompareOpc, MVT::i32));
916
917    // Create the PPCISD altivec 'dot' comparison node.
918    std::vector<SDOperand> Ops;
919    std::vector<MVT::ValueType> VTs;
920    Ops.push_back(Op.getOperand(2));  // LHS
921    Ops.push_back(Op.getOperand(3));  // RHS
922    Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));
923    VTs.push_back(Op.getOperand(2).getValueType());
924    VTs.push_back(MVT::Flag);
925    SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);
926
927    // Now that we have the comparison, emit a copy from the CR to a GPR.
928    // This is flagged to the above dot comparison.
929    SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,
930                                  DAG.getRegister(PPC::CR6, MVT::i32),
931                                  CompNode.getValue(1));
932
933    // Unpack the result based on how the target uses it.
934    unsigned BitNo;   // Bit # of CR6.
935    bool InvertBit;   // Invert result?
936    switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
937    default:  // Can't happen, don't crash on invalid number though.
938    case 0:   // Return the value of the EQ bit of CR6.
939      BitNo = 0; InvertBit = false;
940      break;
941    case 1:   // Return the inverted value of the EQ bit of CR6.
942      BitNo = 0; InvertBit = true;
943      break;
944    case 2:   // Return the value of the LT bit of CR6.
945      BitNo = 2; InvertBit = false;
946      break;
947    case 3:   // Return the inverted value of the LT bit of CR6.
948      BitNo = 2; InvertBit = true;
949      break;
950    }
951
952    // Shift the bit into the low position.
953    Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,
954                        DAG.getConstant(8-(3-BitNo), MVT::i32));
955    // Isolate the bit.
956    Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,
957                        DAG.getConstant(1, MVT::i32));
958
959    // If we are supposed to, toggle the bit.
960    if (InvertBit)
961      Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,
962                          DAG.getConstant(1, MVT::i32));
963    return Flags;
964  }
965  }
966  return SDOperand();
967}
968
969std::vector<SDOperand>
970PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
971  //
972  // add beautiful description of PPC stack frame format, or at least some docs
973  //
974  MachineFunction &MF = DAG.getMachineFunction();
975  MachineFrameInfo *MFI = MF.getFrameInfo();
976  MachineBasicBlock& BB = MF.front();
977  SSARegMap *RegMap = MF.getSSARegMap();
978  std::vector<SDOperand> ArgValues;
979
980  unsigned ArgOffset = 24;
981  unsigned GPR_remaining = 8;
982  unsigned FPR_remaining = 13;
983  unsigned GPR_idx = 0, FPR_idx = 0;
984  static const unsigned GPR[] = {
985    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
986    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
987  };
988  static const unsigned FPR[] = {
989    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
990    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
991  };
992
993  // Add DAG nodes to load the arguments...  On entry to a function on PPC,
994  // the arguments start at offset 24, although they are likely to be passed
995  // in registers.
996  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
997    SDOperand newroot, argt;
998    unsigned ObjSize;
999    bool needsLoad = false;
1000    bool ArgLive = !I->use_empty();
1001    MVT::ValueType ObjectVT = getValueType(I->getType());
1002
1003    switch (ObjectVT) {
1004    default: assert(0 && "Unhandled argument type!");
1005    case MVT::i1:
1006    case MVT::i8:
1007    case MVT::i16:
1008    case MVT::i32:
1009      ObjSize = 4;
1010      if (!ArgLive) break;
1011      if (GPR_remaining > 0) {
1012        unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
1013        MF.addLiveIn(GPR[GPR_idx], VReg);
1014        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
1015        if (ObjectVT != MVT::i32) {
1016          unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext
1017                                                       : ISD::AssertZext;
1018          argt = DAG.getNode(AssertOp, MVT::i32, argt,
1019                             DAG.getValueType(ObjectVT));
1020          argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt);
1021        }
1022      } else {
1023        needsLoad = true;
1024      }
1025      break;
1026    case MVT::i64:
1027      ObjSize = 8;
1028      if (!ArgLive) break;
1029      if (GPR_remaining > 0) {
1030        SDOperand argHi, argLo;
1031        unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
1032        MF.addLiveIn(GPR[GPR_idx], VReg);
1033        argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
1034        // If we have two or more remaining argument registers, then both halves
1035        // of the i64 can be sourced from there.  Otherwise, the lower half will
1036        // have to come off the stack.  This can happen when an i64 is preceded
1037        // by 28 bytes of arguments.
1038        if (GPR_remaining > 1) {
1039          unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
1040          MF.addLiveIn(GPR[GPR_idx+1], VReg);
1041          argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32);
1042        } else {
1043          int FI = MFI->CreateFixedObject(4, ArgOffset+4);
1044          SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
1045          argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
1046                              DAG.getSrcValue(NULL));
1047        }
1048        // Build the outgoing arg thingy
1049        argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi);
1050        newroot = argLo;
1051      } else {
1052        needsLoad = true;
1053      }
1054      break;
1055    case MVT::f32:
1056    case MVT::f64:
1057      ObjSize = (ObjectVT == MVT::f64) ? 8 : 4;
1058      if (!ArgLive) {
1059        if (FPR_remaining > 0) {
1060          --FPR_remaining;
1061          ++FPR_idx;
1062        }
1063        break;
1064      }
1065      if (FPR_remaining > 0) {
1066        unsigned VReg;
1067        if (ObjectVT == MVT::f32)
1068          VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
1069        else
1070          VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
1071        MF.addLiveIn(FPR[FPR_idx], VReg);
1072        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT);
1073        --FPR_remaining;
1074        ++FPR_idx;
1075      } else {
1076        needsLoad = true;
1077      }
1078      break;
1079    }
1080
1081    // We need to load the argument to a virtual register if we determined above
1082    // that we ran out of physical registers of the appropriate type
1083    if (needsLoad) {
1084      unsigned SubregOffset = 0;
1085      if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3;
1086      if (ObjectVT == MVT::i16) SubregOffset = 2;
1087      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1088      SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
1089      FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN,
1090                        DAG.getConstant(SubregOffset, MVT::i32));
1091      argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
1092                                   DAG.getSrcValue(NULL));
1093    }
1094
1095    // Every 4 bytes of argument space consumes one of the GPRs available for
1096    // argument passing.
1097    if (GPR_remaining > 0) {
1098      unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1;
1099      GPR_remaining -= delta;
1100      GPR_idx += delta;
1101    }
1102    ArgOffset += ObjSize;
1103    if (newroot.Val)
1104      DAG.setRoot(newroot.getValue(1));
1105
1106    ArgValues.push_back(argt);
1107  }
1108
1109  // If the function takes variable number of arguments, make a frame index for
1110  // the start of the first vararg value... for expansion of llvm.va_start.
1111  if (F.isVarArg()) {
1112    VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
1113    SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
1114    // If this function is vararg, store any remaining integer argument regs
1115    // to their spots on the stack so that they may be loaded by deferencing the
1116    // result of va_next.
1117    std::vector<SDOperand> MemOps;
1118    for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) {
1119      unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
1120      MF.addLiveIn(GPR[GPR_idx], VReg);
1121      SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
1122      SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
1123                                    Val, FIN, DAG.getSrcValue(NULL));
1124      MemOps.push_back(Store);
1125      // Increment the address by four for the next argument to store
1126      SDOperand PtrOff = DAG.getConstant(4, getPointerTy());
1127      FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff);
1128    }
1129    if (!MemOps.empty()) {
1130      MemOps.push_back(DAG.getRoot());
1131      DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps));
1132    }
1133  }
1134
1135  // Finally, inform the code generator which regs we return values in.
1136  switch (getValueType(F.getReturnType())) {
1137    default: assert(0 && "Unknown type!");
1138    case MVT::isVoid: break;
1139    case MVT::i1:
1140    case MVT::i8:
1141    case MVT::i16:
1142    case MVT::i32:
1143      MF.addLiveOut(PPC::R3);
1144      break;
1145    case MVT::i64:
1146      MF.addLiveOut(PPC::R3);
1147      MF.addLiveOut(PPC::R4);
1148      break;
1149    case MVT::f32:
1150    case MVT::f64:
1151      MF.addLiveOut(PPC::F1);
1152      break;
1153  }
1154
1155  return ArgValues;
1156}
1157
1158std::pair<SDOperand, SDOperand>
1159PPCTargetLowering::LowerCallTo(SDOperand Chain,
1160                               const Type *RetTy, bool isVarArg,
1161                               unsigned CallingConv, bool isTailCall,
1162                               SDOperand Callee, ArgListTy &Args,
1163                               SelectionDAG &DAG) {
1164  // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
1165  // SelectExpr to use to put the arguments in the appropriate registers.
1166  std::vector<SDOperand> args_to_use;
1167
1168  // Count how many bytes are to be pushed on the stack, including the linkage
1169  // area, and parameter passing area.
1170  unsigned NumBytes = 24;
1171
1172  if (Args.empty()) {
1173    Chain = DAG.getCALLSEQ_START(Chain,
1174                                 DAG.getConstant(NumBytes, getPointerTy()));
1175  } else {
1176    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1177      switch (getValueType(Args[i].second)) {
1178      default: assert(0 && "Unknown value type!");
1179      case MVT::i1:
1180      case MVT::i8:
1181      case MVT::i16:
1182      case MVT::i32:
1183      case MVT::f32:
1184        NumBytes += 4;
1185        break;
1186      case MVT::i64:
1187      case MVT::f64:
1188        NumBytes += 8;
1189        break;
1190      }
1191    }
1192
1193    // Just to be safe, we'll always reserve the full 24 bytes of linkage area
1194    // plus 32 bytes of argument space in case any called code gets funky on us.
1195    // (Required by ABI to support var arg)
1196    if (NumBytes < 56) NumBytes = 56;
1197
1198    // Adjust the stack pointer for the new arguments...
1199    // These operations are automatically eliminated by the prolog/epilog pass
1200    Chain = DAG.getCALLSEQ_START(Chain,
1201                                 DAG.getConstant(NumBytes, getPointerTy()));
1202
1203    // Set up a copy of the stack pointer for use loading and storing any
1204    // arguments that may not fit in the registers available for argument
1205    // passing.
1206    SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
1207
1208    // Figure out which arguments are going to go in registers, and which in
1209    // memory.  Also, if this is a vararg function, floating point operations
1210    // must be stored to our stack, and loaded into integer regs as well, if
1211    // any integer regs are available for argument passing.
1212    unsigned ArgOffset = 24;
1213    unsigned GPR_remaining = 8;
1214    unsigned FPR_remaining = 13;
1215
1216    std::vector<SDOperand> MemOps;
1217    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1218      // PtrOff will be used to store the current argument to the stack if a
1219      // register cannot be found for it.
1220      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1221      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
1222      MVT::ValueType ArgVT = getValueType(Args[i].second);
1223
1224      switch (ArgVT) {
1225      default: assert(0 && "Unexpected ValueType for argument!");
1226      case MVT::i1:
1227      case MVT::i8:
1228      case MVT::i16:
1229        // Promote the integer to 32 bits.  If the input type is signed use a
1230        // sign extend, otherwise use a zero extend.
1231        if (Args[i].second->isSigned())
1232          Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
1233        else
1234          Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
1235        // FALL THROUGH
1236      case MVT::i32:
1237        if (GPR_remaining > 0) {
1238          args_to_use.push_back(Args[i].first);
1239          --GPR_remaining;
1240        } else {
1241          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1242                                       Args[i].first, PtrOff,
1243                                       DAG.getSrcValue(NULL)));
1244        }
1245        ArgOffset += 4;
1246        break;
1247      case MVT::i64:
1248        // If we have one free GPR left, we can place the upper half of the i64
1249        // in it, and store the other half to the stack.  If we have two or more
1250        // free GPRs, then we can pass both halves of the i64 in registers.
1251        if (GPR_remaining > 0) {
1252          SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1253                                     Args[i].first, DAG.getConstant(1, MVT::i32));
1254          SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1255                                     Args[i].first, DAG.getConstant(0, MVT::i32));
1256          args_to_use.push_back(Hi);
1257          --GPR_remaining;
1258          if (GPR_remaining > 0) {
1259            args_to_use.push_back(Lo);
1260            --GPR_remaining;
1261          } else {
1262            SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
1263            PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
1264            MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1265                                         Lo, PtrOff, DAG.getSrcValue(NULL)));
1266          }
1267        } else {
1268          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1269                                       Args[i].first, PtrOff,
1270                                       DAG.getSrcValue(NULL)));
1271        }
1272        ArgOffset += 8;
1273        break;
1274      case MVT::f32:
1275      case MVT::f64:
1276        if (FPR_remaining > 0) {
1277          args_to_use.push_back(Args[i].first);
1278          --FPR_remaining;
1279          if (isVarArg) {
1280            SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
1281                                          Args[i].first, PtrOff,
1282                                          DAG.getSrcValue(NULL));
1283            MemOps.push_back(Store);
1284            // Float varargs are always shadowed in available integer registers
1285            if (GPR_remaining > 0) {
1286              SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
1287                                           DAG.getSrcValue(NULL));
1288              MemOps.push_back(Load.getValue(1));
1289              args_to_use.push_back(Load);
1290              --GPR_remaining;
1291            }
1292            if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
1293              SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
1294              PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
1295              SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
1296                                           DAG.getSrcValue(NULL));
1297              MemOps.push_back(Load.getValue(1));
1298              args_to_use.push_back(Load);
1299              --GPR_remaining;
1300            }
1301          } else {
1302            // If we have any FPRs remaining, we may also have GPRs remaining.
1303            // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
1304            // GPRs.
1305            if (GPR_remaining > 0) {
1306              args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
1307              --GPR_remaining;
1308            }
1309            if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
1310              args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
1311              --GPR_remaining;
1312            }
1313          }
1314        } else {
1315          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1316                                       Args[i].first, PtrOff,
1317                                       DAG.getSrcValue(NULL)));
1318        }
1319        ArgOffset += (ArgVT == MVT::f32) ? 4 : 8;
1320        break;
1321      }
1322    }
1323    if (!MemOps.empty())
1324      Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);
1325  }
1326
1327  std::vector<MVT::ValueType> RetVals;
1328  MVT::ValueType RetTyVT = getValueType(RetTy);
1329  MVT::ValueType ActualRetTyVT = RetTyVT;
1330  if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16)
1331    ActualRetTyVT = MVT::i32;   // Promote result to i32.
1332
1333  if (RetTyVT == MVT::i64) {
1334    RetVals.push_back(MVT::i32);
1335    RetVals.push_back(MVT::i32);
1336  } else if (RetTyVT != MVT::isVoid) {
1337    RetVals.push_back(ActualRetTyVT);
1338  }
1339  RetVals.push_back(MVT::Other);
1340
1341  // If the callee is a GlobalAddress node (quite common, every direct call is)
1342  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1343  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1344    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
1345
1346  std::vector<SDOperand> Ops;
1347  Ops.push_back(Chain);
1348  Ops.push_back(Callee);
1349  Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
1350  SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops);
1351  Chain = TheCall.getValue(TheCall.Val->getNumValues()-1);
1352  Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1353                      DAG.getConstant(NumBytes, getPointerTy()));
1354  SDOperand RetVal = TheCall;
1355
1356  // If the result is a small value, add a note so that we keep track of the
1357  // information about whether it is sign or zero extended.
1358  if (RetTyVT != ActualRetTyVT) {
1359    RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext,
1360                         MVT::i32, RetVal, DAG.getValueType(RetTyVT));
1361    RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
1362  } else if (RetTyVT == MVT::i64) {
1363    RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1));
1364  }
1365
1366  return std::make_pair(RetVal, Chain);
1367}
1368
1369MachineBasicBlock *
1370PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
1371                                           MachineBasicBlock *BB) {
1372  assert((MI->getOpcode() == PPC::SELECT_CC_Int ||
1373          MI->getOpcode() == PPC::SELECT_CC_F4 ||
1374          MI->getOpcode() == PPC::SELECT_CC_F8) &&
1375         "Unexpected instr type to insert");
1376
1377  // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
1378  // control-flow pattern.  The incoming instruction knows the destination vreg
1379  // to set, the condition code register to branch on, the true/false values to
1380  // select between, and a branch opcode to use.
1381  const BasicBlock *LLVM_BB = BB->getBasicBlock();
1382  ilist<MachineBasicBlock>::iterator It = BB;
1383  ++It;
1384
1385  //  thisMBB:
1386  //  ...
1387  //   TrueVal = ...
1388  //   cmpTY ccX, r1, r2
1389  //   bCC copy1MBB
1390  //   fallthrough --> copy0MBB
1391  MachineBasicBlock *thisMBB = BB;
1392  MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
1393  MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
1394  BuildMI(BB, MI->getOperand(4).getImmedValue(), 2)
1395    .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
1396  MachineFunction *F = BB->getParent();
1397  F->getBasicBlockList().insert(It, copy0MBB);
1398  F->getBasicBlockList().insert(It, sinkMBB);
1399  // Update machine-CFG edges by first adding all successors of the current
1400  // block to the new block which will contain the Phi node for the select.
1401  for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
1402      e = BB->succ_end(); i != e; ++i)
1403    sinkMBB->addSuccessor(*i);
1404  // Next, remove all successors of the current block, and add the true
1405  // and fallthrough blocks as its successors.
1406  while(!BB->succ_empty())
1407    BB->removeSuccessor(BB->succ_begin());
1408  BB->addSuccessor(copy0MBB);
1409  BB->addSuccessor(sinkMBB);
1410
1411  //  copy0MBB:
1412  //   %FalseValue = ...
1413  //   # fallthrough to sinkMBB
1414  BB = copy0MBB;
1415
1416  // Update machine-CFG edges
1417  BB->addSuccessor(sinkMBB);
1418
1419  //  sinkMBB:
1420  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
1421  //  ...
1422  BB = sinkMBB;
1423  BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg())
1424    .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
1425    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
1426
1427  delete MI;   // The pseudo instruction is gone now.
1428  return BB;
1429}
1430
1431SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
1432                                               DAGCombinerInfo &DCI) const {
1433  TargetMachine &TM = getTargetMachine();
1434  SelectionDAG &DAG = DCI.DAG;
1435  switch (N->getOpcode()) {
1436  default: break;
1437  case ISD::SINT_TO_FP:
1438    if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
1439      if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
1440        // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
1441        // We allow the src/dst to be either f32/f64, but the intermediate
1442        // type must be i64.
1443        if (N->getOperand(0).getValueType() == MVT::i64) {
1444          SDOperand Val = N->getOperand(0).getOperand(0);
1445          if (Val.getValueType() == MVT::f32) {
1446            Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
1447            DCI.AddToWorklist(Val.Val);
1448          }
1449
1450          Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
1451          DCI.AddToWorklist(Val.Val);
1452          Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
1453          DCI.AddToWorklist(Val.Val);
1454          if (N->getValueType(0) == MVT::f32) {
1455            Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
1456            DCI.AddToWorklist(Val.Val);
1457          }
1458          return Val;
1459        } else if (N->getOperand(0).getValueType() == MVT::i32) {
1460          // If the intermediate type is i32, we can avoid the load/store here
1461          // too.
1462        }
1463      }
1464    }
1465    break;
1466  case ISD::STORE:
1467    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
1468    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
1469        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
1470        N->getOperand(1).getValueType() == MVT::i32) {
1471      SDOperand Val = N->getOperand(1).getOperand(0);
1472      if (Val.getValueType() == MVT::f32) {
1473        Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
1474        DCI.AddToWorklist(Val.Val);
1475      }
1476      Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
1477      DCI.AddToWorklist(Val.Val);
1478
1479      Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
1480                        N->getOperand(2), N->getOperand(3));
1481      DCI.AddToWorklist(Val.Val);
1482      return Val;
1483    }
1484    break;
1485  case PPCISD::VCMP: {
1486    // If a VCMPo node already exists with exactly the same operands as this
1487    // node, use its result instead of this node (VCMPo computes both a CR6 and
1488    // a normal output).
1489    //
1490    if (!N->getOperand(0).hasOneUse() &&
1491        !N->getOperand(1).hasOneUse() &&
1492        !N->getOperand(2).hasOneUse()) {
1493
1494      // Scan all of the users of the LHS, looking for VCMPo's that match.
1495      SDNode *VCMPoNode = 0;
1496
1497      SDNode *LHSN = N->getOperand(0).Val;
1498      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
1499           UI != E; ++UI)
1500        if ((*UI)->getOpcode() == PPCISD::VCMPo &&
1501            (*UI)->getOperand(1) == N->getOperand(1) &&
1502            (*UI)->getOperand(2) == N->getOperand(2) &&
1503            (*UI)->getOperand(0) == N->getOperand(0)) {
1504          VCMPoNode = *UI;
1505          break;
1506        }
1507
1508      // If there are non-zero uses of the flag value, use the VCMPo node!
1509      if (VCMPoNode && !VCMPoNode->hasNUsesOfValue(0, 1))
1510        return SDOperand(VCMPoNode, 0);
1511    }
1512    break;
1513  }
1514  }
1515
1516  return SDOperand();
1517}
1518
1519void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
1520                                                       uint64_t Mask,
1521                                                       uint64_t &KnownZero,
1522                                                       uint64_t &KnownOne,
1523                                                       unsigned Depth) const {
1524  KnownZero = 0;
1525  KnownOne = 0;
1526  switch (Op.getOpcode()) {
1527  default: break;
1528  case ISD::INTRINSIC_WO_CHAIN: {
1529    switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) {
1530    default: break;
1531    case Intrinsic::ppc_altivec_vcmpbfp_p:
1532    case Intrinsic::ppc_altivec_vcmpeqfp_p:
1533    case Intrinsic::ppc_altivec_vcmpequb_p:
1534    case Intrinsic::ppc_altivec_vcmpequh_p:
1535    case Intrinsic::ppc_altivec_vcmpequw_p:
1536    case Intrinsic::ppc_altivec_vcmpgefp_p:
1537    case Intrinsic::ppc_altivec_vcmpgtfp_p:
1538    case Intrinsic::ppc_altivec_vcmpgtsb_p:
1539    case Intrinsic::ppc_altivec_vcmpgtsh_p:
1540    case Intrinsic::ppc_altivec_vcmpgtsw_p:
1541    case Intrinsic::ppc_altivec_vcmpgtub_p:
1542    case Intrinsic::ppc_altivec_vcmpgtuh_p:
1543    case Intrinsic::ppc_altivec_vcmpgtuw_p:
1544      KnownZero = ~1U;  // All bits but the low one are known to be zero.
1545      break;
1546    }
1547  }
1548  }
1549}
1550
1551
1552/// getConstraintType - Given a constraint letter, return the type of
1553/// constraint it is for this target.
1554PPCTargetLowering::ConstraintType
1555PPCTargetLowering::getConstraintType(char ConstraintLetter) const {
1556  switch (ConstraintLetter) {
1557  default: break;
1558  case 'b':
1559  case 'r':
1560  case 'f':
1561  case 'v':
1562  case 'y':
1563    return C_RegisterClass;
1564  }
1565  return TargetLowering::getConstraintType(ConstraintLetter);
1566}
1567
1568
1569std::vector<unsigned> PPCTargetLowering::
1570getRegClassForInlineAsmConstraint(const std::string &Constraint,
1571                                  MVT::ValueType VT) const {
1572  if (Constraint.size() == 1) {
1573    switch (Constraint[0]) {      // GCC RS6000 Constraint Letters
1574    default: break;  // Unknown constriant letter
1575    case 'b':
1576      return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 ,
1577                                   PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
1578                                   PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
1579                                   PPC::R12, PPC::R13, PPC::R14, PPC::R15,
1580                                   PPC::R16, PPC::R17, PPC::R18, PPC::R19,
1581                                   PPC::R20, PPC::R21, PPC::R22, PPC::R23,
1582                                   PPC::R24, PPC::R25, PPC::R26, PPC::R27,
1583                                   PPC::R28, PPC::R29, PPC::R30, PPC::R31,
1584                                   0);
1585    case 'r':
1586      return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 ,
1587                                   PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
1588                                   PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
1589                                   PPC::R12, PPC::R13, PPC::R14, PPC::R15,
1590                                   PPC::R16, PPC::R17, PPC::R18, PPC::R19,
1591                                   PPC::R20, PPC::R21, PPC::R22, PPC::R23,
1592                                   PPC::R24, PPC::R25, PPC::R26, PPC::R27,
1593                                   PPC::R28, PPC::R29, PPC::R30, PPC::R31,
1594                                   0);
1595    case 'f':
1596      return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 ,
1597                                   PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 ,
1598                                   PPC::F8 , PPC::F9 , PPC::F10, PPC::F11,
1599                                   PPC::F12, PPC::F13, PPC::F14, PPC::F15,
1600                                   PPC::F16, PPC::F17, PPC::F18, PPC::F19,
1601                                   PPC::F20, PPC::F21, PPC::F22, PPC::F23,
1602                                   PPC::F24, PPC::F25, PPC::F26, PPC::F27,
1603                                   PPC::F28, PPC::F29, PPC::F30, PPC::F31,
1604                                   0);
1605    case 'v':
1606      return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 ,
1607                                   PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
1608                                   PPC::V8 , PPC::V9 , PPC::V10, PPC::V11,
1609                                   PPC::V12, PPC::V13, PPC::V14, PPC::V15,
1610                                   PPC::V16, PPC::V17, PPC::V18, PPC::V19,
1611                                   PPC::V20, PPC::V21, PPC::V22, PPC::V23,
1612                                   PPC::V24, PPC::V25, PPC::V26, PPC::V27,
1613                                   PPC::V28, PPC::V29, PPC::V30, PPC::V31,
1614                                   0);
1615    case 'y':
1616      return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
1617                                   PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7,
1618                                   0);
1619    }
1620  }
1621
1622  return std::vector<unsigned>();
1623}
1624
1625// isOperandValidForConstraint
1626bool PPCTargetLowering::
1627isOperandValidForConstraint(SDOperand Op, char Letter) {
1628  switch (Letter) {
1629  default: break;
1630  case 'I':
1631  case 'J':
1632  case 'K':
1633  case 'L':
1634  case 'M':
1635  case 'N':
1636  case 'O':
1637  case 'P': {
1638    if (!isa<ConstantSDNode>(Op)) return false;  // Must be an immediate.
1639    unsigned Value = cast<ConstantSDNode>(Op)->getValue();
1640    switch (Letter) {
1641    default: assert(0 && "Unknown constraint letter!");
1642    case 'I':  // "I" is a signed 16-bit constant.
1643      return (short)Value == (int)Value;
1644    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
1645    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
1646      return (short)Value == 0;
1647    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
1648      return (Value >> 16) == 0;
1649    case 'M':  // "M" is a constant that is greater than 31.
1650      return Value > 31;
1651    case 'N':  // "N" is a positive constant that is an exact power of two.
1652      return (int)Value > 0 && isPowerOf2_32(Value);
1653    case 'O':  // "O" is the constant zero.
1654      return Value == 0;
1655    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
1656      return (short)-Value == (int)-Value;
1657    }
1658    break;
1659  }
1660  }
1661
1662  // Handle standard constraint letters.
1663  return TargetLowering::isOperandValidForConstraint(Op, Letter);
1664}
1665
1666/// isLegalAddressImmediate - Return true if the integer value can be used
1667/// as the offset of the target addressing mode.
1668bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const {
1669  // PPC allows a sign-extended 16-bit immediate field.
1670  return (V > -(1 << 16) && V < (1 << 16)-1);
1671}
1672