PPCISelLowering.cpp revision 9b3bd467d02e73a02afed6d50aaaa149a6a69701
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PPCISelLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCISelLowering.h"
15#include "PPCTargetMachine.h"
16#include "llvm/ADT/VectorExtras.h"
17#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18#include "llvm/CodeGen/MachineFrameInfo.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/SelectionDAG.h"
22#include "llvm/CodeGen/SSARegMap.h"
23#include "llvm/Constants.h"
24#include "llvm/Function.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Target/TargetOptions.h"
27using namespace llvm;
28
29PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
30  : TargetLowering(TM) {
31
32  // Fold away setcc operations if possible.
33  setSetCCIsExpensive();
34  setPow2DivIsCheap();
35
36  // Use _setjmp/_longjmp instead of setjmp/longjmp.
37  setUseUnderscoreSetJmpLongJmp(true);
38
39  // Set up the register classes.
40  addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
41  addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
42  addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
43
44  setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
45  setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
46
47  // PowerPC has no intrinsics for these particular operations
48  setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
49  setOperationAction(ISD::MEMSET, MVT::Other, Expand);
50  setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
51
52  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
53  setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);
54  setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);
55
56  // PowerPC has no SREM/UREM instructions
57  setOperationAction(ISD::SREM, MVT::i32, Expand);
58  setOperationAction(ISD::UREM, MVT::i32, Expand);
59
60  // We don't support sin/cos/sqrt/fmod
61  setOperationAction(ISD::FSIN , MVT::f64, Expand);
62  setOperationAction(ISD::FCOS , MVT::f64, Expand);
63  setOperationAction(ISD::FREM , MVT::f64, Expand);
64  setOperationAction(ISD::FSIN , MVT::f32, Expand);
65  setOperationAction(ISD::FCOS , MVT::f32, Expand);
66  setOperationAction(ISD::FREM , MVT::f32, Expand);
67
68  // If we're enabling GP optimizations, use hardware square root
69  if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
70    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
71    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
72  }
73
74  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
75  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
76
77  // PowerPC does not have BSWAP, CTPOP or CTTZ
78  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
79  setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
80  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
81
82  // PowerPC does not have ROTR
83  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
84
85  // PowerPC does not have Select
86  setOperationAction(ISD::SELECT, MVT::i32, Expand);
87  setOperationAction(ISD::SELECT, MVT::f32, Expand);
88  setOperationAction(ISD::SELECT, MVT::f64, Expand);
89
90  // PowerPC wants to turn select_cc of FP into fsel when possible.
91  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
92  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
93
94  // PowerPC wants to optimize integer setcc a bit
95  setOperationAction(ISD::SETCC, MVT::i32, Custom);
96
97  // PowerPC does not have BRCOND which requires SetCC
98  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
99
100  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
101  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
102
103  // PowerPC does not have [U|S]INT_TO_FP
104  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
105  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
106
107  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
108  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
109
110  // PowerPC does not have truncstore for i1.
111  setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);
112
113  // Support label based line numbers.
114  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
115  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
116  // FIXME - use subtarget debug flags
117  if (!TM.getSubtarget<PPCSubtarget>().isDarwin())
118    setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
119
120  // We want to legalize GlobalAddress and ConstantPool nodes into the
121  // appropriate instructions to materialize the address.
122  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
123  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
124
125  // RET must be custom lowered, to meet ABI requirements
126  setOperationAction(ISD::RET               , MVT::Other, Custom);
127
128  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
129  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
130
131  // Use the default implementation.
132  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
133  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
134  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
135  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
136  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
137  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
138
139  if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
140    // They also have instructions for converting between i64 and fp.
141    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
142    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
143    // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
144    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
145  } else {
146    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
147    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
148  }
149
150  if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) {
151    // 64 bit PowerPC implementations can support i64 types directly
152    addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
153    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
154    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
155  } else {
156    // 32 bit PowerPC wants to expand i64 shifts itself.
157    setOperationAction(ISD::SHL, MVT::i64, Custom);
158    setOperationAction(ISD::SRL, MVT::i64, Custom);
159    setOperationAction(ISD::SRA, MVT::i64, Custom);
160  }
161
162  // First set operation action for all vector types to expand. Then we
163  // will selectively turn on ones that can be effectively codegen'd.
164  for (unsigned VT = (unsigned)MVT::Vector + 1;
165       VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
166    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
167    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
168    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
169    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
170    setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
171    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
172
173    // FIXME: We don't support any BUILD_VECTOR's yet.  We should custom expand
174    // the ones we do, like splat(0.0) and splat(-0.0).
175    setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
176  }
177
178  if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
179    addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
180    addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
181
182    setOperationAction(ISD::ADD        , MVT::v4f32, Legal);
183    setOperationAction(ISD::SUB        , MVT::v4f32, Legal);
184    setOperationAction(ISD::MUL        , MVT::v4f32, Legal);
185    setOperationAction(ISD::LOAD       , MVT::v4f32, Legal);
186    setOperationAction(ISD::ADD        , MVT::v4i32, Legal);
187    setOperationAction(ISD::LOAD       , MVT::v4i32, Legal);
188    setOperationAction(ISD::LOAD       , MVT::v16i8, Legal);
189
190    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
191    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
192
193    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
194    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
195  }
196
197  setSetCCResultContents(ZeroOrOneSetCCResult);
198  setStackPointerRegisterToSaveRestore(PPC::R1);
199
200  // We have target-specific dag combine patterns for the following nodes:
201  setTargetDAGCombine(ISD::SINT_TO_FP);
202  setTargetDAGCombine(ISD::STORE);
203
204  computeRegisterProperties();
205}
206
207const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
208  switch (Opcode) {
209  default: return 0;
210  case PPCISD::FSEL:          return "PPCISD::FSEL";
211  case PPCISD::FCFID:         return "PPCISD::FCFID";
212  case PPCISD::FCTIDZ:        return "PPCISD::FCTIDZ";
213  case PPCISD::FCTIWZ:        return "PPCISD::FCTIWZ";
214  case PPCISD::STFIWX:        return "PPCISD::STFIWX";
215  case PPCISD::VMADDFP:       return "PPCISD::VMADDFP";
216  case PPCISD::VNMSUBFP:      return "PPCISD::VNMSUBFP";
217  case PPCISD::LVE_X:         return "PPCISD::LVE_X";
218  case PPCISD::VPERM:         return "PPCISD::VPERM";
219  case PPCISD::Hi:            return "PPCISD::Hi";
220  case PPCISD::Lo:            return "PPCISD::Lo";
221  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
222  case PPCISD::SRL:           return "PPCISD::SRL";
223  case PPCISD::SRA:           return "PPCISD::SRA";
224  case PPCISD::SHL:           return "PPCISD::SHL";
225  case PPCISD::CALL:          return "PPCISD::CALL";
226  case PPCISD::RET_FLAG:      return "PPCISD::RET_FLAG";
227  }
228}
229
230/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
231static bool isFloatingPointZero(SDOperand Op) {
232  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
233    return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
234  else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {
235    // Maybe this has already been legalized into the constant pool?
236    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
237      if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))
238        return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
239  }
240  return false;
241}
242
243
244/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
245/// specifies a splat of a single element that is suitable for input to
246/// VSPLTB/VSPLTH/VSPLTW.
247bool PPC::isSplatShuffleMask(SDNode *N) {
248  assert(N->getOpcode() == ISD::BUILD_VECTOR);
249
250  // We can only splat 8-bit, 16-bit, and 32-bit quantities.
251  if (N->getNumOperands() != 4 && N->getNumOperands() != 8 &&
252      N->getNumOperands() != 16)
253    return false;
254
255  // This is a splat operation if each element of the permute is the same, and
256  // if the value doesn't reference the second vector.
257  SDOperand Elt = N->getOperand(0);
258  assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
259  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) {
260    assert(isa<ConstantSDNode>(N->getOperand(i)) &&
261           "Invalid VECTOR_SHUFFLE mask!");
262    if (N->getOperand(i) != Elt) return false;
263  }
264
265  // Make sure it is a splat of the first vector operand.
266  return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands();
267}
268
269/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
270/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
271unsigned PPC::getVSPLTImmediate(SDNode *N) {
272  assert(isSplatShuffleMask(N));
273  return cast<ConstantSDNode>(N->getOperand(0))->getValue();
274}
275
276
277/// LowerOperation - Provide custom lowering hooks for some operations.
278///
279SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
280  switch (Op.getOpcode()) {
281  default: assert(0 && "Wasn't expecting to be able to lower this!");
282  case ISD::FP_TO_SINT: {
283    assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
284    SDOperand Src = Op.getOperand(0);
285    if (Src.getValueType() == MVT::f32)
286      Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
287
288    SDOperand Tmp;
289    switch (Op.getValueType()) {
290    default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
291    case MVT::i32:
292      Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
293      break;
294    case MVT::i64:
295      Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
296      break;
297    }
298
299    // Convert the FP value to an int value through memory.
300    SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);
301    if (Op.getValueType() == MVT::i32)
302      Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
303    return Bits;
304  }
305  case ISD::SINT_TO_FP: {
306    assert(MVT::i64 == Op.getOperand(0).getValueType() &&
307           "Unhandled SINT_TO_FP type in custom expander!");
308    SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
309    SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
310    if (MVT::f32 == Op.getValueType())
311      FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
312    return FP;
313  }
314  case ISD::SELECT_CC: {
315    // Turn FP only select_cc's into fsel instructions.
316    if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
317        !MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
318      break;
319
320    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
321
322    // Cannot handle SETEQ/SETNE.
323    if (CC == ISD::SETEQ || CC == ISD::SETNE) break;
324
325    MVT::ValueType ResVT = Op.getValueType();
326    MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
327    SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
328    SDOperand TV  = Op.getOperand(2), FV  = Op.getOperand(3);
329
330    // If the RHS of the comparison is a 0.0, we don't need to do the
331    // subtraction at all.
332    if (isFloatingPointZero(RHS))
333      switch (CC) {
334      default: break;       // SETUO etc aren't handled by fsel.
335      case ISD::SETULT:
336      case ISD::SETLT:
337        std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
338      case ISD::SETUGE:
339      case ISD::SETGE:
340        if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
341          LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
342        return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
343      case ISD::SETUGT:
344      case ISD::SETGT:
345        std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
346      case ISD::SETULE:
347      case ISD::SETLE:
348        if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
349          LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
350        return DAG.getNode(PPCISD::FSEL, ResVT,
351                           DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
352      }
353
354    SDOperand Cmp;
355    switch (CC) {
356    default: break;       // SETUO etc aren't handled by fsel.
357    case ISD::SETULT:
358    case ISD::SETLT:
359      Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
360      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
361        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
362      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
363    case ISD::SETUGE:
364    case ISD::SETGE:
365      Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
366      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
367        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
368      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
369    case ISD::SETUGT:
370    case ISD::SETGT:
371      Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
372      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
373        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
374      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
375    case ISD::SETULE:
376    case ISD::SETLE:
377      Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
378      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
379        Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
380      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
381    }
382    break;
383  }
384  case ISD::SHL: {
385    assert(Op.getValueType() == MVT::i64 &&
386           Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
387    // The generic code does a fine job expanding shift by a constant.
388    if (isa<ConstantSDNode>(Op.getOperand(1))) break;
389
390    // Otherwise, expand into a bunch of logical ops.  Note that these ops
391    // depend on the PPC behavior for oversized shift amounts.
392    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
393                               DAG.getConstant(0, MVT::i32));
394    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
395                               DAG.getConstant(1, MVT::i32));
396    SDOperand Amt = Op.getOperand(1);
397
398    SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
399                                 DAG.getConstant(32, MVT::i32), Amt);
400    SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
401    SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
402    SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
403    SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
404                                 DAG.getConstant(-32U, MVT::i32));
405    SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
406    SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
407    SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
408    return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
409  }
410  case ISD::SRL: {
411    assert(Op.getValueType() == MVT::i64 &&
412           Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
413    // The generic code does a fine job expanding shift by a constant.
414    if (isa<ConstantSDNode>(Op.getOperand(1))) break;
415
416    // Otherwise, expand into a bunch of logical ops.  Note that these ops
417    // depend on the PPC behavior for oversized shift amounts.
418    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
419                               DAG.getConstant(0, MVT::i32));
420    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
421                               DAG.getConstant(1, MVT::i32));
422    SDOperand Amt = Op.getOperand(1);
423
424    SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
425                                 DAG.getConstant(32, MVT::i32), Amt);
426    SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
427    SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
428    SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
429    SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
430                                 DAG.getConstant(-32U, MVT::i32));
431    SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
432    SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
433    SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
434    return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
435  }
436  case ISD::SRA: {
437    assert(Op.getValueType() == MVT::i64 &&
438           Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
439    // The generic code does a fine job expanding shift by a constant.
440    if (isa<ConstantSDNode>(Op.getOperand(1))) break;
441
442    // Otherwise, expand into a bunch of logical ops, followed by a select_cc.
443    SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
444                               DAG.getConstant(0, MVT::i32));
445    SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
446                               DAG.getConstant(1, MVT::i32));
447    SDOperand Amt = Op.getOperand(1);
448
449    SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
450                                 DAG.getConstant(32, MVT::i32), Amt);
451    SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
452    SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
453    SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
454    SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
455                                 DAG.getConstant(-32U, MVT::i32));
456    SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
457    SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
458    SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
459                                      Tmp4, Tmp6, ISD::SETLE);
460    return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
461  }
462  case ISD::ConstantPool: {
463    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
464    Constant *C = CP->get();
465    SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment());
466    SDOperand Zero = DAG.getConstant(0, MVT::i32);
467
468    if (getTargetMachine().getRelocationModel() == Reloc::Static) {
469      // Generate non-pic code that has direct accesses to the constant pool.
470      // The address of the global is just (hi(&g)+lo(&g)).
471      SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
472      SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
473      return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
474    }
475
476    // Only lower ConstantPool on Darwin.
477    if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
478    SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
479    if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
480      // With PIC, the first instruction is actually "GR+hi(&G)".
481      Hi = DAG.getNode(ISD::ADD, MVT::i32,
482                       DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
483    }
484
485    SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
486    Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
487    return Lo;
488  }
489  case ISD::GlobalAddress: {
490    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
491    GlobalValue *GV = GSDN->getGlobal();
492    SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset());
493    SDOperand Zero = DAG.getConstant(0, MVT::i32);
494
495    if (getTargetMachine().getRelocationModel() == Reloc::Static) {
496      // Generate non-pic code that has direct accesses to globals.
497      // The address of the global is just (hi(&g)+lo(&g)).
498      SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
499      SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
500      return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
501    }
502
503    // Only lower GlobalAddress on Darwin.
504    if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
505
506    SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
507    if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
508      // With PIC, the first instruction is actually "GR+hi(&G)".
509      Hi = DAG.getNode(ISD::ADD, MVT::i32,
510                       DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
511    }
512
513    SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
514    Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
515
516    if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() &&
517        (!GV->isExternal() || GV->hasNotBeenReadFromBytecode()))
518      return Lo;
519
520    // If the global is weak or external, we have to go through the lazy
521    // resolution stub.
522    return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0));
523  }
524  case ISD::SETCC: {
525    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
526
527    // If we're comparing for equality to zero, expose the fact that this is
528    // implented as a ctlz/srl pair on ppc, so that the dag combiner can
529    // fold the new nodes.
530    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
531      if (C->isNullValue() && CC == ISD::SETEQ) {
532        MVT::ValueType VT = Op.getOperand(0).getValueType();
533        SDOperand Zext = Op.getOperand(0);
534        if (VT < MVT::i32) {
535          VT = MVT::i32;
536          Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
537        }
538        unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
539        SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
540        SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
541                                    DAG.getConstant(Log2b, getShiftAmountTy()));
542        return DAG.getNode(ISD::TRUNCATE, getSetCCResultTy(), Scc);
543      }
544      // Leave comparisons against 0 and -1 alone for now, since they're usually
545      // optimized.  FIXME: revisit this when we can custom lower all setcc
546      // optimizations.
547      if (C->isAllOnesValue() || C->isNullValue())
548        break;
549    }
550
551    // If we have an integer seteq/setne, turn it into a compare against zero
552    // by subtracting the rhs from the lhs, which is faster than setting a
553    // condition register, reading it back out, and masking the correct bit.
554    MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
555    if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
556      MVT::ValueType VT = Op.getValueType();
557      SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0),
558                                  Op.getOperand(1));
559      return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
560    }
561    break;
562  }
563  case ISD::VASTART: {
564    // vastart just stores the address of the VarArgsFrameIndex slot into the
565    // memory location argument.
566    // FIXME: Replace MVT::i32 with PointerTy
567    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
568    return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
569                       Op.getOperand(1), Op.getOperand(2));
570  }
571  case ISD::RET: {
572    SDOperand Copy;
573
574    switch(Op.getNumOperands()) {
575    default:
576      assert(0 && "Do not know how to return this many arguments!");
577      abort();
578    case 1:
579      return SDOperand(); // ret void is legal
580    case 2: {
581      MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
582      unsigned ArgReg = MVT::isInteger(ArgVT) ? PPC::R3 : PPC::F1;
583      Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),
584                              SDOperand());
585      break;
586    }
587    case 3:
588      Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2),
589                              SDOperand());
590      Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));
591      break;
592    }
593    return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
594  }
595  case ISD::SCALAR_TO_VECTOR: {
596    // Create a stack slot that is 16-byte aligned.
597    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
598    int FrameIdx = FrameInfo->CreateStackObject(16, 16);
599    SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
600
601    // Store the input value into Value#0 of the stack slot.
602    SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
603                                  Op.getOperand(0), FIdx,DAG.getSrcValue(NULL));
604    // LVE_X it out.
605    return DAG.getNode(PPCISD::LVE_X, Op.getValueType(), Store, FIdx,
606                       DAG.getSrcValue(NULL));
607  }
608  case ISD::VECTOR_SHUFFLE: {
609    SDOperand V1 = Op.getOperand(0);
610    SDOperand V2 = Op.getOperand(1);
611    SDOperand PermMask = Op.getOperand(2);
612
613    // Cases that are handled by instructions that take permute immediates
614    // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
615    // selected by the instruction selector.
616    if (PPC::isSplatShuffleMask(PermMask.Val) && V2.getOpcode() == ISD::UNDEF)
617      break;
618
619    // TODO: Handle more cases, and also handle cases that are cheaper to do as
620    // multiple such instructions than as a constant pool load/vperm pair.
621
622    // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
623    // vector that will get spilled to the constant pool.
624    if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
625
626    // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
627    // that it is in input element units, not in bytes.  Convert now.
628    MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
629    unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
630
631    std::vector<SDOperand> ResultMask;
632    for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
633      unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
634
635      for (unsigned j = 0; j != BytesPerElement; ++j)
636        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
637                                             MVT::i8));
638    }
639
640    SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
641    return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
642  }
643  }
644  return SDOperand();
645}
646
647std::vector<SDOperand>
648PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
649  //
650  // add beautiful description of PPC stack frame format, or at least some docs
651  //
652  MachineFunction &MF = DAG.getMachineFunction();
653  MachineFrameInfo *MFI = MF.getFrameInfo();
654  MachineBasicBlock& BB = MF.front();
655  SSARegMap *RegMap = MF.getSSARegMap();
656  std::vector<SDOperand> ArgValues;
657
658  unsigned ArgOffset = 24;
659  unsigned GPR_remaining = 8;
660  unsigned FPR_remaining = 13;
661  unsigned GPR_idx = 0, FPR_idx = 0;
662  static const unsigned GPR[] = {
663    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
664    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
665  };
666  static const unsigned FPR[] = {
667    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
668    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
669  };
670
671  // Add DAG nodes to load the arguments...  On entry to a function on PPC,
672  // the arguments start at offset 24, although they are likely to be passed
673  // in registers.
674  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
675    SDOperand newroot, argt;
676    unsigned ObjSize;
677    bool needsLoad = false;
678    bool ArgLive = !I->use_empty();
679    MVT::ValueType ObjectVT = getValueType(I->getType());
680
681    switch (ObjectVT) {
682    default: assert(0 && "Unhandled argument type!");
683    case MVT::i1:
684    case MVT::i8:
685    case MVT::i16:
686    case MVT::i32:
687      ObjSize = 4;
688      if (!ArgLive) break;
689      if (GPR_remaining > 0) {
690        unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
691        MF.addLiveIn(GPR[GPR_idx], VReg);
692        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
693        if (ObjectVT != MVT::i32) {
694          unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext
695                                                       : ISD::AssertZext;
696          argt = DAG.getNode(AssertOp, MVT::i32, argt,
697                             DAG.getValueType(ObjectVT));
698          argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt);
699        }
700      } else {
701        needsLoad = true;
702      }
703      break;
704    case MVT::i64:
705      ObjSize = 8;
706      if (!ArgLive) break;
707      if (GPR_remaining > 0) {
708        SDOperand argHi, argLo;
709        unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
710        MF.addLiveIn(GPR[GPR_idx], VReg);
711        argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
712        // If we have two or more remaining argument registers, then both halves
713        // of the i64 can be sourced from there.  Otherwise, the lower half will
714        // have to come off the stack.  This can happen when an i64 is preceded
715        // by 28 bytes of arguments.
716        if (GPR_remaining > 1) {
717          unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
718          MF.addLiveIn(GPR[GPR_idx+1], VReg);
719          argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32);
720        } else {
721          int FI = MFI->CreateFixedObject(4, ArgOffset+4);
722          SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
723          argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
724                              DAG.getSrcValue(NULL));
725        }
726        // Build the outgoing arg thingy
727        argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi);
728        newroot = argLo;
729      } else {
730        needsLoad = true;
731      }
732      break;
733    case MVT::f32:
734    case MVT::f64:
735      ObjSize = (ObjectVT == MVT::f64) ? 8 : 4;
736      if (!ArgLive) {
737        if (FPR_remaining > 0) {
738          --FPR_remaining;
739          ++FPR_idx;
740        }
741        break;
742      }
743      if (FPR_remaining > 0) {
744        unsigned VReg;
745        if (ObjectVT == MVT::f32)
746          VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
747        else
748          VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
749        MF.addLiveIn(FPR[FPR_idx], VReg);
750        argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT);
751        --FPR_remaining;
752        ++FPR_idx;
753      } else {
754        needsLoad = true;
755      }
756      break;
757    }
758
759    // We need to load the argument to a virtual register if we determined above
760    // that we ran out of physical registers of the appropriate type
761    if (needsLoad) {
762      unsigned SubregOffset = 0;
763      if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3;
764      if (ObjectVT == MVT::i16) SubregOffset = 2;
765      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
766      SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
767      FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN,
768                        DAG.getConstant(SubregOffset, MVT::i32));
769      argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
770                                   DAG.getSrcValue(NULL));
771    }
772
773    // Every 4 bytes of argument space consumes one of the GPRs available for
774    // argument passing.
775    if (GPR_remaining > 0) {
776      unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1;
777      GPR_remaining -= delta;
778      GPR_idx += delta;
779    }
780    ArgOffset += ObjSize;
781    if (newroot.Val)
782      DAG.setRoot(newroot.getValue(1));
783
784    ArgValues.push_back(argt);
785  }
786
787  // If the function takes variable number of arguments, make a frame index for
788  // the start of the first vararg value... for expansion of llvm.va_start.
789  if (F.isVarArg()) {
790    VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
791    SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
792    // If this function is vararg, store any remaining integer argument regs
793    // to their spots on the stack so that they may be loaded by deferencing the
794    // result of va_next.
795    std::vector<SDOperand> MemOps;
796    for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) {
797      unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
798      MF.addLiveIn(GPR[GPR_idx], VReg);
799      SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
800      SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
801                                    Val, FIN, DAG.getSrcValue(NULL));
802      MemOps.push_back(Store);
803      // Increment the address by four for the next argument to store
804      SDOperand PtrOff = DAG.getConstant(4, getPointerTy());
805      FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff);
806    }
807    if (!MemOps.empty()) {
808      MemOps.push_back(DAG.getRoot());
809      DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps));
810    }
811  }
812
813  // Finally, inform the code generator which regs we return values in.
814  switch (getValueType(F.getReturnType())) {
815    default: assert(0 && "Unknown type!");
816    case MVT::isVoid: break;
817    case MVT::i1:
818    case MVT::i8:
819    case MVT::i16:
820    case MVT::i32:
821      MF.addLiveOut(PPC::R3);
822      break;
823    case MVT::i64:
824      MF.addLiveOut(PPC::R3);
825      MF.addLiveOut(PPC::R4);
826      break;
827    case MVT::f32:
828    case MVT::f64:
829      MF.addLiveOut(PPC::F1);
830      break;
831  }
832
833  return ArgValues;
834}
835
836std::pair<SDOperand, SDOperand>
837PPCTargetLowering::LowerCallTo(SDOperand Chain,
838                               const Type *RetTy, bool isVarArg,
839                               unsigned CallingConv, bool isTailCall,
840                               SDOperand Callee, ArgListTy &Args,
841                               SelectionDAG &DAG) {
842  // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
843  // SelectExpr to use to put the arguments in the appropriate registers.
844  std::vector<SDOperand> args_to_use;
845
846  // Count how many bytes are to be pushed on the stack, including the linkage
847  // area, and parameter passing area.
848  unsigned NumBytes = 24;
849
850  if (Args.empty()) {
851    Chain = DAG.getCALLSEQ_START(Chain,
852                                 DAG.getConstant(NumBytes, getPointerTy()));
853  } else {
854    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
855      switch (getValueType(Args[i].second)) {
856      default: assert(0 && "Unknown value type!");
857      case MVT::i1:
858      case MVT::i8:
859      case MVT::i16:
860      case MVT::i32:
861      case MVT::f32:
862        NumBytes += 4;
863        break;
864      case MVT::i64:
865      case MVT::f64:
866        NumBytes += 8;
867        break;
868      }
869    }
870
871    // Just to be safe, we'll always reserve the full 24 bytes of linkage area
872    // plus 32 bytes of argument space in case any called code gets funky on us.
873    // (Required by ABI to support var arg)
874    if (NumBytes < 56) NumBytes = 56;
875
876    // Adjust the stack pointer for the new arguments...
877    // These operations are automatically eliminated by the prolog/epilog pass
878    Chain = DAG.getCALLSEQ_START(Chain,
879                                 DAG.getConstant(NumBytes, getPointerTy()));
880
881    // Set up a copy of the stack pointer for use loading and storing any
882    // arguments that may not fit in the registers available for argument
883    // passing.
884    SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
885
886    // Figure out which arguments are going to go in registers, and which in
887    // memory.  Also, if this is a vararg function, floating point operations
888    // must be stored to our stack, and loaded into integer regs as well, if
889    // any integer regs are available for argument passing.
890    unsigned ArgOffset = 24;
891    unsigned GPR_remaining = 8;
892    unsigned FPR_remaining = 13;
893
894    std::vector<SDOperand> MemOps;
895    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
896      // PtrOff will be used to store the current argument to the stack if a
897      // register cannot be found for it.
898      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
899      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
900      MVT::ValueType ArgVT = getValueType(Args[i].second);
901
902      switch (ArgVT) {
903      default: assert(0 && "Unexpected ValueType for argument!");
904      case MVT::i1:
905      case MVT::i8:
906      case MVT::i16:
907        // Promote the integer to 32 bits.  If the input type is signed use a
908        // sign extend, otherwise use a zero extend.
909        if (Args[i].second->isSigned())
910          Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
911        else
912          Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
913        // FALL THROUGH
914      case MVT::i32:
915        if (GPR_remaining > 0) {
916          args_to_use.push_back(Args[i].first);
917          --GPR_remaining;
918        } else {
919          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
920                                       Args[i].first, PtrOff,
921                                       DAG.getSrcValue(NULL)));
922        }
923        ArgOffset += 4;
924        break;
925      case MVT::i64:
926        // If we have one free GPR left, we can place the upper half of the i64
927        // in it, and store the other half to the stack.  If we have two or more
928        // free GPRs, then we can pass both halves of the i64 in registers.
929        if (GPR_remaining > 0) {
930          SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
931                                     Args[i].first, DAG.getConstant(1, MVT::i32));
932          SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
933                                     Args[i].first, DAG.getConstant(0, MVT::i32));
934          args_to_use.push_back(Hi);
935          --GPR_remaining;
936          if (GPR_remaining > 0) {
937            args_to_use.push_back(Lo);
938            --GPR_remaining;
939          } else {
940            SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
941            PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
942            MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
943                                         Lo, PtrOff, DAG.getSrcValue(NULL)));
944          }
945        } else {
946          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
947                                       Args[i].first, PtrOff,
948                                       DAG.getSrcValue(NULL)));
949        }
950        ArgOffset += 8;
951        break;
952      case MVT::f32:
953      case MVT::f64:
954        if (FPR_remaining > 0) {
955          args_to_use.push_back(Args[i].first);
956          --FPR_remaining;
957          if (isVarArg) {
958            SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
959                                          Args[i].first, PtrOff,
960                                          DAG.getSrcValue(NULL));
961            MemOps.push_back(Store);
962            // Float varargs are always shadowed in available integer registers
963            if (GPR_remaining > 0) {
964              SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
965                                           DAG.getSrcValue(NULL));
966              MemOps.push_back(Load.getValue(1));
967              args_to_use.push_back(Load);
968              --GPR_remaining;
969            }
970            if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
971              SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
972              PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
973              SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
974                                           DAG.getSrcValue(NULL));
975              MemOps.push_back(Load.getValue(1));
976              args_to_use.push_back(Load);
977              --GPR_remaining;
978            }
979          } else {
980            // If we have any FPRs remaining, we may also have GPRs remaining.
981            // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
982            // GPRs.
983            if (GPR_remaining > 0) {
984              args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
985              --GPR_remaining;
986            }
987            if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
988              args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
989              --GPR_remaining;
990            }
991          }
992        } else {
993          MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
994                                       Args[i].first, PtrOff,
995                                       DAG.getSrcValue(NULL)));
996        }
997        ArgOffset += (ArgVT == MVT::f32) ? 4 : 8;
998        break;
999      }
1000    }
1001    if (!MemOps.empty())
1002      Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);
1003  }
1004
1005  std::vector<MVT::ValueType> RetVals;
1006  MVT::ValueType RetTyVT = getValueType(RetTy);
1007  MVT::ValueType ActualRetTyVT = RetTyVT;
1008  if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16)
1009    ActualRetTyVT = MVT::i32;   // Promote result to i32.
1010
1011  if (RetTyVT == MVT::i64) {
1012    RetVals.push_back(MVT::i32);
1013    RetVals.push_back(MVT::i32);
1014  } else if (RetTyVT != MVT::isVoid) {
1015    RetVals.push_back(ActualRetTyVT);
1016  }
1017  RetVals.push_back(MVT::Other);
1018
1019  // If the callee is a GlobalAddress node (quite common, every direct call is)
1020  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1021  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1022    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
1023
1024  std::vector<SDOperand> Ops;
1025  Ops.push_back(Chain);
1026  Ops.push_back(Callee);
1027  Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
1028  SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops);
1029  Chain = TheCall.getValue(TheCall.Val->getNumValues()-1);
1030  Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1031                      DAG.getConstant(NumBytes, getPointerTy()));
1032  SDOperand RetVal = TheCall;
1033
1034  // If the result is a small value, add a note so that we keep track of the
1035  // information about whether it is sign or zero extended.
1036  if (RetTyVT != ActualRetTyVT) {
1037    RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext,
1038                         MVT::i32, RetVal, DAG.getValueType(RetTyVT));
1039    RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
1040  } else if (RetTyVT == MVT::i64) {
1041    RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1));
1042  }
1043
1044  return std::make_pair(RetVal, Chain);
1045}
1046
1047MachineBasicBlock *
1048PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
1049                                           MachineBasicBlock *BB) {
1050  assert((MI->getOpcode() == PPC::SELECT_CC_Int ||
1051          MI->getOpcode() == PPC::SELECT_CC_F4 ||
1052          MI->getOpcode() == PPC::SELECT_CC_F8) &&
1053         "Unexpected instr type to insert");
1054
1055  // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
1056  // control-flow pattern.  The incoming instruction knows the destination vreg
1057  // to set, the condition code register to branch on, the true/false values to
1058  // select between, and a branch opcode to use.
1059  const BasicBlock *LLVM_BB = BB->getBasicBlock();
1060  ilist<MachineBasicBlock>::iterator It = BB;
1061  ++It;
1062
1063  //  thisMBB:
1064  //  ...
1065  //   TrueVal = ...
1066  //   cmpTY ccX, r1, r2
1067  //   bCC copy1MBB
1068  //   fallthrough --> copy0MBB
1069  MachineBasicBlock *thisMBB = BB;
1070  MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
1071  MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
1072  BuildMI(BB, MI->getOperand(4).getImmedValue(), 2)
1073    .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
1074  MachineFunction *F = BB->getParent();
1075  F->getBasicBlockList().insert(It, copy0MBB);
1076  F->getBasicBlockList().insert(It, sinkMBB);
1077  // Update machine-CFG edges
1078  BB->addSuccessor(copy0MBB);
1079  BB->addSuccessor(sinkMBB);
1080
1081  //  copy0MBB:
1082  //   %FalseValue = ...
1083  //   # fallthrough to sinkMBB
1084  BB = copy0MBB;
1085
1086  // Update machine-CFG edges
1087  BB->addSuccessor(sinkMBB);
1088
1089  //  sinkMBB:
1090  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
1091  //  ...
1092  BB = sinkMBB;
1093  BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg())
1094    .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
1095    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
1096
1097  delete MI;   // The pseudo instruction is gone now.
1098  return BB;
1099}
1100
1101SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
1102                                               DAGCombinerInfo &DCI) const {
1103  TargetMachine &TM = getTargetMachine();
1104  SelectionDAG &DAG = DCI.DAG;
1105  switch (N->getOpcode()) {
1106  default: break;
1107  case ISD::SINT_TO_FP:
1108    if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
1109      // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
1110      // We allow the src/dst to be either f32/f64, but force the intermediate
1111      // type to be i64.
1112      if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT &&
1113          N->getOperand(0).getValueType() == MVT::i64) {
1114
1115        SDOperand Val = N->getOperand(0).getOperand(0);
1116        if (Val.getValueType() == MVT::f32) {
1117          Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
1118          DCI.AddToWorklist(Val.Val);
1119        }
1120
1121        Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
1122        DCI.AddToWorklist(Val.Val);
1123        Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
1124        DCI.AddToWorklist(Val.Val);
1125        if (N->getValueType(0) == MVT::f32) {
1126          Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
1127          DCI.AddToWorklist(Val.Val);
1128        }
1129        return Val;
1130      }
1131    }
1132    break;
1133  case ISD::STORE:
1134    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
1135    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
1136        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
1137        N->getOperand(1).getValueType() == MVT::i32) {
1138      SDOperand Val = N->getOperand(1).getOperand(0);
1139      if (Val.getValueType() == MVT::f32) {
1140        Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
1141        DCI.AddToWorklist(Val.Val);
1142      }
1143      Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
1144      DCI.AddToWorklist(Val.Val);
1145
1146      Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
1147                        N->getOperand(2), N->getOperand(3));
1148      DCI.AddToWorklist(Val.Val);
1149      return Val;
1150    }
1151    break;
1152  }
1153
1154  return SDOperand();
1155}
1156
1157/// getConstraintType - Given a constraint letter, return the type of
1158/// constraint it is for this target.
1159PPCTargetLowering::ConstraintType
1160PPCTargetLowering::getConstraintType(char ConstraintLetter) const {
1161  switch (ConstraintLetter) {
1162  default: break;
1163  case 'b':
1164  case 'r':
1165  case 'f':
1166  case 'v':
1167  case 'y':
1168    return C_RegisterClass;
1169  }
1170  return TargetLowering::getConstraintType(ConstraintLetter);
1171}
1172
1173
1174std::vector<unsigned> PPCTargetLowering::
1175getRegClassForInlineAsmConstraint(const std::string &Constraint,
1176                                  MVT::ValueType VT) const {
1177  if (Constraint.size() == 1) {
1178    switch (Constraint[0]) {      // GCC RS6000 Constraint Letters
1179    default: break;  // Unknown constriant letter
1180    case 'b':
1181      return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 ,
1182                                   PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
1183                                   PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
1184                                   PPC::R12, PPC::R13, PPC::R14, PPC::R15,
1185                                   PPC::R16, PPC::R17, PPC::R18, PPC::R19,
1186                                   PPC::R20, PPC::R21, PPC::R22, PPC::R23,
1187                                   PPC::R24, PPC::R25, PPC::R26, PPC::R27,
1188                                   PPC::R28, PPC::R29, PPC::R30, PPC::R31,
1189                                   0);
1190    case 'r':
1191      return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 ,
1192                                   PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
1193                                   PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
1194                                   PPC::R12, PPC::R13, PPC::R14, PPC::R15,
1195                                   PPC::R16, PPC::R17, PPC::R18, PPC::R19,
1196                                   PPC::R20, PPC::R21, PPC::R22, PPC::R23,
1197                                   PPC::R24, PPC::R25, PPC::R26, PPC::R27,
1198                                   PPC::R28, PPC::R29, PPC::R30, PPC::R31,
1199                                   0);
1200    case 'f':
1201      return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 ,
1202                                   PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 ,
1203                                   PPC::F8 , PPC::F9 , PPC::F10, PPC::F11,
1204                                   PPC::F12, PPC::F13, PPC::F14, PPC::F15,
1205                                   PPC::F16, PPC::F17, PPC::F18, PPC::F19,
1206                                   PPC::F20, PPC::F21, PPC::F22, PPC::F23,
1207                                   PPC::F24, PPC::F25, PPC::F26, PPC::F27,
1208                                   PPC::F28, PPC::F29, PPC::F30, PPC::F31,
1209                                   0);
1210    case 'v':
1211      return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 ,
1212                                   PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
1213                                   PPC::V8 , PPC::V9 , PPC::V10, PPC::V11,
1214                                   PPC::V12, PPC::V13, PPC::V14, PPC::V15,
1215                                   PPC::V16, PPC::V17, PPC::V18, PPC::V19,
1216                                   PPC::V20, PPC::V21, PPC::V22, PPC::V23,
1217                                   PPC::V24, PPC::V25, PPC::V26, PPC::V27,
1218                                   PPC::V28, PPC::V29, PPC::V30, PPC::V31,
1219                                   0);
1220    case 'y':
1221      return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
1222                                   PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7,
1223                                   0);
1224    }
1225  }
1226
1227  return std::vector<unsigned>();
1228}
1229
1230// isOperandValidForConstraint
1231bool PPCTargetLowering::
1232isOperandValidForConstraint(SDOperand Op, char Letter) {
1233  switch (Letter) {
1234  default: break;
1235  case 'I':
1236  case 'J':
1237  case 'K':
1238  case 'L':
1239  case 'M':
1240  case 'N':
1241  case 'O':
1242  case 'P': {
1243    if (!isa<ConstantSDNode>(Op)) return false;  // Must be an immediate.
1244    unsigned Value = cast<ConstantSDNode>(Op)->getValue();
1245    switch (Letter) {
1246    default: assert(0 && "Unknown constraint letter!");
1247    case 'I':  // "I" is a signed 16-bit constant.
1248      return (short)Value == (int)Value;
1249    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
1250    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
1251      return (short)Value == 0;
1252    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
1253      return (Value >> 16) == 0;
1254    case 'M':  // "M" is a constant that is greater than 31.
1255      return Value > 31;
1256    case 'N':  // "N" is a positive constant that is an exact power of two.
1257      return (int)Value > 0 && isPowerOf2_32(Value);
1258    case 'O':  // "O" is the constant zero.
1259      return Value == 0;
1260    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
1261      return (short)-Value == (int)-Value;
1262    }
1263    break;
1264  }
1265  }
1266
1267  // Handle standard constraint letters.
1268  return TargetLowering::isOperandValidForConstraint(Op, Letter);
1269}
1270
1271/// isLegalAddressImmediate - Return true if the integer value can be used
1272/// as the offset of the target addressing mode.
1273bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const {
1274  // PPC allows a sign-extended 16-bit immediate field.
1275  return (V > -(1 << 16) && V < (1 << 16)-1);
1276}
1277