PPCISelLowering.cpp revision f89437d049b4a3dff2b04e2635b45068db0a6b34
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PPCISelLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCISelLowering.h"
15#include "PPCTargetMachine.h"
16#include "PPCPerfectShuffle.h"
17#include "llvm/ADT/VectorExtras.h"
18#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19#include "llvm/CodeGen/MachineFrameInfo.h"
20#include "llvm/CodeGen/MachineFunction.h"
21#include "llvm/CodeGen/MachineInstrBuilder.h"
22#include "llvm/CodeGen/SelectionDAG.h"
23#include "llvm/CodeGen/SSARegMap.h"
24#include "llvm/Constants.h"
25#include "llvm/Function.h"
26#include "llvm/Intrinsics.h"
27#include "llvm/Support/MathExtras.h"
28#include "llvm/Target/TargetOptions.h"
29using namespace llvm;
30
31PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
32  : TargetLowering(TM) {
33
34  // Fold away setcc operations if possible.
35  setSetCCIsExpensive();
36  setPow2DivIsCheap();
37
38  // Use _setjmp/_longjmp instead of setjmp/longjmp.
39  setUseUnderscoreSetJmpLongJmp(true);
40
41  // Set up the register classes.
42  addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
43  addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
44  addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
45
46  setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
47  setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
48
49  // PowerPC has no intrinsics for these particular operations
50  setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
51  setOperationAction(ISD::MEMSET, MVT::Other, Expand);
52  setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
53
54  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
55  setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);
56  setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);
57
58  // PowerPC has no SREM/UREM instructions
59  setOperationAction(ISD::SREM, MVT::i32, Expand);
60  setOperationAction(ISD::UREM, MVT::i32, Expand);
61  setOperationAction(ISD::SREM, MVT::i64, Expand);
62  setOperationAction(ISD::UREM, MVT::i64, Expand);
63
64  // We don't support sin/cos/sqrt/fmod
65  setOperationAction(ISD::FSIN , MVT::f64, Expand);
66  setOperationAction(ISD::FCOS , MVT::f64, Expand);
67  setOperationAction(ISD::FREM , MVT::f64, Expand);
68  setOperationAction(ISD::FSIN , MVT::f32, Expand);
69  setOperationAction(ISD::FCOS , MVT::f32, Expand);
70  setOperationAction(ISD::FREM , MVT::f32, Expand);
71
72  // If we're enabling GP optimizations, use hardware square root
73  if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
74    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
75    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
76  }
77
78  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
79  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
80
81  // PowerPC does not have BSWAP, CTPOP or CTTZ
82  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
83  setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
84  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
85  setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
86  setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
87  setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
88
89  // PowerPC does not have ROTR
90  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
91
92  // PowerPC does not have Select
93  setOperationAction(ISD::SELECT, MVT::i32, Expand);
94  setOperationAction(ISD::SELECT, MVT::i64, Expand);
95  setOperationAction(ISD::SELECT, MVT::f32, Expand);
96  setOperationAction(ISD::SELECT, MVT::f64, Expand);
97
98  // PowerPC wants to turn select_cc of FP into fsel when possible.
99  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
100  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
101
102  // PowerPC wants to optimize integer setcc a bit
103  setOperationAction(ISD::SETCC, MVT::i32, Custom);
104
105  // PowerPC does not have BRCOND which requires SetCC
106  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
107
108  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
109  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
110
111  // PowerPC does not have [U|S]INT_TO_FP
112  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
113  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
114
115  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
116  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
117  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
118  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
119
120  // PowerPC does not have truncstore for i1.
121  setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);
122
123  // We cannot sextinreg(i1).  Expand to shifts.
124  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
125
126
127  // Support label based line numbers.
128  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
129  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
130  // FIXME - use subtarget debug flags
131  if (!TM.getSubtarget<PPCSubtarget>().isDarwin())
132    setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
133
134  // We want to legalize GlobalAddress and ConstantPool nodes into the
135  // appropriate instructions to materialize the address.
136  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
137  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
138  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
139  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
140  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
141  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
142
143  // RET must be custom lowered, to meet ABI requirements
144  setOperationAction(ISD::RET               , MVT::Other, Custom);
145
146  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
147  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
148
149  // Use the default implementation.
150  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
151  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
152  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
153  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
154  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
155  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
156
157  // We want to custom lower some of our intrinsics.
158  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
159
160  if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
161    // They also have instructions for converting between i64 and fp.
162    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
163    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
164
165    // FIXME: disable this lowered code.  This generates 64-bit register values,
166    // and we don't model the fact that the top part is clobbered by calls.  We
167    // need to flag these together so that the value isn't live across a call.
168    //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
169
170    // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
171    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
172  } else {
173    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
174    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
175  }
176
177  if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
178    // 64 bit PowerPC implementations can support i64 types directly
179    addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
180    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
181    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
182  } else {
183    // 32 bit PowerPC wants to expand i64 shifts itself.
184    setOperationAction(ISD::SHL, MVT::i64, Custom);
185    setOperationAction(ISD::SRL, MVT::i64, Custom);
186    setOperationAction(ISD::SRA, MVT::i64, Custom);
187  }
188
189  if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
190    // First set operation action for all vector types to expand. Then we
191    // will selectively turn on ones that can be effectively codegen'd.
192    for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
193         VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
194      // add/sub are legal for all supported vector VT's.
195      setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
196      setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
197
198      // We promote all shuffles to v16i8.
199      setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote);
200      AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8);
201
202      // We promote all non-typed operations to v4i32.
203      setOperationAction(ISD::AND   , (MVT::ValueType)VT, Promote);
204      AddPromotedToType (ISD::AND   , (MVT::ValueType)VT, MVT::v4i32);
205      setOperationAction(ISD::OR    , (MVT::ValueType)VT, Promote);
206      AddPromotedToType (ISD::OR    , (MVT::ValueType)VT, MVT::v4i32);
207      setOperationAction(ISD::XOR   , (MVT::ValueType)VT, Promote);
208      AddPromotedToType (ISD::XOR   , (MVT::ValueType)VT, MVT::v4i32);
209      setOperationAction(ISD::LOAD  , (MVT::ValueType)VT, Promote);
210      AddPromotedToType (ISD::LOAD  , (MVT::ValueType)VT, MVT::v4i32);
211      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
212      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32);
213      setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote);
214      AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32);
215
216      // No other operations are legal.
217      setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
218      setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
219      setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
220      setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
221      setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
222      setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
223      setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
224      setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
225      setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
226
227      setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand);
228    }
229
230    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
231    // with merges, splats, etc.
232    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
233
234    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
235    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
236    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
237    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
238    setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
239    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
240
241    addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
242    addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
243    addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
244    addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
245
246    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
247    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
248    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
249    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
250
251    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
252    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
253
254    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
255    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
256    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
257    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
258  }
259
260  setSetCCResultType(MVT::i32);
261  setShiftAmountType(MVT::i32);
262  setSetCCResultContents(ZeroOrOneSetCCResult);
263  setStackPointerRegisterToSaveRestore(PPC::R1);
264
265  // We have target-specific dag combine patterns for the following nodes:
266  setTargetDAGCombine(ISD::SINT_TO_FP);
267  setTargetDAGCombine(ISD::STORE);
268  setTargetDAGCombine(ISD::BR_CC);
269
270  computeRegisterProperties();
271}
272
273const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
274  switch (Opcode) {
275  default: return 0;
276  case PPCISD::FSEL:          return "PPCISD::FSEL";
277  case PPCISD::FCFID:         return "PPCISD::FCFID";
278  case PPCISD::FCTIDZ:        return "PPCISD::FCTIDZ";
279  case PPCISD::FCTIWZ:        return "PPCISD::FCTIWZ";
280  case PPCISD::STFIWX:        return "PPCISD::STFIWX";
281  case PPCISD::VMADDFP:       return "PPCISD::VMADDFP";
282  case PPCISD::VNMSUBFP:      return "PPCISD::VNMSUBFP";
283  case PPCISD::VPERM:         return "PPCISD::VPERM";
284  case PPCISD::Hi:            return "PPCISD::Hi";
285  case PPCISD::Lo:            return "PPCISD::Lo";
286  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
287  case PPCISD::SRL:           return "PPCISD::SRL";
288  case PPCISD::SRA:           return "PPCISD::SRA";
289  case PPCISD::SHL:           return "PPCISD::SHL";
290  case PPCISD::EXTSW_32:      return "PPCISD::EXTSW_32";
291  case PPCISD::STD_32:        return "PPCISD::STD_32";
292  case PPCISD::CALL:          return "PPCISD::CALL";
293  case PPCISD::MTCTR:         return "PPCISD::MTCTR";
294  case PPCISD::BCTRL:         return "PPCISD::BCTRL";
295  case PPCISD::RET_FLAG:      return "PPCISD::RET_FLAG";
296  case PPCISD::MFCR:          return "PPCISD::MFCR";
297  case PPCISD::VCMP:          return "PPCISD::VCMP";
298  case PPCISD::VCMPo:         return "PPCISD::VCMPo";
299  case PPCISD::COND_BRANCH:   return "PPCISD::COND_BRANCH";
300  }
301}
302
303//===----------------------------------------------------------------------===//
304// Node matching predicates, for use by the tblgen matching code.
305//===----------------------------------------------------------------------===//
306
307/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
308static bool isFloatingPointZero(SDOperand Op) {
309  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
310    return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
311  else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {
312    // Maybe this has already been legalized into the constant pool?
313    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
314      if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))
315        return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
316  }
317  return false;
318}
319
320/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
321/// true if Op is undef or if it matches the specified value.
322static bool isConstantOrUndef(SDOperand Op, unsigned Val) {
323  return Op.getOpcode() == ISD::UNDEF ||
324         cast<ConstantSDNode>(Op)->getValue() == Val;
325}
326
327/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
328/// VPKUHUM instruction.
329bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
330  if (!isUnary) {
331    for (unsigned i = 0; i != 16; ++i)
332      if (!isConstantOrUndef(N->getOperand(i),  i*2+1))
333        return false;
334  } else {
335    for (unsigned i = 0; i != 8; ++i)
336      if (!isConstantOrUndef(N->getOperand(i),  i*2+1) ||
337          !isConstantOrUndef(N->getOperand(i+8),  i*2+1))
338        return false;
339  }
340  return true;
341}
342
343/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
344/// VPKUWUM instruction.
345bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
346  if (!isUnary) {
347    for (unsigned i = 0; i != 16; i += 2)
348      if (!isConstantOrUndef(N->getOperand(i  ),  i*2+2) ||
349          !isConstantOrUndef(N->getOperand(i+1),  i*2+3))
350        return false;
351  } else {
352    for (unsigned i = 0; i != 8; i += 2)
353      if (!isConstantOrUndef(N->getOperand(i  ),  i*2+2) ||
354          !isConstantOrUndef(N->getOperand(i+1),  i*2+3) ||
355          !isConstantOrUndef(N->getOperand(i+8),  i*2+2) ||
356          !isConstantOrUndef(N->getOperand(i+9),  i*2+3))
357        return false;
358  }
359  return true;
360}
361
362/// isVMerge - Common function, used to match vmrg* shuffles.
363///
364static bool isVMerge(SDNode *N, unsigned UnitSize,
365                     unsigned LHSStart, unsigned RHSStart) {
366  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
367         N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
368  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
369         "Unsupported merge size!");
370
371  for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
372    for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
373      if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),
374                             LHSStart+j+i*UnitSize) ||
375          !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),
376                             RHSStart+j+i*UnitSize))
377        return false;
378    }
379      return true;
380}
381
382/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
383/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
384bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
385  if (!isUnary)
386    return isVMerge(N, UnitSize, 8, 24);
387  return isVMerge(N, UnitSize, 8, 8);
388}
389
390/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
391/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
392bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
393  if (!isUnary)
394    return isVMerge(N, UnitSize, 0, 16);
395  return isVMerge(N, UnitSize, 0, 0);
396}
397
398
399/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
400/// amount, otherwise return -1.
401int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
402  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
403         N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
404  // Find the first non-undef value in the shuffle mask.
405  unsigned i;
406  for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
407    /*search*/;
408
409  if (i == 16) return -1;  // all undef.
410
411  // Otherwise, check to see if the rest of the elements are consequtively
412  // numbered from this value.
413  unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue();
414  if (ShiftAmt < i) return -1;
415  ShiftAmt -= i;
416
417  if (!isUnary) {
418    // Check the rest of the elements to see if they are consequtive.
419    for (++i; i != 16; ++i)
420      if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))
421        return -1;
422  } else {
423    // Check the rest of the elements to see if they are consequtive.
424    for (++i; i != 16; ++i)
425      if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))
426        return -1;
427  }
428
429  return ShiftAmt;
430}
431
432/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
433/// specifies a splat of a single element that is suitable for input to
434/// VSPLTB/VSPLTH/VSPLTW.
435bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {
436  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
437         N->getNumOperands() == 16 &&
438         (EltSize == 1 || EltSize == 2 || EltSize == 4));
439
440  // This is a splat operation if each element of the permute is the same, and
441  // if the value doesn't reference the second vector.
442  unsigned ElementBase = 0;
443  SDOperand Elt = N->getOperand(0);
444  if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
445    ElementBase = EltV->getValue();
446  else
447    return false;   // FIXME: Handle UNDEF elements too!
448
449  if (cast<ConstantSDNode>(Elt)->getValue() >= 16)
450    return false;
451
452  // Check that they are consequtive.
453  for (unsigned i = 1; i != EltSize; ++i) {
454    if (!isa<ConstantSDNode>(N->getOperand(i)) ||
455        cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase)
456      return false;
457  }
458
459  assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
460  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
461    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
462    assert(isa<ConstantSDNode>(N->getOperand(i)) &&
463           "Invalid VECTOR_SHUFFLE mask!");
464    for (unsigned j = 0; j != EltSize; ++j)
465      if (N->getOperand(i+j) != N->getOperand(j))
466        return false;
467  }
468
469  return true;
470}
471
472/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
473/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
474unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
475  assert(isSplatShuffleMask(N, EltSize));
476  return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize;
477}
478
479/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
480/// by using a vspltis[bhw] instruction of the specified element size, return
481/// the constant being splatted.  The ByteSize field indicates the number of
482/// bytes of each element [124] -> [bhw].
483SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
484  SDOperand OpVal(0, 0);
485
486  // If ByteSize of the splat is bigger than the element size of the
487  // build_vector, then we have a case where we are checking for a splat where
488  // multiple elements of the buildvector are folded together into a single
489  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
490  unsigned EltSize = 16/N->getNumOperands();
491  if (EltSize < ByteSize) {
492    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
493    SDOperand UniquedVals[4];
494    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
495
496    // See if all of the elements in the buildvector agree across.
497    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
498      if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
499      // If the element isn't a constant, bail fully out.
500      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand();
501
502
503      if (UniquedVals[i&(Multiple-1)].Val == 0)
504        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
505      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
506        return SDOperand();  // no match.
507    }
508
509    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
510    // either constant or undef values that are identical for each chunk.  See
511    // if these chunks can form into a larger vspltis*.
512
513    // Check to see if all of the leading entries are either 0 or -1.  If
514    // neither, then this won't fit into the immediate field.
515    bool LeadingZero = true;
516    bool LeadingOnes = true;
517    for (unsigned i = 0; i != Multiple-1; ++i) {
518      if (UniquedVals[i].Val == 0) continue;  // Must have been undefs.
519
520      LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
521      LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
522    }
523    // Finally, check the least significant entry.
524    if (LeadingZero) {
525      if (UniquedVals[Multiple-1].Val == 0)
526        return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
527      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue();
528      if (Val < 16)
529        return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
530    }
531    if (LeadingOnes) {
532      if (UniquedVals[Multiple-1].Val == 0)
533        return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
534      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended();
535      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
536        return DAG.getTargetConstant(Val, MVT::i32);
537    }
538
539    return SDOperand();
540  }
541
542  // Check to see if this buildvec has a single non-undef value in its elements.
543  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
544    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
545    if (OpVal.Val == 0)
546      OpVal = N->getOperand(i);
547    else if (OpVal != N->getOperand(i))
548      return SDOperand();
549  }
550
551  if (OpVal.Val == 0) return SDOperand();  // All UNDEF: use implicit def.
552
553  unsigned ValSizeInBytes = 0;
554  uint64_t Value = 0;
555  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
556    Value = CN->getValue();
557    ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;
558  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
559    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
560    Value = FloatToBits(CN->getValue());
561    ValSizeInBytes = 4;
562  }
563
564  // If the splat value is larger than the element value, then we can never do
565  // this splat.  The only case that we could fit the replicated bits into our
566  // immediate field for would be zero, and we prefer to use vxor for it.
567  if (ValSizeInBytes < ByteSize) return SDOperand();
568
569  // If the element value is larger than the splat value, cut it in half and
570  // check to see if the two halves are equal.  Continue doing this until we
571  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
572  while (ValSizeInBytes > ByteSize) {
573    ValSizeInBytes >>= 1;
574
575    // If the top half equals the bottom half, we're still ok.
576    if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
577         (Value                        & ((1 << (8*ValSizeInBytes))-1)))
578      return SDOperand();
579  }
580
581  // Properly sign extend the value.
582  int ShAmt = (4-ByteSize)*8;
583  int MaskVal = ((int)Value << ShAmt) >> ShAmt;
584
585  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
586  if (MaskVal == 0) return SDOperand();
587
588  // Finally, if this value fits in a 5 bit sext field, return it
589  if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
590    return DAG.getTargetConstant(MaskVal, MVT::i32);
591  return SDOperand();
592}
593
594//===----------------------------------------------------------------------===//
595//  LowerOperation implementation
596//===----------------------------------------------------------------------===//
597
598static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
599  MVT::ValueType PtrVT = Op.getValueType();
600  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
601  Constant *C = CP->get();
602  SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
603  SDOperand Zero = DAG.getConstant(0, PtrVT);
604
605  const TargetMachine &TM = DAG.getTarget();
606
607  SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero);
608  SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero);
609
610  // If this is a non-darwin platform, we don't support non-static relo models
611  // yet.
612  if (TM.getRelocationModel() == Reloc::Static ||
613      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
614    // Generate non-pic code that has direct accesses to the constant pool.
615    // The address of the global is just (hi(&g)+lo(&g)).
616    return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
617  }
618
619  if (TM.getRelocationModel() == Reloc::PIC) {
620    // With PIC, the first instruction is actually "GR+hi(&G)".
621    Hi = DAG.getNode(ISD::ADD, PtrVT,
622                     DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);
623  }
624
625  Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
626  return Lo;
627}
628
629static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
630  MVT::ValueType PtrVT = Op.getValueType();
631  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
632  SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
633  SDOperand Zero = DAG.getConstant(0, PtrVT);
634
635  const TargetMachine &TM = DAG.getTarget();
636
637  SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero);
638  SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero);
639
640  // If this is a non-darwin platform, we don't support non-static relo models
641  // yet.
642  if (TM.getRelocationModel() == Reloc::Static ||
643      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
644    // Generate non-pic code that has direct accesses to the constant pool.
645    // The address of the global is just (hi(&g)+lo(&g)).
646    return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
647  }
648
649  if (TM.getRelocationModel() == Reloc::PIC) {
650    // With PIC, the first instruction is actually "GR+hi(&G)".
651    Hi = DAG.getNode(ISD::ADD, PtrVT,
652                     DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
653  }
654
655  Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
656  return Lo;
657}
658
659static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
660  MVT::ValueType PtrVT = Op.getValueType();
661  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
662  GlobalValue *GV = GSDN->getGlobal();
663  SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
664  SDOperand Zero = DAG.getConstant(0, PtrVT);
665
666  const TargetMachine &TM = DAG.getTarget();
667
668  SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero);
669  SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero);
670
671  // If this is a non-darwin platform, we don't support non-static relo models
672  // yet.
673  if (TM.getRelocationModel() == Reloc::Static ||
674      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
675    // Generate non-pic code that has direct accesses to globals.
676    // The address of the global is just (hi(&g)+lo(&g)).
677    return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
678  }
679
680  if (TM.getRelocationModel() == Reloc::PIC) {
681    // With PIC, the first instruction is actually "GR+hi(&G)".
682    Hi = DAG.getNode(ISD::ADD, PtrVT,
683                     DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi);
684  }
685
686  Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo);
687
688  if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() &&
689      (!GV->isExternal() || GV->hasNotBeenReadFromBytecode()))
690    return Lo;
691
692  // If the global is weak or external, we have to go through the lazy
693  // resolution stub.
694  return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, DAG.getSrcValue(0));
695}
696
697static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
698  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
699
700  // If we're comparing for equality to zero, expose the fact that this is
701  // implented as a ctlz/srl pair on ppc, so that the dag combiner can
702  // fold the new nodes.
703  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
704    if (C->isNullValue() && CC == ISD::SETEQ) {
705      MVT::ValueType VT = Op.getOperand(0).getValueType();
706      SDOperand Zext = Op.getOperand(0);
707      if (VT < MVT::i32) {
708        VT = MVT::i32;
709        Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
710      }
711      unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
712      SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
713      SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
714                                  DAG.getConstant(Log2b, MVT::i32));
715      return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc);
716    }
717    // Leave comparisons against 0 and -1 alone for now, since they're usually
718    // optimized.  FIXME: revisit this when we can custom lower all setcc
719    // optimizations.
720    if (C->isAllOnesValue() || C->isNullValue())
721      return SDOperand();
722  }
723
724  // If we have an integer seteq/setne, turn it into a compare against zero
725  // by subtracting the rhs from the lhs, which is faster than setting a
726  // condition register, reading it back out, and masking the correct bit.
727  MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
728  if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
729    MVT::ValueType VT = Op.getValueType();
730    SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0),
731                                Op.getOperand(1));
732    return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
733  }
734  return SDOperand();
735}
736
737static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,
738                              unsigned VarArgsFrameIndex) {
739  // vastart just stores the address of the VarArgsFrameIndex slot into the
740  // memory location argument.
741  SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
742  return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
743                     Op.getOperand(1), Op.getOperand(2));
744}
745
746static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG,
747                                       int &VarArgsFrameIndex) {
748  // TODO: add description of PPC stack frame format, or at least some docs.
749  //
750  MachineFunction &MF = DAG.getMachineFunction();
751  MachineFrameInfo *MFI = MF.getFrameInfo();
752  SSARegMap *RegMap = MF.getSSARegMap();
753  std::vector<SDOperand> ArgValues;
754  SDOperand Root = Op.getOperand(0);
755
756  unsigned ArgOffset = 24;
757  const unsigned Num_GPR_Regs = 8;
758  const unsigned Num_FPR_Regs = 13;
759  const unsigned Num_VR_Regs  = 12;
760  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
761
762  static const unsigned GPR_32[] = {           // 32-bit registers.
763    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
764    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
765  };
766  static const unsigned GPR_64[] = {           // 64-bit registers.
767    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
768    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
769  };
770  static const unsigned FPR[] = {
771    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
772    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
773  };
774  static const unsigned VR[] = {
775    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
776    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
777  };
778
779  MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
780  bool isPPC64 = PtrVT == MVT::i64;
781  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
782
783  // Add DAG nodes to load the arguments or copy them out of registers.  On
784  // entry to a function on PPC, the arguments start at offset 24, although the
785  // first ones are often in registers.
786  for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
787    SDOperand ArgVal;
788    bool needsLoad = false;
789    MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
790    unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
791
792    unsigned CurArgOffset = ArgOffset;
793    switch (ObjectVT) {
794    default: assert(0 && "Unhandled argument type!");
795    case MVT::i32:
796      // All int arguments reserve stack space.
797      ArgOffset += isPPC64 ? 8 : 4;
798
799      if (GPR_idx != Num_GPR_Regs) {
800        unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
801        MF.addLiveIn(GPR[GPR_idx], VReg);
802        ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
803        ++GPR_idx;
804      } else {
805        needsLoad = true;
806      }
807      break;
808    case MVT::i64:  // PPC64
809      // All int arguments reserve stack space.
810      ArgOffset += 8;
811
812      if (GPR_idx != Num_GPR_Regs) {
813        unsigned VReg = RegMap->createVirtualRegister(&PPC::G8RCRegClass);
814        MF.addLiveIn(GPR[GPR_idx], VReg);
815        ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
816        ++GPR_idx;
817      } else {
818        needsLoad = true;
819      }
820      break;
821    case MVT::f32:
822    case MVT::f64:
823      // All FP arguments reserve stack space.
824      ArgOffset += ObjSize;
825
826      // Every 4 bytes of argument space consumes one of the GPRs available for
827      // argument passing.
828      if (GPR_idx != Num_GPR_Regs) {
829        ++GPR_idx;
830        if (ObjSize == 8 && GPR_idx != Num_GPR_Regs)
831          ++GPR_idx;
832      }
833      if (FPR_idx != Num_FPR_Regs) {
834        unsigned VReg;
835        if (ObjectVT == MVT::f32)
836          VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
837        else
838          VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
839        MF.addLiveIn(FPR[FPR_idx], VReg);
840        ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
841        ++FPR_idx;
842      } else {
843        needsLoad = true;
844      }
845      break;
846    case MVT::v4f32:
847    case MVT::v4i32:
848    case MVT::v8i16:
849    case MVT::v16i8:
850      // Note that vector arguments in registers don't reserve stack space.
851      if (VR_idx != Num_VR_Regs) {
852        unsigned VReg = RegMap->createVirtualRegister(&PPC::VRRCRegClass);
853        MF.addLiveIn(VR[VR_idx], VReg);
854        ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
855        ++VR_idx;
856      } else {
857        // This should be simple, but requires getting 16-byte aligned stack
858        // values.
859        assert(0 && "Loading VR argument not implemented yet!");
860        needsLoad = true;
861      }
862      break;
863    }
864
865    // We need to load the argument to a virtual register if we determined above
866    // that we ran out of physical registers of the appropriate type
867    if (needsLoad) {
868      // If the argument is actually used, emit a load from the right stack
869      // slot.
870      if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
871        int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);
872        SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
873        ArgVal = DAG.getLoad(ObjectVT, Root, FIN,
874                             DAG.getSrcValue(NULL));
875      } else {
876        // Don't emit a dead load.
877        ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
878      }
879    }
880
881    ArgValues.push_back(ArgVal);
882  }
883
884  // If the function takes variable number of arguments, make a frame index for
885  // the start of the first vararg value... for expansion of llvm.va_start.
886  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
887  if (isVarArg) {
888    VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
889                                               ArgOffset);
890    SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
891    // If this function is vararg, store any remaining integer argument regs
892    // to their spots on the stack so that they may be loaded by deferencing the
893    // result of va_next.
894    std::vector<SDOperand> MemOps;
895    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
896      unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
897      MF.addLiveIn(GPR[GPR_idx], VReg);
898      SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
899      SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
900                                    Val, FIN, DAG.getSrcValue(NULL));
901      MemOps.push_back(Store);
902      // Increment the address by four for the next argument to store
903      SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
904      FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
905    }
906    if (!MemOps.empty())
907      Root = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);
908  }
909
910  ArgValues.push_back(Root);
911
912  // Return the new list of results.
913  std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
914                                    Op.Val->value_end());
915  return DAG.getNode(ISD::MERGE_VALUES, RetVT, ArgValues);
916}
917
918/// isCallCompatibleAddress - Return the immediate to use if the specified
919/// 32-bit value is representable in the immediate field of a BxA instruction.
920static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) {
921  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
922  if (!C) return 0;
923
924  int Addr = C->getValue();
925  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
926      (Addr << 6 >> 6) != Addr)
927    return 0;  // Top 6 bits have to be sext of immediate.
928
929  return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
930}
931
932
933static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) {
934  SDOperand Chain = Op.getOperand(0);
935  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
936  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
937  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
938  SDOperand Callee    = Op.getOperand(4);
939  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
940
941  MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
942  bool isPPC64 = PtrVT == MVT::i64;
943  unsigned PtrByteSize = isPPC64 ? 8 : 4;
944
945
946  // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
947  // SelectExpr to use to put the arguments in the appropriate registers.
948  std::vector<SDOperand> args_to_use;
949
950  // Count how many bytes are to be pushed on the stack, including the linkage
951  // area, and parameter passing area.  We start with 24/48 bytes, which is
952  // prereserved space for [SP][CR][LR][3 x unused].
953  unsigned NumBytes = 6*PtrByteSize;
954
955  // Add up all the space actually used.
956  for (unsigned i = 0; i != NumOps; ++i)
957    NumBytes += MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8;
958
959  // The prolog code of the callee may store up to 8 GPR argument registers to
960  // the stack, allowing va_start to index over them in memory if its varargs.
961  // Because we cannot tell if this is needed on the caller side, we have to
962  // conservatively assume that it is needed.  As such, make sure we have at
963  // least enough stack space for the caller to store the 8 GPRs.
964  if (NumBytes < 6*PtrByteSize+8*PtrByteSize)
965    NumBytes = 6*PtrByteSize+8*PtrByteSize;
966
967  // Adjust the stack pointer for the new arguments...
968  // These operations are automatically eliminated by the prolog/epilog pass
969  Chain = DAG.getCALLSEQ_START(Chain,
970                               DAG.getConstant(NumBytes, PtrVT));
971
972  // Set up a copy of the stack pointer for use loading and storing any
973  // arguments that may not fit in the registers available for argument
974  // passing.
975  SDOperand StackPtr;
976  if (isPPC64)
977    StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
978  else
979    StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
980
981  // Figure out which arguments are going to go in registers, and which in
982  // memory.  Also, if this is a vararg function, floating point operations
983  // must be stored to our stack, and loaded into integer regs as well, if
984  // any integer regs are available for argument passing.
985  unsigned ArgOffset = 6*PtrByteSize;
986  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
987  static const unsigned GPR_32[] = {           // 32-bit registers.
988    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
989    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
990  };
991  static const unsigned GPR_64[] = {           // 64-bit registers.
992    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
993    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
994  };
995  static const unsigned FPR[] = {
996    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
997    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
998  };
999  static const unsigned VR[] = {
1000    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
1001    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
1002  };
1003  const unsigned NumGPRs = sizeof(GPR_32)/sizeof(GPR_32[0]);
1004  const unsigned NumFPRs = sizeof(FPR)/sizeof(FPR[0]);
1005  const unsigned NumVRs  = sizeof( VR)/sizeof( VR[0]);
1006
1007  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
1008
1009  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1010  std::vector<SDOperand> MemOpChains;
1011  for (unsigned i = 0; i != NumOps; ++i) {
1012    SDOperand Arg = Op.getOperand(5+2*i);
1013
1014    // PtrOff will be used to store the current argument to the stack if a
1015    // register cannot be found for it.
1016    SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1017    PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1018
1019    // On PPC64, promote integers to 64-bit values.
1020    if (isPPC64 && Arg.getValueType() == MVT::i32) {
1021      unsigned ExtOp = ISD::ZERO_EXTEND;
1022      if (cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue())
1023        ExtOp = ISD::SIGN_EXTEND;
1024      Arg = DAG.getNode(ExtOp, MVT::i64, Arg);
1025    }
1026
1027    switch (Arg.getValueType()) {
1028    default: assert(0 && "Unexpected ValueType for argument!");
1029    case MVT::i32:
1030    case MVT::i64:
1031      if (GPR_idx != NumGPRs) {
1032        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
1033      } else {
1034        MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1035                                          Arg, PtrOff, DAG.getSrcValue(NULL)));
1036      }
1037      ArgOffset += PtrByteSize;
1038      break;
1039    case MVT::f32:
1040    case MVT::f64:
1041      if (FPR_idx != NumFPRs) {
1042        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
1043
1044        if (isVarArg) {
1045          SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
1046                                        Arg, PtrOff,
1047                                        DAG.getSrcValue(NULL));
1048          MemOpChains.push_back(Store);
1049
1050          // Float varargs are always shadowed in available integer registers
1051          if (GPR_idx != NumGPRs) {
1052            SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff,
1053                                         DAG.getSrcValue(NULL));
1054            MemOpChains.push_back(Load.getValue(1));
1055            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
1056          }
1057          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64) {
1058            SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType());
1059            PtrOff = DAG.getNode(ISD::ADD, PtrVT, PtrOff, ConstFour);
1060            SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff,
1061                                         DAG.getSrcValue(NULL));
1062            MemOpChains.push_back(Load.getValue(1));
1063            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
1064          }
1065        } else {
1066          // If we have any FPRs remaining, we may also have GPRs remaining.
1067          // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
1068          // GPRs.
1069          if (GPR_idx != NumGPRs)
1070            ++GPR_idx;
1071          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64)
1072            ++GPR_idx;
1073        }
1074      } else {
1075        MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1076                                          Arg, PtrOff, DAG.getSrcValue(NULL)));
1077      }
1078      if (isPPC64)
1079        ArgOffset += 8;
1080      else
1081        ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
1082      break;
1083    case MVT::v4f32:
1084    case MVT::v4i32:
1085    case MVT::v8i16:
1086    case MVT::v16i8:
1087      assert(!isVarArg && "Don't support passing vectors to varargs yet!");
1088      assert(VR_idx != NumVRs &&
1089             "Don't support passing more than 12 vector args yet!");
1090      RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
1091      break;
1092    }
1093  }
1094  if (!MemOpChains.empty())
1095    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOpChains);
1096
1097  // Build a sequence of copy-to-reg nodes chained together with token chain
1098  // and flag operands which copy the outgoing args into the appropriate regs.
1099  SDOperand InFlag;
1100  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1101    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1102                             InFlag);
1103    InFlag = Chain.getValue(1);
1104  }
1105
1106  std::vector<MVT::ValueType> NodeTys;
1107  NodeTys.push_back(MVT::Other);   // Returns a chain
1108  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1109
1110  std::vector<SDOperand> Ops;
1111  unsigned CallOpc = PPCISD::CALL;
1112
1113  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1114  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1115  // node so that legalize doesn't hack it.
1116  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1117    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
1118  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1119    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
1120  else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
1121    // If this is an absolute destination address, use the munged value.
1122    Callee = SDOperand(Dest, 0);
1123  else {
1124    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
1125    // to do the call, we can't use PPCISD::CALL.
1126    Ops.push_back(Chain);
1127    Ops.push_back(Callee);
1128
1129    if (InFlag.Val)
1130      Ops.push_back(InFlag);
1131    Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, Ops);
1132    InFlag = Chain.getValue(1);
1133
1134    // Copy the callee address into R12 on darwin.
1135    Chain = DAG.getCopyToReg(Chain, PPC::R12, Callee, InFlag);
1136    InFlag = Chain.getValue(1);
1137
1138    NodeTys.clear();
1139    NodeTys.push_back(MVT::Other);
1140    NodeTys.push_back(MVT::Flag);
1141    Ops.clear();
1142    Ops.push_back(Chain);
1143    CallOpc = PPCISD::BCTRL;
1144    Callee.Val = 0;
1145  }
1146
1147  // If this is a direct call, pass the chain and the callee.
1148  if (Callee.Val) {
1149    Ops.push_back(Chain);
1150    Ops.push_back(Callee);
1151  }
1152
1153  // Add argument registers to the end of the list so that they are known live
1154  // into the call.
1155  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1156    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1157                                  RegsToPass[i].second.getValueType()));
1158
1159  if (InFlag.Val)
1160    Ops.push_back(InFlag);
1161  Chain = DAG.getNode(CallOpc, NodeTys, Ops);
1162  InFlag = Chain.getValue(1);
1163
1164  std::vector<SDOperand> ResultVals;
1165  NodeTys.clear();
1166
1167  // If the call has results, copy the values out of the ret val registers.
1168  switch (Op.Val->getValueType(0)) {
1169  default: assert(0 && "Unexpected ret value!");
1170  case MVT::Other: break;
1171  case MVT::i32:
1172    if (Op.Val->getValueType(1) == MVT::i32) {
1173      Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32, InFlag).getValue(1);
1174      ResultVals.push_back(Chain.getValue(0));
1175      Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32,
1176                                 Chain.getValue(2)).getValue(1);
1177      ResultVals.push_back(Chain.getValue(0));
1178      NodeTys.push_back(MVT::i32);
1179    } else {
1180      Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1);
1181      ResultVals.push_back(Chain.getValue(0));
1182    }
1183    NodeTys.push_back(MVT::i32);
1184    break;
1185  case MVT::i64:
1186    Chain = DAG.getCopyFromReg(Chain, PPC::X3, MVT::i64, InFlag).getValue(1);
1187    ResultVals.push_back(Chain.getValue(0));
1188    NodeTys.push_back(MVT::i64);
1189    break;
1190  case MVT::f32:
1191  case MVT::f64:
1192    Chain = DAG.getCopyFromReg(Chain, PPC::F1, Op.Val->getValueType(0),
1193                               InFlag).getValue(1);
1194    ResultVals.push_back(Chain.getValue(0));
1195    NodeTys.push_back(Op.Val->getValueType(0));
1196    break;
1197  case MVT::v4f32:
1198  case MVT::v4i32:
1199  case MVT::v8i16:
1200  case MVT::v16i8:
1201    Chain = DAG.getCopyFromReg(Chain, PPC::V2, Op.Val->getValueType(0),
1202                                   InFlag).getValue(1);
1203    ResultVals.push_back(Chain.getValue(0));
1204    NodeTys.push_back(Op.Val->getValueType(0));
1205    break;
1206  }
1207
1208  Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1209                      DAG.getConstant(NumBytes, PtrVT));
1210  NodeTys.push_back(MVT::Other);
1211
1212  // If the function returns void, just return the chain.
1213  if (ResultVals.empty())
1214    return Chain;
1215
1216  // Otherwise, merge everything together with a MERGE_VALUES node.
1217  ResultVals.push_back(Chain);
1218  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, ResultVals);
1219  return Res.getValue(Op.ResNo);
1220}
1221
1222static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {
1223  SDOperand Copy;
1224  switch(Op.getNumOperands()) {
1225  default:
1226    assert(0 && "Do not know how to return this many arguments!");
1227    abort();
1228  case 1:
1229    return SDOperand(); // ret void is legal
1230  case 3: {
1231    MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
1232    unsigned ArgReg;
1233    if (ArgVT == MVT::i32) {
1234      ArgReg = PPC::R3;
1235    } else if (ArgVT == MVT::i64) {
1236      ArgReg = PPC::X3;
1237    } else if (MVT::isFloatingPoint(ArgVT)) {
1238      ArgReg = PPC::F1;
1239    } else {
1240      assert(MVT::isVector(ArgVT));
1241      ArgReg = PPC::V2;
1242    }
1243
1244    Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),
1245                            SDOperand());
1246
1247    // If we haven't noted the R3/F1 are live out, do so now.
1248    if (DAG.getMachineFunction().liveout_empty())
1249      DAG.getMachineFunction().addLiveOut(ArgReg);
1250    break;
1251  }
1252  case 5:
1253    Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(3),
1254                            SDOperand());
1255    Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));
1256    // If we haven't noted the R3+R4 are live out, do so now.
1257    if (DAG.getMachineFunction().liveout_empty()) {
1258      DAG.getMachineFunction().addLiveOut(PPC::R3);
1259      DAG.getMachineFunction().addLiveOut(PPC::R4);
1260    }
1261    break;
1262  }
1263  return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
1264}
1265
1266/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
1267/// possible.
1268static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {
1269  // Not FP? Not a fsel.
1270  if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
1271      !MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
1272    return SDOperand();
1273
1274  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
1275
1276  // Cannot handle SETEQ/SETNE.
1277  if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand();
1278
1279  MVT::ValueType ResVT = Op.getValueType();
1280  MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
1281  SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
1282  SDOperand TV  = Op.getOperand(2), FV  = Op.getOperand(3);
1283
1284  // If the RHS of the comparison is a 0.0, we don't need to do the
1285  // subtraction at all.
1286  if (isFloatingPointZero(RHS))
1287    switch (CC) {
1288    default: break;       // SETUO etc aren't handled by fsel.
1289    case ISD::SETULT:
1290    case ISD::SETOLT:
1291    case ISD::SETLT:
1292      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
1293    case ISD::SETUGE:
1294    case ISD::SETOGE:
1295    case ISD::SETGE:
1296      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
1297        LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
1298      return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
1299    case ISD::SETUGT:
1300    case ISD::SETOGT:
1301    case ISD::SETGT:
1302      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
1303    case ISD::SETULE:
1304    case ISD::SETOLE:
1305    case ISD::SETLE:
1306      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
1307        LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
1308      return DAG.getNode(PPCISD::FSEL, ResVT,
1309                         DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
1310    }
1311
1312      SDOperand Cmp;
1313  switch (CC) {
1314  default: break;       // SETUO etc aren't handled by fsel.
1315  case ISD::SETULT:
1316  case ISD::SETOLT:
1317  case ISD::SETLT:
1318    Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
1319    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
1320      Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
1321      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
1322  case ISD::SETUGE:
1323  case ISD::SETOGE:
1324  case ISD::SETGE:
1325    Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
1326    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
1327      Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
1328      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
1329  case ISD::SETUGT:
1330  case ISD::SETOGT:
1331  case ISD::SETGT:
1332    Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
1333    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
1334      Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
1335      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
1336  case ISD::SETULE:
1337  case ISD::SETOLE:
1338  case ISD::SETLE:
1339    Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
1340    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
1341      Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
1342      return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
1343  }
1344  return SDOperand();
1345}
1346
1347static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
1348  assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
1349  SDOperand Src = Op.getOperand(0);
1350  if (Src.getValueType() == MVT::f32)
1351    Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
1352
1353  SDOperand Tmp;
1354  switch (Op.getValueType()) {
1355  default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
1356  case MVT::i32:
1357    Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
1358    break;
1359  case MVT::i64:
1360    Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
1361    break;
1362  }
1363
1364  // Convert the FP value to an int value through memory.
1365  SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);
1366  if (Op.getValueType() == MVT::i32)
1367    Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
1368  return Bits;
1369}
1370
1371static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
1372  if (Op.getOperand(0).getValueType() == MVT::i64) {
1373    SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
1374    SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
1375    if (Op.getValueType() == MVT::f32)
1376      FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
1377    return FP;
1378  }
1379
1380  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
1381         "Unhandled SINT_TO_FP type in custom expander!");
1382  // Since we only generate this in 64-bit mode, we can take advantage of
1383  // 64-bit registers.  In particular, sign extend the input value into the
1384  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
1385  // then lfd it and fcfid it.
1386  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
1387  int FrameIdx = FrameInfo->CreateStackObject(8, 8);
1388  SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
1389
1390  SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
1391                                Op.getOperand(0));
1392
1393  // STD the extended value into the stack slot.
1394  SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
1395                                DAG.getEntryNode(), Ext64, FIdx,
1396                                DAG.getSrcValue(NULL));
1397  // Load the value as a double.
1398  SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));
1399
1400  // FCFID it and return it.
1401  SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
1402  if (Op.getValueType() == MVT::f32)
1403    FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
1404  return FP;
1405}
1406
1407static SDOperand LowerSHL(SDOperand Op, SelectionDAG &DAG,
1408                          MVT::ValueType PtrVT) {
1409  assert(Op.getValueType() == MVT::i64 &&
1410         Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
1411  // The generic code does a fine job expanding shift by a constant.
1412  if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
1413
1414  // Otherwise, expand into a bunch of logical ops.  Note that these ops
1415  // depend on the PPC behavior for oversized shift amounts.
1416  SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
1417                             DAG.getConstant(0, PtrVT));
1418  SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
1419                             DAG.getConstant(1, PtrVT));
1420  SDOperand Amt = Op.getOperand(1);
1421
1422  SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
1423                               DAG.getConstant(32, MVT::i32), Amt);
1424  SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
1425  SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
1426  SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
1427  SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
1428                               DAG.getConstant(-32U, MVT::i32));
1429  SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
1430  SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
1431  SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
1432  return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
1433}
1434
1435static SDOperand LowerSRL(SDOperand Op, SelectionDAG &DAG,
1436                          MVT::ValueType PtrVT) {
1437  assert(Op.getValueType() == MVT::i64 &&
1438         Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
1439  // The generic code does a fine job expanding shift by a constant.
1440  if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
1441
1442  // Otherwise, expand into a bunch of logical ops.  Note that these ops
1443  // depend on the PPC behavior for oversized shift amounts.
1444  SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
1445                             DAG.getConstant(0, PtrVT));
1446  SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
1447                             DAG.getConstant(1, PtrVT));
1448  SDOperand Amt = Op.getOperand(1);
1449
1450  SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
1451                               DAG.getConstant(32, MVT::i32), Amt);
1452  SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
1453  SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
1454  SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
1455  SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
1456                               DAG.getConstant(-32U, MVT::i32));
1457  SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
1458  SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
1459  SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
1460  return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
1461}
1462
1463static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG,
1464                          MVT::ValueType PtrVT) {
1465  assert(Op.getValueType() == MVT::i64 &&
1466         Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
1467  // The generic code does a fine job expanding shift by a constant.
1468  if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand();
1469
1470  // Otherwise, expand into a bunch of logical ops, followed by a select_cc.
1471  SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
1472                             DAG.getConstant(0, PtrVT));
1473  SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
1474                             DAG.getConstant(1, PtrVT));
1475  SDOperand Amt = Op.getOperand(1);
1476
1477  SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
1478                               DAG.getConstant(32, MVT::i32), Amt);
1479  SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
1480  SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
1481  SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
1482  SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
1483                               DAG.getConstant(-32U, MVT::i32));
1484  SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
1485  SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
1486  SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
1487                                    Tmp4, Tmp6, ISD::SETLE);
1488  return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
1489}
1490
1491//===----------------------------------------------------------------------===//
1492// Vector related lowering.
1493//
1494
1495// If this is a vector of constants or undefs, get the bits.  A bit in
1496// UndefBits is set if the corresponding element of the vector is an
1497// ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1498// zero.   Return true if this is not an array of constants, false if it is.
1499//
1500static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1501                                       uint64_t UndefBits[2]) {
1502  // Start with zero'd results.
1503  VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1504
1505  unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1506  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1507    SDOperand OpVal = BV->getOperand(i);
1508
1509    unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1510    unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1511
1512    uint64_t EltBits = 0;
1513    if (OpVal.getOpcode() == ISD::UNDEF) {
1514      uint64_t EltUndefBits = ~0U >> (32-EltBitSize);
1515      UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1516      continue;
1517    } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1518      EltBits = CN->getValue() & (~0U >> (32-EltBitSize));
1519    } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1520      assert(CN->getValueType(0) == MVT::f32 &&
1521             "Only one legal FP vector type!");
1522      EltBits = FloatToBits(CN->getValue());
1523    } else {
1524      // Nonconstant element.
1525      return true;
1526    }
1527
1528    VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1529  }
1530
1531  //printf("%llx %llx  %llx %llx\n",
1532  //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1533  return false;
1534}
1535
1536// If this is a splat (repetition) of a value across the whole vector, return
1537// the smallest size that splats it.  For example, "0x01010101010101..." is a
1538// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1539// SplatSize = 1 byte.
1540static bool isConstantSplat(const uint64_t Bits128[2],
1541                            const uint64_t Undef128[2],
1542                            unsigned &SplatBits, unsigned &SplatUndef,
1543                            unsigned &SplatSize) {
1544
1545  // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1546  // the same as the lower 64-bits, ignoring undefs.
1547  if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0]))
1548    return false;  // Can't be a splat if two pieces don't match.
1549
1550  uint64_t Bits64  = Bits128[0] | Bits128[1];
1551  uint64_t Undef64 = Undef128[0] & Undef128[1];
1552
1553  // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1554  // undefs.
1555  if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64))
1556    return false;  // Can't be a splat if two pieces don't match.
1557
1558  uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1559  uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1560
1561  // If the top 16-bits are different than the lower 16-bits, ignoring
1562  // undefs, we have an i32 splat.
1563  if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) {
1564    SplatBits = Bits32;
1565    SplatUndef = Undef32;
1566    SplatSize = 4;
1567    return true;
1568  }
1569
1570  uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1571  uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1572
1573  // If the top 8-bits are different than the lower 8-bits, ignoring
1574  // undefs, we have an i16 splat.
1575  if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) {
1576    SplatBits = Bits16;
1577    SplatUndef = Undef16;
1578    SplatSize = 2;
1579    return true;
1580  }
1581
1582  // Otherwise, we have an 8-bit splat.
1583  SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1584  SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1585  SplatSize = 1;
1586  return true;
1587}
1588
1589/// BuildSplatI - Build a canonical splati of Val with an element size of
1590/// SplatSize.  Cast the result to VT.
1591static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT,
1592                             SelectionDAG &DAG) {
1593  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
1594
1595  // Force vspltis[hw] -1 to vspltisb -1.
1596  if (Val == -1) SplatSize = 1;
1597
1598  static const MVT::ValueType VTys[] = { // canonical VT to use for each size.
1599    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
1600  };
1601  MVT::ValueType CanonicalVT = VTys[SplatSize-1];
1602
1603  // Build a canonical splat for this value.
1604  SDOperand Elt = DAG.getConstant(Val, MVT::getVectorBaseType(CanonicalVT));
1605  std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
1606  SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
1607  return DAG.getNode(ISD::BIT_CONVERT, VT, Res);
1608}
1609
1610/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
1611/// specified intrinsic ID.
1612static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS,
1613                                  SelectionDAG &DAG,
1614                                  MVT::ValueType DestVT = MVT::Other) {
1615  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
1616  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
1617                     DAG.getConstant(IID, MVT::i32), LHS, RHS);
1618}
1619
1620/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
1621/// specified intrinsic ID.
1622static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1,
1623                                  SDOperand Op2, SelectionDAG &DAG,
1624                                  MVT::ValueType DestVT = MVT::Other) {
1625  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
1626  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
1627                     DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
1628}
1629
1630
1631/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
1632/// amount.  The result has the specified value type.
1633static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt,
1634                             MVT::ValueType VT, SelectionDAG &DAG) {
1635  // Force LHS/RHS to be the right type.
1636  LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS);
1637  RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS);
1638
1639  std::vector<SDOperand> Ops;
1640  for (unsigned i = 0; i != 16; ++i)
1641    Ops.push_back(DAG.getConstant(i+Amt, MVT::i32));
1642  SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS,
1643                            DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));
1644  return DAG.getNode(ISD::BIT_CONVERT, VT, T);
1645}
1646
1647// If this is a case we can't handle, return null and let the default
1648// expansion code take care of it.  If we CAN select this case, and if it
1649// selects to a single instruction, return Op.  Otherwise, if we can codegen
1650// this case more efficiently than a constant pool load, lower it to the
1651// sequence of ops that should be used.
1652static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1653  // If this is a vector of constants or undefs, get the bits.  A bit in
1654  // UndefBits is set if the corresponding element of the vector is an
1655  // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1656  // zero.
1657  uint64_t VectorBits[2];
1658  uint64_t UndefBits[2];
1659  if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
1660    return SDOperand();   // Not a constant vector.
1661
1662  // If this is a splat (repetition) of a value across the whole vector, return
1663  // the smallest size that splats it.  For example, "0x01010101010101..." is a
1664  // splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1665  // SplatSize = 1 byte.
1666  unsigned SplatBits, SplatUndef, SplatSize;
1667  if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){
1668    bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0;
1669
1670    // First, handle single instruction cases.
1671
1672    // All zeros?
1673    if (SplatBits == 0) {
1674      // Canonicalize all zero vectors to be v4i32.
1675      if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
1676        SDOperand Z = DAG.getConstant(0, MVT::i32);
1677        Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
1678        Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
1679      }
1680      return Op;
1681    }
1682
1683    // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
1684    int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);
1685    if (SextVal >= -16 && SextVal <= 15)
1686      return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG);
1687
1688
1689    // Two instruction sequences.
1690
1691    // If this value is in the range [-32,30] and is even, use:
1692    //    tmp = VSPLTI[bhw], result = add tmp, tmp
1693    if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
1694      Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG);
1695      return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op);
1696    }
1697
1698    // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
1699    // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
1700    // for fneg/fabs.
1701    if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
1702      // Make -1 and vspltisw -1:
1703      SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG);
1704
1705      // Make the VSLW intrinsic, computing 0x8000_0000.
1706      SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
1707                                       OnesV, DAG);
1708
1709      // xor by OnesV to invert it.
1710      Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
1711      return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
1712    }
1713
1714    // Check to see if this is a wide variety of vsplti*, binop self cases.
1715    unsigned SplatBitSize = SplatSize*8;
1716    static const char SplatCsts[] = {
1717      -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
1718      -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
1719    };
1720    for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){
1721      // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
1722      // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
1723      int i = SplatCsts[idx];
1724
1725      // Figure out what shift amount will be used by altivec if shifted by i in
1726      // this splat size.
1727      unsigned TypeShiftAmt = i & (SplatBitSize-1);
1728
1729      // vsplti + shl self.
1730      if (SextVal == (i << (int)TypeShiftAmt)) {
1731        Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
1732        static const unsigned IIDs[] = { // Intrinsic to use for each size.
1733          Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
1734          Intrinsic::ppc_altivec_vslw
1735        };
1736        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
1737      }
1738
1739      // vsplti + srl self.
1740      if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
1741        Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
1742        static const unsigned IIDs[] = { // Intrinsic to use for each size.
1743          Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
1744          Intrinsic::ppc_altivec_vsrw
1745        };
1746        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
1747      }
1748
1749      // vsplti + sra self.
1750      if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
1751        Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
1752        static const unsigned IIDs[] = { // Intrinsic to use for each size.
1753          Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
1754          Intrinsic::ppc_altivec_vsraw
1755        };
1756        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
1757      }
1758
1759      // vsplti + rol self.
1760      if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
1761                           ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
1762        Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG);
1763        static const unsigned IIDs[] = { // Intrinsic to use for each size.
1764          Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
1765          Intrinsic::ppc_altivec_vrlw
1766        };
1767        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
1768      }
1769
1770      // t = vsplti c, result = vsldoi t, t, 1
1771      if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
1772        SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
1773        return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG);
1774      }
1775      // t = vsplti c, result = vsldoi t, t, 2
1776      if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
1777        SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
1778        return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG);
1779      }
1780      // t = vsplti c, result = vsldoi t, t, 3
1781      if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
1782        SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG);
1783        return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG);
1784      }
1785    }
1786
1787    // Three instruction sequences.
1788
1789    // Odd, in range [17,31]:  (vsplti C)-(vsplti -16).
1790    if (SextVal >= 0 && SextVal <= 31) {
1791      SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, Op.getValueType(),DAG);
1792      SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);
1793      return DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS);
1794    }
1795    // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
1796    if (SextVal >= -31 && SextVal <= 0) {
1797      SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, Op.getValueType(),DAG);
1798      SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG);
1799      return DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS);
1800    }
1801  }
1802
1803  return SDOperand();
1804}
1805
1806/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
1807/// the specified operations to build the shuffle.
1808static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS,
1809                                        SDOperand RHS, SelectionDAG &DAG) {
1810  unsigned OpNum = (PFEntry >> 26) & 0x0F;
1811  unsigned LHSID  = (PFEntry >> 13) & ((1 << 13)-1);
1812  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
1813
1814  enum {
1815    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
1816    OP_VMRGHW,
1817    OP_VMRGLW,
1818    OP_VSPLTISW0,
1819    OP_VSPLTISW1,
1820    OP_VSPLTISW2,
1821    OP_VSPLTISW3,
1822    OP_VSLDOI4,
1823    OP_VSLDOI8,
1824    OP_VSLDOI12
1825  };
1826
1827  if (OpNum == OP_COPY) {
1828    if (LHSID == (1*9+2)*9+3) return LHS;
1829    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
1830    return RHS;
1831  }
1832
1833  SDOperand OpLHS, OpRHS;
1834  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG);
1835  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG);
1836
1837  unsigned ShufIdxs[16];
1838  switch (OpNum) {
1839  default: assert(0 && "Unknown i32 permute!");
1840  case OP_VMRGHW:
1841    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
1842    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
1843    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
1844    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
1845    break;
1846  case OP_VMRGLW:
1847    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
1848    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
1849    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
1850    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
1851    break;
1852  case OP_VSPLTISW0:
1853    for (unsigned i = 0; i != 16; ++i)
1854      ShufIdxs[i] = (i&3)+0;
1855    break;
1856  case OP_VSPLTISW1:
1857    for (unsigned i = 0; i != 16; ++i)
1858      ShufIdxs[i] = (i&3)+4;
1859    break;
1860  case OP_VSPLTISW2:
1861    for (unsigned i = 0; i != 16; ++i)
1862      ShufIdxs[i] = (i&3)+8;
1863    break;
1864  case OP_VSPLTISW3:
1865    for (unsigned i = 0; i != 16; ++i)
1866      ShufIdxs[i] = (i&3)+12;
1867    break;
1868  case OP_VSLDOI4:
1869    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG);
1870  case OP_VSLDOI8:
1871    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG);
1872  case OP_VSLDOI12:
1873    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG);
1874  }
1875  std::vector<SDOperand> Ops;
1876  for (unsigned i = 0; i != 16; ++i)
1877    Ops.push_back(DAG.getConstant(ShufIdxs[i], MVT::i32));
1878
1879  return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS,
1880                     DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));
1881}
1882
1883/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
1884/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
1885/// return the code it can be lowered into.  Worst case, it can always be
1886/// lowered into a vperm.
1887static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1888  SDOperand V1 = Op.getOperand(0);
1889  SDOperand V2 = Op.getOperand(1);
1890  SDOperand PermMask = Op.getOperand(2);
1891
1892  // Cases that are handled by instructions that take permute immediates
1893  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
1894  // selected by the instruction selector.
1895  if (V2.getOpcode() == ISD::UNDEF) {
1896    if (PPC::isSplatShuffleMask(PermMask.Val, 1) ||
1897        PPC::isSplatShuffleMask(PermMask.Val, 2) ||
1898        PPC::isSplatShuffleMask(PermMask.Val, 4) ||
1899        PPC::isVPKUWUMShuffleMask(PermMask.Val, true) ||
1900        PPC::isVPKUHUMShuffleMask(PermMask.Val, true) ||
1901        PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 ||
1902        PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) ||
1903        PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) ||
1904        PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) ||
1905        PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) ||
1906        PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) ||
1907        PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {
1908      return Op;
1909    }
1910  }
1911
1912  // Altivec has a variety of "shuffle immediates" that take two vector inputs
1913  // and produce a fixed permutation.  If any of these match, do not lower to
1914  // VPERM.
1915  if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) ||
1916      PPC::isVPKUHUMShuffleMask(PermMask.Val, false) ||
1917      PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 ||
1918      PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) ||
1919      PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) ||
1920      PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) ||
1921      PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) ||
1922      PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) ||
1923      PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))
1924    return Op;
1925
1926  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
1927  // perfect shuffle table to emit an optimal matching sequence.
1928  unsigned PFIndexes[4];
1929  bool isFourElementShuffle = true;
1930  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
1931    unsigned EltNo = 8;   // Start out undef.
1932    for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
1933      if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)
1934        continue;   // Undef, ignore it.
1935
1936      unsigned ByteSource =
1937        cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue();
1938      if ((ByteSource & 3) != j) {
1939        isFourElementShuffle = false;
1940        break;
1941      }
1942
1943      if (EltNo == 8) {
1944        EltNo = ByteSource/4;
1945      } else if (EltNo != ByteSource/4) {
1946        isFourElementShuffle = false;
1947        break;
1948      }
1949    }
1950    PFIndexes[i] = EltNo;
1951  }
1952
1953  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
1954  // perfect shuffle vector to determine if it is cost effective to do this as
1955  // discrete instructions, or whether we should use a vperm.
1956  if (isFourElementShuffle) {
1957    // Compute the index in the perfect shuffle table.
1958    unsigned PFTableIndex =
1959      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
1960
1961    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
1962    unsigned Cost  = (PFEntry >> 30);
1963
1964    // Determining when to avoid vperm is tricky.  Many things affect the cost
1965    // of vperm, particularly how many times the perm mask needs to be computed.
1966    // For example, if the perm mask can be hoisted out of a loop or is already
1967    // used (perhaps because there are multiple permutes with the same shuffle
1968    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
1969    // the loop requires an extra register.
1970    //
1971    // As a compromise, we only emit discrete instructions if the shuffle can be
1972    // generated in 3 or fewer operations.  When we have loop information
1973    // available, if this block is within a loop, we should avoid using vperm
1974    // for 3-operation perms and use a constant pool load instead.
1975    if (Cost < 3)
1976      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG);
1977  }
1978
1979  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
1980  // vector that will get spilled to the constant pool.
1981  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1982
1983  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
1984  // that it is in input element units, not in bytes.  Convert now.
1985  MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
1986  unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1987
1988  std::vector<SDOperand> ResultMask;
1989  for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1990    unsigned SrcElt;
1991    if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1992      SrcElt = 0;
1993    else
1994      SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1995
1996    for (unsigned j = 0; j != BytesPerElement; ++j)
1997      ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1998                                           MVT::i8));
1999  }
2000
2001  SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
2002  return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
2003}
2004
2005/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
2006/// altivec comparison.  If it is, return true and fill in Opc/isDot with
2007/// information about the intrinsic.
2008static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc,
2009                                  bool &isDot) {
2010  unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue();
2011  CompareOpc = -1;
2012  isDot = false;
2013  switch (IntrinsicID) {
2014  default: return false;
2015    // Comparison predicates.
2016  case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
2017  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
2018  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
2019  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
2020  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
2021  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
2022  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
2023  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
2024  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
2025  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
2026  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
2027  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
2028  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
2029
2030    // Normal Comparisons.
2031  case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
2032  case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
2033  case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
2034  case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
2035  case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
2036  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
2037  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
2038  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
2039  case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
2040  case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
2041  case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
2042  case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
2043  case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
2044  }
2045  return true;
2046}
2047
2048/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
2049/// lower, do it, otherwise return null.
2050static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
2051  // If this is a lowered altivec predicate compare, CompareOpc is set to the
2052  // opcode number of the comparison.
2053  int CompareOpc;
2054  bool isDot;
2055  if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
2056    return SDOperand();    // Don't custom lower most intrinsics.
2057
2058  // If this is a non-dot comparison, make the VCMP node and we are done.
2059  if (!isDot) {
2060    SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),
2061                                Op.getOperand(1), Op.getOperand(2),
2062                                DAG.getConstant(CompareOpc, MVT::i32));
2063    return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp);
2064  }
2065
2066  // Create the PPCISD altivec 'dot' comparison node.
2067  std::vector<SDOperand> Ops;
2068  std::vector<MVT::ValueType> VTs;
2069  Ops.push_back(Op.getOperand(2));  // LHS
2070  Ops.push_back(Op.getOperand(3));  // RHS
2071  Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));
2072  VTs.push_back(Op.getOperand(2).getValueType());
2073  VTs.push_back(MVT::Flag);
2074  SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);
2075
2076  // Now that we have the comparison, emit a copy from the CR to a GPR.
2077  // This is flagged to the above dot comparison.
2078  SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,
2079                                DAG.getRegister(PPC::CR6, MVT::i32),
2080                                CompNode.getValue(1));
2081
2082  // Unpack the result based on how the target uses it.
2083  unsigned BitNo;   // Bit # of CR6.
2084  bool InvertBit;   // Invert result?
2085  switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
2086  default:  // Can't happen, don't crash on invalid number though.
2087  case 0:   // Return the value of the EQ bit of CR6.
2088    BitNo = 0; InvertBit = false;
2089    break;
2090  case 1:   // Return the inverted value of the EQ bit of CR6.
2091    BitNo = 0; InvertBit = true;
2092    break;
2093  case 2:   // Return the value of the LT bit of CR6.
2094    BitNo = 2; InvertBit = false;
2095    break;
2096  case 3:   // Return the inverted value of the LT bit of CR6.
2097    BitNo = 2; InvertBit = true;
2098    break;
2099  }
2100
2101  // Shift the bit into the low position.
2102  Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,
2103                      DAG.getConstant(8-(3-BitNo), MVT::i32));
2104  // Isolate the bit.
2105  Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,
2106                      DAG.getConstant(1, MVT::i32));
2107
2108  // If we are supposed to, toggle the bit.
2109  if (InvertBit)
2110    Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,
2111                        DAG.getConstant(1, MVT::i32));
2112  return Flags;
2113}
2114
2115static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
2116  // Create a stack slot that is 16-byte aligned.
2117  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
2118  int FrameIdx = FrameInfo->CreateStackObject(16, 16);
2119  SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
2120
2121  // Store the input value into Value#0 of the stack slot.
2122  SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
2123                                Op.getOperand(0), FIdx,DAG.getSrcValue(NULL));
2124  // Load it out.
2125  return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL));
2126}
2127
2128static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {
2129  if (Op.getValueType() == MVT::v4i32) {
2130    SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
2131
2132    SDOperand Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG);
2133    SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt.
2134
2135    SDOperand RHSSwap =   // = vrlw RHS, 16
2136      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG);
2137
2138    // Shrinkify inputs to v8i16.
2139    LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS);
2140    RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS);
2141    RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap);
2142
2143    // Low parts multiplied together, generating 32-bit results (we ignore the
2144    // top parts).
2145    SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
2146                                        LHS, RHS, DAG, MVT::v4i32);
2147
2148    SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
2149                                        LHS, RHSSwap, Zero, DAG, MVT::v4i32);
2150    // Shift the high parts up 16 bits.
2151    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG);
2152    return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd);
2153  } else if (Op.getValueType() == MVT::v8i16) {
2154    SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
2155
2156    SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG);
2157
2158    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
2159                            LHS, RHS, Zero, DAG);
2160  } else if (Op.getValueType() == MVT::v16i8) {
2161    SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
2162
2163    // Multiply the even 8-bit parts, producing 16-bit sums.
2164    SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
2165                                           LHS, RHS, DAG, MVT::v8i16);
2166    EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts);
2167
2168    // Multiply the odd 8-bit parts, producing 16-bit sums.
2169    SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
2170                                          LHS, RHS, DAG, MVT::v8i16);
2171    OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts);
2172
2173    // Merge the results together.
2174    std::vector<SDOperand> Ops;
2175    for (unsigned i = 0; i != 8; ++i) {
2176      Ops.push_back(DAG.getConstant(2*i+1, MVT::i8));
2177      Ops.push_back(DAG.getConstant(2*i+1+16, MVT::i8));
2178    }
2179
2180    return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts,
2181                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops));
2182  } else {
2183    assert(0 && "Unknown mul to lower!");
2184    abort();
2185  }
2186}
2187
2188/// LowerOperation - Provide custom lowering hooks for some operations.
2189///
2190SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
2191  switch (Op.getOpcode()) {
2192  default: assert(0 && "Wasn't expecting to be able to lower this!");
2193  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
2194  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
2195  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
2196  case ISD::SETCC:              return LowerSETCC(Op, DAG);
2197  case ISD::VASTART:            return LowerVASTART(Op, DAG, VarArgsFrameIndex);
2198  case ISD::FORMAL_ARGUMENTS:
2199      return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2200  case ISD::CALL:               return LowerCALL(Op, DAG);
2201  case ISD::RET:                return LowerRET(Op, DAG);
2202
2203  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
2204  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
2205  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
2206
2207  // Lower 64-bit shifts.
2208  case ISD::SHL:                return LowerSHL(Op, DAG, getPointerTy());
2209  case ISD::SRL:                return LowerSRL(Op, DAG, getPointerTy());
2210  case ISD::SRA:                return LowerSRA(Op, DAG, getPointerTy());
2211
2212  // Vector-related lowering.
2213  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
2214  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
2215  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2216  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
2217  case ISD::MUL:                return LowerMUL(Op, DAG);
2218  }
2219  return SDOperand();
2220}
2221
2222//===----------------------------------------------------------------------===//
2223//  Other Lowering Code
2224//===----------------------------------------------------------------------===//
2225
2226MachineBasicBlock *
2227PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2228                                           MachineBasicBlock *BB) {
2229  assert((MI->getOpcode() == PPC::SELECT_CC_I4 ||
2230          MI->getOpcode() == PPC::SELECT_CC_I8 ||
2231          MI->getOpcode() == PPC::SELECT_CC_F4 ||
2232          MI->getOpcode() == PPC::SELECT_CC_F8 ||
2233          MI->getOpcode() == PPC::SELECT_CC_VRRC) &&
2234         "Unexpected instr type to insert");
2235
2236  // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
2237  // control-flow pattern.  The incoming instruction knows the destination vreg
2238  // to set, the condition code register to branch on, the true/false values to
2239  // select between, and a branch opcode to use.
2240  const BasicBlock *LLVM_BB = BB->getBasicBlock();
2241  ilist<MachineBasicBlock>::iterator It = BB;
2242  ++It;
2243
2244  //  thisMBB:
2245  //  ...
2246  //   TrueVal = ...
2247  //   cmpTY ccX, r1, r2
2248  //   bCC copy1MBB
2249  //   fallthrough --> copy0MBB
2250  MachineBasicBlock *thisMBB = BB;
2251  MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
2252  MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
2253  BuildMI(BB, MI->getOperand(4).getImmedValue(), 2)
2254    .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
2255  MachineFunction *F = BB->getParent();
2256  F->getBasicBlockList().insert(It, copy0MBB);
2257  F->getBasicBlockList().insert(It, sinkMBB);
2258  // Update machine-CFG edges by first adding all successors of the current
2259  // block to the new block which will contain the Phi node for the select.
2260  for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
2261      e = BB->succ_end(); i != e; ++i)
2262    sinkMBB->addSuccessor(*i);
2263  // Next, remove all successors of the current block, and add the true
2264  // and fallthrough blocks as its successors.
2265  while(!BB->succ_empty())
2266    BB->removeSuccessor(BB->succ_begin());
2267  BB->addSuccessor(copy0MBB);
2268  BB->addSuccessor(sinkMBB);
2269
2270  //  copy0MBB:
2271  //   %FalseValue = ...
2272  //   # fallthrough to sinkMBB
2273  BB = copy0MBB;
2274
2275  // Update machine-CFG edges
2276  BB->addSuccessor(sinkMBB);
2277
2278  //  sinkMBB:
2279  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
2280  //  ...
2281  BB = sinkMBB;
2282  BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg())
2283    .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
2284    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
2285
2286  delete MI;   // The pseudo instruction is gone now.
2287  return BB;
2288}
2289
2290//===----------------------------------------------------------------------===//
2291// Target Optimization Hooks
2292//===----------------------------------------------------------------------===//
2293
2294SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
2295                                               DAGCombinerInfo &DCI) const {
2296  TargetMachine &TM = getTargetMachine();
2297  SelectionDAG &DAG = DCI.DAG;
2298  switch (N->getOpcode()) {
2299  default: break;
2300  case ISD::SINT_TO_FP:
2301    if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
2302      if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
2303        // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
2304        // We allow the src/dst to be either f32/f64, but the intermediate
2305        // type must be i64.
2306        if (N->getOperand(0).getValueType() == MVT::i64) {
2307          SDOperand Val = N->getOperand(0).getOperand(0);
2308          if (Val.getValueType() == MVT::f32) {
2309            Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
2310            DCI.AddToWorklist(Val.Val);
2311          }
2312
2313          Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
2314          DCI.AddToWorklist(Val.Val);
2315          Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
2316          DCI.AddToWorklist(Val.Val);
2317          if (N->getValueType(0) == MVT::f32) {
2318            Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
2319            DCI.AddToWorklist(Val.Val);
2320          }
2321          return Val;
2322        } else if (N->getOperand(0).getValueType() == MVT::i32) {
2323          // If the intermediate type is i32, we can avoid the load/store here
2324          // too.
2325        }
2326      }
2327    }
2328    break;
2329  case ISD::STORE:
2330    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
2331    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
2332        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
2333        N->getOperand(1).getValueType() == MVT::i32) {
2334      SDOperand Val = N->getOperand(1).getOperand(0);
2335      if (Val.getValueType() == MVT::f32) {
2336        Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
2337        DCI.AddToWorklist(Val.Val);
2338      }
2339      Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
2340      DCI.AddToWorklist(Val.Val);
2341
2342      Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
2343                        N->getOperand(2), N->getOperand(3));
2344      DCI.AddToWorklist(Val.Val);
2345      return Val;
2346    }
2347    break;
2348  case PPCISD::VCMP: {
2349    // If a VCMPo node already exists with exactly the same operands as this
2350    // node, use its result instead of this node (VCMPo computes both a CR6 and
2351    // a normal output).
2352    //
2353    if (!N->getOperand(0).hasOneUse() &&
2354        !N->getOperand(1).hasOneUse() &&
2355        !N->getOperand(2).hasOneUse()) {
2356
2357      // Scan all of the users of the LHS, looking for VCMPo's that match.
2358      SDNode *VCMPoNode = 0;
2359
2360      SDNode *LHSN = N->getOperand(0).Val;
2361      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
2362           UI != E; ++UI)
2363        if ((*UI)->getOpcode() == PPCISD::VCMPo &&
2364            (*UI)->getOperand(1) == N->getOperand(1) &&
2365            (*UI)->getOperand(2) == N->getOperand(2) &&
2366            (*UI)->getOperand(0) == N->getOperand(0)) {
2367          VCMPoNode = *UI;
2368          break;
2369        }
2370
2371      // If there is no VCMPo node, or if the flag value has a single use, don't
2372      // transform this.
2373      if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
2374        break;
2375
2376      // Look at the (necessarily single) use of the flag value.  If it has a
2377      // chain, this transformation is more complex.  Note that multiple things
2378      // could use the value result, which we should ignore.
2379      SDNode *FlagUser = 0;
2380      for (SDNode::use_iterator UI = VCMPoNode->use_begin();
2381           FlagUser == 0; ++UI) {
2382        assert(UI != VCMPoNode->use_end() && "Didn't find user!");
2383        SDNode *User = *UI;
2384        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
2385          if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) {
2386            FlagUser = User;
2387            break;
2388          }
2389        }
2390      }
2391
2392      // If the user is a MFCR instruction, we know this is safe.  Otherwise we
2393      // give up for right now.
2394      if (FlagUser->getOpcode() == PPCISD::MFCR)
2395        return SDOperand(VCMPoNode, 0);
2396    }
2397    break;
2398  }
2399  case ISD::BR_CC: {
2400    // If this is a branch on an altivec predicate comparison, lower this so
2401    // that we don't have to do a MFCR: instead, branch directly on CR6.  This
2402    // lowering is done pre-legalize, because the legalizer lowers the predicate
2403    // compare down to code that is difficult to reassemble.
2404    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
2405    SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3);
2406    int CompareOpc;
2407    bool isDot;
2408
2409    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2410        isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
2411        getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
2412      assert(isDot && "Can't compare against a vector result!");
2413
2414      // If this is a comparison against something other than 0/1, then we know
2415      // that the condition is never/always true.
2416      unsigned Val = cast<ConstantSDNode>(RHS)->getValue();
2417      if (Val != 0 && Val != 1) {
2418        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
2419          return N->getOperand(0);
2420        // Always !=, turn it into an unconditional branch.
2421        return DAG.getNode(ISD::BR, MVT::Other,
2422                           N->getOperand(0), N->getOperand(4));
2423      }
2424
2425      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
2426
2427      // Create the PPCISD altivec 'dot' comparison node.
2428      std::vector<SDOperand> Ops;
2429      std::vector<MVT::ValueType> VTs;
2430      Ops.push_back(LHS.getOperand(2));  // LHS of compare
2431      Ops.push_back(LHS.getOperand(3));  // RHS of compare
2432      Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));
2433      VTs.push_back(LHS.getOperand(2).getValueType());
2434      VTs.push_back(MVT::Flag);
2435      SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);
2436
2437      // Unpack the result based on how the target uses it.
2438      unsigned CompOpc;
2439      switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) {
2440      default:  // Can't happen, don't crash on invalid number though.
2441      case 0:   // Branch on the value of the EQ bit of CR6.
2442        CompOpc = BranchOnWhenPredTrue ? PPC::BEQ : PPC::BNE;
2443        break;
2444      case 1:   // Branch on the inverted value of the EQ bit of CR6.
2445        CompOpc = BranchOnWhenPredTrue ? PPC::BNE : PPC::BEQ;
2446        break;
2447      case 2:   // Branch on the value of the LT bit of CR6.
2448        CompOpc = BranchOnWhenPredTrue ? PPC::BLT : PPC::BGE;
2449        break;
2450      case 3:   // Branch on the inverted value of the LT bit of CR6.
2451        CompOpc = BranchOnWhenPredTrue ? PPC::BGE : PPC::BLT;
2452        break;
2453      }
2454
2455      return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0),
2456                         DAG.getRegister(PPC::CR6, MVT::i32),
2457                         DAG.getConstant(CompOpc, MVT::i32),
2458                         N->getOperand(4), CompNode.getValue(1));
2459    }
2460    break;
2461  }
2462  }
2463
2464  return SDOperand();
2465}
2466
2467//===----------------------------------------------------------------------===//
2468// Inline Assembly Support
2469//===----------------------------------------------------------------------===//
2470
2471void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2472                                                       uint64_t Mask,
2473                                                       uint64_t &KnownZero,
2474                                                       uint64_t &KnownOne,
2475                                                       unsigned Depth) const {
2476  KnownZero = 0;
2477  KnownOne = 0;
2478  switch (Op.getOpcode()) {
2479  default: break;
2480  case ISD::INTRINSIC_WO_CHAIN: {
2481    switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) {
2482    default: break;
2483    case Intrinsic::ppc_altivec_vcmpbfp_p:
2484    case Intrinsic::ppc_altivec_vcmpeqfp_p:
2485    case Intrinsic::ppc_altivec_vcmpequb_p:
2486    case Intrinsic::ppc_altivec_vcmpequh_p:
2487    case Intrinsic::ppc_altivec_vcmpequw_p:
2488    case Intrinsic::ppc_altivec_vcmpgefp_p:
2489    case Intrinsic::ppc_altivec_vcmpgtfp_p:
2490    case Intrinsic::ppc_altivec_vcmpgtsb_p:
2491    case Intrinsic::ppc_altivec_vcmpgtsh_p:
2492    case Intrinsic::ppc_altivec_vcmpgtsw_p:
2493    case Intrinsic::ppc_altivec_vcmpgtub_p:
2494    case Intrinsic::ppc_altivec_vcmpgtuh_p:
2495    case Intrinsic::ppc_altivec_vcmpgtuw_p:
2496      KnownZero = ~1U;  // All bits but the low one are known to be zero.
2497      break;
2498    }
2499  }
2500  }
2501}
2502
2503
2504/// getConstraintType - Given a constraint letter, return the type of
2505/// constraint it is for this target.
2506PPCTargetLowering::ConstraintType
2507PPCTargetLowering::getConstraintType(char ConstraintLetter) const {
2508  switch (ConstraintLetter) {
2509  default: break;
2510  case 'b':
2511  case 'r':
2512  case 'f':
2513  case 'v':
2514  case 'y':
2515    return C_RegisterClass;
2516  }
2517  return TargetLowering::getConstraintType(ConstraintLetter);
2518}
2519
2520
2521std::vector<unsigned> PPCTargetLowering::
2522getRegClassForInlineAsmConstraint(const std::string &Constraint,
2523                                  MVT::ValueType VT) const {
2524  if (Constraint.size() == 1) {
2525    switch (Constraint[0]) {      // GCC RS6000 Constraint Letters
2526    default: break;  // Unknown constriant letter
2527    case 'b':
2528      return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 ,
2529                                   PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
2530                                   PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
2531                                   PPC::R12, PPC::R13, PPC::R14, PPC::R15,
2532                                   PPC::R16, PPC::R17, PPC::R18, PPC::R19,
2533                                   PPC::R20, PPC::R21, PPC::R22, PPC::R23,
2534                                   PPC::R24, PPC::R25, PPC::R26, PPC::R27,
2535                                   PPC::R28, PPC::R29, PPC::R30, PPC::R31,
2536                                   0);
2537    case 'r':
2538      return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 ,
2539                                   PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
2540                                   PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
2541                                   PPC::R12, PPC::R13, PPC::R14, PPC::R15,
2542                                   PPC::R16, PPC::R17, PPC::R18, PPC::R19,
2543                                   PPC::R20, PPC::R21, PPC::R22, PPC::R23,
2544                                   PPC::R24, PPC::R25, PPC::R26, PPC::R27,
2545                                   PPC::R28, PPC::R29, PPC::R30, PPC::R31,
2546                                   0);
2547    case 'f':
2548      return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 ,
2549                                   PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 ,
2550                                   PPC::F8 , PPC::F9 , PPC::F10, PPC::F11,
2551                                   PPC::F12, PPC::F13, PPC::F14, PPC::F15,
2552                                   PPC::F16, PPC::F17, PPC::F18, PPC::F19,
2553                                   PPC::F20, PPC::F21, PPC::F22, PPC::F23,
2554                                   PPC::F24, PPC::F25, PPC::F26, PPC::F27,
2555                                   PPC::F28, PPC::F29, PPC::F30, PPC::F31,
2556                                   0);
2557    case 'v':
2558      return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 ,
2559                                   PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
2560                                   PPC::V8 , PPC::V9 , PPC::V10, PPC::V11,
2561                                   PPC::V12, PPC::V13, PPC::V14, PPC::V15,
2562                                   PPC::V16, PPC::V17, PPC::V18, PPC::V19,
2563                                   PPC::V20, PPC::V21, PPC::V22, PPC::V23,
2564                                   PPC::V24, PPC::V25, PPC::V26, PPC::V27,
2565                                   PPC::V28, PPC::V29, PPC::V30, PPC::V31,
2566                                   0);
2567    case 'y':
2568      return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
2569                                   PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7,
2570                                   0);
2571    }
2572  }
2573
2574  return std::vector<unsigned>();
2575}
2576
2577// isOperandValidForConstraint
2578bool PPCTargetLowering::
2579isOperandValidForConstraint(SDOperand Op, char Letter) {
2580  switch (Letter) {
2581  default: break;
2582  case 'I':
2583  case 'J':
2584  case 'K':
2585  case 'L':
2586  case 'M':
2587  case 'N':
2588  case 'O':
2589  case 'P': {
2590    if (!isa<ConstantSDNode>(Op)) return false;  // Must be an immediate.
2591    unsigned Value = cast<ConstantSDNode>(Op)->getValue();
2592    switch (Letter) {
2593    default: assert(0 && "Unknown constraint letter!");
2594    case 'I':  // "I" is a signed 16-bit constant.
2595      return (short)Value == (int)Value;
2596    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
2597    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
2598      return (short)Value == 0;
2599    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
2600      return (Value >> 16) == 0;
2601    case 'M':  // "M" is a constant that is greater than 31.
2602      return Value > 31;
2603    case 'N':  // "N" is a positive constant that is an exact power of two.
2604      return (int)Value > 0 && isPowerOf2_32(Value);
2605    case 'O':  // "O" is the constant zero.
2606      return Value == 0;
2607    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
2608      return (short)-Value == (int)-Value;
2609    }
2610    break;
2611  }
2612  }
2613
2614  // Handle standard constraint letters.
2615  return TargetLowering::isOperandValidForConstraint(Op, Letter);
2616}
2617
2618/// isLegalAddressImmediate - Return true if the integer value can be used
2619/// as the offset of the target addressing mode.
2620bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const {
2621  // PPC allows a sign-extended 16-bit immediate field.
2622  return (V > -(1 << 16) && V < (1 << 16)-1);
2623}
2624