PPCISelLowering.cpp revision df38043a46b873acb98e7ce0c700d82c1d888772
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PPCISelLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCISelLowering.h"
15#include "PPCMachineFunctionInfo.h"
16#include "PPCPredicates.h"
17#include "PPCTargetMachine.h"
18#include "PPCPerfectShuffle.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/VectorExtras.h"
21#include "llvm/CodeGen/CallingConvLower.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/PseudoSourceValue.h"
27#include "llvm/CodeGen/SelectionDAG.h"
28#include "llvm/CallingConv.h"
29#include "llvm/Constants.h"
30#include "llvm/Function.h"
31#include "llvm/Intrinsics.h"
32#include "llvm/Support/MathExtras.h"
33#include "llvm/Target/TargetOptions.h"
34#include "llvm/Support/CommandLine.h"
35using namespace llvm;
36
37static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
38cl::desc("enable preincrement load/store generation on PPC (experimental)"),
39                                     cl::Hidden);
40
41PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
42  : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) {
43
44  setPow2DivIsCheap();
45
46  // Use _setjmp/_longjmp instead of setjmp/longjmp.
47  setUseUnderscoreSetJmp(true);
48  setUseUnderscoreLongJmp(true);
49
50  // Set up the register classes.
51  addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
52  addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
53  addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
54
55  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
56  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
57  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
58
59  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
60
61  // PowerPC has pre-inc load and store's.
62  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
63  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
64  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
65  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
66  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
67  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
68  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
69  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
70  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
71  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
72
73  // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg)
74  setConvertAction(MVT::ppcf128, MVT::f64, Expand);
75  setConvertAction(MVT::ppcf128, MVT::f32, Expand);
76  // This is used in the ppcf128->int sequence.  Note it has different semantics
77  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
78  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
79
80  // PowerPC has no SREM/UREM instructions
81  setOperationAction(ISD::SREM, MVT::i32, Expand);
82  setOperationAction(ISD::UREM, MVT::i32, Expand);
83  setOperationAction(ISD::SREM, MVT::i64, Expand);
84  setOperationAction(ISD::UREM, MVT::i64, Expand);
85
86  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
87  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
88  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
89  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
90  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
91  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
92  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
93  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
94  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
95
96  // We don't support sin/cos/sqrt/fmod/pow
97  setOperationAction(ISD::FSIN , MVT::f64, Expand);
98  setOperationAction(ISD::FCOS , MVT::f64, Expand);
99  setOperationAction(ISD::FREM , MVT::f64, Expand);
100  setOperationAction(ISD::FPOW , MVT::f64, Expand);
101  setOperationAction(ISD::FSIN , MVT::f32, Expand);
102  setOperationAction(ISD::FCOS , MVT::f32, Expand);
103  setOperationAction(ISD::FREM , MVT::f32, Expand);
104  setOperationAction(ISD::FPOW , MVT::f32, Expand);
105
106  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
107
108  // If we're enabling GP optimizations, use hardware square root
109  if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
110    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
111    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
112  }
113
114  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
115  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
116
117  // PowerPC does not have BSWAP, CTPOP or CTTZ
118  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
119  setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
120  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
121  setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
122  setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
123  setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
124
125  // PowerPC does not have ROTR
126  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
127  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
128
129  // PowerPC does not have Select
130  setOperationAction(ISD::SELECT, MVT::i32, Expand);
131  setOperationAction(ISD::SELECT, MVT::i64, Expand);
132  setOperationAction(ISD::SELECT, MVT::f32, Expand);
133  setOperationAction(ISD::SELECT, MVT::f64, Expand);
134
135  // PowerPC wants to turn select_cc of FP into fsel when possible.
136  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
137  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
138
139  // PowerPC wants to optimize integer setcc a bit
140  setOperationAction(ISD::SETCC, MVT::i32, Custom);
141
142  // PowerPC does not have BRCOND which requires SetCC
143  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
144
145  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
146
147  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
148  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
149
150  // PowerPC does not have [U|S]INT_TO_FP
151  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
152  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
153
154  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
155  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
156  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
157  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
158
159  // We cannot sextinreg(i1).  Expand to shifts.
160  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
161
162  // Support label based line numbers.
163  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
164  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
165
166  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
167  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
168  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
169  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
170
171
172  // We want to legalize GlobalAddress and ConstantPool nodes into the
173  // appropriate instructions to materialize the address.
174  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
175  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
176  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
177  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
178  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
179  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
180  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
181  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
182
183  // RET must be custom lowered, to meet ABI requirements.
184  setOperationAction(ISD::RET               , MVT::Other, Custom);
185
186  // TRAP is legal.
187  setOperationAction(ISD::TRAP, MVT::Other, Legal);
188
189  // TRAMPOLINE is custom lowered.
190  setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
191
192  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
193  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
194
195  // VAARG is custom lowered with ELF 32 ABI
196  if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI())
197    setOperationAction(ISD::VAARG, MVT::Other, Custom);
198  else
199    setOperationAction(ISD::VAARG, MVT::Other, Expand);
200
201  // Use the default implementation.
202  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
203  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
204  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
205  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
206  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
207  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
208
209  // We want to custom lower some of our intrinsics.
210  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
211
212  // Comparisons that require checking two conditions.
213  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
214  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
215  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
216  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
217  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
218  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
219  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
220  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
221  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
222  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
223  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
224  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
225
226  if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
227    // They also have instructions for converting between i64 and fp.
228    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
229    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
230    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
231    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
232    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
233
234    // FIXME: disable this lowered code.  This generates 64-bit register values,
235    // and we don't model the fact that the top part is clobbered by calls.  We
236    // need to flag these together so that the value isn't live across a call.
237    //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
238
239    // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
240    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
241  } else {
242    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
243    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
244  }
245
246  if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
247    // 64-bit PowerPC implementations can support i64 types directly
248    addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
249    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
250    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
251    // 64-bit PowerPC wants to expand i128 shifts itself.
252    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
253    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
254    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
255  } else {
256    // 32-bit PowerPC wants to expand i64 shifts itself.
257    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
258    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
259    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
260  }
261
262  if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
263    // First set operation action for all vector types to expand. Then we
264    // will selectively turn on ones that can be effectively codegen'd.
265    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
266         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
267      MVT VT = (MVT::SimpleValueType)i;
268
269      // add/sub are legal for all supported vector VT's.
270      setOperationAction(ISD::ADD , VT, Legal);
271      setOperationAction(ISD::SUB , VT, Legal);
272
273      // We promote all shuffles to v16i8.
274      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
275      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
276
277      // We promote all non-typed operations to v4i32.
278      setOperationAction(ISD::AND   , VT, Promote);
279      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
280      setOperationAction(ISD::OR    , VT, Promote);
281      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
282      setOperationAction(ISD::XOR   , VT, Promote);
283      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
284      setOperationAction(ISD::LOAD  , VT, Promote);
285      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
286      setOperationAction(ISD::SELECT, VT, Promote);
287      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
288      setOperationAction(ISD::STORE, VT, Promote);
289      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
290
291      // No other operations are legal.
292      setOperationAction(ISD::MUL , VT, Expand);
293      setOperationAction(ISD::SDIV, VT, Expand);
294      setOperationAction(ISD::SREM, VT, Expand);
295      setOperationAction(ISD::UDIV, VT, Expand);
296      setOperationAction(ISD::UREM, VT, Expand);
297      setOperationAction(ISD::FDIV, VT, Expand);
298      setOperationAction(ISD::FNEG, VT, Expand);
299      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
300      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
301      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
302      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
303      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
304      setOperationAction(ISD::UDIVREM, VT, Expand);
305      setOperationAction(ISD::SDIVREM, VT, Expand);
306      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
307      setOperationAction(ISD::FPOW, VT, Expand);
308      setOperationAction(ISD::CTPOP, VT, Expand);
309      setOperationAction(ISD::CTLZ, VT, Expand);
310      setOperationAction(ISD::CTTZ, VT, Expand);
311    }
312
313    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
314    // with merges, splats, etc.
315    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
316
317    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
318    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
319    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
320    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
321    setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
322    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
323
324    addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
325    addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
326    addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
327    addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
328
329    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
330    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
331    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
332    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
333
334    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
335    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
336
337    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
338    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
339    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
340    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
341  }
342
343  setShiftAmountType(MVT::i32);
344  setBooleanContents(ZeroOrOneBooleanContent);
345
346  if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
347    setStackPointerRegisterToSaveRestore(PPC::X1);
348    setExceptionPointerRegister(PPC::X3);
349    setExceptionSelectorRegister(PPC::X4);
350  } else {
351    setStackPointerRegisterToSaveRestore(PPC::R1);
352    setExceptionPointerRegister(PPC::R3);
353    setExceptionSelectorRegister(PPC::R4);
354  }
355
356  // We have target-specific dag combine patterns for the following nodes:
357  setTargetDAGCombine(ISD::SINT_TO_FP);
358  setTargetDAGCombine(ISD::STORE);
359  setTargetDAGCombine(ISD::BR_CC);
360  setTargetDAGCombine(ISD::BSWAP);
361
362  // Darwin long double math library functions have $LDBL128 appended.
363  if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
364    setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
365    setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
366    setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
367    setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
368    setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
369    setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
370    setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
371    setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
372    setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
373    setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
374  }
375
376  computeRegisterProperties();
377}
378
379/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
380/// function arguments in the caller parameter area.
381unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
382  TargetMachine &TM = getTargetMachine();
383  // Darwin passes everything on 4 byte boundary.
384  if (TM.getSubtarget<PPCSubtarget>().isDarwin())
385    return 4;
386  // FIXME Elf TBD
387  return 4;
388}
389
390const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
391  switch (Opcode) {
392  default: return 0;
393  case PPCISD::FSEL:            return "PPCISD::FSEL";
394  case PPCISD::FCFID:           return "PPCISD::FCFID";
395  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
396  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
397  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
398  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
399  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
400  case PPCISD::VPERM:           return "PPCISD::VPERM";
401  case PPCISD::Hi:              return "PPCISD::Hi";
402  case PPCISD::Lo:              return "PPCISD::Lo";
403  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
404  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
405  case PPCISD::SRL:             return "PPCISD::SRL";
406  case PPCISD::SRA:             return "PPCISD::SRA";
407  case PPCISD::SHL:             return "PPCISD::SHL";
408  case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
409  case PPCISD::STD_32:          return "PPCISD::STD_32";
410  case PPCISD::CALL_ELF:        return "PPCISD::CALL_ELF";
411  case PPCISD::CALL_Macho:      return "PPCISD::CALL_Macho";
412  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
413  case PPCISD::BCTRL_Macho:     return "PPCISD::BCTRL_Macho";
414  case PPCISD::BCTRL_ELF:       return "PPCISD::BCTRL_ELF";
415  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
416  case PPCISD::MFCR:            return "PPCISD::MFCR";
417  case PPCISD::VCMP:            return "PPCISD::VCMP";
418  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
419  case PPCISD::LBRX:            return "PPCISD::LBRX";
420  case PPCISD::STBRX:           return "PPCISD::STBRX";
421  case PPCISD::LARX:            return "PPCISD::LARX";
422  case PPCISD::STCX:            return "PPCISD::STCX";
423  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
424  case PPCISD::MFFS:            return "PPCISD::MFFS";
425  case PPCISD::MTFSB0:          return "PPCISD::MTFSB0";
426  case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
427  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
428  case PPCISD::MTFSF:           return "PPCISD::MTFSF";
429  case PPCISD::TAILCALL:        return "PPCISD::TAILCALL";
430  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
431  }
432}
433
434
435MVT PPCTargetLowering::getSetCCResultType(MVT VT) const {
436  return MVT::i32;
437}
438
439
440//===----------------------------------------------------------------------===//
441// Node matching predicates, for use by the tblgen matching code.
442//===----------------------------------------------------------------------===//
443
444/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
445static bool isFloatingPointZero(SDValue Op) {
446  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
447    return CFP->getValueAPF().isZero();
448  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
449    // Maybe this has already been legalized into the constant pool?
450    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
451      if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
452        return CFP->getValueAPF().isZero();
453  }
454  return false;
455}
456
457/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
458/// true if Op is undef or if it matches the specified value.
459static bool isConstantOrUndef(SDValue Op, unsigned Val) {
460  return Op.getOpcode() == ISD::UNDEF ||
461         cast<ConstantSDNode>(Op)->getZExtValue() == Val;
462}
463
464/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
465/// VPKUHUM instruction.
466bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
467  if (!isUnary) {
468    for (unsigned i = 0; i != 16; ++i)
469      if (!isConstantOrUndef(N->getOperand(i),  i*2+1))
470        return false;
471  } else {
472    for (unsigned i = 0; i != 8; ++i)
473      if (!isConstantOrUndef(N->getOperand(i),  i*2+1) ||
474          !isConstantOrUndef(N->getOperand(i+8),  i*2+1))
475        return false;
476  }
477  return true;
478}
479
480/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
481/// VPKUWUM instruction.
482bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
483  if (!isUnary) {
484    for (unsigned i = 0; i != 16; i += 2)
485      if (!isConstantOrUndef(N->getOperand(i  ),  i*2+2) ||
486          !isConstantOrUndef(N->getOperand(i+1),  i*2+3))
487        return false;
488  } else {
489    for (unsigned i = 0; i != 8; i += 2)
490      if (!isConstantOrUndef(N->getOperand(i  ),  i*2+2) ||
491          !isConstantOrUndef(N->getOperand(i+1),  i*2+3) ||
492          !isConstantOrUndef(N->getOperand(i+8),  i*2+2) ||
493          !isConstantOrUndef(N->getOperand(i+9),  i*2+3))
494        return false;
495  }
496  return true;
497}
498
499/// isVMerge - Common function, used to match vmrg* shuffles.
500///
501static bool isVMerge(SDNode *N, unsigned UnitSize,
502                     unsigned LHSStart, unsigned RHSStart) {
503  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
504         N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
505  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
506         "Unsupported merge size!");
507
508  for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
509    for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
510      if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),
511                             LHSStart+j+i*UnitSize) ||
512          !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),
513                             RHSStart+j+i*UnitSize))
514        return false;
515    }
516      return true;
517}
518
519/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
520/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
521bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
522  if (!isUnary)
523    return isVMerge(N, UnitSize, 8, 24);
524  return isVMerge(N, UnitSize, 8, 8);
525}
526
527/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
528/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
529bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
530  if (!isUnary)
531    return isVMerge(N, UnitSize, 0, 16);
532  return isVMerge(N, UnitSize, 0, 0);
533}
534
535
536/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
537/// amount, otherwise return -1.
538int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
539  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
540         N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
541  // Find the first non-undef value in the shuffle mask.
542  unsigned i;
543  for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
544    /*search*/;
545
546  if (i == 16) return -1;  // all undef.
547
548  // Otherwise, check to see if the rest of the elements are consequtively
549  // numbered from this value.
550  unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getZExtValue();
551  if (ShiftAmt < i) return -1;
552  ShiftAmt -= i;
553
554  if (!isUnary) {
555    // Check the rest of the elements to see if they are consequtive.
556    for (++i; i != 16; ++i)
557      if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))
558        return -1;
559  } else {
560    // Check the rest of the elements to see if they are consequtive.
561    for (++i; i != 16; ++i)
562      if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))
563        return -1;
564  }
565
566  return ShiftAmt;
567}
568
569/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
570/// specifies a splat of a single element that is suitable for input to
571/// VSPLTB/VSPLTH/VSPLTW.
572bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {
573  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
574         N->getNumOperands() == 16 &&
575         (EltSize == 1 || EltSize == 2 || EltSize == 4));
576
577  // This is a splat operation if each element of the permute is the same, and
578  // if the value doesn't reference the second vector.
579  unsigned ElementBase = 0;
580  SDValue Elt = N->getOperand(0);
581  if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
582    ElementBase = EltV->getZExtValue();
583  else
584    return false;   // FIXME: Handle UNDEF elements too!
585
586  if (cast<ConstantSDNode>(Elt)->getZExtValue() >= 16)
587    return false;
588
589  // Check that they are consequtive.
590  for (unsigned i = 1; i != EltSize; ++i) {
591    if (!isa<ConstantSDNode>(N->getOperand(i)) ||
592        cast<ConstantSDNode>(N->getOperand(i))->getZExtValue() != i+ElementBase)
593      return false;
594  }
595
596  assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
597  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
598    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
599    assert(isa<ConstantSDNode>(N->getOperand(i)) &&
600           "Invalid VECTOR_SHUFFLE mask!");
601    for (unsigned j = 0; j != EltSize; ++j)
602      if (N->getOperand(i+j) != N->getOperand(j))
603        return false;
604  }
605
606  return true;
607}
608
609/// isAllNegativeZeroVector - Returns true if all elements of build_vector
610/// are -0.0.
611bool PPC::isAllNegativeZeroVector(SDNode *N) {
612  assert(N->getOpcode() == ISD::BUILD_VECTOR);
613  if (PPC::isSplatShuffleMask(N, N->getNumOperands()))
614    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N))
615      return CFP->getValueAPF().isNegZero();
616  return false;
617}
618
619/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
620/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
621unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
622  assert(isSplatShuffleMask(N, EltSize));
623  return cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() / EltSize;
624}
625
626/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
627/// by using a vspltis[bhw] instruction of the specified element size, return
628/// the constant being splatted.  The ByteSize field indicates the number of
629/// bytes of each element [124] -> [bhw].
630SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
631  SDValue OpVal(0, 0);
632
633  // If ByteSize of the splat is bigger than the element size of the
634  // build_vector, then we have a case where we are checking for a splat where
635  // multiple elements of the buildvector are folded together into a single
636  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
637  unsigned EltSize = 16/N->getNumOperands();
638  if (EltSize < ByteSize) {
639    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
640    SDValue UniquedVals[4];
641    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
642
643    // See if all of the elements in the buildvector agree across.
644    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
645      if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
646      // If the element isn't a constant, bail fully out.
647      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
648
649
650      if (UniquedVals[i&(Multiple-1)].getNode() == 0)
651        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
652      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
653        return SDValue();  // no match.
654    }
655
656    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
657    // either constant or undef values that are identical for each chunk.  See
658    // if these chunks can form into a larger vspltis*.
659
660    // Check to see if all of the leading entries are either 0 or -1.  If
661    // neither, then this won't fit into the immediate field.
662    bool LeadingZero = true;
663    bool LeadingOnes = true;
664    for (unsigned i = 0; i != Multiple-1; ++i) {
665      if (UniquedVals[i].getNode() == 0) continue;  // Must have been undefs.
666
667      LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
668      LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
669    }
670    // Finally, check the least significant entry.
671    if (LeadingZero) {
672      if (UniquedVals[Multiple-1].getNode() == 0)
673        return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
674      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
675      if (Val < 16)
676        return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
677    }
678    if (LeadingOnes) {
679      if (UniquedVals[Multiple-1].getNode() == 0)
680        return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
681      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
682      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
683        return DAG.getTargetConstant(Val, MVT::i32);
684    }
685
686    return SDValue();
687  }
688
689  // Check to see if this buildvec has a single non-undef value in its elements.
690  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
691    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
692    if (OpVal.getNode() == 0)
693      OpVal = N->getOperand(i);
694    else if (OpVal != N->getOperand(i))
695      return SDValue();
696  }
697
698  if (OpVal.getNode() == 0) return SDValue();  // All UNDEF: use implicit def.
699
700  unsigned ValSizeInBytes = 0;
701  uint64_t Value = 0;
702  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
703    Value = CN->getZExtValue();
704    ValSizeInBytes = CN->getValueType(0).getSizeInBits()/8;
705  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
706    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
707    Value = FloatToBits(CN->getValueAPF().convertToFloat());
708    ValSizeInBytes = 4;
709  }
710
711  // If the splat value is larger than the element value, then we can never do
712  // this splat.  The only case that we could fit the replicated bits into our
713  // immediate field for would be zero, and we prefer to use vxor for it.
714  if (ValSizeInBytes < ByteSize) return SDValue();
715
716  // If the element value is larger than the splat value, cut it in half and
717  // check to see if the two halves are equal.  Continue doing this until we
718  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
719  while (ValSizeInBytes > ByteSize) {
720    ValSizeInBytes >>= 1;
721
722    // If the top half equals the bottom half, we're still ok.
723    if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
724         (Value                        & ((1 << (8*ValSizeInBytes))-1)))
725      return SDValue();
726  }
727
728  // Properly sign extend the value.
729  int ShAmt = (4-ByteSize)*8;
730  int MaskVal = ((int)Value << ShAmt) >> ShAmt;
731
732  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
733  if (MaskVal == 0) return SDValue();
734
735  // Finally, if this value fits in a 5 bit sext field, return it
736  if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
737    return DAG.getTargetConstant(MaskVal, MVT::i32);
738  return SDValue();
739}
740
741//===----------------------------------------------------------------------===//
742//  Addressing Mode Selection
743//===----------------------------------------------------------------------===//
744
745/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
746/// or 64-bit immediate, and if the value can be accurately represented as a
747/// sign extension from a 16-bit value.  If so, this returns true and the
748/// immediate.
749static bool isIntS16Immediate(SDNode *N, short &Imm) {
750  if (N->getOpcode() != ISD::Constant)
751    return false;
752
753  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
754  if (N->getValueType(0) == MVT::i32)
755    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
756  else
757    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
758}
759static bool isIntS16Immediate(SDValue Op, short &Imm) {
760  return isIntS16Immediate(Op.getNode(), Imm);
761}
762
763
764/// SelectAddressRegReg - Given the specified addressed, check to see if it
765/// can be represented as an indexed [r+r] operation.  Returns false if it
766/// can be more efficiently represented with [r+imm].
767bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
768                                            SDValue &Index,
769                                            SelectionDAG &DAG) const {
770  short imm = 0;
771  if (N.getOpcode() == ISD::ADD) {
772    if (isIntS16Immediate(N.getOperand(1), imm))
773      return false;    // r+i
774    if (N.getOperand(1).getOpcode() == PPCISD::Lo)
775      return false;    // r+i
776
777    Base = N.getOperand(0);
778    Index = N.getOperand(1);
779    return true;
780  } else if (N.getOpcode() == ISD::OR) {
781    if (isIntS16Immediate(N.getOperand(1), imm))
782      return false;    // r+i can fold it if we can.
783
784    // If this is an or of disjoint bitfields, we can codegen this as an add
785    // (for better address arithmetic) if the LHS and RHS of the OR are provably
786    // disjoint.
787    APInt LHSKnownZero, LHSKnownOne;
788    APInt RHSKnownZero, RHSKnownOne;
789    DAG.ComputeMaskedBits(N.getOperand(0),
790                          APInt::getAllOnesValue(N.getOperand(0)
791                            .getValueSizeInBits()),
792                          LHSKnownZero, LHSKnownOne);
793
794    if (LHSKnownZero.getBoolValue()) {
795      DAG.ComputeMaskedBits(N.getOperand(1),
796                            APInt::getAllOnesValue(N.getOperand(1)
797                              .getValueSizeInBits()),
798                            RHSKnownZero, RHSKnownOne);
799      // If all of the bits are known zero on the LHS or RHS, the add won't
800      // carry.
801      if (~(LHSKnownZero | RHSKnownZero) == 0) {
802        Base = N.getOperand(0);
803        Index = N.getOperand(1);
804        return true;
805      }
806    }
807  }
808
809  return false;
810}
811
812/// Returns true if the address N can be represented by a base register plus
813/// a signed 16-bit displacement [r+imm], and if it is not better
814/// represented as reg+reg.
815bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
816                                            SDValue &Base,
817                                            SelectionDAG &DAG) const {
818  // FIXME dl should come from parent load or store, not from address
819  DebugLoc dl = N.getDebugLoc();
820  // If this can be more profitably realized as r+r, fail.
821  if (SelectAddressRegReg(N, Disp, Base, DAG))
822    return false;
823
824  if (N.getOpcode() == ISD::ADD) {
825    short imm = 0;
826    if (isIntS16Immediate(N.getOperand(1), imm)) {
827      Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
828      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
829        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
830      } else {
831        Base = N.getOperand(0);
832      }
833      return true; // [r+i]
834    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
835      // Match LOAD (ADD (X, Lo(G))).
836     assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
837             && "Cannot handle constant offsets yet!");
838      Disp = N.getOperand(1).getOperand(0);  // The global address.
839      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
840             Disp.getOpcode() == ISD::TargetConstantPool ||
841             Disp.getOpcode() == ISD::TargetJumpTable);
842      Base = N.getOperand(0);
843      return true;  // [&g+r]
844    }
845  } else if (N.getOpcode() == ISD::OR) {
846    short imm = 0;
847    if (isIntS16Immediate(N.getOperand(1), imm)) {
848      // If this is an or of disjoint bitfields, we can codegen this as an add
849      // (for better address arithmetic) if the LHS and RHS of the OR are
850      // provably disjoint.
851      APInt LHSKnownZero, LHSKnownOne;
852      DAG.ComputeMaskedBits(N.getOperand(0),
853                            APInt::getAllOnesValue(N.getOperand(0)
854                                                   .getValueSizeInBits()),
855                            LHSKnownZero, LHSKnownOne);
856
857      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
858        // If all of the bits are known zero on the LHS or RHS, the add won't
859        // carry.
860        Base = N.getOperand(0);
861        Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
862        return true;
863      }
864    }
865  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
866    // Loading from a constant address.
867
868    // If this address fits entirely in a 16-bit sext immediate field, codegen
869    // this as "d, 0"
870    short Imm;
871    if (isIntS16Immediate(CN, Imm)) {
872      Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
873      Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
874      return true;
875    }
876
877    // Handle 32-bit sext immediates with LIS + addr mode.
878    if (CN->getValueType(0) == MVT::i32 ||
879        (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
880      int Addr = (int)CN->getZExtValue();
881
882      // Otherwise, break this down into an LIS + disp.
883      Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
884
885      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
886      unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
887      Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base), 0);
888      return true;
889    }
890  }
891
892  Disp = DAG.getTargetConstant(0, getPointerTy());
893  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
894    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
895  else
896    Base = N;
897  return true;      // [r+0]
898}
899
900/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
901/// represented as an indexed [r+r] operation.
902bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
903                                                SDValue &Index,
904                                                SelectionDAG &DAG) const {
905  // Check to see if we can easily represent this as an [r+r] address.  This
906  // will fail if it thinks that the address is more profitably represented as
907  // reg+imm, e.g. where imm = 0.
908  if (SelectAddressRegReg(N, Base, Index, DAG))
909    return true;
910
911  // If the operand is an addition, always emit this as [r+r], since this is
912  // better (for code size, and execution, as the memop does the add for free)
913  // than emitting an explicit add.
914  if (N.getOpcode() == ISD::ADD) {
915    Base = N.getOperand(0);
916    Index = N.getOperand(1);
917    return true;
918  }
919
920  // Otherwise, do it the hard way, using R0 as the base register.
921  Base = DAG.getRegister(PPC::R0, N.getValueType());
922  Index = N;
923  return true;
924}
925
926/// SelectAddressRegImmShift - Returns true if the address N can be
927/// represented by a base register plus a signed 14-bit displacement
928/// [r+imm*4].  Suitable for use by STD and friends.
929bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
930                                                 SDValue &Base,
931                                                 SelectionDAG &DAG) const {
932  // FIXME dl should come from the parent load or store, not the address
933  DebugLoc dl = N.getDebugLoc();
934  // If this can be more profitably realized as r+r, fail.
935  if (SelectAddressRegReg(N, Disp, Base, DAG))
936    return false;
937
938  if (N.getOpcode() == ISD::ADD) {
939    short imm = 0;
940    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
941      Disp =  DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
942      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
943        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
944      } else {
945        Base = N.getOperand(0);
946      }
947      return true; // [r+i]
948    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
949      // Match LOAD (ADD (X, Lo(G))).
950     assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
951             && "Cannot handle constant offsets yet!");
952      Disp = N.getOperand(1).getOperand(0);  // The global address.
953      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
954             Disp.getOpcode() == ISD::TargetConstantPool ||
955             Disp.getOpcode() == ISD::TargetJumpTable);
956      Base = N.getOperand(0);
957      return true;  // [&g+r]
958    }
959  } else if (N.getOpcode() == ISD::OR) {
960    short imm = 0;
961    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
962      // If this is an or of disjoint bitfields, we can codegen this as an add
963      // (for better address arithmetic) if the LHS and RHS of the OR are
964      // provably disjoint.
965      APInt LHSKnownZero, LHSKnownOne;
966      DAG.ComputeMaskedBits(N.getOperand(0),
967                            APInt::getAllOnesValue(N.getOperand(0)
968                                                   .getValueSizeInBits()),
969                            LHSKnownZero, LHSKnownOne);
970      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
971        // If all of the bits are known zero on the LHS or RHS, the add won't
972        // carry.
973        Base = N.getOperand(0);
974        Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
975        return true;
976      }
977    }
978  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
979    // Loading from a constant address.  Verify low two bits are clear.
980    if ((CN->getZExtValue() & 3) == 0) {
981      // If this address fits entirely in a 14-bit sext immediate field, codegen
982      // this as "d, 0"
983      short Imm;
984      if (isIntS16Immediate(CN, Imm)) {
985        Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
986        Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
987        return true;
988      }
989
990      // Fold the low-part of 32-bit absolute addresses into addr mode.
991      if (CN->getValueType(0) == MVT::i32 ||
992          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
993        int Addr = (int)CN->getZExtValue();
994
995        // Otherwise, break this down into an LIS + disp.
996        Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
997        Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
998        unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
999        Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base),0);
1000        return true;
1001      }
1002    }
1003  }
1004
1005  Disp = DAG.getTargetConstant(0, getPointerTy());
1006  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
1007    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1008  else
1009    Base = N;
1010  return true;      // [r+0]
1011}
1012
1013
1014/// getPreIndexedAddressParts - returns true by value, base pointer and
1015/// offset pointer and addressing mode by reference if the node's address
1016/// can be legally represented as pre-indexed load / store address.
1017bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1018                                                  SDValue &Offset,
1019                                                  ISD::MemIndexedMode &AM,
1020                                                  SelectionDAG &DAG) const {
1021  // Disabled by default for now.
1022  if (!EnablePPCPreinc) return false;
1023
1024  SDValue Ptr;
1025  MVT VT;
1026  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1027    Ptr = LD->getBasePtr();
1028    VT = LD->getMemoryVT();
1029
1030  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1031    ST = ST;
1032    Ptr = ST->getBasePtr();
1033    VT  = ST->getMemoryVT();
1034  } else
1035    return false;
1036
1037  // PowerPC doesn't have preinc load/store instructions for vectors.
1038  if (VT.isVector())
1039    return false;
1040
1041  // TODO: Check reg+reg first.
1042
1043  // LDU/STU use reg+imm*4, others use reg+imm.
1044  if (VT != MVT::i64) {
1045    // reg + imm
1046    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
1047      return false;
1048  } else {
1049    // reg + imm * 4.
1050    if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
1051      return false;
1052  }
1053
1054  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1055    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
1056    // sext i32 to i64 when addr mode is r+i.
1057    if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
1058        LD->getExtensionType() == ISD::SEXTLOAD &&
1059        isa<ConstantSDNode>(Offset))
1060      return false;
1061  }
1062
1063  AM = ISD::PRE_INC;
1064  return true;
1065}
1066
1067//===----------------------------------------------------------------------===//
1068//  LowerOperation implementation
1069//===----------------------------------------------------------------------===//
1070
1071SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
1072                                             SelectionDAG &DAG) {
1073  MVT PtrVT = Op.getValueType();
1074  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1075  Constant *C = CP->getConstVal();
1076  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
1077  SDValue Zero = DAG.getConstant(0, PtrVT);
1078  // FIXME there isn't really any debug info here
1079  DebugLoc dl = Op.getDebugLoc();
1080
1081  const TargetMachine &TM = DAG.getTarget();
1082
1083  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, CPI, Zero);
1084  SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, CPI, Zero);
1085
1086  // If this is a non-darwin platform, we don't support non-static relo models
1087  // yet.
1088  if (TM.getRelocationModel() == Reloc::Static ||
1089      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
1090    // Generate non-pic code that has direct accesses to the constant pool.
1091    // The address of the global is just (hi(&g)+lo(&g)).
1092    return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1093  }
1094
1095  if (TM.getRelocationModel() == Reloc::PIC_) {
1096    // With PIC, the first instruction is actually "GR+hi(&G)".
1097    Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
1098                     DAG.getNode(PPCISD::GlobalBaseReg,
1099                                 DebugLoc::getUnknownLoc(), PtrVT), Hi);
1100  }
1101
1102  Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1103  return Lo;
1104}
1105
1106SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
1107  MVT PtrVT = Op.getValueType();
1108  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1109  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1110  SDValue Zero = DAG.getConstant(0, PtrVT);
1111  // FIXME there isn't really any debug loc here
1112  DebugLoc dl = Op.getDebugLoc();
1113
1114  const TargetMachine &TM = DAG.getTarget();
1115
1116  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, JTI, Zero);
1117  SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, JTI, Zero);
1118
1119  // If this is a non-darwin platform, we don't support non-static relo models
1120  // yet.
1121  if (TM.getRelocationModel() == Reloc::Static ||
1122      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
1123    // Generate non-pic code that has direct accesses to the constant pool.
1124    // The address of the global is just (hi(&g)+lo(&g)).
1125    return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1126  }
1127
1128  if (TM.getRelocationModel() == Reloc::PIC_) {
1129    // With PIC, the first instruction is actually "GR+hi(&G)".
1130    Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
1131                     DAG.getNode(PPCISD::GlobalBaseReg,
1132                                 DebugLoc::getUnknownLoc(), PtrVT), Hi);
1133  }
1134
1135  Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1136  return Lo;
1137}
1138
1139SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1140                                                   SelectionDAG &DAG) {
1141  assert(0 && "TLS not implemented for PPC.");
1142  return SDValue(); // Not reached
1143}
1144
1145SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
1146                                              SelectionDAG &DAG) {
1147  MVT PtrVT = Op.getValueType();
1148  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1149  GlobalValue *GV = GSDN->getGlobal();
1150  SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
1151  SDValue Zero = DAG.getConstant(0, PtrVT);
1152  // FIXME there isn't really any debug info here
1153  DebugLoc dl = GSDN->getDebugLoc();
1154
1155  const TargetMachine &TM = DAG.getTarget();
1156
1157  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero);
1158  SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero);
1159
1160  // If this is a non-darwin platform, we don't support non-static relo models
1161  // yet.
1162  if (TM.getRelocationModel() == Reloc::Static ||
1163      !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
1164    // Generate non-pic code that has direct accesses to globals.
1165    // The address of the global is just (hi(&g)+lo(&g)).
1166    return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1167  }
1168
1169  if (TM.getRelocationModel() == Reloc::PIC_) {
1170    // With PIC, the first instruction is actually "GR+hi(&G)".
1171    Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
1172                     DAG.getNode(PPCISD::GlobalBaseReg,
1173                                 DebugLoc::getUnknownLoc(), PtrVT), Hi);
1174  }
1175
1176  Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
1177
1178  if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV))
1179    return Lo;
1180
1181  // If the global is weak or external, we have to go through the lazy
1182  // resolution stub.
1183  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0);
1184}
1185
1186SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
1187  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1188  DebugLoc dl = Op.getDebugLoc();
1189
1190  // If we're comparing for equality to zero, expose the fact that this is
1191  // implented as a ctlz/srl pair on ppc, so that the dag combiner can
1192  // fold the new nodes.
1193  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1194    if (C->isNullValue() && CC == ISD::SETEQ) {
1195      MVT VT = Op.getOperand(0).getValueType();
1196      SDValue Zext = Op.getOperand(0);
1197      if (VT.bitsLT(MVT::i32)) {
1198        VT = MVT::i32;
1199        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
1200      }
1201      unsigned Log2b = Log2_32(VT.getSizeInBits());
1202      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
1203      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
1204                                DAG.getConstant(Log2b, MVT::i32));
1205      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
1206    }
1207    // Leave comparisons against 0 and -1 alone for now, since they're usually
1208    // optimized.  FIXME: revisit this when we can custom lower all setcc
1209    // optimizations.
1210    if (C->isAllOnesValue() || C->isNullValue())
1211      return SDValue();
1212  }
1213
1214  // If we have an integer seteq/setne, turn it into a compare against zero
1215  // by xor'ing the rhs with the lhs, which is faster than setting a
1216  // condition register, reading it back out, and masking the correct bit.  The
1217  // normal approach here uses sub to do this instead of xor.  Using xor exposes
1218  // the result to other bit-twiddling opportunities.
1219  MVT LHSVT = Op.getOperand(0).getValueType();
1220  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1221    MVT VT = Op.getValueType();
1222    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
1223                                Op.getOperand(1));
1224    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
1225  }
1226  return SDValue();
1227}
1228
1229SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
1230                              int VarArgsFrameIndex,
1231                              int VarArgsStackOffset,
1232                              unsigned VarArgsNumGPR,
1233                              unsigned VarArgsNumFPR,
1234                              const PPCSubtarget &Subtarget) {
1235
1236  assert(0 && "VAARG in ELF32 ABI not implemented yet!");
1237  return SDValue(); // Not reached
1238}
1239
1240SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
1241  SDValue Chain = Op.getOperand(0);
1242  SDValue Trmp = Op.getOperand(1); // trampoline
1243  SDValue FPtr = Op.getOperand(2); // nested function
1244  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
1245  DebugLoc dl = Op.getDebugLoc();
1246
1247  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1248  bool isPPC64 = (PtrVT == MVT::i64);
1249  const Type *IntPtrTy =
1250    DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType();
1251
1252  TargetLowering::ArgListTy Args;
1253  TargetLowering::ArgListEntry Entry;
1254
1255  Entry.Ty = IntPtrTy;
1256  Entry.Node = Trmp; Args.push_back(Entry);
1257
1258  // TrampSize == (isPPC64 ? 48 : 40);
1259  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
1260                               isPPC64 ? MVT::i64 : MVT::i32);
1261  Args.push_back(Entry);
1262
1263  Entry.Node = FPtr; Args.push_back(Entry);
1264  Entry.Node = Nest; Args.push_back(Entry);
1265
1266  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
1267  std::pair<SDValue, SDValue> CallResult =
1268    LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false,
1269                false, false, CallingConv::C, false,
1270                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
1271                Args, DAG, dl);
1272
1273  SDValue Ops[] =
1274    { CallResult.first, CallResult.second };
1275
1276  return DAG.getMergeValues(Ops, 2, dl);
1277}
1278
1279SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
1280                                        int VarArgsFrameIndex,
1281                                        int VarArgsStackOffset,
1282                                        unsigned VarArgsNumGPR,
1283                                        unsigned VarArgsNumFPR,
1284                                        const PPCSubtarget &Subtarget) {
1285  DebugLoc dl = Op.getDebugLoc();
1286
1287  if (Subtarget.isMachoABI()) {
1288    // vastart just stores the address of the VarArgsFrameIndex slot into the
1289    // memory location argument.
1290    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1291    SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1292    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1293    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
1294  }
1295
1296  // For ELF 32 ABI we follow the layout of the va_list struct.
1297  // We suppose the given va_list is already allocated.
1298  //
1299  // typedef struct {
1300  //  char gpr;     /* index into the array of 8 GPRs
1301  //                 * stored in the register save area
1302  //                 * gpr=0 corresponds to r3,
1303  //                 * gpr=1 to r4, etc.
1304  //                 */
1305  //  char fpr;     /* index into the array of 8 FPRs
1306  //                 * stored in the register save area
1307  //                 * fpr=0 corresponds to f1,
1308  //                 * fpr=1 to f2, etc.
1309  //                 */
1310  //  char *overflow_arg_area;
1311  //                /* location on stack that holds
1312  //                 * the next overflow argument
1313  //                 */
1314  //  char *reg_save_area;
1315  //               /* where r3:r10 and f1:f8 (if saved)
1316  //                * are stored
1317  //                */
1318  // } va_list[1];
1319
1320
1321  SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8);
1322  SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8);
1323
1324
1325  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1326
1327  SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);
1328  SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1329
1330  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
1331  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
1332
1333  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
1334  SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
1335
1336  uint64_t FPROffset = 1;
1337  SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
1338
1339  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1340
1341  // Store first byte : number of int regs
1342  SDValue firstStore = DAG.getStore(Op.getOperand(0), dl, ArgGPR,
1343                                      Op.getOperand(1), SV, 0);
1344  uint64_t nextOffset = FPROffset;
1345  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
1346                                  ConstFPROffset);
1347
1348  // Store second byte : number of float regs
1349  SDValue secondStore =
1350    DAG.getStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset);
1351  nextOffset += StackOffset;
1352  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
1353
1354  // Store second word : arguments given on stack
1355  SDValue thirdStore =
1356    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset);
1357  nextOffset += FrameOffset;
1358  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
1359
1360  // Store third word : arguments given in registers
1361  return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset);
1362
1363}
1364
1365#include "PPCGenCallingConv.inc"
1366
1367/// GetFPR - Get the set of FP registers that should be allocated for arguments,
1368/// depending on which subtarget is selected.
1369static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
1370  if (Subtarget.isMachoABI()) {
1371    static const unsigned FPR[] = {
1372      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1373      PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
1374    };
1375    return FPR;
1376  }
1377
1378
1379  static const unsigned FPR[] = {
1380    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1381    PPC::F8
1382  };
1383  return FPR;
1384}
1385
1386/// CalculateStackSlotSize - Calculates the size reserved for this argument on
1387/// the stack.
1388static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags,
1389                                       bool isVarArg, unsigned PtrByteSize) {
1390  MVT ArgVT = Arg.getValueType();
1391  unsigned ArgSize =ArgVT.getSizeInBits()/8;
1392  if (Flags.isByVal())
1393    ArgSize = Flags.getByValSize();
1394  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
1395
1396  return ArgSize;
1397}
1398
1399SDValue
1400PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
1401                                         SelectionDAG &DAG,
1402                                         int &VarArgsFrameIndex,
1403                                         int &VarArgsStackOffset,
1404                                         unsigned &VarArgsNumGPR,
1405                                         unsigned &VarArgsNumFPR,
1406                                         const PPCSubtarget &Subtarget) {
1407  // TODO: add description of PPC stack frame format, or at least some docs.
1408  //
1409  MachineFunction &MF = DAG.getMachineFunction();
1410  MachineFrameInfo *MFI = MF.getFrameInfo();
1411  MachineRegisterInfo &RegInfo = MF.getRegInfo();
1412  SmallVector<SDValue, 8> ArgValues;
1413  SDValue Root = Op.getOperand(0);
1414  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
1415  DebugLoc dl = Op.getDebugLoc();
1416
1417  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1418  bool isPPC64 = PtrVT == MVT::i64;
1419  bool isMachoABI = Subtarget.isMachoABI();
1420  bool isELF32_ABI = Subtarget.isELF32_ABI();
1421  // Potential tail calls could cause overwriting of argument stack slots.
1422  unsigned CC = MF.getFunction()->getCallingConv();
1423  bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
1424  unsigned PtrByteSize = isPPC64 ? 8 : 4;
1425
1426  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
1427  // Area that is at least reserved in caller of this function.
1428  unsigned MinReservedArea = ArgOffset;
1429
1430  static const unsigned GPR_32[] = {           // 32-bit registers.
1431    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
1432    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
1433  };
1434  static const unsigned GPR_64[] = {           // 64-bit registers.
1435    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
1436    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
1437  };
1438
1439  static const unsigned *FPR = GetFPR(Subtarget);
1440
1441  static const unsigned VR[] = {
1442    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
1443    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
1444  };
1445
1446  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
1447  const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8;
1448  const unsigned Num_VR_Regs  = array_lengthof( VR);
1449
1450  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
1451
1452  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
1453
1454  // In 32-bit non-varargs functions, the stack space for vectors is after the
1455  // stack space for non-vectors.  We do not use this space unless we have
1456  // too many vectors to fit in registers, something that only occurs in
1457  // constructed examples:), but we have to walk the arglist to figure
1458  // that out...for the pathological case, compute VecArgOffset as the
1459  // start of the vector parameter area.  Computing VecArgOffset is the
1460  // entire point of the following loop.
1461  // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying
1462  // to handle Elf here.
1463  unsigned VecArgOffset = ArgOffset;
1464  if (!isVarArg && !isPPC64) {
1465    for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e;
1466         ++ArgNo) {
1467      MVT ObjectVT = Op.getValue(ArgNo).getValueType();
1468      unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1469      ISD::ArgFlagsTy Flags =
1470        cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
1471
1472      if (Flags.isByVal()) {
1473        // ObjSize is the true size, ArgSize rounded up to multiple of regs.
1474        ObjSize = Flags.getByValSize();
1475        unsigned ArgSize =
1476                ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
1477        VecArgOffset += ArgSize;
1478        continue;
1479      }
1480
1481      switch(ObjectVT.getSimpleVT()) {
1482      default: assert(0 && "Unhandled argument type!");
1483      case MVT::i32:
1484      case MVT::f32:
1485        VecArgOffset += isPPC64 ? 8 : 4;
1486        break;
1487      case MVT::i64:  // PPC64
1488      case MVT::f64:
1489        VecArgOffset += 8;
1490        break;
1491      case MVT::v4f32:
1492      case MVT::v4i32:
1493      case MVT::v8i16:
1494      case MVT::v16i8:
1495        // Nothing to do, we're only looking at Nonvector args here.
1496        break;
1497      }
1498    }
1499  }
1500  // We've found where the vector parameter area in memory is.  Skip the
1501  // first 12 parameters; these don't use that memory.
1502  VecArgOffset = ((VecArgOffset+15)/16)*16;
1503  VecArgOffset += 12*16;
1504
1505  // Add DAG nodes to load the arguments or copy them out of registers.  On
1506  // entry to a function on PPC, the arguments start after the linkage area,
1507  // although the first ones are often in registers.
1508  //
1509  // In the ELF 32 ABI, GPRs and stack are double word align: an argument
1510  // represented with two words (long long or double) must be copied to an
1511  // even GPR_idx value or to an even ArgOffset value.
1512
1513  SmallVector<SDValue, 8> MemOps;
1514  unsigned nAltivecParamsAtEnd = 0;
1515  for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
1516       ArgNo != e; ++ArgNo) {
1517    SDValue ArgVal;
1518    bool needsLoad = false;
1519    MVT ObjectVT = Op.getValue(ArgNo).getValueType();
1520    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1521    unsigned ArgSize = ObjSize;
1522    ISD::ArgFlagsTy Flags =
1523      cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
1524    // See if next argument requires stack alignment in ELF
1525    bool Align = Flags.isSplit();
1526
1527    unsigned CurArgOffset = ArgOffset;
1528
1529    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
1530    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
1531        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
1532      if (isVarArg || isPPC64) {
1533        MinReservedArea = ((MinReservedArea+15)/16)*16;
1534        MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
1535                                                  Flags,
1536                                                  isVarArg,
1537                                                  PtrByteSize);
1538      } else  nAltivecParamsAtEnd++;
1539    } else
1540      // Calculate min reserved area.
1541      MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
1542                                                Flags,
1543                                                isVarArg,
1544                                                PtrByteSize);
1545
1546    // FIXME alignment for ELF may not be right
1547    // FIXME the codegen can be much improved in some cases.
1548    // We do not have to keep everything in memory.
1549    if (Flags.isByVal()) {
1550      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
1551      ObjSize = Flags.getByValSize();
1552      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
1553      // Double word align in ELF
1554      if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
1555      // Objects of size 1 and 2 are right justified, everything else is
1556      // left justified.  This means the memory address is adjusted forwards.
1557      if (ObjSize==1 || ObjSize==2) {
1558        CurArgOffset = CurArgOffset + (4 - ObjSize);
1559      }
1560      // The value of the object is its address.
1561      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);
1562      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1563      ArgValues.push_back(FIN);
1564      if (ObjSize==1 || ObjSize==2) {
1565        if (GPR_idx != Num_GPR_Regs) {
1566          unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
1567          RegInfo.addLiveIn(GPR[GPR_idx], VReg);
1568          SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
1569          SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
1570                               NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
1571          MemOps.push_back(Store);
1572          ++GPR_idx;
1573          if (isMachoABI) ArgOffset += PtrByteSize;
1574        } else {
1575          ArgOffset += PtrByteSize;
1576        }
1577        continue;
1578      }
1579      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
1580        // Store whatever pieces of the object are in registers
1581        // to memory.  ArgVal will be address of the beginning of
1582        // the object.
1583        if (GPR_idx != Num_GPR_Regs) {
1584          unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
1585          RegInfo.addLiveIn(GPR[GPR_idx], VReg);
1586          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);
1587          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1588          SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
1589          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
1590          MemOps.push_back(Store);
1591          ++GPR_idx;
1592          if (isMachoABI) ArgOffset += PtrByteSize;
1593        } else {
1594          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
1595          break;
1596        }
1597      }
1598      continue;
1599    }
1600
1601    switch (ObjectVT.getSimpleVT()) {
1602    default: assert(0 && "Unhandled argument type!");
1603    case MVT::i32:
1604      if (!isPPC64) {
1605        // Double word align in ELF
1606        if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
1607
1608        if (GPR_idx != Num_GPR_Regs) {
1609          unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
1610          RegInfo.addLiveIn(GPR[GPR_idx], VReg);
1611          ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
1612          ++GPR_idx;
1613        } else {
1614          needsLoad = true;
1615          ArgSize = PtrByteSize;
1616        }
1617        // Stack align in ELF
1618        if (needsLoad && Align && isELF32_ABI)
1619          ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
1620        // All int arguments reserve stack space in Macho ABI.
1621        if (isMachoABI || needsLoad) ArgOffset += PtrByteSize;
1622        break;
1623      }
1624      // FALLTHROUGH
1625    case MVT::i64:  // PPC64
1626      if (GPR_idx != Num_GPR_Regs) {
1627        unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
1628        RegInfo.addLiveIn(GPR[GPR_idx], VReg);
1629        ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
1630
1631        if (ObjectVT == MVT::i32) {
1632          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
1633          // value to MVT::i64 and then truncate to the correct register size.
1634          if (Flags.isSExt())
1635            ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
1636                                 DAG.getValueType(ObjectVT));
1637          else if (Flags.isZExt())
1638            ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
1639                                 DAG.getValueType(ObjectVT));
1640
1641          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
1642        }
1643
1644        ++GPR_idx;
1645      } else {
1646        needsLoad = true;
1647        ArgSize = PtrByteSize;
1648      }
1649      // All int arguments reserve stack space in Macho ABI.
1650      if (isMachoABI || needsLoad) ArgOffset += 8;
1651      break;
1652
1653    case MVT::f32:
1654    case MVT::f64:
1655      // Every 4 bytes of argument space consumes one of the GPRs available for
1656      // argument passing.
1657      if (GPR_idx != Num_GPR_Regs && isMachoABI) {
1658        ++GPR_idx;
1659        if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
1660          ++GPR_idx;
1661      }
1662      if (FPR_idx != Num_FPR_Regs) {
1663        unsigned VReg;
1664        if (ObjectVT == MVT::f32)
1665          VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass);
1666        else
1667          VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
1668        RegInfo.addLiveIn(FPR[FPR_idx], VReg);
1669        ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1670        ++FPR_idx;
1671      } else {
1672        needsLoad = true;
1673      }
1674
1675      // Stack align in ELF
1676      if (needsLoad && Align && isELF32_ABI)
1677        ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
1678      // All FP arguments reserve stack space in Macho ABI.
1679      if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize;
1680      break;
1681    case MVT::v4f32:
1682    case MVT::v4i32:
1683    case MVT::v8i16:
1684    case MVT::v16i8:
1685      // Note that vector arguments in registers don't reserve stack space,
1686      // except in varargs functions.
1687      if (VR_idx != Num_VR_Regs) {
1688        unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass);
1689        RegInfo.addLiveIn(VR[VR_idx], VReg);
1690        ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1691        if (isVarArg) {
1692          while ((ArgOffset % 16) != 0) {
1693            ArgOffset += PtrByteSize;
1694            if (GPR_idx != Num_GPR_Regs)
1695              GPR_idx++;
1696          }
1697          ArgOffset += 16;
1698          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs);
1699        }
1700        ++VR_idx;
1701      } else {
1702        if (!isVarArg && !isPPC64) {
1703          // Vectors go after all the nonvectors.
1704          CurArgOffset = VecArgOffset;
1705          VecArgOffset += 16;
1706        } else {
1707          // Vectors are aligned.
1708          ArgOffset = ((ArgOffset+15)/16)*16;
1709          CurArgOffset = ArgOffset;
1710          ArgOffset += 16;
1711        }
1712        needsLoad = true;
1713      }
1714      break;
1715    }
1716
1717    // We need to load the argument to a virtual register if we determined above
1718    // that we ran out of physical registers of the appropriate type.
1719    if (needsLoad) {
1720      int FI = MFI->CreateFixedObject(ObjSize,
1721                                      CurArgOffset + (ArgSize - ObjSize),
1722                                      isImmutable);
1723      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1724      ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1725    }
1726
1727    ArgValues.push_back(ArgVal);
1728  }
1729
1730  // Set the size that is at least reserved in caller of this function.  Tail
1731  // call optimized function's reserved stack space needs to be aligned so that
1732  // taking the difference between two stack areas will result in an aligned
1733  // stack.
1734  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1735  // Add the Altivec parameters at the end, if needed.
1736  if (nAltivecParamsAtEnd) {
1737    MinReservedArea = ((MinReservedArea+15)/16)*16;
1738    MinReservedArea += 16*nAltivecParamsAtEnd;
1739  }
1740  MinReservedArea =
1741    std::max(MinReservedArea,
1742             PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
1743  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
1744    getStackAlignment();
1745  unsigned AlignMask = TargetAlign-1;
1746  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
1747  FI->setMinReservedArea(MinReservedArea);
1748
1749  // If the function takes variable number of arguments, make a frame index for
1750  // the start of the first vararg value... for expansion of llvm.va_start.
1751  if (isVarArg) {
1752
1753    int depth;
1754    if (isELF32_ABI) {
1755      VarArgsNumGPR = GPR_idx;
1756      VarArgsNumFPR = FPR_idx;
1757
1758      // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame
1759      // pointer.
1760      depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 +
1761                Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 +
1762                PtrVT.getSizeInBits()/8);
1763
1764      VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1765                                                  ArgOffset);
1766
1767    }
1768    else
1769      depth = ArgOffset;
1770
1771    VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1772                                               depth);
1773    SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1774
1775    // In ELF 32 ABI, the fixed integer arguments of a variadic function are
1776    // stored to the VarArgsFrameIndex on the stack.
1777    if (isELF32_ABI) {
1778      for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) {
1779        SDValue Val = DAG.getRegister(GPR[GPR_idx], PtrVT);
1780        SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
1781        MemOps.push_back(Store);
1782        // Increment the address by four for the next argument to store
1783        SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1784        FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
1785      }
1786    }
1787
1788    // If this function is vararg, store any remaining integer argument regs
1789    // to their spots on the stack so that they may be loaded by deferencing the
1790    // result of va_next.
1791    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
1792      unsigned VReg;
1793      if (isPPC64)
1794        VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
1795      else
1796        VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
1797
1798      RegInfo.addLiveIn(GPR[GPR_idx], VReg);
1799      SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
1800      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
1801      MemOps.push_back(Store);
1802      // Increment the address by four for the next argument to store
1803      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1804      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
1805    }
1806
1807    // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex
1808    // on the stack.
1809    if (isELF32_ABI) {
1810      for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) {
1811        SDValue Val = DAG.getRegister(FPR[FPR_idx], MVT::f64);
1812        SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
1813        MemOps.push_back(Store);
1814        // Increment the address by eight for the next argument to store
1815        SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
1816                                           PtrVT);
1817        FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
1818      }
1819
1820      for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) {
1821        unsigned VReg;
1822        VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
1823
1824        RegInfo.addLiveIn(FPR[FPR_idx], VReg);
1825        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64);
1826        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
1827        MemOps.push_back(Store);
1828        // Increment the address by eight for the next argument to store
1829        SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
1830                                           PtrVT);
1831        FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
1832      }
1833    }
1834  }
1835
1836  if (!MemOps.empty())
1837    Root = DAG.getNode(ISD::TokenFactor, dl,
1838                       MVT::Other, &MemOps[0], MemOps.size());
1839
1840  ArgValues.push_back(Root);
1841
1842  // Return the new list of results.
1843  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1844                     &ArgValues[0], ArgValues.size());
1845}
1846
1847/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
1848/// linkage area.
1849static unsigned
1850CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
1851                                     bool isPPC64,
1852                                     bool isMachoABI,
1853                                     bool isVarArg,
1854                                     unsigned CC,
1855                                     CallSDNode *TheCall,
1856                                     unsigned &nAltivecParamsAtEnd) {
1857  // Count how many bytes are to be pushed on the stack, including the linkage
1858  // area, and parameter passing area.  We start with 24/48 bytes, which is
1859  // prereserved space for [SP][CR][LR][3 x unused].
1860  unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
1861  unsigned NumOps = TheCall->getNumArgs();
1862  unsigned PtrByteSize = isPPC64 ? 8 : 4;
1863
1864  // Add up all the space actually used.
1865  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
1866  // they all go in registers, but we must reserve stack space for them for
1867  // possible use by the caller.  In varargs or 64-bit calls, parameters are
1868  // assigned stack space in order, with padding so Altivec parameters are
1869  // 16-byte aligned.
1870  nAltivecParamsAtEnd = 0;
1871  for (unsigned i = 0; i != NumOps; ++i) {
1872    SDValue Arg = TheCall->getArg(i);
1873    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
1874    MVT ArgVT = Arg.getValueType();
1875    // Varargs Altivec parameters are padded to a 16 byte boundary.
1876    if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
1877        ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
1878      if (!isVarArg && !isPPC64) {
1879        // Non-varargs Altivec parameters go after all the non-Altivec
1880        // parameters; handle those later so we know how much padding we need.
1881        nAltivecParamsAtEnd++;
1882        continue;
1883      }
1884      // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
1885      NumBytes = ((NumBytes+15)/16)*16;
1886    }
1887    NumBytes += CalculateStackSlotSize(Arg, Flags, isVarArg, PtrByteSize);
1888  }
1889
1890   // Allow for Altivec parameters at the end, if needed.
1891  if (nAltivecParamsAtEnd) {
1892    NumBytes = ((NumBytes+15)/16)*16;
1893    NumBytes += 16*nAltivecParamsAtEnd;
1894  }
1895
1896  // The prolog code of the callee may store up to 8 GPR argument registers to
1897  // the stack, allowing va_start to index over them in memory if its varargs.
1898  // Because we cannot tell if this is needed on the caller side, we have to
1899  // conservatively assume that it is needed.  As such, make sure we have at
1900  // least enough stack space for the caller to store the 8 GPRs.
1901  NumBytes = std::max(NumBytes,
1902                      PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
1903
1904  // Tail call needs the stack to be aligned.
1905  if (CC==CallingConv::Fast && PerformTailCallOpt) {
1906    unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
1907      getStackAlignment();
1908    unsigned AlignMask = TargetAlign-1;
1909    NumBytes = (NumBytes + AlignMask) & ~AlignMask;
1910  }
1911
1912  return NumBytes;
1913}
1914
1915/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
1916/// adjusted to accomodate the arguments for the tailcall.
1917static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall,
1918                                   unsigned ParamSize) {
1919
1920  if (!IsTailCall) return 0;
1921
1922  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
1923  unsigned CallerMinReservedArea = FI->getMinReservedArea();
1924  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
1925  // Remember only if the new adjustement is bigger.
1926  if (SPDiff < FI->getTailCallSPDelta())
1927    FI->setTailCallSPDelta(SPDiff);
1928
1929  return SPDiff;
1930}
1931
1932/// IsEligibleForTailCallElimination - Check to see whether the next instruction
1933/// following the call is a return. A function is eligible if caller/callee
1934/// calling conventions match, currently only fastcc supports tail calls, and
1935/// the function CALL is immediatly followed by a RET.
1936bool
1937PPCTargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
1938                                                     SDValue Ret,
1939                                                     SelectionDAG& DAG) const {
1940  // Variable argument functions are not supported.
1941  if (!PerformTailCallOpt || TheCall->isVarArg())
1942    return false;
1943
1944  if (CheckTailCallReturnConstraints(TheCall, Ret)) {
1945    MachineFunction &MF = DAG.getMachineFunction();
1946    unsigned CallerCC = MF.getFunction()->getCallingConv();
1947    unsigned CalleeCC = TheCall->getCallingConv();
1948    if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
1949      // Functions containing by val parameters are not supported.
1950      for (unsigned i = 0; i != TheCall->getNumArgs(); i++) {
1951         ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
1952         if (Flags.isByVal()) return false;
1953      }
1954
1955      SDValue Callee = TheCall->getCallee();
1956      // Non PIC/GOT  tail calls are supported.
1957      if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
1958        return true;
1959
1960      // At the moment we can only do local tail calls (in same module, hidden
1961      // or protected) if we are generating PIC.
1962      if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1963        return G->getGlobal()->hasHiddenVisibility()
1964            || G->getGlobal()->hasProtectedVisibility();
1965    }
1966  }
1967
1968  return false;
1969}
1970
1971/// isCallCompatibleAddress - Return the immediate to use if the specified
1972/// 32-bit value is representable in the immediate field of a BxA instruction.
1973static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
1974  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1975  if (!C) return 0;
1976
1977  int Addr = C->getZExtValue();
1978  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1979      (Addr << 6 >> 6) != Addr)
1980    return 0;  // Top 6 bits have to be sext of immediate.
1981
1982  return DAG.getConstant((int)C->getZExtValue() >> 2,
1983                         DAG.getTargetLoweringInfo().getPointerTy()).getNode();
1984}
1985
1986namespace {
1987
1988struct TailCallArgumentInfo {
1989  SDValue Arg;
1990  SDValue FrameIdxOp;
1991  int       FrameIdx;
1992
1993  TailCallArgumentInfo() : FrameIdx(0) {}
1994};
1995
1996}
1997
1998/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
1999static void
2000StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
2001                                           SDValue Chain,
2002                   const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
2003                   SmallVector<SDValue, 8> &MemOpChains,
2004                   DebugLoc dl) {
2005  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
2006    SDValue Arg = TailCallArgs[i].Arg;
2007    SDValue FIN = TailCallArgs[i].FrameIdxOp;
2008    int FI = TailCallArgs[i].FrameIdx;
2009    // Store relative to framepointer.
2010    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
2011                                       PseudoSourceValue::getFixedStack(FI),
2012                                       0));
2013  }
2014}
2015
2016/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
2017/// the appropriate stack slot for the tail call optimized function call.
2018static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
2019                                               MachineFunction &MF,
2020                                               SDValue Chain,
2021                                               SDValue OldRetAddr,
2022                                               SDValue OldFP,
2023                                               int SPDiff,
2024                                               bool isPPC64,
2025                                               bool isMachoABI,
2026                                               DebugLoc dl) {
2027  if (SPDiff) {
2028    // Calculate the new stack slot for the return address.
2029    int SlotSize = isPPC64 ? 8 : 4;
2030    int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
2031                                                                   isMachoABI);
2032    int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
2033                                                          NewRetAddrLoc);
2034    int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
2035                                                                    isMachoABI);
2036    int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
2037
2038    MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
2039    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
2040    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
2041                         PseudoSourceValue::getFixedStack(NewRetAddr), 0);
2042    SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
2043    Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
2044                         PseudoSourceValue::getFixedStack(NewFPIdx), 0);
2045  }
2046  return Chain;
2047}
2048
2049/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
2050/// the position of the argument.
2051static void
2052CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
2053                         SDValue Arg, int SPDiff, unsigned ArgOffset,
2054                      SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
2055  int Offset = ArgOffset + SPDiff;
2056  uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
2057  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
2058  MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
2059  SDValue FIN = DAG.getFrameIndex(FI, VT);
2060  TailCallArgumentInfo Info;
2061  Info.Arg = Arg;
2062  Info.FrameIdxOp = FIN;
2063  Info.FrameIdx = FI;
2064  TailCallArguments.push_back(Info);
2065}
2066
2067/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
2068/// stack slot. Returns the chain as result and the loaded frame pointers in
2069/// LROpOut/FPOpout. Used when tail calling.
2070SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
2071                                                        int SPDiff,
2072                                                        SDValue Chain,
2073                                                        SDValue &LROpOut,
2074                                                        SDValue &FPOpOut,
2075                                                        DebugLoc dl) {
2076  if (SPDiff) {
2077    // Load the LR and FP stack slot for later adjusting.
2078    MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
2079    LROpOut = getReturnAddrFrameIndex(DAG);
2080    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
2081    Chain = SDValue(LROpOut.getNode(), 1);
2082    FPOpOut = getFramePointerFrameIndex(DAG);
2083    FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
2084    Chain = SDValue(FPOpOut.getNode(), 1);
2085  }
2086  return Chain;
2087}
2088
2089/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
2090/// by "Src" to address "Dst" of size "Size".  Alignment information is
2091/// specified by the specific parameter attribute. The copy will be passed as
2092/// a byval function parameter.
2093/// Sometimes what we are copying is the end of a larger object, the part that
2094/// does not fit in registers.
2095static SDValue
2096CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2097                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
2098                          unsigned Size, DebugLoc dl) {
2099  SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
2100  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2101                       false, NULL, 0, NULL, 0);
2102}
2103
2104/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
2105/// tail calls.
2106static void
2107LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
2108                 SDValue Arg, SDValue PtrOff, int SPDiff,
2109                 unsigned ArgOffset, bool isPPC64, bool isTailCall,
2110                 bool isVector, SmallVector<SDValue, 8> &MemOpChains,
2111                 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments,
2112                 DebugLoc dl) {
2113  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2114  if (!isTailCall) {
2115    if (isVector) {
2116      SDValue StackPtr;
2117      if (isPPC64)
2118        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
2119      else
2120        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
2121      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
2122                           DAG.getConstant(ArgOffset, PtrVT));
2123    }
2124    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
2125  // Calculate and remember argument location.
2126  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
2127                                  TailCallArguments);
2128}
2129
2130SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
2131                                       const PPCSubtarget &Subtarget,
2132                                       TargetMachine &TM) {
2133  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
2134  SDValue Chain  = TheCall->getChain();
2135  bool isVarArg   = TheCall->isVarArg();
2136  unsigned CC     = TheCall->getCallingConv();
2137  bool isTailCall = TheCall->isTailCall()
2138                 && CC == CallingConv::Fast && PerformTailCallOpt;
2139  SDValue Callee = TheCall->getCallee();
2140  unsigned NumOps  = TheCall->getNumArgs();
2141  DebugLoc dl = TheCall->getDebugLoc();
2142
2143  bool isMachoABI = Subtarget.isMachoABI();
2144  bool isELF32_ABI  = Subtarget.isELF32_ABI();
2145
2146  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2147  bool isPPC64 = PtrVT == MVT::i64;
2148  unsigned PtrByteSize = isPPC64 ? 8 : 4;
2149
2150  MachineFunction &MF = DAG.getMachineFunction();
2151
2152  // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
2153  // SelectExpr to use to put the arguments in the appropriate registers.
2154  std::vector<SDValue> args_to_use;
2155
2156  // Mark this function as potentially containing a function that contains a
2157  // tail call. As a consequence the frame pointer will be used for dynamicalloc
2158  // and restoring the callers stack pointer in this functions epilog. This is
2159  // done because by tail calling the called function might overwrite the value
2160  // in this function's (MF) stack pointer stack slot 0(SP).
2161  if (PerformTailCallOpt && CC==CallingConv::Fast)
2162    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
2163
2164  unsigned nAltivecParamsAtEnd = 0;
2165
2166  // Count how many bytes are to be pushed on the stack, including the linkage
2167  // area, and parameter passing area.  We start with 24/48 bytes, which is
2168  // prereserved space for [SP][CR][LR][3 x unused].
2169  unsigned NumBytes =
2170    CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC,
2171                                         TheCall, nAltivecParamsAtEnd);
2172
2173  // Calculate by how many bytes the stack has to be adjusted in case of tail
2174  // call optimization.
2175  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
2176
2177  // Adjust the stack pointer for the new arguments...
2178  // These operations are automatically eliminated by the prolog/epilog pass
2179  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
2180  SDValue CallSeqStart = Chain;
2181
2182  // Load the return address and frame pointer so it can be move somewhere else
2183  // later.
2184  SDValue LROp, FPOp;
2185  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
2186
2187  // Set up a copy of the stack pointer for use loading and storing any
2188  // arguments that may not fit in the registers available for argument
2189  // passing.
2190  SDValue StackPtr;
2191  if (isPPC64)
2192    StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
2193  else
2194    StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
2195
2196  // Figure out which arguments are going to go in registers, and which in
2197  // memory.  Also, if this is a vararg function, floating point operations
2198  // must be stored to our stack, and loaded into integer regs as well, if
2199  // any integer regs are available for argument passing.
2200  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
2201  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
2202
2203  static const unsigned GPR_32[] = {           // 32-bit registers.
2204    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2205    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2206  };
2207  static const unsigned GPR_64[] = {           // 64-bit registers.
2208    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2209    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2210  };
2211  static const unsigned *FPR = GetFPR(Subtarget);
2212
2213  static const unsigned VR[] = {
2214    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2215    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2216  };
2217  const unsigned NumGPRs = array_lengthof(GPR_32);
2218  const unsigned NumFPRs = isMachoABI ? 13 : 8;
2219  const unsigned NumVRs  = array_lengthof( VR);
2220
2221  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
2222
2223  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
2224  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
2225
2226  SmallVector<SDValue, 8> MemOpChains;
2227  for (unsigned i = 0; i != NumOps; ++i) {
2228    bool inMem = false;
2229    SDValue Arg = TheCall->getArg(i);
2230    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
2231    // See if next argument requires stack alignment in ELF
2232    bool Align = Flags.isSplit();
2233
2234    // PtrOff will be used to store the current argument to the stack if a
2235    // register cannot be found for it.
2236    SDValue PtrOff;
2237
2238    // Stack align in ELF 32
2239    if (isELF32_ABI && Align)
2240      PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize,
2241                               StackPtr.getValueType());
2242    else
2243      PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
2244
2245    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
2246
2247    // On PPC64, promote integers to 64-bit values.
2248    if (isPPC64 && Arg.getValueType() == MVT::i32) {
2249      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
2250      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2251      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
2252    }
2253
2254    // FIXME Elf untested, what are alignment rules?
2255    // FIXME memcpy is used way more than necessary.  Correctness first.
2256    if (Flags.isByVal()) {
2257      unsigned Size = Flags.getByValSize();
2258      if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
2259      if (Size==1 || Size==2) {
2260        // Very small objects are passed right-justified.
2261        // Everything else is passed left-justified.
2262        MVT VT = (Size==1) ? MVT::i8 : MVT::i16;
2263        if (GPR_idx != NumGPRs) {
2264          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
2265                                          NULL, 0, VT);
2266          MemOpChains.push_back(Load.getValue(1));
2267          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
2268          if (isMachoABI)
2269            ArgOffset += PtrByteSize;
2270        } else {
2271          SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
2272          SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
2273          SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
2274                                CallSeqStart.getNode()->getOperand(0),
2275                                Flags, DAG, Size, dl);
2276          // This must go outside the CALLSEQ_START..END.
2277          SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
2278                               CallSeqStart.getNode()->getOperand(1));
2279          DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
2280                                 NewCallSeqStart.getNode());
2281          Chain = CallSeqStart = NewCallSeqStart;
2282          ArgOffset += PtrByteSize;
2283        }
2284        continue;
2285      }
2286      // Copy entire object into memory.  There are cases where gcc-generated
2287      // code assumes it is there, even if it could be put entirely into
2288      // registers.  (This is not what the doc says.)
2289      SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
2290                            CallSeqStart.getNode()->getOperand(0),
2291                            Flags, DAG, Size, dl);
2292      // This must go outside the CALLSEQ_START..END.
2293      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
2294                           CallSeqStart.getNode()->getOperand(1));
2295      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
2296      Chain = CallSeqStart = NewCallSeqStart;
2297      // And copy the pieces of it that fit into registers.
2298      for (unsigned j=0; j<Size; j+=PtrByteSize) {
2299        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
2300        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2301        if (GPR_idx != NumGPRs) {
2302          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0);
2303          MemOpChains.push_back(Load.getValue(1));
2304          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
2305          if (isMachoABI)
2306            ArgOffset += PtrByteSize;
2307        } else {
2308          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
2309          break;
2310        }
2311      }
2312      continue;
2313    }
2314
2315    switch (Arg.getValueType().getSimpleVT()) {
2316    default: assert(0 && "Unexpected ValueType for argument!");
2317    case MVT::i32:
2318    case MVT::i64:
2319      // Double word align in ELF
2320      if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
2321      if (GPR_idx != NumGPRs) {
2322        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
2323      } else {
2324        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
2325                         isPPC64, isTailCall, false, MemOpChains,
2326                         TailCallArguments, dl);
2327        inMem = true;
2328      }
2329      if (inMem || isMachoABI) {
2330        // Stack align in ELF
2331        if (isELF32_ABI && Align)
2332          ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
2333
2334        ArgOffset += PtrByteSize;
2335      }
2336      break;
2337    case MVT::f32:
2338    case MVT::f64:
2339      if (FPR_idx != NumFPRs) {
2340        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
2341
2342        if (isVarArg) {
2343          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
2344          MemOpChains.push_back(Store);
2345
2346          // Float varargs are always shadowed in available integer registers
2347          if (GPR_idx != NumGPRs) {
2348            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
2349            MemOpChains.push_back(Load.getValue(1));
2350            if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
2351                                                                Load));
2352          }
2353          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
2354            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
2355            PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
2356            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
2357            MemOpChains.push_back(Load.getValue(1));
2358            if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
2359                                                                Load));
2360          }
2361        } else {
2362          // If we have any FPRs remaining, we may also have GPRs remaining.
2363          // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
2364          // GPRs.
2365          if (isMachoABI) {
2366            if (GPR_idx != NumGPRs)
2367              ++GPR_idx;
2368            if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
2369                !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
2370              ++GPR_idx;
2371          }
2372        }
2373      } else {
2374        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
2375                         isPPC64, isTailCall, false, MemOpChains,
2376                         TailCallArguments, dl);
2377        inMem = true;
2378      }
2379      if (inMem || isMachoABI) {
2380        // Stack align in ELF
2381        if (isELF32_ABI && Align)
2382          ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
2383        if (isPPC64)
2384          ArgOffset += 8;
2385        else
2386          ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
2387      }
2388      break;
2389    case MVT::v4f32:
2390    case MVT::v4i32:
2391    case MVT::v8i16:
2392    case MVT::v16i8:
2393      if (isVarArg) {
2394        // These go aligned on the stack, or in the corresponding R registers
2395        // when within range.  The Darwin PPC ABI doc claims they also go in
2396        // V registers; in fact gcc does this only for arguments that are
2397        // prototyped, not for those that match the ...  We do it for all
2398        // arguments, seems to work.
2399        while (ArgOffset % 16 !=0) {
2400          ArgOffset += PtrByteSize;
2401          if (GPR_idx != NumGPRs)
2402            GPR_idx++;
2403        }
2404        // We could elide this store in the case where the object fits
2405        // entirely in R registers.  Maybe later.
2406        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
2407                            DAG.getConstant(ArgOffset, PtrVT));
2408        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
2409        MemOpChains.push_back(Store);
2410        if (VR_idx != NumVRs) {
2411          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0);
2412          MemOpChains.push_back(Load.getValue(1));
2413          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
2414        }
2415        ArgOffset += 16;
2416        for (unsigned i=0; i<16; i+=PtrByteSize) {
2417          if (GPR_idx == NumGPRs)
2418            break;
2419          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
2420                                  DAG.getConstant(i, PtrVT));
2421          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0);
2422          MemOpChains.push_back(Load.getValue(1));
2423          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
2424        }
2425        break;
2426      }
2427
2428      // Non-varargs Altivec params generally go in registers, but have
2429      // stack space allocated at the end.
2430      if (VR_idx != NumVRs) {
2431        // Doesn't have GPR space allocated.
2432        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
2433      } else if (nAltivecParamsAtEnd==0) {
2434        // We are emitting Altivec params in order.
2435        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
2436                         isPPC64, isTailCall, true, MemOpChains,
2437                         TailCallArguments, dl);
2438        ArgOffset += 16;
2439      }
2440      break;
2441    }
2442  }
2443  // If all Altivec parameters fit in registers, as they usually do,
2444  // they get stack space following the non-Altivec parameters.  We
2445  // don't track this here because nobody below needs it.
2446  // If there are more Altivec parameters than fit in registers emit
2447  // the stores here.
2448  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
2449    unsigned j = 0;
2450    // Offset is aligned; skip 1st 12 params which go in V registers.
2451    ArgOffset = ((ArgOffset+15)/16)*16;
2452    ArgOffset += 12*16;
2453    for (unsigned i = 0; i != NumOps; ++i) {
2454      SDValue Arg = TheCall->getArg(i);
2455      MVT ArgType = Arg.getValueType();
2456      if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
2457          ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
2458        if (++j > NumVRs) {
2459          SDValue PtrOff;
2460          // We are emitting Altivec params in order.
2461          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
2462                           isPPC64, isTailCall, true, MemOpChains,
2463                           TailCallArguments, dl);
2464          ArgOffset += 16;
2465        }
2466      }
2467    }
2468  }
2469
2470  if (!MemOpChains.empty())
2471    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2472                        &MemOpChains[0], MemOpChains.size());
2473
2474  // Build a sequence of copy-to-reg nodes chained together with token chain
2475  // and flag operands which copy the outgoing args into the appropriate regs.
2476  SDValue InFlag;
2477  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2478    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2479                             RegsToPass[i].second, InFlag);
2480    InFlag = Chain.getValue(1);
2481  }
2482
2483  // With the ELF 32 ABI, set CR6 to true if this is a vararg call.
2484  if (isVarArg && isELF32_ABI) {
2485    SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0);
2486    Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
2487    InFlag = Chain.getValue(1);
2488  }
2489
2490  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
2491  // might overwrite each other in case of tail call optimization.
2492  if (isTailCall) {
2493    SmallVector<SDValue, 8> MemOpChains2;
2494    // Do not flag preceeding copytoreg stuff together with the following stuff.
2495    InFlag = SDValue();
2496    StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
2497                                      MemOpChains2, dl);
2498    if (!MemOpChains2.empty())
2499      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2500                          &MemOpChains2[0], MemOpChains2.size());
2501
2502    // Store the return address to the appropriate stack slot.
2503    Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
2504                                          isPPC64, isMachoABI, dl);
2505  }
2506
2507  // Emit callseq_end just before tailcall node.
2508  if (isTailCall) {
2509    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
2510                               DAG.getIntPtrConstant(0, true), InFlag);
2511    InFlag = Chain.getValue(1);
2512  }
2513
2514  std::vector<MVT> NodeTys;
2515  NodeTys.push_back(MVT::Other);   // Returns a chain
2516  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
2517
2518  SmallVector<SDValue, 8> Ops;
2519  unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF;
2520
2521  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2522  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2523  // node so that legalize doesn't hack it.
2524  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2525    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
2526  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
2527    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
2528  else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
2529    // If this is an absolute destination address, use the munged value.
2530    Callee = SDValue(Dest, 0);
2531  else {
2532    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
2533    // to do the call, we can't use PPCISD::CALL.
2534    SDValue MTCTROps[] = {Chain, Callee, InFlag};
2535    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
2536                        2 + (InFlag.getNode() != 0));
2537    InFlag = Chain.getValue(1);
2538
2539    // Copy the callee address into R12/X12 on darwin.
2540    if (isMachoABI) {
2541      unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12;
2542      Chain = DAG.getCopyToReg(Chain, dl, Reg, Callee, InFlag);
2543      InFlag = Chain.getValue(1);
2544    }
2545
2546    NodeTys.clear();
2547    NodeTys.push_back(MVT::Other);
2548    NodeTys.push_back(MVT::Flag);
2549    Ops.push_back(Chain);
2550    CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF;
2551    Callee.setNode(0);
2552    // Add CTR register as callee so a bctr can be emitted later.
2553    if (isTailCall)
2554      Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy()));
2555  }
2556
2557  // If this is a direct call, pass the chain and the callee.
2558  if (Callee.getNode()) {
2559    Ops.push_back(Chain);
2560    Ops.push_back(Callee);
2561  }
2562  // If this is a tail call add stack pointer delta.
2563  if (isTailCall)
2564    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
2565
2566  // Add argument registers to the end of the list so that they are known live
2567  // into the call.
2568  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2569    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2570                                  RegsToPass[i].second.getValueType()));
2571
2572  // When performing tail call optimization the callee pops its arguments off
2573  // the stack. Account for this here so these bytes can be pushed back on in
2574  // PPCRegisterInfo::eliminateCallFramePseudoInstr.
2575  int BytesCalleePops =
2576    (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
2577
2578  if (InFlag.getNode())
2579    Ops.push_back(InFlag);
2580
2581  // Emit tail call.
2582  if (isTailCall) {
2583    assert(InFlag.getNode() &&
2584           "Flag must be set. Depend on flag being set in LowerRET");
2585    Chain = DAG.getNode(PPCISD::TAILCALL, dl,
2586                        TheCall->getVTList(), &Ops[0], Ops.size());
2587    return SDValue(Chain.getNode(), Op.getResNo());
2588  }
2589
2590  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
2591  InFlag = Chain.getValue(1);
2592
2593  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
2594                             DAG.getIntPtrConstant(BytesCalleePops, true),
2595                             InFlag);
2596  if (TheCall->getValueType(0) != MVT::Other)
2597    InFlag = Chain.getValue(1);
2598
2599  SmallVector<SDValue, 16> ResultVals;
2600  SmallVector<CCValAssign, 16> RVLocs;
2601  unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv();
2602  CCState CCInfo(CallerCC, isVarArg, TM, RVLocs);
2603  CCInfo.AnalyzeCallResult(TheCall, RetCC_PPC);
2604
2605  // Copy all of the result registers out of their specified physreg.
2606  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2607    CCValAssign &VA = RVLocs[i];
2608    MVT VT = VA.getValVT();
2609    assert(VA.isRegLoc() && "Can only return in registers!");
2610    Chain = DAG.getCopyFromReg(Chain, dl,
2611                               VA.getLocReg(), VT, InFlag).getValue(1);
2612    ResultVals.push_back(Chain.getValue(0));
2613    InFlag = Chain.getValue(2);
2614  }
2615
2616  // If the function returns void, just return the chain.
2617  if (RVLocs.empty())
2618    return Chain;
2619
2620  // Otherwise, merge everything together with a MERGE_VALUES node.
2621  ResultVals.push_back(Chain);
2622  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
2623                            &ResultVals[0], ResultVals.size());
2624  return Res.getValue(Op.getResNo());
2625}
2626
2627SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG,
2628                                      TargetMachine &TM) {
2629  SmallVector<CCValAssign, 16> RVLocs;
2630  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
2631  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
2632  DebugLoc dl = Op.getDebugLoc();
2633  CCState CCInfo(CC, isVarArg, TM, RVLocs);
2634  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_PPC);
2635
2636  // If this is the first return lowered for this function, add the regs to the
2637  // liveout set for the function.
2638  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
2639    for (unsigned i = 0; i != RVLocs.size(); ++i)
2640      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
2641  }
2642
2643  SDValue Chain = Op.getOperand(0);
2644
2645  Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL);
2646  if (Chain.getOpcode() == PPCISD::TAILCALL) {
2647    SDValue TailCall = Chain;
2648    SDValue TargetAddress = TailCall.getOperand(1);
2649    SDValue StackAdjustment = TailCall.getOperand(2);
2650
2651    assert(((TargetAddress.getOpcode() == ISD::Register &&
2652             cast<RegisterSDNode>(TargetAddress)->getReg() == PPC::CTR) ||
2653            TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
2654            TargetAddress.getOpcode() == ISD::TargetGlobalAddress ||
2655            isa<ConstantSDNode>(TargetAddress)) &&
2656    "Expecting an global address, external symbol, absolute value or register");
2657
2658    assert(StackAdjustment.getOpcode() == ISD::Constant &&
2659           "Expecting a const value");
2660
2661    SmallVector<SDValue,8> Operands;
2662    Operands.push_back(Chain.getOperand(0));
2663    Operands.push_back(TargetAddress);
2664    Operands.push_back(StackAdjustment);
2665    // Copy registers used by the call. Last operand is a flag so it is not
2666    // copied.
2667    for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
2668      Operands.push_back(Chain.getOperand(i));
2669    }
2670    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Operands[0],
2671                       Operands.size());
2672  }
2673
2674  SDValue Flag;
2675
2676  // Copy the result values into the output registers.
2677  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2678    CCValAssign &VA = RVLocs[i];
2679    assert(VA.isRegLoc() && "Can only return in registers!");
2680    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2681                             Op.getOperand(i*2+1), Flag);
2682    Flag = Chain.getValue(1);
2683  }
2684
2685  if (Flag.getNode())
2686    return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
2687  else
2688    return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
2689}
2690
2691SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
2692                                   const PPCSubtarget &Subtarget) {
2693  // When we pop the dynamic allocation we need to restore the SP link.
2694  DebugLoc dl = Op.getDebugLoc();
2695
2696  // Get the corect type for pointers.
2697  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2698
2699  // Construct the stack pointer operand.
2700  bool IsPPC64 = Subtarget.isPPC64();
2701  unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1;
2702  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
2703
2704  // Get the operands for the STACKRESTORE.
2705  SDValue Chain = Op.getOperand(0);
2706  SDValue SaveSP = Op.getOperand(1);
2707
2708  // Load the old link SP.
2709  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0);
2710
2711  // Restore the stack pointer.
2712  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
2713
2714  // Store the old link SP.
2715  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0);
2716}
2717
2718
2719
2720SDValue
2721PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
2722  MachineFunction &MF = DAG.getMachineFunction();
2723  bool IsPPC64 = PPCSubTarget.isPPC64();
2724  bool isMachoABI = PPCSubTarget.isMachoABI();
2725  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2726
2727  // Get current frame pointer save index.  The users of this index will be
2728  // primarily DYNALLOC instructions.
2729  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
2730  int RASI = FI->getReturnAddrSaveIndex();
2731
2732  // If the frame pointer save index hasn't been defined yet.
2733  if (!RASI) {
2734    // Find out what the fix offset of the frame pointer save area.
2735    int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI);
2736    // Allocate the frame index for frame pointer save area.
2737    RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset);
2738    // Save the result.
2739    FI->setReturnAddrSaveIndex(RASI);
2740  }
2741  return DAG.getFrameIndex(RASI, PtrVT);
2742}
2743
2744SDValue
2745PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
2746  MachineFunction &MF = DAG.getMachineFunction();
2747  bool IsPPC64 = PPCSubTarget.isPPC64();
2748  bool isMachoABI = PPCSubTarget.isMachoABI();
2749  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2750
2751  // Get current frame pointer save index.  The users of this index will be
2752  // primarily DYNALLOC instructions.
2753  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
2754  int FPSI = FI->getFramePointerSaveIndex();
2755
2756  // If the frame pointer save index hasn't been defined yet.
2757  if (!FPSI) {
2758    // Find out what the fix offset of the frame pointer save area.
2759    int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI);
2760
2761    // Allocate the frame index for frame pointer save area.
2762    FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
2763    // Save the result.
2764    FI->setFramePointerSaveIndex(FPSI);
2765  }
2766  return DAG.getFrameIndex(FPSI, PtrVT);
2767}
2768
2769SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
2770                                         SelectionDAG &DAG,
2771                                         const PPCSubtarget &Subtarget) {
2772  // Get the inputs.
2773  SDValue Chain = Op.getOperand(0);
2774  SDValue Size  = Op.getOperand(1);
2775  DebugLoc dl = Op.getDebugLoc();
2776
2777  // Get the corect type for pointers.
2778  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2779  // Negate the size.
2780  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
2781                                  DAG.getConstant(0, PtrVT), Size);
2782  // Construct a node for the frame pointer save index.
2783  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
2784  // Build a DYNALLOC node.
2785  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
2786  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
2787  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
2788}
2789
2790/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
2791/// possible.
2792SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2793  // Not FP? Not a fsel.
2794  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
2795      !Op.getOperand(2).getValueType().isFloatingPoint())
2796    return SDValue();
2797
2798  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2799
2800  // Cannot handle SETEQ/SETNE.
2801  if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDValue();
2802
2803  MVT ResVT = Op.getValueType();
2804  MVT CmpVT = Op.getOperand(0).getValueType();
2805  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
2806  SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
2807  DebugLoc dl = Op.getDebugLoc();
2808
2809  // If the RHS of the comparison is a 0.0, we don't need to do the
2810  // subtraction at all.
2811  if (isFloatingPointZero(RHS))
2812    switch (CC) {
2813    default: break;       // SETUO etc aren't handled by fsel.
2814    case ISD::SETULT:
2815    case ISD::SETLT:
2816      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
2817    case ISD::SETOGE:
2818    case ISD::SETGE:
2819      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
2820        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
2821      return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
2822    case ISD::SETUGT:
2823    case ISD::SETGT:
2824      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
2825    case ISD::SETOLE:
2826    case ISD::SETLE:
2827      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
2828        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
2829      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
2830                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
2831    }
2832
2833  SDValue Cmp;
2834  switch (CC) {
2835  default: break;       // SETUO etc aren't handled by fsel.
2836  case ISD::SETULT:
2837  case ISD::SETLT:
2838    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
2839    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
2840      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
2841      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
2842  case ISD::SETOGE:
2843  case ISD::SETGE:
2844    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
2845    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
2846      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
2847      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
2848  case ISD::SETUGT:
2849  case ISD::SETGT:
2850    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
2851    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
2852      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
2853      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
2854  case ISD::SETOLE:
2855  case ISD::SETLE:
2856    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
2857    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
2858      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
2859      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
2860  }
2861  return SDValue();
2862}
2863
2864// FIXME: Split this code up when LegalizeDAGTypes lands.
2865SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
2866                                           DebugLoc dl) {
2867  assert(Op.getOperand(0).getValueType().isFloatingPoint());
2868  SDValue Src = Op.getOperand(0);
2869  if (Src.getValueType() == MVT::f32)
2870    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
2871
2872  SDValue Tmp;
2873  switch (Op.getValueType().getSimpleVT()) {
2874  default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
2875  case MVT::i32:
2876    Tmp = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Src);
2877    break;
2878  case MVT::i64:
2879    Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
2880    break;
2881  }
2882
2883  // Convert the FP value to an int value through memory.
2884  SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
2885
2886  // Emit a store to the stack slot.
2887  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0);
2888
2889  // Result is a load from the stack slot.  If loading 4 bytes, make sure to
2890  // add in a bias.
2891  if (Op.getValueType() == MVT::i32)
2892    FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
2893                        DAG.getConstant(4, FIPtr.getValueType()));
2894  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0);
2895}
2896
2897SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2898  DebugLoc dl = Op.getDebugLoc();
2899  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
2900  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
2901    return SDValue();
2902
2903  if (Op.getOperand(0).getValueType() == MVT::i64) {
2904    SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, dl,
2905                               MVT::f64, Op.getOperand(0));
2906    SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
2907    if (Op.getValueType() == MVT::f32)
2908      FP = DAG.getNode(ISD::FP_ROUND, dl,
2909                       MVT::f32, FP, DAG.getIntPtrConstant(0));
2910    return FP;
2911  }
2912
2913  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
2914         "Unhandled SINT_TO_FP type in custom expander!");
2915  // Since we only generate this in 64-bit mode, we can take advantage of
2916  // 64-bit registers.  In particular, sign extend the input value into the
2917  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
2918  // then lfd it and fcfid it.
2919  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
2920  int FrameIdx = FrameInfo->CreateStackObject(8, 8);
2921  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2922  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
2923
2924  SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
2925                                Op.getOperand(0));
2926
2927  // STD the extended value into the stack slot.
2928  MachineMemOperand MO(PseudoSourceValue::getFixedStack(FrameIdx),
2929                       MachineMemOperand::MOStore, 0, 8, 8);
2930  SDValue Store = DAG.getNode(PPCISD::STD_32, dl, MVT::Other,
2931                                DAG.getEntryNode(), Ext64, FIdx,
2932                                DAG.getMemOperand(MO));
2933  // Load the value as a double.
2934  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0);
2935
2936  // FCFID it and return it.
2937  SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
2938  if (Op.getValueType() == MVT::f32)
2939    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
2940  return FP;
2941}
2942
2943SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
2944  DebugLoc dl = Op.getDebugLoc();
2945  /*
2946   The rounding mode is in bits 30:31 of FPSR, and has the following
2947   settings:
2948     00 Round to nearest
2949     01 Round to 0
2950     10 Round to +inf
2951     11 Round to -inf
2952
2953  FLT_ROUNDS, on the other hand, expects the following:
2954    -1 Undefined
2955     0 Round to 0
2956     1 Round to nearest
2957     2 Round to +inf
2958     3 Round to -inf
2959
2960  To perform the conversion, we do:
2961    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
2962  */
2963
2964  MachineFunction &MF = DAG.getMachineFunction();
2965  MVT VT = Op.getValueType();
2966  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2967  std::vector<MVT> NodeTys;
2968  SDValue MFFSreg, InFlag;
2969
2970  // Save FP Control Word to register
2971  NodeTys.push_back(MVT::f64);    // return register
2972  NodeTys.push_back(MVT::Flag);   // unused in this context
2973  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
2974
2975  // Save FP register to stack slot
2976  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
2977  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
2978  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
2979                                 StackSlot, NULL, 0);
2980
2981  // Load FP Control Word from low 32 bits of stack slot.
2982  SDValue Four = DAG.getConstant(4, PtrVT);
2983  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
2984  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0);
2985
2986  // Transform as necessary
2987  SDValue CWD1 =
2988    DAG.getNode(ISD::AND, dl, MVT::i32,
2989                CWD, DAG.getConstant(3, MVT::i32));
2990  SDValue CWD2 =
2991    DAG.getNode(ISD::SRL, dl, MVT::i32,
2992                DAG.getNode(ISD::AND, dl, MVT::i32,
2993                            DAG.getNode(ISD::XOR, dl, MVT::i32,
2994                                        CWD, DAG.getConstant(3, MVT::i32)),
2995                            DAG.getConstant(3, MVT::i32)),
2996                DAG.getConstant(1, MVT::i32));
2997
2998  SDValue RetVal =
2999    DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
3000
3001  return DAG.getNode((VT.getSizeInBits() < 16 ?
3002                      ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
3003}
3004
3005SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
3006  MVT VT = Op.getValueType();
3007  unsigned BitWidth = VT.getSizeInBits();
3008  DebugLoc dl = Op.getDebugLoc();
3009  assert(Op.getNumOperands() == 3 &&
3010         VT == Op.getOperand(1).getValueType() &&
3011         "Unexpected SHL!");
3012
3013  // Expand into a bunch of logical ops.  Note that these ops
3014  // depend on the PPC behavior for oversized shift amounts.
3015  SDValue Lo = Op.getOperand(0);
3016  SDValue Hi = Op.getOperand(1);
3017  SDValue Amt = Op.getOperand(2);
3018  MVT AmtVT = Amt.getValueType();
3019
3020  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
3021                             DAG.getConstant(BitWidth, AmtVT), Amt);
3022  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
3023  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
3024  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
3025  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
3026                             DAG.getConstant(-BitWidth, AmtVT));
3027  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
3028  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
3029  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
3030  SDValue OutOps[] = { OutLo, OutHi };
3031  return DAG.getMergeValues(OutOps, 2, dl);
3032}
3033
3034SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
3035  MVT VT = Op.getValueType();
3036  DebugLoc dl = Op.getDebugLoc();
3037  unsigned BitWidth = VT.getSizeInBits();
3038  assert(Op.getNumOperands() == 3 &&
3039         VT == Op.getOperand(1).getValueType() &&
3040         "Unexpected SRL!");
3041
3042  // Expand into a bunch of logical ops.  Note that these ops
3043  // depend on the PPC behavior for oversized shift amounts.
3044  SDValue Lo = Op.getOperand(0);
3045  SDValue Hi = Op.getOperand(1);
3046  SDValue Amt = Op.getOperand(2);
3047  MVT AmtVT = Amt.getValueType();
3048
3049  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
3050                             DAG.getConstant(BitWidth, AmtVT), Amt);
3051  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
3052  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
3053  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
3054  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
3055                             DAG.getConstant(-BitWidth, AmtVT));
3056  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
3057  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
3058  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
3059  SDValue OutOps[] = { OutLo, OutHi };
3060  return DAG.getMergeValues(OutOps, 2, dl);
3061}
3062
3063SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) {
3064  DebugLoc dl = Op.getDebugLoc();
3065  MVT VT = Op.getValueType();
3066  unsigned BitWidth = VT.getSizeInBits();
3067  assert(Op.getNumOperands() == 3 &&
3068         VT == Op.getOperand(1).getValueType() &&
3069         "Unexpected SRA!");
3070
3071  // Expand into a bunch of logical ops, followed by a select_cc.
3072  SDValue Lo = Op.getOperand(0);
3073  SDValue Hi = Op.getOperand(1);
3074  SDValue Amt = Op.getOperand(2);
3075  MVT AmtVT = Amt.getValueType();
3076
3077  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
3078                             DAG.getConstant(BitWidth, AmtVT), Amt);
3079  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
3080  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
3081  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
3082  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
3083                             DAG.getConstant(-BitWidth, AmtVT));
3084  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
3085  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
3086  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
3087                                  Tmp4, Tmp6, ISD::SETLE);
3088  SDValue OutOps[] = { OutLo, OutHi };
3089  return DAG.getMergeValues(OutOps, 2, dl);
3090}
3091
3092//===----------------------------------------------------------------------===//
3093// Vector related lowering.
3094//
3095
3096/// BuildSplatI - Build a canonical splati of Val with an element size of
3097/// SplatSize.  Cast the result to VT.
3098static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT,
3099                             SelectionDAG &DAG, DebugLoc dl) {
3100  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
3101
3102  static const MVT VTys[] = { // canonical VT to use for each size.
3103    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
3104  };
3105
3106  MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
3107
3108  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
3109  if (Val == -1)
3110    SplatSize = 1;
3111
3112  MVT CanonicalVT = VTys[SplatSize-1];
3113
3114  // Build a canonical splat for this value.
3115  SDValue Elt = DAG.getConstant(Val, CanonicalVT.getVectorElementType());
3116  SmallVector<SDValue, 8> Ops;
3117  Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
3118  SDValue Res = DAG.getBUILD_VECTOR(CanonicalVT, dl, &Ops[0], Ops.size());
3119  return DAG.getNode(ISD::BIT_CONVERT, dl, ReqVT, Res);
3120}
3121
3122/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
3123/// specified intrinsic ID.
3124static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
3125                                SelectionDAG &DAG, DebugLoc dl,
3126                                MVT DestVT = MVT::Other) {
3127  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
3128  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
3129                     DAG.getConstant(IID, MVT::i32), LHS, RHS);
3130}
3131
3132/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
3133/// specified intrinsic ID.
3134static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
3135                                SDValue Op2, SelectionDAG &DAG,
3136                                DebugLoc dl, MVT DestVT = MVT::Other) {
3137  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
3138  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
3139                     DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
3140}
3141
3142
3143/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
3144/// amount.  The result has the specified value type.
3145static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
3146                             MVT VT, SelectionDAG &DAG, DebugLoc dl) {
3147  // Force LHS/RHS to be the right type.
3148  LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
3149  RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
3150
3151  SDValue Ops[16];
3152  for (unsigned i = 0; i != 16; ++i)
3153    Ops[i] = DAG.getConstant(i+Amt, MVT::i8);
3154  SDValue T = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, LHS, RHS,
3155                            DAG.getBUILD_VECTOR(MVT::v16i8, dl, Ops,16));
3156  return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
3157}
3158
3159// If this is a case we can't handle, return null and let the default
3160// expansion code take care of it.  If we CAN select this case, and if it
3161// selects to a single instruction, return Op.  Otherwise, if we can codegen
3162// this case more efficiently than a constant pool load, lower it to the
3163// sequence of ops that should be used.
3164SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
3165                                               SelectionDAG &DAG) {
3166  // If this is a vector of constants or undefs, get the bits.  A bit in
3167  // UndefBits is set if the corresponding element of the vector is an
3168  // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
3169  // zero.
3170  DebugLoc dl = Op.getDebugLoc();
3171  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
3172  assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
3173
3174  uint64_t SplatBits;
3175  uint64_t SplatUndef;
3176  unsigned SplatSize;
3177  bool HasAnyUndefs;
3178
3179  // If this is a splat (repetition) of a value across the whole vector, return
3180  // the smallest size that splats it.  For example, "0x01010101010101..." is a
3181  // splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
3182  // SplatSize = 1 byte.
3183  if (BVN->isConstantSplat(HasAnyUndefs, SplatBits, SplatUndef, SplatSize)) {
3184    // First, handle single instruction cases.
3185
3186    // All zeros?
3187    if (SplatBits == 0) {
3188      // Canonicalize all zero vectors to be v4i32.
3189      if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
3190        SDValue Z = DAG.getConstant(0, MVT::i32);
3191        Z = DAG.getBUILD_VECTOR(MVT::v4i32, dl, Z, Z, Z, Z);
3192        Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z);
3193      }
3194      return Op;
3195    }
3196
3197    // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
3198    int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);
3199    if (SextVal >= -16 && SextVal <= 15)
3200      return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
3201
3202
3203    // Two instruction sequences.
3204
3205    // If this value is in the range [-32,30] and is even, use:
3206    //    tmp = VSPLTI[bhw], result = add tmp, tmp
3207    if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
3208      SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
3209      Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
3210      return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3211    }
3212
3213    // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
3214    // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
3215    // for fneg/fabs.
3216    if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
3217      // Make -1 and vspltisw -1:
3218      SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
3219
3220      // Make the VSLW intrinsic, computing 0x8000_0000.
3221      SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
3222                                       OnesV, DAG, dl);
3223
3224      // xor by OnesV to invert it.
3225      Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
3226      return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3227    }
3228
3229    // Check to see if this is a wide variety of vsplti*, binop self cases.
3230    unsigned SplatBitSize = SplatSize*8;
3231    static const signed char SplatCsts[] = {
3232      -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
3233      -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
3234    };
3235
3236    for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
3237      // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
3238      // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
3239      int i = SplatCsts[idx];
3240
3241      // Figure out what shift amount will be used by altivec if shifted by i in
3242      // this splat size.
3243      unsigned TypeShiftAmt = i & (SplatBitSize-1);
3244
3245      // vsplti + shl self.
3246      if (SextVal == (i << (int)TypeShiftAmt)) {
3247        SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
3248        static const unsigned IIDs[] = { // Intrinsic to use for each size.
3249          Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
3250          Intrinsic::ppc_altivec_vslw
3251        };
3252        Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
3253        return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3254      }
3255
3256      // vsplti + srl self.
3257      if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
3258        SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
3259        static const unsigned IIDs[] = { // Intrinsic to use for each size.
3260          Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
3261          Intrinsic::ppc_altivec_vsrw
3262        };
3263        Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
3264        return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3265      }
3266
3267      // vsplti + sra self.
3268      if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
3269        SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
3270        static const unsigned IIDs[] = { // Intrinsic to use for each size.
3271          Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
3272          Intrinsic::ppc_altivec_vsraw
3273        };
3274        Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
3275        return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3276      }
3277
3278      // vsplti + rol self.
3279      if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
3280                           ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
3281        SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
3282        static const unsigned IIDs[] = { // Intrinsic to use for each size.
3283          Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
3284          Intrinsic::ppc_altivec_vrlw
3285        };
3286        Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
3287        return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
3288      }
3289
3290      // t = vsplti c, result = vsldoi t, t, 1
3291      if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
3292        SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
3293        return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
3294      }
3295      // t = vsplti c, result = vsldoi t, t, 2
3296      if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
3297        SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
3298        return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
3299      }
3300      // t = vsplti c, result = vsldoi t, t, 3
3301      if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
3302        SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
3303        return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
3304      }
3305    }
3306
3307    // Three instruction sequences.
3308
3309    // Odd, in range [17,31]:  (vsplti C)-(vsplti -16).
3310    if (SextVal >= 0 && SextVal <= 31) {
3311      SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
3312      SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
3313      LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
3314      return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
3315    }
3316    // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
3317    if (SextVal >= -31 && SextVal <= 0) {
3318      SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
3319      SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
3320      LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
3321      return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
3322    }
3323  }
3324
3325  return SDValue();
3326}
3327
3328/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
3329/// the specified operations to build the shuffle.
3330static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
3331                                      SDValue RHS, SelectionDAG &DAG,
3332                                      DebugLoc dl) {
3333  unsigned OpNum = (PFEntry >> 26) & 0x0F;
3334  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
3335  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
3336
3337  enum {
3338    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
3339    OP_VMRGHW,
3340    OP_VMRGLW,
3341    OP_VSPLTISW0,
3342    OP_VSPLTISW1,
3343    OP_VSPLTISW2,
3344    OP_VSPLTISW3,
3345    OP_VSLDOI4,
3346    OP_VSLDOI8,
3347    OP_VSLDOI12
3348  };
3349
3350  if (OpNum == OP_COPY) {
3351    if (LHSID == (1*9+2)*9+3) return LHS;
3352    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
3353    return RHS;
3354  }
3355
3356  SDValue OpLHS, OpRHS;
3357  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
3358  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
3359
3360  unsigned ShufIdxs[16];
3361  switch (OpNum) {
3362  default: assert(0 && "Unknown i32 permute!");
3363  case OP_VMRGHW:
3364    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
3365    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
3366    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
3367    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
3368    break;
3369  case OP_VMRGLW:
3370    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
3371    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
3372    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
3373    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
3374    break;
3375  case OP_VSPLTISW0:
3376    for (unsigned i = 0; i != 16; ++i)
3377      ShufIdxs[i] = (i&3)+0;
3378    break;
3379  case OP_VSPLTISW1:
3380    for (unsigned i = 0; i != 16; ++i)
3381      ShufIdxs[i] = (i&3)+4;
3382    break;
3383  case OP_VSPLTISW2:
3384    for (unsigned i = 0; i != 16; ++i)
3385      ShufIdxs[i] = (i&3)+8;
3386    break;
3387  case OP_VSPLTISW3:
3388    for (unsigned i = 0; i != 16; ++i)
3389      ShufIdxs[i] = (i&3)+12;
3390    break;
3391  case OP_VSLDOI4:
3392    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
3393  case OP_VSLDOI8:
3394    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
3395  case OP_VSLDOI12:
3396    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
3397  }
3398  SDValue Ops[16];
3399  for (unsigned i = 0; i != 16; ++i)
3400    Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i8);
3401
3402  return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, OpLHS.getValueType(),
3403                     OpLHS, OpRHS,
3404                     DAG.getBUILD_VECTOR(MVT::v16i8, dl, Ops, 16));
3405}
3406
3407/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
3408/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
3409/// return the code it can be lowered into.  Worst case, it can always be
3410/// lowered into a vperm.
3411SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
3412                                                 SelectionDAG &DAG) {
3413  DebugLoc dl = Op.getDebugLoc();
3414  SDValue V1 = Op.getOperand(0);
3415  SDValue V2 = Op.getOperand(1);
3416  SDValue PermMask = Op.getOperand(2);
3417
3418  // Cases that are handled by instructions that take permute immediates
3419  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
3420  // selected by the instruction selector.
3421  if (V2.getOpcode() == ISD::UNDEF) {
3422    if (PPC::isSplatShuffleMask(PermMask.getNode(), 1) ||
3423        PPC::isSplatShuffleMask(PermMask.getNode(), 2) ||
3424        PPC::isSplatShuffleMask(PermMask.getNode(), 4) ||
3425        PPC::isVPKUWUMShuffleMask(PermMask.getNode(), true) ||
3426        PPC::isVPKUHUMShuffleMask(PermMask.getNode(), true) ||
3427        PPC::isVSLDOIShuffleMask(PermMask.getNode(), true) != -1 ||
3428        PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, true) ||
3429        PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, true) ||
3430        PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, true) ||
3431        PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, true) ||
3432        PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, true) ||
3433        PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, true)) {
3434      return Op;
3435    }
3436  }
3437
3438  // Altivec has a variety of "shuffle immediates" that take two vector inputs
3439  // and produce a fixed permutation.  If any of these match, do not lower to
3440  // VPERM.
3441  if (PPC::isVPKUWUMShuffleMask(PermMask.getNode(), false) ||
3442      PPC::isVPKUHUMShuffleMask(PermMask.getNode(), false) ||
3443      PPC::isVSLDOIShuffleMask(PermMask.getNode(), false) != -1 ||
3444      PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, false) ||
3445      PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, false) ||
3446      PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, false) ||
3447      PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, false) ||
3448      PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, false) ||
3449      PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, false))
3450    return Op;
3451
3452  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
3453  // perfect shuffle table to emit an optimal matching sequence.
3454  unsigned PFIndexes[4];
3455  bool isFourElementShuffle = true;
3456  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
3457    unsigned EltNo = 8;   // Start out undef.
3458    for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
3459      if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)
3460        continue;   // Undef, ignore it.
3461
3462      unsigned ByteSource =
3463        cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getZExtValue();
3464      if ((ByteSource & 3) != j) {
3465        isFourElementShuffle = false;
3466        break;
3467      }
3468
3469      if (EltNo == 8) {
3470        EltNo = ByteSource/4;
3471      } else if (EltNo != ByteSource/4) {
3472        isFourElementShuffle = false;
3473        break;
3474      }
3475    }
3476    PFIndexes[i] = EltNo;
3477  }
3478
3479  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
3480  // perfect shuffle vector to determine if it is cost effective to do this as
3481  // discrete instructions, or whether we should use a vperm.
3482  if (isFourElementShuffle) {
3483    // Compute the index in the perfect shuffle table.
3484    unsigned PFTableIndex =
3485      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3486
3487    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3488    unsigned Cost  = (PFEntry >> 30);
3489
3490    // Determining when to avoid vperm is tricky.  Many things affect the cost
3491    // of vperm, particularly how many times the perm mask needs to be computed.
3492    // For example, if the perm mask can be hoisted out of a loop or is already
3493    // used (perhaps because there are multiple permutes with the same shuffle
3494    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
3495    // the loop requires an extra register.
3496    //
3497    // As a compromise, we only emit discrete instructions if the shuffle can be
3498    // generated in 3 or fewer operations.  When we have loop information
3499    // available, if this block is within a loop, we should avoid using vperm
3500    // for 3-operation perms and use a constant pool load instead.
3501    if (Cost < 3)
3502      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
3503  }
3504
3505  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
3506  // vector that will get spilled to the constant pool.
3507  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
3508
3509  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
3510  // that it is in input element units, not in bytes.  Convert now.
3511  MVT EltVT = V1.getValueType().getVectorElementType();
3512  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
3513
3514  SmallVector<SDValue, 16> ResultMask;
3515  for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
3516    unsigned SrcElt;
3517    if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
3518      SrcElt = 0;
3519    else
3520      SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
3521
3522    for (unsigned j = 0; j != BytesPerElement; ++j)
3523      ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
3524                                           MVT::i8));
3525  }
3526
3527  SDValue VPermMask = DAG.getBUILD_VECTOR(MVT::v16i8, dl,
3528                                          &ResultMask[0], ResultMask.size());
3529  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
3530}
3531
3532/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
3533/// altivec comparison.  If it is, return true and fill in Opc/isDot with
3534/// information about the intrinsic.
3535static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
3536                                  bool &isDot) {
3537  unsigned IntrinsicID =
3538    cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
3539  CompareOpc = -1;
3540  isDot = false;
3541  switch (IntrinsicID) {
3542  default: return false;
3543    // Comparison predicates.
3544  case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
3545  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
3546  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
3547  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
3548  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
3549  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
3550  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
3551  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
3552  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
3553  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
3554  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
3555  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
3556  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
3557
3558    // Normal Comparisons.
3559  case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
3560  case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
3561  case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
3562  case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
3563  case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
3564  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
3565  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
3566  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
3567  case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
3568  case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
3569  case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
3570  case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
3571  case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
3572  }
3573  return true;
3574}
3575
3576/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
3577/// lower, do it, otherwise return null.
3578SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3579                                                     SelectionDAG &DAG) {
3580  // If this is a lowered altivec predicate compare, CompareOpc is set to the
3581  // opcode number of the comparison.
3582  DebugLoc dl = Op.getDebugLoc();
3583  int CompareOpc;
3584  bool isDot;
3585  if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
3586    return SDValue();    // Don't custom lower most intrinsics.
3587
3588  // If this is a non-dot comparison, make the VCMP node and we are done.
3589  if (!isDot) {
3590    SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
3591                                Op.getOperand(1), Op.getOperand(2),
3592                                DAG.getConstant(CompareOpc, MVT::i32));
3593    return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Tmp);
3594  }
3595
3596  // Create the PPCISD altivec 'dot' comparison node.
3597  SDValue Ops[] = {
3598    Op.getOperand(2),  // LHS
3599    Op.getOperand(3),  // RHS
3600    DAG.getConstant(CompareOpc, MVT::i32)
3601  };
3602  std::vector<MVT> VTs;
3603  VTs.push_back(Op.getOperand(2).getValueType());
3604  VTs.push_back(MVT::Flag);
3605  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
3606
3607  // Now that we have the comparison, emit a copy from the CR to a GPR.
3608  // This is flagged to the above dot comparison.
3609  SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
3610                                DAG.getRegister(PPC::CR6, MVT::i32),
3611                                CompNode.getValue(1));
3612
3613  // Unpack the result based on how the target uses it.
3614  unsigned BitNo;   // Bit # of CR6.
3615  bool InvertBit;   // Invert result?
3616  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
3617  default:  // Can't happen, don't crash on invalid number though.
3618  case 0:   // Return the value of the EQ bit of CR6.
3619    BitNo = 0; InvertBit = false;
3620    break;
3621  case 1:   // Return the inverted value of the EQ bit of CR6.
3622    BitNo = 0; InvertBit = true;
3623    break;
3624  case 2:   // Return the value of the LT bit of CR6.
3625    BitNo = 2; InvertBit = false;
3626    break;
3627  case 3:   // Return the inverted value of the LT bit of CR6.
3628    BitNo = 2; InvertBit = true;
3629    break;
3630  }
3631
3632  // Shift the bit into the low position.
3633  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
3634                      DAG.getConstant(8-(3-BitNo), MVT::i32));
3635  // Isolate the bit.
3636  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
3637                      DAG.getConstant(1, MVT::i32));
3638
3639  // If we are supposed to, toggle the bit.
3640  if (InvertBit)
3641    Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
3642                        DAG.getConstant(1, MVT::i32));
3643  return Flags;
3644}
3645
3646SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
3647                                                   SelectionDAG &DAG) {
3648  DebugLoc dl = Op.getDebugLoc();
3649  // Create a stack slot that is 16-byte aligned.
3650  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
3651  int FrameIdx = FrameInfo->CreateStackObject(16, 16);
3652  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3653  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
3654
3655  // Store the input value into Value#0 of the stack slot.
3656  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
3657                                 Op.getOperand(0), FIdx, NULL, 0);
3658  // Load it out.
3659  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0);
3660}
3661
3662SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
3663  DebugLoc dl = Op.getDebugLoc();
3664  if (Op.getValueType() == MVT::v4i32) {
3665    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
3666
3667    SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
3668    SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
3669
3670    SDValue RHSSwap =   // = vrlw RHS, 16
3671      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
3672
3673    // Shrinkify inputs to v8i16.
3674    LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, LHS);
3675    RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHS);
3676    RHSSwap = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHSSwap);
3677
3678    // Low parts multiplied together, generating 32-bit results (we ignore the
3679    // top parts).
3680    SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
3681                                        LHS, RHS, DAG, dl, MVT::v4i32);
3682
3683    SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
3684                                      LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
3685    // Shift the high parts up 16 bits.
3686    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
3687                              Neg16, DAG, dl);
3688    return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
3689  } else if (Op.getValueType() == MVT::v8i16) {
3690    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
3691
3692    SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
3693
3694    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
3695                            LHS, RHS, Zero, DAG, dl);
3696  } else if (Op.getValueType() == MVT::v16i8) {
3697    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
3698
3699    // Multiply the even 8-bit parts, producing 16-bit sums.
3700    SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
3701                                           LHS, RHS, DAG, dl, MVT::v8i16);
3702    EvenParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, EvenParts);
3703
3704    // Multiply the odd 8-bit parts, producing 16-bit sums.
3705    SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
3706                                          LHS, RHS, DAG, dl, MVT::v8i16);
3707    OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts);
3708
3709    // Merge the results together.
3710    SDValue Ops[16];
3711    for (unsigned i = 0; i != 8; ++i) {
3712      Ops[i*2  ] = DAG.getConstant(2*i+1, MVT::i8);
3713      Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8);
3714    }
3715    return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, EvenParts, OddParts,
3716                       DAG.getBUILD_VECTOR(MVT::v16i8, dl, Ops, 16));
3717  } else {
3718    assert(0 && "Unknown mul to lower!");
3719    abort();
3720  }
3721}
3722
3723/// LowerOperation - Provide custom lowering hooks for some operations.
3724///
3725SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
3726  switch (Op.getOpcode()) {
3727  default: assert(0 && "Wasn't expecting to be able to lower this!");
3728  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
3729  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
3730  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
3731  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
3732  case ISD::SETCC:              return LowerSETCC(Op, DAG);
3733  case ISD::TRAMPOLINE:         return LowerTRAMPOLINE(Op, DAG);
3734  case ISD::VASTART:
3735    return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
3736                        VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
3737
3738  case ISD::VAARG:
3739    return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
3740                      VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
3741
3742  case ISD::FORMAL_ARGUMENTS:
3743    return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex,
3744                                 VarArgsStackOffset, VarArgsNumGPR,
3745                                 VarArgsNumFPR, PPCSubTarget);
3746
3747  case ISD::CALL:               return LowerCALL(Op, DAG, PPCSubTarget,
3748                                                 getTargetMachine());
3749  case ISD::RET:                return LowerRET(Op, DAG, getTargetMachine());
3750  case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
3751  case ISD::DYNAMIC_STACKALLOC:
3752    return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
3753
3754  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
3755  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG,
3756                                                       Op.getDebugLoc());
3757  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
3758  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
3759
3760  // Lower 64-bit shifts.
3761  case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
3762  case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
3763  case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
3764
3765  // Vector-related lowering.
3766  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
3767  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
3768  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3769  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
3770  case ISD::MUL:                return LowerMUL(Op, DAG);
3771
3772  // Frame & Return address.
3773  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
3774  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
3775  }
3776  return SDValue();
3777}
3778
3779void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
3780                                           SmallVectorImpl<SDValue>&Results,
3781                                           SelectionDAG &DAG) {
3782  DebugLoc dl = N->getDebugLoc();
3783  switch (N->getOpcode()) {
3784  default:
3785    assert(false && "Do not know how to custom type legalize this operation!");
3786    return;
3787  case ISD::FP_ROUND_INREG: {
3788    assert(N->getValueType(0) == MVT::ppcf128);
3789    assert(N->getOperand(0).getValueType() == MVT::ppcf128);
3790    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
3791                             MVT::f64, N->getOperand(0),
3792                             DAG.getIntPtrConstant(0));
3793    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
3794                             MVT::f64, N->getOperand(0),
3795                             DAG.getIntPtrConstant(1));
3796
3797    // This sequence changes FPSCR to do round-to-zero, adds the two halves
3798    // of the long double, and puts FPSCR back the way it was.  We do not
3799    // actually model FPSCR.
3800    std::vector<MVT> NodeTys;
3801    SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
3802
3803    NodeTys.push_back(MVT::f64);   // Return register
3804    NodeTys.push_back(MVT::Flag);    // Returns a flag for later insns
3805    Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
3806    MFFSreg = Result.getValue(0);
3807    InFlag = Result.getValue(1);
3808
3809    NodeTys.clear();
3810    NodeTys.push_back(MVT::Flag);   // Returns a flag
3811    Ops[0] = DAG.getConstant(31, MVT::i32);
3812    Ops[1] = InFlag;
3813    Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
3814    InFlag = Result.getValue(0);
3815
3816    NodeTys.clear();
3817    NodeTys.push_back(MVT::Flag);   // Returns a flag
3818    Ops[0] = DAG.getConstant(30, MVT::i32);
3819    Ops[1] = InFlag;
3820    Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
3821    InFlag = Result.getValue(0);
3822
3823    NodeTys.clear();
3824    NodeTys.push_back(MVT::f64);    // result of add
3825    NodeTys.push_back(MVT::Flag);   // Returns a flag
3826    Ops[0] = Lo;
3827    Ops[1] = Hi;
3828    Ops[2] = InFlag;
3829    Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
3830    FPreg = Result.getValue(0);
3831    InFlag = Result.getValue(1);
3832
3833    NodeTys.clear();
3834    NodeTys.push_back(MVT::f64);
3835    Ops[0] = DAG.getConstant(1, MVT::i32);
3836    Ops[1] = MFFSreg;
3837    Ops[2] = FPreg;
3838    Ops[3] = InFlag;
3839    Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
3840    FPreg = Result.getValue(0);
3841
3842    // We know the low half is about to be thrown away, so just use something
3843    // convenient.
3844    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
3845                                FPreg, FPreg));
3846    return;
3847  }
3848  case ISD::FP_TO_SINT:
3849    Results.push_back(LowerFP_TO_SINT(SDValue(N, 0), DAG, dl));
3850    return;
3851  }
3852}
3853
3854
3855//===----------------------------------------------------------------------===//
3856//  Other Lowering Code
3857//===----------------------------------------------------------------------===//
3858
3859MachineBasicBlock *
3860PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
3861                                    bool is64bit, unsigned BinOpcode) const {
3862  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
3863  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3864
3865  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3866  MachineFunction *F = BB->getParent();
3867  MachineFunction::iterator It = BB;
3868  ++It;
3869
3870  unsigned dest = MI->getOperand(0).getReg();
3871  unsigned ptrA = MI->getOperand(1).getReg();
3872  unsigned ptrB = MI->getOperand(2).getReg();
3873  unsigned incr = MI->getOperand(3).getReg();
3874  DebugLoc dl = MI->getDebugLoc();
3875
3876  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
3877  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
3878  F->insert(It, loopMBB);
3879  F->insert(It, exitMBB);
3880  exitMBB->transferSuccessors(BB);
3881
3882  MachineRegisterInfo &RegInfo = F->getRegInfo();
3883  unsigned TmpReg = (!BinOpcode) ? incr :
3884    RegInfo.createVirtualRegister(
3885       is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
3886                 (const TargetRegisterClass *) &PPC::GPRCRegClass);
3887
3888  //  thisMBB:
3889  //   ...
3890  //   fallthrough --> loopMBB
3891  BB->addSuccessor(loopMBB);
3892
3893  //  loopMBB:
3894  //   l[wd]arx dest, ptr
3895  //   add r0, dest, incr
3896  //   st[wd]cx. r0, ptr
3897  //   bne- loopMBB
3898  //   fallthrough --> exitMBB
3899  BB = loopMBB;
3900  BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
3901    .addReg(ptrA).addReg(ptrB);
3902  if (BinOpcode)
3903    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
3904  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
3905    .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
3906  BuildMI(BB, dl, TII->get(PPC::BCC))
3907    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
3908  BB->addSuccessor(loopMBB);
3909  BB->addSuccessor(exitMBB);
3910
3911  //  exitMBB:
3912  //   ...
3913  BB = exitMBB;
3914  return BB;
3915}
3916
3917MachineBasicBlock *
3918PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
3919                                            MachineBasicBlock *BB,
3920                                            bool is8bit,    // operation
3921                                            unsigned BinOpcode) const {
3922  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
3923  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3924  // In 64 bit mode we have to use 64 bits for addresses, even though the
3925  // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
3926  // registers without caring whether they're 32 or 64, but here we're
3927  // doing actual arithmetic on the addresses.
3928  bool is64bit = PPCSubTarget.isPPC64();
3929
3930  const BasicBlock *LLVM_BB = BB->getBasicBlock();
3931  MachineFunction *F = BB->getParent();
3932  MachineFunction::iterator It = BB;
3933  ++It;
3934
3935  unsigned dest = MI->getOperand(0).getReg();
3936  unsigned ptrA = MI->getOperand(1).getReg();
3937  unsigned ptrB = MI->getOperand(2).getReg();
3938  unsigned incr = MI->getOperand(3).getReg();
3939  DebugLoc dl = MI->getDebugLoc();
3940
3941  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
3942  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
3943  F->insert(It, loopMBB);
3944  F->insert(It, exitMBB);
3945  exitMBB->transferSuccessors(BB);
3946
3947  MachineRegisterInfo &RegInfo = F->getRegInfo();
3948  const TargetRegisterClass *RC =
3949    is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
3950              (const TargetRegisterClass *) &PPC::GPRCRegClass;
3951  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
3952  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
3953  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
3954  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
3955  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
3956  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
3957  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
3958  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
3959  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
3960  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
3961  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
3962  unsigned Ptr1Reg;
3963  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
3964
3965  //  thisMBB:
3966  //   ...
3967  //   fallthrough --> loopMBB
3968  BB->addSuccessor(loopMBB);
3969
3970  // The 4-byte load must be aligned, while a char or short may be
3971  // anywhere in the word.  Hence all this nasty bookkeeping code.
3972  //   add ptr1, ptrA, ptrB [copy if ptrA==0]
3973  //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
3974  //   xori shift, shift1, 24 [16]
3975  //   rlwinm ptr, ptr1, 0, 0, 29
3976  //   slw incr2, incr, shift
3977  //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
3978  //   slw mask, mask2, shift
3979  //  loopMBB:
3980  //   lwarx tmpDest, ptr
3981  //   add tmp, tmpDest, incr2
3982  //   andc tmp2, tmpDest, mask
3983  //   and tmp3, tmp, mask
3984  //   or tmp4, tmp3, tmp2
3985  //   stwcx. tmp4, ptr
3986  //   bne- loopMBB
3987  //   fallthrough --> exitMBB
3988  //   srw dest, tmpDest, shift
3989
3990  if (ptrA!=PPC::R0) {
3991    Ptr1Reg = RegInfo.createVirtualRegister(RC);
3992    BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
3993      .addReg(ptrA).addReg(ptrB);
3994  } else {
3995    Ptr1Reg = ptrB;
3996  }
3997  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
3998      .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
3999  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
4000      .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
4001  if (is64bit)
4002    BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
4003      .addReg(Ptr1Reg).addImm(0).addImm(61);
4004  else
4005    BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
4006      .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
4007  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
4008      .addReg(incr).addReg(ShiftReg);
4009  if (is8bit)
4010    BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
4011  else {
4012    BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
4013    BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
4014  }
4015  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
4016      .addReg(Mask2Reg).addReg(ShiftReg);
4017
4018  BB = loopMBB;
4019  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
4020    .addReg(PPC::R0).addReg(PtrReg);
4021  if (BinOpcode)
4022    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
4023      .addReg(Incr2Reg).addReg(TmpDestReg);
4024  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
4025    .addReg(TmpDestReg).addReg(MaskReg);
4026  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
4027    .addReg(TmpReg).addReg(MaskReg);
4028  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
4029    .addReg(Tmp3Reg).addReg(Tmp2Reg);
4030  BuildMI(BB, dl, TII->get(PPC::STWCX))
4031    .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg);
4032  BuildMI(BB, dl, TII->get(PPC::BCC))
4033    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
4034  BB->addSuccessor(loopMBB);
4035  BB->addSuccessor(exitMBB);
4036
4037  //  exitMBB:
4038  //   ...
4039  BB = exitMBB;
4040  BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg);
4041  return BB;
4042}
4043
4044MachineBasicBlock *
4045PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
4046                                               MachineBasicBlock *BB) const {
4047  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4048
4049  // To "insert" these instructions we actually have to insert their
4050  // control-flow patterns.
4051  const BasicBlock *LLVM_BB = BB->getBasicBlock();
4052  MachineFunction::iterator It = BB;
4053  ++It;
4054
4055  MachineFunction *F = BB->getParent();
4056
4057  if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
4058      MI->getOpcode() == PPC::SELECT_CC_I8 ||
4059      MI->getOpcode() == PPC::SELECT_CC_F4 ||
4060      MI->getOpcode() == PPC::SELECT_CC_F8 ||
4061      MI->getOpcode() == PPC::SELECT_CC_VRRC) {
4062
4063    // The incoming instruction knows the destination vreg to set, the
4064    // condition code register to branch on, the true/false values to
4065    // select between, and a branch opcode to use.
4066
4067    //  thisMBB:
4068    //  ...
4069    //   TrueVal = ...
4070    //   cmpTY ccX, r1, r2
4071    //   bCC copy1MBB
4072    //   fallthrough --> copy0MBB
4073    MachineBasicBlock *thisMBB = BB;
4074    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
4075    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
4076    unsigned SelectPred = MI->getOperand(4).getImm();
4077    DebugLoc dl = MI->getDebugLoc();
4078    BuildMI(BB, dl, TII->get(PPC::BCC))
4079      .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
4080    F->insert(It, copy0MBB);
4081    F->insert(It, sinkMBB);
4082    // Update machine-CFG edges by transferring all successors of the current
4083    // block to the new block which will contain the Phi node for the select.
4084    sinkMBB->transferSuccessors(BB);
4085    // Next, add the true and fallthrough blocks as its successors.
4086    BB->addSuccessor(copy0MBB);
4087    BB->addSuccessor(sinkMBB);
4088
4089    //  copy0MBB:
4090    //   %FalseValue = ...
4091    //   # fallthrough to sinkMBB
4092    BB = copy0MBB;
4093
4094    // Update machine-CFG edges
4095    BB->addSuccessor(sinkMBB);
4096
4097    //  sinkMBB:
4098    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4099    //  ...
4100    BB = sinkMBB;
4101    BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg())
4102      .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
4103      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4104  }
4105  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
4106    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
4107  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
4108    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
4109  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
4110    BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
4111  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
4112    BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
4113
4114  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
4115    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
4116  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
4117    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
4118  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
4119    BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
4120  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
4121    BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
4122
4123  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
4124    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
4125  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
4126    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
4127  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
4128    BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
4129  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
4130    BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
4131
4132  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
4133    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
4134  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
4135    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
4136  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
4137    BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
4138  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
4139    BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
4140
4141  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
4142    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
4143  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
4144    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
4145  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
4146    BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
4147  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
4148    BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
4149
4150  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
4151    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
4152  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
4153    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
4154  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
4155    BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
4156  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
4157    BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
4158
4159  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
4160    BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
4161  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
4162    BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
4163  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
4164    BB = EmitAtomicBinary(MI, BB, false, 0);
4165  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
4166    BB = EmitAtomicBinary(MI, BB, true, 0);
4167
4168  else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
4169           MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
4170    bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
4171
4172    unsigned dest   = MI->getOperand(0).getReg();
4173    unsigned ptrA   = MI->getOperand(1).getReg();
4174    unsigned ptrB   = MI->getOperand(2).getReg();
4175    unsigned oldval = MI->getOperand(3).getReg();
4176    unsigned newval = MI->getOperand(4).getReg();
4177    DebugLoc dl     = MI->getDebugLoc();
4178
4179    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
4180    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
4181    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
4182    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
4183    F->insert(It, loop1MBB);
4184    F->insert(It, loop2MBB);
4185    F->insert(It, midMBB);
4186    F->insert(It, exitMBB);
4187    exitMBB->transferSuccessors(BB);
4188
4189    //  thisMBB:
4190    //   ...
4191    //   fallthrough --> loopMBB
4192    BB->addSuccessor(loop1MBB);
4193
4194    // loop1MBB:
4195    //   l[wd]arx dest, ptr
4196    //   cmp[wd] dest, oldval
4197    //   bne- midMBB
4198    // loop2MBB:
4199    //   st[wd]cx. newval, ptr
4200    //   bne- loopMBB
4201    //   b exitBB
4202    // midMBB:
4203    //   st[wd]cx. dest, ptr
4204    // exitBB:
4205    BB = loop1MBB;
4206    BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
4207      .addReg(ptrA).addReg(ptrB);
4208    BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
4209      .addReg(oldval).addReg(dest);
4210    BuildMI(BB, dl, TII->get(PPC::BCC))
4211      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
4212    BB->addSuccessor(loop2MBB);
4213    BB->addSuccessor(midMBB);
4214
4215    BB = loop2MBB;
4216    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
4217      .addReg(newval).addReg(ptrA).addReg(ptrB);
4218    BuildMI(BB, dl, TII->get(PPC::BCC))
4219      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
4220    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
4221    BB->addSuccessor(loop1MBB);
4222    BB->addSuccessor(exitMBB);
4223
4224    BB = midMBB;
4225    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
4226      .addReg(dest).addReg(ptrA).addReg(ptrB);
4227    BB->addSuccessor(exitMBB);
4228
4229    //  exitMBB:
4230    //   ...
4231    BB = exitMBB;
4232  } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
4233             MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
4234    // We must use 64-bit registers for addresses when targeting 64-bit,
4235    // since we're actually doing arithmetic on them.  Other registers
4236    // can be 32-bit.
4237    bool is64bit = PPCSubTarget.isPPC64();
4238    bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
4239
4240    unsigned dest   = MI->getOperand(0).getReg();
4241    unsigned ptrA   = MI->getOperand(1).getReg();
4242    unsigned ptrB   = MI->getOperand(2).getReg();
4243    unsigned oldval = MI->getOperand(3).getReg();
4244    unsigned newval = MI->getOperand(4).getReg();
4245    DebugLoc dl     = MI->getDebugLoc();
4246
4247    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
4248    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
4249    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
4250    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
4251    F->insert(It, loop1MBB);
4252    F->insert(It, loop2MBB);
4253    F->insert(It, midMBB);
4254    F->insert(It, exitMBB);
4255    exitMBB->transferSuccessors(BB);
4256
4257    MachineRegisterInfo &RegInfo = F->getRegInfo();
4258    const TargetRegisterClass *RC =
4259      is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
4260                (const TargetRegisterClass *) &PPC::GPRCRegClass;
4261    unsigned PtrReg = RegInfo.createVirtualRegister(RC);
4262    unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
4263    unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
4264    unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
4265    unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
4266    unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
4267    unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
4268    unsigned MaskReg = RegInfo.createVirtualRegister(RC);
4269    unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
4270    unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
4271    unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
4272    unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
4273    unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
4274    unsigned Ptr1Reg;
4275    unsigned TmpReg = RegInfo.createVirtualRegister(RC);
4276    //  thisMBB:
4277    //   ...
4278    //   fallthrough --> loopMBB
4279    BB->addSuccessor(loop1MBB);
4280
4281    // The 4-byte load must be aligned, while a char or short may be
4282    // anywhere in the word.  Hence all this nasty bookkeeping code.
4283    //   add ptr1, ptrA, ptrB [copy if ptrA==0]
4284    //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
4285    //   xori shift, shift1, 24 [16]
4286    //   rlwinm ptr, ptr1, 0, 0, 29
4287    //   slw newval2, newval, shift
4288    //   slw oldval2, oldval,shift
4289    //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
4290    //   slw mask, mask2, shift
4291    //   and newval3, newval2, mask
4292    //   and oldval3, oldval2, mask
4293    // loop1MBB:
4294    //   lwarx tmpDest, ptr
4295    //   and tmp, tmpDest, mask
4296    //   cmpw tmp, oldval3
4297    //   bne- midMBB
4298    // loop2MBB:
4299    //   andc tmp2, tmpDest, mask
4300    //   or tmp4, tmp2, newval3
4301    //   stwcx. tmp4, ptr
4302    //   bne- loop1MBB
4303    //   b exitBB
4304    // midMBB:
4305    //   stwcx. tmpDest, ptr
4306    // exitBB:
4307    //   srw dest, tmpDest, shift
4308    if (ptrA!=PPC::R0) {
4309      Ptr1Reg = RegInfo.createVirtualRegister(RC);
4310      BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
4311        .addReg(ptrA).addReg(ptrB);
4312    } else {
4313      Ptr1Reg = ptrB;
4314    }
4315    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
4316        .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
4317    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
4318        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
4319    if (is64bit)
4320      BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
4321        .addReg(Ptr1Reg).addImm(0).addImm(61);
4322    else
4323      BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
4324        .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
4325    BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
4326        .addReg(newval).addReg(ShiftReg);
4327    BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
4328        .addReg(oldval).addReg(ShiftReg);
4329    if (is8bit)
4330      BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
4331    else {
4332      BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
4333      BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
4334        .addReg(Mask3Reg).addImm(65535);
4335    }
4336    BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
4337        .addReg(Mask2Reg).addReg(ShiftReg);
4338    BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
4339        .addReg(NewVal2Reg).addReg(MaskReg);
4340    BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
4341        .addReg(OldVal2Reg).addReg(MaskReg);
4342
4343    BB = loop1MBB;
4344    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
4345        .addReg(PPC::R0).addReg(PtrReg);
4346    BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
4347        .addReg(TmpDestReg).addReg(MaskReg);
4348    BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
4349        .addReg(TmpReg).addReg(OldVal3Reg);
4350    BuildMI(BB, dl, TII->get(PPC::BCC))
4351        .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
4352    BB->addSuccessor(loop2MBB);
4353    BB->addSuccessor(midMBB);
4354
4355    BB = loop2MBB;
4356    BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
4357        .addReg(TmpDestReg).addReg(MaskReg);
4358    BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
4359        .addReg(Tmp2Reg).addReg(NewVal3Reg);
4360    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
4361        .addReg(PPC::R0).addReg(PtrReg);
4362    BuildMI(BB, dl, TII->get(PPC::BCC))
4363      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
4364    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
4365    BB->addSuccessor(loop1MBB);
4366    BB->addSuccessor(exitMBB);
4367
4368    BB = midMBB;
4369    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
4370      .addReg(PPC::R0).addReg(PtrReg);
4371    BB->addSuccessor(exitMBB);
4372
4373    //  exitMBB:
4374    //   ...
4375    BB = exitMBB;
4376    BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg);
4377  } else {
4378    assert(0 && "Unexpected instr type to insert");
4379  }
4380
4381  F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
4382  return BB;
4383}
4384
4385//===----------------------------------------------------------------------===//
4386// Target Optimization Hooks
4387//===----------------------------------------------------------------------===//
4388
4389SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
4390                                             DAGCombinerInfo &DCI) const {
4391  TargetMachine &TM = getTargetMachine();
4392  SelectionDAG &DAG = DCI.DAG;
4393  DebugLoc dl = N->getDebugLoc();
4394  switch (N->getOpcode()) {
4395  default: break;
4396  case PPCISD::SHL:
4397    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
4398      if (C->getZExtValue() == 0)   // 0 << V -> 0.
4399        return N->getOperand(0);
4400    }
4401    break;
4402  case PPCISD::SRL:
4403    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
4404      if (C->getZExtValue() == 0)   // 0 >>u V -> 0.
4405        return N->getOperand(0);
4406    }
4407    break;
4408  case PPCISD::SRA:
4409    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
4410      if (C->getZExtValue() == 0 ||   //  0 >>s V -> 0.
4411          C->isAllOnesValue())    // -1 >>s V -> -1.
4412        return N->getOperand(0);
4413    }
4414    break;
4415
4416  case ISD::SINT_TO_FP:
4417    if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
4418      if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
4419        // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
4420        // We allow the src/dst to be either f32/f64, but the intermediate
4421        // type must be i64.
4422        if (N->getOperand(0).getValueType() == MVT::i64 &&
4423            N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
4424          SDValue Val = N->getOperand(0).getOperand(0);
4425          if (Val.getValueType() == MVT::f32) {
4426            Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
4427            DCI.AddToWorklist(Val.getNode());
4428          }
4429
4430          Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
4431          DCI.AddToWorklist(Val.getNode());
4432          Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
4433          DCI.AddToWorklist(Val.getNode());
4434          if (N->getValueType(0) == MVT::f32) {
4435            Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
4436                              DAG.getIntPtrConstant(0));
4437            DCI.AddToWorklist(Val.getNode());
4438          }
4439          return Val;
4440        } else if (N->getOperand(0).getValueType() == MVT::i32) {
4441          // If the intermediate type is i32, we can avoid the load/store here
4442          // too.
4443        }
4444      }
4445    }
4446    break;
4447  case ISD::STORE:
4448    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
4449    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
4450        !cast<StoreSDNode>(N)->isTruncatingStore() &&
4451        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
4452        N->getOperand(1).getValueType() == MVT::i32 &&
4453        N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
4454      SDValue Val = N->getOperand(1).getOperand(0);
4455      if (Val.getValueType() == MVT::f32) {
4456        Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
4457        DCI.AddToWorklist(Val.getNode());
4458      }
4459      Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
4460      DCI.AddToWorklist(Val.getNode());
4461
4462      Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
4463                        N->getOperand(2), N->getOperand(3));
4464      DCI.AddToWorklist(Val.getNode());
4465      return Val;
4466    }
4467
4468    // Turn STORE (BSWAP) -> sthbrx/stwbrx.
4469    if (N->getOperand(1).getOpcode() == ISD::BSWAP &&
4470        N->getOperand(1).getNode()->hasOneUse() &&
4471        (N->getOperand(1).getValueType() == MVT::i32 ||
4472         N->getOperand(1).getValueType() == MVT::i16)) {
4473      SDValue BSwapOp = N->getOperand(1).getOperand(0);
4474      // Do an any-extend to 32-bits if this is a half-word input.
4475      if (BSwapOp.getValueType() == MVT::i16)
4476        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
4477
4478      return DAG.getNode(PPCISD::STBRX, dl, MVT::Other, N->getOperand(0),
4479                         BSwapOp, N->getOperand(2), N->getOperand(3),
4480                         DAG.getValueType(N->getOperand(1).getValueType()));
4481    }
4482    break;
4483  case ISD::BSWAP:
4484    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
4485    if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
4486        N->getOperand(0).hasOneUse() &&
4487        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
4488      SDValue Load = N->getOperand(0);
4489      LoadSDNode *LD = cast<LoadSDNode>(Load);
4490      // Create the byte-swapping load.
4491      std::vector<MVT> VTs;
4492      VTs.push_back(MVT::i32);
4493      VTs.push_back(MVT::Other);
4494      SDValue MO = DAG.getMemOperand(LD->getMemOperand());
4495      SDValue Ops[] = {
4496        LD->getChain(),    // Chain
4497        LD->getBasePtr(),  // Ptr
4498        MO,                // MemOperand
4499        DAG.getValueType(N->getValueType(0)) // VT
4500      };
4501      SDValue BSLoad = DAG.getNode(PPCISD::LBRX, dl, VTs, Ops, 4);
4502
4503      // If this is an i16 load, insert the truncate.
4504      SDValue ResVal = BSLoad;
4505      if (N->getValueType(0) == MVT::i16)
4506        ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
4507
4508      // First, combine the bswap away.  This makes the value produced by the
4509      // load dead.
4510      DCI.CombineTo(N, ResVal);
4511
4512      // Next, combine the load away, we give it a bogus result value but a real
4513      // chain result.  The result value is dead because the bswap is dead.
4514      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
4515
4516      // Return N so it doesn't get rechecked!
4517      return SDValue(N, 0);
4518    }
4519
4520    break;
4521  case PPCISD::VCMP: {
4522    // If a VCMPo node already exists with exactly the same operands as this
4523    // node, use its result instead of this node (VCMPo computes both a CR6 and
4524    // a normal output).
4525    //
4526    if (!N->getOperand(0).hasOneUse() &&
4527        !N->getOperand(1).hasOneUse() &&
4528        !N->getOperand(2).hasOneUse()) {
4529
4530      // Scan all of the users of the LHS, looking for VCMPo's that match.
4531      SDNode *VCMPoNode = 0;
4532
4533      SDNode *LHSN = N->getOperand(0).getNode();
4534      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
4535           UI != E; ++UI)
4536        if (UI->getOpcode() == PPCISD::VCMPo &&
4537            UI->getOperand(1) == N->getOperand(1) &&
4538            UI->getOperand(2) == N->getOperand(2) &&
4539            UI->getOperand(0) == N->getOperand(0)) {
4540          VCMPoNode = *UI;
4541          break;
4542        }
4543
4544      // If there is no VCMPo node, or if the flag value has a single use, don't
4545      // transform this.
4546      if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
4547        break;
4548
4549      // Look at the (necessarily single) use of the flag value.  If it has a
4550      // chain, this transformation is more complex.  Note that multiple things
4551      // could use the value result, which we should ignore.
4552      SDNode *FlagUser = 0;
4553      for (SDNode::use_iterator UI = VCMPoNode->use_begin();
4554           FlagUser == 0; ++UI) {
4555        assert(UI != VCMPoNode->use_end() && "Didn't find user!");
4556        SDNode *User = *UI;
4557        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
4558          if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
4559            FlagUser = User;
4560            break;
4561          }
4562        }
4563      }
4564
4565      // If the user is a MFCR instruction, we know this is safe.  Otherwise we
4566      // give up for right now.
4567      if (FlagUser->getOpcode() == PPCISD::MFCR)
4568        return SDValue(VCMPoNode, 0);
4569    }
4570    break;
4571  }
4572  case ISD::BR_CC: {
4573    // If this is a branch on an altivec predicate comparison, lower this so
4574    // that we don't have to do a MFCR: instead, branch directly on CR6.  This
4575    // lowering is done pre-legalize, because the legalizer lowers the predicate
4576    // compare down to code that is difficult to reassemble.
4577    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4578    SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
4579    int CompareOpc;
4580    bool isDot;
4581
4582    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
4583        isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
4584        getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
4585      assert(isDot && "Can't compare against a vector result!");
4586
4587      // If this is a comparison against something other than 0/1, then we know
4588      // that the condition is never/always true.
4589      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
4590      if (Val != 0 && Val != 1) {
4591        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
4592          return N->getOperand(0);
4593        // Always !=, turn it into an unconditional branch.
4594        return DAG.getNode(ISD::BR, dl, MVT::Other,
4595                           N->getOperand(0), N->getOperand(4));
4596      }
4597
4598      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
4599
4600      // Create the PPCISD altivec 'dot' comparison node.
4601      std::vector<MVT> VTs;
4602      SDValue Ops[] = {
4603        LHS.getOperand(2),  // LHS of compare
4604        LHS.getOperand(3),  // RHS of compare
4605        DAG.getConstant(CompareOpc, MVT::i32)
4606      };
4607      VTs.push_back(LHS.getOperand(2).getValueType());
4608      VTs.push_back(MVT::Flag);
4609      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
4610
4611      // Unpack the result based on how the target uses it.
4612      PPC::Predicate CompOpc;
4613      switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
4614      default:  // Can't happen, don't crash on invalid number though.
4615      case 0:   // Branch on the value of the EQ bit of CR6.
4616        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
4617        break;
4618      case 1:   // Branch on the inverted value of the EQ bit of CR6.
4619        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
4620        break;
4621      case 2:   // Branch on the value of the LT bit of CR6.
4622        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
4623        break;
4624      case 3:   // Branch on the inverted value of the LT bit of CR6.
4625        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
4626        break;
4627      }
4628
4629      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
4630                         DAG.getConstant(CompOpc, MVT::i32),
4631                         DAG.getRegister(PPC::CR6, MVT::i32),
4632                         N->getOperand(4), CompNode.getValue(1));
4633    }
4634    break;
4635  }
4636  }
4637
4638  return SDValue();
4639}
4640
4641//===----------------------------------------------------------------------===//
4642// Inline Assembly Support
4643//===----------------------------------------------------------------------===//
4644
4645void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
4646                                                       const APInt &Mask,
4647                                                       APInt &KnownZero,
4648                                                       APInt &KnownOne,
4649                                                       const SelectionDAG &DAG,
4650                                                       unsigned Depth) const {
4651  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
4652  switch (Op.getOpcode()) {
4653  default: break;
4654  case PPCISD::LBRX: {
4655    // lhbrx is known to have the top bits cleared out.
4656    if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16)
4657      KnownZero = 0xFFFF0000;
4658    break;
4659  }
4660  case ISD::INTRINSIC_WO_CHAIN: {
4661    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
4662    default: break;
4663    case Intrinsic::ppc_altivec_vcmpbfp_p:
4664    case Intrinsic::ppc_altivec_vcmpeqfp_p:
4665    case Intrinsic::ppc_altivec_vcmpequb_p:
4666    case Intrinsic::ppc_altivec_vcmpequh_p:
4667    case Intrinsic::ppc_altivec_vcmpequw_p:
4668    case Intrinsic::ppc_altivec_vcmpgefp_p:
4669    case Intrinsic::ppc_altivec_vcmpgtfp_p:
4670    case Intrinsic::ppc_altivec_vcmpgtsb_p:
4671    case Intrinsic::ppc_altivec_vcmpgtsh_p:
4672    case Intrinsic::ppc_altivec_vcmpgtsw_p:
4673    case Intrinsic::ppc_altivec_vcmpgtub_p:
4674    case Intrinsic::ppc_altivec_vcmpgtuh_p:
4675    case Intrinsic::ppc_altivec_vcmpgtuw_p:
4676      KnownZero = ~1U;  // All bits but the low one are known to be zero.
4677      break;
4678    }
4679  }
4680  }
4681}
4682
4683
4684/// getConstraintType - Given a constraint, return the type of
4685/// constraint it is for this target.
4686PPCTargetLowering::ConstraintType
4687PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
4688  if (Constraint.size() == 1) {
4689    switch (Constraint[0]) {
4690    default: break;
4691    case 'b':
4692    case 'r':
4693    case 'f':
4694    case 'v':
4695    case 'y':
4696      return C_RegisterClass;
4697    }
4698  }
4699  return TargetLowering::getConstraintType(Constraint);
4700}
4701
4702std::pair<unsigned, const TargetRegisterClass*>
4703PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
4704                                                MVT VT) const {
4705  if (Constraint.size() == 1) {
4706    // GCC RS6000 Constraint Letters
4707    switch (Constraint[0]) {
4708    case 'b':   // R1-R31
4709    case 'r':   // R0-R31
4710      if (VT == MVT::i64 && PPCSubTarget.isPPC64())
4711        return std::make_pair(0U, PPC::G8RCRegisterClass);
4712      return std::make_pair(0U, PPC::GPRCRegisterClass);
4713    case 'f':
4714      if (VT == MVT::f32)
4715        return std::make_pair(0U, PPC::F4RCRegisterClass);
4716      else if (VT == MVT::f64)
4717        return std::make_pair(0U, PPC::F8RCRegisterClass);
4718      break;
4719    case 'v':
4720      return std::make_pair(0U, PPC::VRRCRegisterClass);
4721    case 'y':   // crrc
4722      return std::make_pair(0U, PPC::CRRCRegisterClass);
4723    }
4724  }
4725
4726  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
4727}
4728
4729
4730/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
4731/// vector.  If it is invalid, don't add anything to Ops. If hasMemory is true
4732/// it means one of the asm constraint of the inline asm instruction being
4733/// processed is 'm'.
4734void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
4735                                                     bool hasMemory,
4736                                                     std::vector<SDValue>&Ops,
4737                                                     SelectionDAG &DAG) const {
4738  SDValue Result(0,0);
4739  switch (Letter) {
4740  default: break;
4741  case 'I':
4742  case 'J':
4743  case 'K':
4744  case 'L':
4745  case 'M':
4746  case 'N':
4747  case 'O':
4748  case 'P': {
4749    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
4750    if (!CST) return; // Must be an immediate to match.
4751    unsigned Value = CST->getZExtValue();
4752    switch (Letter) {
4753    default: assert(0 && "Unknown constraint letter!");
4754    case 'I':  // "I" is a signed 16-bit constant.
4755      if ((short)Value == (int)Value)
4756        Result = DAG.getTargetConstant(Value, Op.getValueType());
4757      break;
4758    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
4759    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
4760      if ((short)Value == 0)
4761        Result = DAG.getTargetConstant(Value, Op.getValueType());
4762      break;
4763    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
4764      if ((Value >> 16) == 0)
4765        Result = DAG.getTargetConstant(Value, Op.getValueType());
4766      break;
4767    case 'M':  // "M" is a constant that is greater than 31.
4768      if (Value > 31)
4769        Result = DAG.getTargetConstant(Value, Op.getValueType());
4770      break;
4771    case 'N':  // "N" is a positive constant that is an exact power of two.
4772      if ((int)Value > 0 && isPowerOf2_32(Value))
4773        Result = DAG.getTargetConstant(Value, Op.getValueType());
4774      break;
4775    case 'O':  // "O" is the constant zero.
4776      if (Value == 0)
4777        Result = DAG.getTargetConstant(Value, Op.getValueType());
4778      break;
4779    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
4780      if ((short)-Value == (int)-Value)
4781        Result = DAG.getTargetConstant(Value, Op.getValueType());
4782      break;
4783    }
4784    break;
4785  }
4786  }
4787
4788  if (Result.getNode()) {
4789    Ops.push_back(Result);
4790    return;
4791  }
4792
4793  // Handle standard constraint letters.
4794  TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG);
4795}
4796
4797// isLegalAddressingMode - Return true if the addressing mode represented
4798// by AM is legal for this target, for a load/store of the specified type.
4799bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
4800                                              const Type *Ty) const {
4801  // FIXME: PPC does not allow r+i addressing modes for vectors!
4802
4803  // PPC allows a sign-extended 16-bit immediate field.
4804  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
4805    return false;
4806
4807  // No global is ever allowed as a base.
4808  if (AM.BaseGV)
4809    return false;
4810
4811  // PPC only support r+r,
4812  switch (AM.Scale) {
4813  case 0:  // "r+i" or just "i", depending on HasBaseReg.
4814    break;
4815  case 1:
4816    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
4817      return false;
4818    // Otherwise we have r+r or r+i.
4819    break;
4820  case 2:
4821    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
4822      return false;
4823    // Allow 2*r as r+r.
4824    break;
4825  default:
4826    // No other scales are supported.
4827    return false;
4828  }
4829
4830  return true;
4831}
4832
4833/// isLegalAddressImmediate - Return true if the integer value can be used
4834/// as the offset of the target addressing mode for load / store of the
4835/// given type.
4836bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
4837  // PPC allows a sign-extended 16-bit immediate field.
4838  return (V > -(1 << 16) && V < (1 << 16)-1);
4839}
4840
4841bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
4842  return false;
4843}
4844
4845SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
4846  DebugLoc dl = Op.getDebugLoc();
4847  // Depths > 0 not supported yet!
4848  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
4849    return SDValue();
4850
4851  MachineFunction &MF = DAG.getMachineFunction();
4852  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4853
4854  // Just load the return address off the stack.
4855  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
4856
4857  // Make sure the function really does not optimize away the store of the RA
4858  // to the stack.
4859  FuncInfo->setLRStoreRequired();
4860  return DAG.getLoad(getPointerTy(), dl,
4861                     DAG.getEntryNode(), RetAddrFI, NULL, 0);
4862}
4863
4864SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
4865  DebugLoc dl = Op.getDebugLoc();
4866  // Depths > 0 not supported yet!
4867  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
4868    return SDValue();
4869
4870  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4871  bool isPPC64 = PtrVT == MVT::i64;
4872
4873  MachineFunction &MF = DAG.getMachineFunction();
4874  MachineFrameInfo *MFI = MF.getFrameInfo();
4875  bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects())
4876                  && MFI->getStackSize();
4877
4878  if (isPPC64)
4879    return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::X31 : PPC::X1,
4880      MVT::i64);
4881  else
4882    return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::R31 : PPC::R1,
4883      MVT::i32);
4884}
4885
4886bool
4887PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
4888  // The PowerPC target isn't yet aware of offsets.
4889  return false;
4890}
4891