PPCISelLowering.cpp revision d2ea0e10cbd158c93fb870cdd03001b9cd1156b8
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PPCISelLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCISelLowering.h"
15#include "PPCMachineFunctionInfo.h"
16#include "PPCPerfectShuffle.h"
17#include "PPCTargetMachine.h"
18#include "MCTargetDesc/PPCPredicates.h"
19#include "llvm/CallingConv.h"
20#include "llvm/Constants.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Function.h"
23#include "llvm/Intrinsics.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/CodeGen/CallingConvLower.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/SelectionDAG.h"
31#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/MathExtras.h"
35#include "llvm/Support/raw_ostream.h"
36#include "llvm/Target/TargetOptions.h"
37using namespace llvm;
38
39static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
40                                     CCValAssign::LocInfo &LocInfo,
41                                     ISD::ArgFlagsTy &ArgFlags,
42                                     CCState &State);
43static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
44                                            MVT &LocVT,
45                                            CCValAssign::LocInfo &LocInfo,
46                                            ISD::ArgFlagsTy &ArgFlags,
47                                            CCState &State);
48static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
49                                              MVT &LocVT,
50                                              CCValAssign::LocInfo &LocInfo,
51                                              ISD::ArgFlagsTy &ArgFlags,
52                                              CCState &State);
53
54static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
55cl::desc("enable preincrement load/store generation on PPC (experimental)"),
56                                     cl::Hidden);
57
58static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
59  if (TM.getSubtargetImpl()->isDarwin())
60    return new TargetLoweringObjectFileMachO();
61
62  return new TargetLoweringObjectFileELF();
63}
64
65PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
66  : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
67
68  setPow2DivIsCheap();
69
70  // Use _setjmp/_longjmp instead of setjmp/longjmp.
71  setUseUnderscoreSetJmp(true);
72  setUseUnderscoreLongJmp(true);
73
74  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
75  // arguments are at least 4/8 bytes aligned.
76  setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
77
78  // Set up the register classes.
79  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
80  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
81  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
82
83  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
84  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
85  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
86
87  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
88
89  // PowerPC has pre-inc load and store's.
90  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
91  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
92  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
93  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
94  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
95  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
96  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
97  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
98  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
99  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
100
101  // This is used in the ppcf128->int sequence.  Note it has different semantics
102  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
103  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
104
105  // We do not currently implment this libm ops for PowerPC.
106  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
107  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
108  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
109  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
110  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
111
112  // PowerPC has no SREM/UREM instructions
113  setOperationAction(ISD::SREM, MVT::i32, Expand);
114  setOperationAction(ISD::UREM, MVT::i32, Expand);
115  setOperationAction(ISD::SREM, MVT::i64, Expand);
116  setOperationAction(ISD::UREM, MVT::i64, Expand);
117
118  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
119  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
120  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
121  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
122  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
123  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
124  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
125  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
126  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
127
128  // We don't support sin/cos/sqrt/fmod/pow
129  setOperationAction(ISD::FSIN , MVT::f64, Expand);
130  setOperationAction(ISD::FCOS , MVT::f64, Expand);
131  setOperationAction(ISD::FREM , MVT::f64, Expand);
132  setOperationAction(ISD::FPOW , MVT::f64, Expand);
133  setOperationAction(ISD::FMA  , MVT::f64, Expand);
134  setOperationAction(ISD::FSIN , MVT::f32, Expand);
135  setOperationAction(ISD::FCOS , MVT::f32, Expand);
136  setOperationAction(ISD::FREM , MVT::f32, Expand);
137  setOperationAction(ISD::FPOW , MVT::f32, Expand);
138  setOperationAction(ISD::FMA  , MVT::f32, Expand);
139
140  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
141
142  // If we're enabling GP optimizations, use hardware square root
143  if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
144    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
145    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
146  }
147
148  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
149  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
150
151  // PowerPC does not have BSWAP, CTPOP or CTTZ
152  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
153  setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
154  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
155  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
156  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
157  setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
158  setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
159  setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
160  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
161  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
162
163  // PowerPC does not have ROTR
164  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
165  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
166
167  // PowerPC does not have Select
168  setOperationAction(ISD::SELECT, MVT::i32, Expand);
169  setOperationAction(ISD::SELECT, MVT::i64, Expand);
170  setOperationAction(ISD::SELECT, MVT::f32, Expand);
171  setOperationAction(ISD::SELECT, MVT::f64, Expand);
172
173  // PowerPC wants to turn select_cc of FP into fsel when possible.
174  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
175  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
176
177  // PowerPC wants to optimize integer setcc a bit
178  setOperationAction(ISD::SETCC, MVT::i32, Custom);
179
180  // PowerPC does not have BRCOND which requires SetCC
181  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
182
183  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
184
185  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
186  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
187
188  // PowerPC does not have [U|S]INT_TO_FP
189  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
190  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
191
192  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
193  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
194  setOperationAction(ISD::BITCAST, MVT::i64, Expand);
195  setOperationAction(ISD::BITCAST, MVT::f64, Expand);
196
197  // We cannot sextinreg(i1).  Expand to shifts.
198  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
199
200  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
201  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
202  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
203  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
204
205
206  // We want to legalize GlobalAddress and ConstantPool nodes into the
207  // appropriate instructions to materialize the address.
208  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
209  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
210  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
211  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
212  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
213  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
214  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
215  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
216  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
217  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
218
219  // TRAP is legal.
220  setOperationAction(ISD::TRAP, MVT::Other, Legal);
221
222  // TRAMPOLINE is custom lowered.
223  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
224  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
225
226  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
227  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
228
229  if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
230    if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
231      // VAARG always uses double-word chunks, so promote anything smaller.
232      setOperationAction(ISD::VAARG, MVT::i1, Promote);
233      AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
234      setOperationAction(ISD::VAARG, MVT::i8, Promote);
235      AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
236      setOperationAction(ISD::VAARG, MVT::i16, Promote);
237      AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
238      setOperationAction(ISD::VAARG, MVT::i32, Promote);
239      AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
240      setOperationAction(ISD::VAARG, MVT::Other, Expand);
241    } else {
242      // VAARG is custom lowered with the 32-bit SVR4 ABI.
243      setOperationAction(ISD::VAARG, MVT::Other, Custom);
244      setOperationAction(ISD::VAARG, MVT::i64, Custom);
245    }
246  } else
247    setOperationAction(ISD::VAARG, MVT::Other, Expand);
248
249  // Use the default implementation.
250  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
251  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
252  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
253  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
254  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
255  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
256
257  // We want to custom lower some of our intrinsics.
258  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
259
260  // Comparisons that require checking two conditions.
261  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
262  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
263  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
264  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
265  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
266  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
267  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
268  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
269  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
270  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
271  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
272  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
273
274  if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
275    // They also have instructions for converting between i64 and fp.
276    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
277    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
278    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
279    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
280    // This is just the low 32 bits of a (signed) fp->i64 conversion.
281    // We cannot do this with Promote because i64 is not a legal type.
282    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
283
284    // FIXME: disable this lowered code.  This generates 64-bit register values,
285    // and we don't model the fact that the top part is clobbered by calls.  We
286    // need to flag these together so that the value isn't live across a call.
287    //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
288  } else {
289    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
290    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
291  }
292
293  if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
294    // 64-bit PowerPC implementations can support i64 types directly
295    addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
296    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
297    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
298    // 64-bit PowerPC wants to expand i128 shifts itself.
299    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
300    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
301    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
302  } else {
303    // 32-bit PowerPC wants to expand i64 shifts itself.
304    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
305    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
306    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
307  }
308
309  if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
310    // First set operation action for all vector types to expand. Then we
311    // will selectively turn on ones that can be effectively codegen'd.
312    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
313         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
314      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
315
316      // add/sub are legal for all supported vector VT's.
317      setOperationAction(ISD::ADD , VT, Legal);
318      setOperationAction(ISD::SUB , VT, Legal);
319
320      // We promote all shuffles to v16i8.
321      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
322      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
323
324      // We promote all non-typed operations to v4i32.
325      setOperationAction(ISD::AND   , VT, Promote);
326      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
327      setOperationAction(ISD::OR    , VT, Promote);
328      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
329      setOperationAction(ISD::XOR   , VT, Promote);
330      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
331      setOperationAction(ISD::LOAD  , VT, Promote);
332      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
333      setOperationAction(ISD::SELECT, VT, Promote);
334      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
335      setOperationAction(ISD::STORE, VT, Promote);
336      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
337
338      // No other operations are legal.
339      setOperationAction(ISD::MUL , VT, Expand);
340      setOperationAction(ISD::SDIV, VT, Expand);
341      setOperationAction(ISD::SREM, VT, Expand);
342      setOperationAction(ISD::UDIV, VT, Expand);
343      setOperationAction(ISD::UREM, VT, Expand);
344      setOperationAction(ISD::FDIV, VT, Expand);
345      setOperationAction(ISD::FNEG, VT, Expand);
346      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
347      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
348      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
349      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
350      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
351      setOperationAction(ISD::UDIVREM, VT, Expand);
352      setOperationAction(ISD::SDIVREM, VT, Expand);
353      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
354      setOperationAction(ISD::FPOW, VT, Expand);
355      setOperationAction(ISD::CTPOP, VT, Expand);
356      setOperationAction(ISD::CTLZ, VT, Expand);
357      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
358      setOperationAction(ISD::CTTZ, VT, Expand);
359      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
360    }
361
362    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
363    // with merges, splats, etc.
364    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
365
366    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
367    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
368    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
369    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
370    setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
371    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
372
373    addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
374    addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
375    addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
376    addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
377
378    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
379    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
380    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
381    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
382
383    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
384    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
385
386    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
387    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
388    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
389    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
390  }
391
392  if (TM.getSubtarget<PPCSubtarget>().has64BitSupport())
393    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
394
395  setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
396  setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
397
398  setBooleanContents(ZeroOrOneBooleanContent);
399  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
400
401  if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
402    setStackPointerRegisterToSaveRestore(PPC::X1);
403    setExceptionPointerRegister(PPC::X3);
404    setExceptionSelectorRegister(PPC::X4);
405  } else {
406    setStackPointerRegisterToSaveRestore(PPC::R1);
407    setExceptionPointerRegister(PPC::R3);
408    setExceptionSelectorRegister(PPC::R4);
409  }
410
411  // We have target-specific dag combine patterns for the following nodes:
412  setTargetDAGCombine(ISD::SINT_TO_FP);
413  setTargetDAGCombine(ISD::STORE);
414  setTargetDAGCombine(ISD::BR_CC);
415  setTargetDAGCombine(ISD::BSWAP);
416
417  // Darwin long double math library functions have $LDBL128 appended.
418  if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
419    setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
420    setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
421    setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
422    setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
423    setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
424    setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
425    setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
426    setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
427    setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
428    setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
429  }
430
431  setMinFunctionAlignment(2);
432  if (PPCSubTarget.isDarwin())
433    setPrefFunctionAlignment(4);
434
435  setInsertFencesForAtomic(true);
436
437  setSchedulingPreference(Sched::Hybrid);
438
439  computeRegisterProperties();
440}
441
442/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
443/// function arguments in the caller parameter area.
444unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
445  const TargetMachine &TM = getTargetMachine();
446  // Darwin passes everything on 4 byte boundary.
447  if (TM.getSubtarget<PPCSubtarget>().isDarwin())
448    return 4;
449
450  // 16byte and wider vectors are passed on 16byte boundary.
451  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
452    if (VTy->getBitWidth() >= 128)
453      return 16;
454
455  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
456   if (PPCSubTarget.isPPC64())
457     return 8;
458
459  return 4;
460}
461
462const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
463  switch (Opcode) {
464  default: return 0;
465  case PPCISD::FSEL:            return "PPCISD::FSEL";
466  case PPCISD::FCFID:           return "PPCISD::FCFID";
467  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
468  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
469  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
470  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
471  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
472  case PPCISD::VPERM:           return "PPCISD::VPERM";
473  case PPCISD::Hi:              return "PPCISD::Hi";
474  case PPCISD::Lo:              return "PPCISD::Lo";
475  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
476  case PPCISD::TOC_RESTORE:     return "PPCISD::TOC_RESTORE";
477  case PPCISD::LOAD:            return "PPCISD::LOAD";
478  case PPCISD::LOAD_TOC:        return "PPCISD::LOAD_TOC";
479  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
480  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
481  case PPCISD::SRL:             return "PPCISD::SRL";
482  case PPCISD::SRA:             return "PPCISD::SRA";
483  case PPCISD::SHL:             return "PPCISD::SHL";
484  case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
485  case PPCISD::STD_32:          return "PPCISD::STD_32";
486  case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
487  case PPCISD::CALL_NOP_SVR4:   return "PPCISD::CALL_NOP_SVR4";
488  case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
489  case PPCISD::NOP:             return "PPCISD::NOP";
490  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
491  case PPCISD::BCTRL_Darwin:    return "PPCISD::BCTRL_Darwin";
492  case PPCISD::BCTRL_SVR4:      return "PPCISD::BCTRL_SVR4";
493  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
494  case PPCISD::MFCR:            return "PPCISD::MFCR";
495  case PPCISD::VCMP:            return "PPCISD::VCMP";
496  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
497  case PPCISD::LBRX:            return "PPCISD::LBRX";
498  case PPCISD::STBRX:           return "PPCISD::STBRX";
499  case PPCISD::LARX:            return "PPCISD::LARX";
500  case PPCISD::STCX:            return "PPCISD::STCX";
501  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
502  case PPCISD::MFFS:            return "PPCISD::MFFS";
503  case PPCISD::MTFSB0:          return "PPCISD::MTFSB0";
504  case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
505  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
506  case PPCISD::MTFSF:           return "PPCISD::MTFSF";
507  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
508  }
509}
510
511EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
512  return MVT::i32;
513}
514
515//===----------------------------------------------------------------------===//
516// Node matching predicates, for use by the tblgen matching code.
517//===----------------------------------------------------------------------===//
518
519/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
520static bool isFloatingPointZero(SDValue Op) {
521  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
522    return CFP->getValueAPF().isZero();
523  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
524    // Maybe this has already been legalized into the constant pool?
525    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
526      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
527        return CFP->getValueAPF().isZero();
528  }
529  return false;
530}
531
532/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
533/// true if Op is undef or if it matches the specified value.
534static bool isConstantOrUndef(int Op, int Val) {
535  return Op < 0 || Op == Val;
536}
537
538/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
539/// VPKUHUM instruction.
540bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
541  if (!isUnary) {
542    for (unsigned i = 0; i != 16; ++i)
543      if (!isConstantOrUndef(N->getMaskElt(i),  i*2+1))
544        return false;
545  } else {
546    for (unsigned i = 0; i != 8; ++i)
547      if (!isConstantOrUndef(N->getMaskElt(i),    i*2+1) ||
548          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+1))
549        return false;
550  }
551  return true;
552}
553
554/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
555/// VPKUWUM instruction.
556bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
557  if (!isUnary) {
558    for (unsigned i = 0; i != 16; i += 2)
559      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
560          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
561        return false;
562  } else {
563    for (unsigned i = 0; i != 8; i += 2)
564      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
565          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3) ||
566          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+2) ||
567          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+3))
568        return false;
569  }
570  return true;
571}
572
573/// isVMerge - Common function, used to match vmrg* shuffles.
574///
575static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
576                     unsigned LHSStart, unsigned RHSStart) {
577  assert(N->getValueType(0) == MVT::v16i8 &&
578         "PPC only supports shuffles by bytes!");
579  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
580         "Unsupported merge size!");
581
582  for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
583    for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
584      if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
585                             LHSStart+j+i*UnitSize) ||
586          !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
587                             RHSStart+j+i*UnitSize))
588        return false;
589    }
590  return true;
591}
592
593/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
594/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
595bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
596                             bool isUnary) {
597  if (!isUnary)
598    return isVMerge(N, UnitSize, 8, 24);
599  return isVMerge(N, UnitSize, 8, 8);
600}
601
602/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
603/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
604bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
605                             bool isUnary) {
606  if (!isUnary)
607    return isVMerge(N, UnitSize, 0, 16);
608  return isVMerge(N, UnitSize, 0, 0);
609}
610
611
612/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
613/// amount, otherwise return -1.
614int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
615  assert(N->getValueType(0) == MVT::v16i8 &&
616         "PPC only supports shuffles by bytes!");
617
618  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
619
620  // Find the first non-undef value in the shuffle mask.
621  unsigned i;
622  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
623    /*search*/;
624
625  if (i == 16) return -1;  // all undef.
626
627  // Otherwise, check to see if the rest of the elements are consecutively
628  // numbered from this value.
629  unsigned ShiftAmt = SVOp->getMaskElt(i);
630  if (ShiftAmt < i) return -1;
631  ShiftAmt -= i;
632
633  if (!isUnary) {
634    // Check the rest of the elements to see if they are consecutive.
635    for (++i; i != 16; ++i)
636      if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
637        return -1;
638  } else {
639    // Check the rest of the elements to see if they are consecutive.
640    for (++i; i != 16; ++i)
641      if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
642        return -1;
643  }
644  return ShiftAmt;
645}
646
647/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
648/// specifies a splat of a single element that is suitable for input to
649/// VSPLTB/VSPLTH/VSPLTW.
650bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
651  assert(N->getValueType(0) == MVT::v16i8 &&
652         (EltSize == 1 || EltSize == 2 || EltSize == 4));
653
654  // This is a splat operation if each element of the permute is the same, and
655  // if the value doesn't reference the second vector.
656  unsigned ElementBase = N->getMaskElt(0);
657
658  // FIXME: Handle UNDEF elements too!
659  if (ElementBase >= 16)
660    return false;
661
662  // Check that the indices are consecutive, in the case of a multi-byte element
663  // splatted with a v16i8 mask.
664  for (unsigned i = 1; i != EltSize; ++i)
665    if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
666      return false;
667
668  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
669    if (N->getMaskElt(i) < 0) continue;
670    for (unsigned j = 0; j != EltSize; ++j)
671      if (N->getMaskElt(i+j) != N->getMaskElt(j))
672        return false;
673  }
674  return true;
675}
676
677/// isAllNegativeZeroVector - Returns true if all elements of build_vector
678/// are -0.0.
679bool PPC::isAllNegativeZeroVector(SDNode *N) {
680  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
681
682  APInt APVal, APUndef;
683  unsigned BitSize;
684  bool HasAnyUndefs;
685
686  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
687    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
688      return CFP->getValueAPF().isNegZero();
689
690  return false;
691}
692
693/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
694/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
695unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
696  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
697  assert(isSplatShuffleMask(SVOp, EltSize));
698  return SVOp->getMaskElt(0) / EltSize;
699}
700
701/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
702/// by using a vspltis[bhw] instruction of the specified element size, return
703/// the constant being splatted.  The ByteSize field indicates the number of
704/// bytes of each element [124] -> [bhw].
705SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
706  SDValue OpVal(0, 0);
707
708  // If ByteSize of the splat is bigger than the element size of the
709  // build_vector, then we have a case where we are checking for a splat where
710  // multiple elements of the buildvector are folded together into a single
711  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
712  unsigned EltSize = 16/N->getNumOperands();
713  if (EltSize < ByteSize) {
714    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
715    SDValue UniquedVals[4];
716    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
717
718    // See if all of the elements in the buildvector agree across.
719    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
720      if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
721      // If the element isn't a constant, bail fully out.
722      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
723
724
725      if (UniquedVals[i&(Multiple-1)].getNode() == 0)
726        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
727      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
728        return SDValue();  // no match.
729    }
730
731    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
732    // either constant or undef values that are identical for each chunk.  See
733    // if these chunks can form into a larger vspltis*.
734
735    // Check to see if all of the leading entries are either 0 or -1.  If
736    // neither, then this won't fit into the immediate field.
737    bool LeadingZero = true;
738    bool LeadingOnes = true;
739    for (unsigned i = 0; i != Multiple-1; ++i) {
740      if (UniquedVals[i].getNode() == 0) continue;  // Must have been undefs.
741
742      LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
743      LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
744    }
745    // Finally, check the least significant entry.
746    if (LeadingZero) {
747      if (UniquedVals[Multiple-1].getNode() == 0)
748        return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
749      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
750      if (Val < 16)
751        return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
752    }
753    if (LeadingOnes) {
754      if (UniquedVals[Multiple-1].getNode() == 0)
755        return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
756      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
757      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
758        return DAG.getTargetConstant(Val, MVT::i32);
759    }
760
761    return SDValue();
762  }
763
764  // Check to see if this buildvec has a single non-undef value in its elements.
765  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
766    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
767    if (OpVal.getNode() == 0)
768      OpVal = N->getOperand(i);
769    else if (OpVal != N->getOperand(i))
770      return SDValue();
771  }
772
773  if (OpVal.getNode() == 0) return SDValue();  // All UNDEF: use implicit def.
774
775  unsigned ValSizeInBytes = EltSize;
776  uint64_t Value = 0;
777  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
778    Value = CN->getZExtValue();
779  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
780    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
781    Value = FloatToBits(CN->getValueAPF().convertToFloat());
782  }
783
784  // If the splat value is larger than the element value, then we can never do
785  // this splat.  The only case that we could fit the replicated bits into our
786  // immediate field for would be zero, and we prefer to use vxor for it.
787  if (ValSizeInBytes < ByteSize) return SDValue();
788
789  // If the element value is larger than the splat value, cut it in half and
790  // check to see if the two halves are equal.  Continue doing this until we
791  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
792  while (ValSizeInBytes > ByteSize) {
793    ValSizeInBytes >>= 1;
794
795    // If the top half equals the bottom half, we're still ok.
796    if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
797         (Value                        & ((1 << (8*ValSizeInBytes))-1)))
798      return SDValue();
799  }
800
801  // Properly sign extend the value.
802  int ShAmt = (4-ByteSize)*8;
803  int MaskVal = ((int)Value << ShAmt) >> ShAmt;
804
805  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
806  if (MaskVal == 0) return SDValue();
807
808  // Finally, if this value fits in a 5 bit sext field, return it
809  if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
810    return DAG.getTargetConstant(MaskVal, MVT::i32);
811  return SDValue();
812}
813
814//===----------------------------------------------------------------------===//
815//  Addressing Mode Selection
816//===----------------------------------------------------------------------===//
817
818/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
819/// or 64-bit immediate, and if the value can be accurately represented as a
820/// sign extension from a 16-bit value.  If so, this returns true and the
821/// immediate.
822static bool isIntS16Immediate(SDNode *N, short &Imm) {
823  if (N->getOpcode() != ISD::Constant)
824    return false;
825
826  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
827  if (N->getValueType(0) == MVT::i32)
828    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
829  else
830    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
831}
832static bool isIntS16Immediate(SDValue Op, short &Imm) {
833  return isIntS16Immediate(Op.getNode(), Imm);
834}
835
836
837/// SelectAddressRegReg - Given the specified addressed, check to see if it
838/// can be represented as an indexed [r+r] operation.  Returns false if it
839/// can be more efficiently represented with [r+imm].
840bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
841                                            SDValue &Index,
842                                            SelectionDAG &DAG) const {
843  short imm = 0;
844  if (N.getOpcode() == ISD::ADD) {
845    if (isIntS16Immediate(N.getOperand(1), imm))
846      return false;    // r+i
847    if (N.getOperand(1).getOpcode() == PPCISD::Lo)
848      return false;    // r+i
849
850    Base = N.getOperand(0);
851    Index = N.getOperand(1);
852    return true;
853  } else if (N.getOpcode() == ISD::OR) {
854    if (isIntS16Immediate(N.getOperand(1), imm))
855      return false;    // r+i can fold it if we can.
856
857    // If this is an or of disjoint bitfields, we can codegen this as an add
858    // (for better address arithmetic) if the LHS and RHS of the OR are provably
859    // disjoint.
860    APInt LHSKnownZero, LHSKnownOne;
861    APInt RHSKnownZero, RHSKnownOne;
862    DAG.ComputeMaskedBits(N.getOperand(0),
863                          LHSKnownZero, LHSKnownOne);
864
865    if (LHSKnownZero.getBoolValue()) {
866      DAG.ComputeMaskedBits(N.getOperand(1),
867                            RHSKnownZero, RHSKnownOne);
868      // If all of the bits are known zero on the LHS or RHS, the add won't
869      // carry.
870      if (~(LHSKnownZero | RHSKnownZero) == 0) {
871        Base = N.getOperand(0);
872        Index = N.getOperand(1);
873        return true;
874      }
875    }
876  }
877
878  return false;
879}
880
881/// Returns true if the address N can be represented by a base register plus
882/// a signed 16-bit displacement [r+imm], and if it is not better
883/// represented as reg+reg.
884bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
885                                            SDValue &Base,
886                                            SelectionDAG &DAG) const {
887  // FIXME dl should come from parent load or store, not from address
888  DebugLoc dl = N.getDebugLoc();
889  // If this can be more profitably realized as r+r, fail.
890  if (SelectAddressRegReg(N, Disp, Base, DAG))
891    return false;
892
893  if (N.getOpcode() == ISD::ADD) {
894    short imm = 0;
895    if (isIntS16Immediate(N.getOperand(1), imm)) {
896      Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
897      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
898        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
899      } else {
900        Base = N.getOperand(0);
901      }
902      return true; // [r+i]
903    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
904      // Match LOAD (ADD (X, Lo(G))).
905      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
906             && "Cannot handle constant offsets yet!");
907      Disp = N.getOperand(1).getOperand(0);  // The global address.
908      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
909             Disp.getOpcode() == ISD::TargetConstantPool ||
910             Disp.getOpcode() == ISD::TargetJumpTable);
911      Base = N.getOperand(0);
912      return true;  // [&g+r]
913    }
914  } else if (N.getOpcode() == ISD::OR) {
915    short imm = 0;
916    if (isIntS16Immediate(N.getOperand(1), imm)) {
917      // If this is an or of disjoint bitfields, we can codegen this as an add
918      // (for better address arithmetic) if the LHS and RHS of the OR are
919      // provably disjoint.
920      APInt LHSKnownZero, LHSKnownOne;
921      DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
922
923      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
924        // If all of the bits are known zero on the LHS or RHS, the add won't
925        // carry.
926        Base = N.getOperand(0);
927        Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
928        return true;
929      }
930    }
931  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
932    // Loading from a constant address.
933
934    // If this address fits entirely in a 16-bit sext immediate field, codegen
935    // this as "d, 0"
936    short Imm;
937    if (isIntS16Immediate(CN, Imm)) {
938      Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
939      Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
940                             CN->getValueType(0));
941      return true;
942    }
943
944    // Handle 32-bit sext immediates with LIS + addr mode.
945    if (CN->getValueType(0) == MVT::i32 ||
946        (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
947      int Addr = (int)CN->getZExtValue();
948
949      // Otherwise, break this down into an LIS + disp.
950      Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
951
952      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
953      unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
954      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
955      return true;
956    }
957  }
958
959  Disp = DAG.getTargetConstant(0, getPointerTy());
960  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
961    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
962  else
963    Base = N;
964  return true;      // [r+0]
965}
966
967/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
968/// represented as an indexed [r+r] operation.
969bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
970                                                SDValue &Index,
971                                                SelectionDAG &DAG) const {
972  // Check to see if we can easily represent this as an [r+r] address.  This
973  // will fail if it thinks that the address is more profitably represented as
974  // reg+imm, e.g. where imm = 0.
975  if (SelectAddressRegReg(N, Base, Index, DAG))
976    return true;
977
978  // If the operand is an addition, always emit this as [r+r], since this is
979  // better (for code size, and execution, as the memop does the add for free)
980  // than emitting an explicit add.
981  if (N.getOpcode() == ISD::ADD) {
982    Base = N.getOperand(0);
983    Index = N.getOperand(1);
984    return true;
985  }
986
987  // Otherwise, do it the hard way, using R0 as the base register.
988  Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
989                         N.getValueType());
990  Index = N;
991  return true;
992}
993
994/// SelectAddressRegImmShift - Returns true if the address N can be
995/// represented by a base register plus a signed 14-bit displacement
996/// [r+imm*4].  Suitable for use by STD and friends.
997bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
998                                                 SDValue &Base,
999                                                 SelectionDAG &DAG) const {
1000  // FIXME dl should come from the parent load or store, not the address
1001  DebugLoc dl = N.getDebugLoc();
1002  // If this can be more profitably realized as r+r, fail.
1003  if (SelectAddressRegReg(N, Disp, Base, DAG))
1004    return false;
1005
1006  if (N.getOpcode() == ISD::ADD) {
1007    short imm = 0;
1008    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
1009      Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
1010      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1011        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1012      } else {
1013        Base = N.getOperand(0);
1014      }
1015      return true; // [r+i]
1016    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1017      // Match LOAD (ADD (X, Lo(G))).
1018      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1019             && "Cannot handle constant offsets yet!");
1020      Disp = N.getOperand(1).getOperand(0);  // The global address.
1021      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1022             Disp.getOpcode() == ISD::TargetConstantPool ||
1023             Disp.getOpcode() == ISD::TargetJumpTable);
1024      Base = N.getOperand(0);
1025      return true;  // [&g+r]
1026    }
1027  } else if (N.getOpcode() == ISD::OR) {
1028    short imm = 0;
1029    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
1030      // If this is an or of disjoint bitfields, we can codegen this as an add
1031      // (for better address arithmetic) if the LHS and RHS of the OR are
1032      // provably disjoint.
1033      APInt LHSKnownZero, LHSKnownOne;
1034      DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1035      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1036        // If all of the bits are known zero on the LHS or RHS, the add won't
1037        // carry.
1038        Base = N.getOperand(0);
1039        Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
1040        return true;
1041      }
1042    }
1043  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1044    // Loading from a constant address.  Verify low two bits are clear.
1045    if ((CN->getZExtValue() & 3) == 0) {
1046      // If this address fits entirely in a 14-bit sext immediate field, codegen
1047      // this as "d, 0"
1048      short Imm;
1049      if (isIntS16Immediate(CN, Imm)) {
1050        Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
1051        Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
1052                               CN->getValueType(0));
1053        return true;
1054      }
1055
1056      // Fold the low-part of 32-bit absolute addresses into addr mode.
1057      if (CN->getValueType(0) == MVT::i32 ||
1058          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
1059        int Addr = (int)CN->getZExtValue();
1060
1061        // Otherwise, break this down into an LIS + disp.
1062        Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
1063        Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
1064        unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1065        Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
1066        return true;
1067      }
1068    }
1069  }
1070
1071  Disp = DAG.getTargetConstant(0, getPointerTy());
1072  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
1073    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1074  else
1075    Base = N;
1076  return true;      // [r+0]
1077}
1078
1079
1080/// getPreIndexedAddressParts - returns true by value, base pointer and
1081/// offset pointer and addressing mode by reference if the node's address
1082/// can be legally represented as pre-indexed load / store address.
1083bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1084                                                  SDValue &Offset,
1085                                                  ISD::MemIndexedMode &AM,
1086                                                  SelectionDAG &DAG) const {
1087  // Disabled by default for now.
1088  if (!EnablePPCPreinc) return false;
1089
1090  SDValue Ptr;
1091  EVT VT;
1092  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1093    Ptr = LD->getBasePtr();
1094    VT = LD->getMemoryVT();
1095
1096  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1097    Ptr = ST->getBasePtr();
1098    VT  = ST->getMemoryVT();
1099  } else
1100    return false;
1101
1102  // PowerPC doesn't have preinc load/store instructions for vectors.
1103  if (VT.isVector())
1104    return false;
1105
1106  // TODO: Check reg+reg first.
1107
1108  // LDU/STU use reg+imm*4, others use reg+imm.
1109  if (VT != MVT::i64) {
1110    // reg + imm
1111    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
1112      return false;
1113  } else {
1114    // reg + imm * 4.
1115    if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
1116      return false;
1117  }
1118
1119  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1120    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
1121    // sext i32 to i64 when addr mode is r+i.
1122    if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
1123        LD->getExtensionType() == ISD::SEXTLOAD &&
1124        isa<ConstantSDNode>(Offset))
1125      return false;
1126  }
1127
1128  AM = ISD::PRE_INC;
1129  return true;
1130}
1131
1132//===----------------------------------------------------------------------===//
1133//  LowerOperation implementation
1134//===----------------------------------------------------------------------===//
1135
1136/// GetLabelAccessInfo - Return true if we should reference labels using a
1137/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
1138static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
1139                               unsigned &LoOpFlags, const GlobalValue *GV = 0) {
1140  HiOpFlags = PPCII::MO_HA16;
1141  LoOpFlags = PPCII::MO_LO16;
1142
1143  // Don't use the pic base if not in PIC relocation model.  Or if we are on a
1144  // non-darwin platform.  We don't support PIC on other platforms yet.
1145  bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
1146               TM.getSubtarget<PPCSubtarget>().isDarwin();
1147  if (isPIC) {
1148    HiOpFlags |= PPCII::MO_PIC_FLAG;
1149    LoOpFlags |= PPCII::MO_PIC_FLAG;
1150  }
1151
1152  // If this is a reference to a global value that requires a non-lazy-ptr, make
1153  // sure that instruction lowering adds it.
1154  if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
1155    HiOpFlags |= PPCII::MO_NLP_FLAG;
1156    LoOpFlags |= PPCII::MO_NLP_FLAG;
1157
1158    if (GV->hasHiddenVisibility()) {
1159      HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1160      LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1161    }
1162  }
1163
1164  return isPIC;
1165}
1166
1167static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
1168                             SelectionDAG &DAG) {
1169  EVT PtrVT = HiPart.getValueType();
1170  SDValue Zero = DAG.getConstant(0, PtrVT);
1171  DebugLoc DL = HiPart.getDebugLoc();
1172
1173  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
1174  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
1175
1176  // With PIC, the first instruction is actually "GR+hi(&G)".
1177  if (isPIC)
1178    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
1179                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
1180
1181  // Generate non-pic code that has direct accesses to the constant pool.
1182  // The address of the global is just (hi(&g)+lo(&g)).
1183  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
1184}
1185
1186SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
1187                                             SelectionDAG &DAG) const {
1188  EVT PtrVT = Op.getValueType();
1189  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1190  const Constant *C = CP->getConstVal();
1191
1192  unsigned MOHiFlag, MOLoFlag;
1193  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1194  SDValue CPIHi =
1195    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
1196  SDValue CPILo =
1197    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
1198  return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
1199}
1200
1201SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1202  EVT PtrVT = Op.getValueType();
1203  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1204
1205  unsigned MOHiFlag, MOLoFlag;
1206  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1207  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
1208  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
1209  return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
1210}
1211
1212SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
1213                                             SelectionDAG &DAG) const {
1214  EVT PtrVT = Op.getValueType();
1215
1216  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1217
1218  unsigned MOHiFlag, MOLoFlag;
1219  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1220  SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag);
1221  SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag);
1222  return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
1223}
1224
1225SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
1226                                              SelectionDAG &DAG) const {
1227  EVT PtrVT = Op.getValueType();
1228  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1229  DebugLoc DL = GSDN->getDebugLoc();
1230  const GlobalValue *GV = GSDN->getGlobal();
1231
1232  // 64-bit SVR4 ABI code is always position-independent.
1233  // The actual address of the GlobalValue is stored in the TOC.
1234  if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
1235    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
1236    return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
1237                       DAG.getRegister(PPC::X2, MVT::i64));
1238  }
1239
1240  unsigned MOHiFlag, MOLoFlag;
1241  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
1242
1243  SDValue GAHi =
1244    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
1245  SDValue GALo =
1246    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
1247
1248  SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
1249
1250  // If the global reference is actually to a non-lazy-pointer, we have to do an
1251  // extra load to get the address of the global.
1252  if (MOHiFlag & PPCII::MO_NLP_FLAG)
1253    Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
1254                      false, false, false, 0);
1255  return Ptr;
1256}
1257
1258SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1259  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1260  DebugLoc dl = Op.getDebugLoc();
1261
1262  // If we're comparing for equality to zero, expose the fact that this is
1263  // implented as a ctlz/srl pair on ppc, so that the dag combiner can
1264  // fold the new nodes.
1265  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1266    if (C->isNullValue() && CC == ISD::SETEQ) {
1267      EVT VT = Op.getOperand(0).getValueType();
1268      SDValue Zext = Op.getOperand(0);
1269      if (VT.bitsLT(MVT::i32)) {
1270        VT = MVT::i32;
1271        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
1272      }
1273      unsigned Log2b = Log2_32(VT.getSizeInBits());
1274      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
1275      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
1276                                DAG.getConstant(Log2b, MVT::i32));
1277      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
1278    }
1279    // Leave comparisons against 0 and -1 alone for now, since they're usually
1280    // optimized.  FIXME: revisit this when we can custom lower all setcc
1281    // optimizations.
1282    if (C->isAllOnesValue() || C->isNullValue())
1283      return SDValue();
1284  }
1285
1286  // If we have an integer seteq/setne, turn it into a compare against zero
1287  // by xor'ing the rhs with the lhs, which is faster than setting a
1288  // condition register, reading it back out, and masking the correct bit.  The
1289  // normal approach here uses sub to do this instead of xor.  Using xor exposes
1290  // the result to other bit-twiddling opportunities.
1291  EVT LHSVT = Op.getOperand(0).getValueType();
1292  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1293    EVT VT = Op.getValueType();
1294    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
1295                                Op.getOperand(1));
1296    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
1297  }
1298  return SDValue();
1299}
1300
1301SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
1302                                      const PPCSubtarget &Subtarget) const {
1303  SDNode *Node = Op.getNode();
1304  EVT VT = Node->getValueType(0);
1305  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1306  SDValue InChain = Node->getOperand(0);
1307  SDValue VAListPtr = Node->getOperand(1);
1308  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1309  DebugLoc dl = Node->getDebugLoc();
1310
1311  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
1312
1313  // gpr_index
1314  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
1315                                    VAListPtr, MachinePointerInfo(SV), MVT::i8,
1316                                    false, false, 0);
1317  InChain = GprIndex.getValue(1);
1318
1319  if (VT == MVT::i64) {
1320    // Check if GprIndex is even
1321    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
1322                                 DAG.getConstant(1, MVT::i32));
1323    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
1324                                DAG.getConstant(0, MVT::i32), ISD::SETNE);
1325    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
1326                                          DAG.getConstant(1, MVT::i32));
1327    // Align GprIndex to be even if it isn't
1328    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
1329                           GprIndex);
1330  }
1331
1332  // fpr index is 1 byte after gpr
1333  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1334                               DAG.getConstant(1, MVT::i32));
1335
1336  // fpr
1337  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
1338                                    FprPtr, MachinePointerInfo(SV), MVT::i8,
1339                                    false, false, 0);
1340  InChain = FprIndex.getValue(1);
1341
1342  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1343                                       DAG.getConstant(8, MVT::i32));
1344
1345  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1346                                        DAG.getConstant(4, MVT::i32));
1347
1348  // areas
1349  SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
1350                                     MachinePointerInfo(), false, false,
1351                                     false, 0);
1352  InChain = OverflowArea.getValue(1);
1353
1354  SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
1355                                    MachinePointerInfo(), false, false,
1356                                    false, 0);
1357  InChain = RegSaveArea.getValue(1);
1358
1359  // select overflow_area if index > 8
1360  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
1361                            DAG.getConstant(8, MVT::i32), ISD::SETLT);
1362
1363  // adjustment constant gpr_index * 4/8
1364  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
1365                                    VT.isInteger() ? GprIndex : FprIndex,
1366                                    DAG.getConstant(VT.isInteger() ? 4 : 8,
1367                                                    MVT::i32));
1368
1369  // OurReg = RegSaveArea + RegConstant
1370  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
1371                               RegConstant);
1372
1373  // Floating types are 32 bytes into RegSaveArea
1374  if (VT.isFloatingPoint())
1375    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
1376                         DAG.getConstant(32, MVT::i32));
1377
1378  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
1379  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
1380                                   VT.isInteger() ? GprIndex : FprIndex,
1381                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1,
1382                                                   MVT::i32));
1383
1384  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
1385                              VT.isInteger() ? VAListPtr : FprPtr,
1386                              MachinePointerInfo(SV),
1387                              MVT::i8, false, false, 0);
1388
1389  // determine if we should load from reg_save_area or overflow_area
1390  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
1391
1392  // increase overflow_area by 4/8 if gpr/fpr > 8
1393  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
1394                                          DAG.getConstant(VT.isInteger() ? 4 : 8,
1395                                          MVT::i32));
1396
1397  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
1398                             OverflowAreaPlusN);
1399
1400  InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
1401                              OverflowAreaPtr,
1402                              MachinePointerInfo(),
1403                              MVT::i32, false, false, 0);
1404
1405  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
1406                     false, false, false, 0);
1407}
1408
1409SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
1410                                                  SelectionDAG &DAG) const {
1411  return Op.getOperand(0);
1412}
1413
1414SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
1415                                                SelectionDAG &DAG) const {
1416  SDValue Chain = Op.getOperand(0);
1417  SDValue Trmp = Op.getOperand(1); // trampoline
1418  SDValue FPtr = Op.getOperand(2); // nested function
1419  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
1420  DebugLoc dl = Op.getDebugLoc();
1421
1422  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1423  bool isPPC64 = (PtrVT == MVT::i64);
1424  Type *IntPtrTy =
1425    DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
1426                                                             *DAG.getContext());
1427
1428  TargetLowering::ArgListTy Args;
1429  TargetLowering::ArgListEntry Entry;
1430
1431  Entry.Ty = IntPtrTy;
1432  Entry.Node = Trmp; Args.push_back(Entry);
1433
1434  // TrampSize == (isPPC64 ? 48 : 40);
1435  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
1436                               isPPC64 ? MVT::i64 : MVT::i32);
1437  Args.push_back(Entry);
1438
1439  Entry.Node = FPtr; Args.push_back(Entry);
1440  Entry.Node = Nest; Args.push_back(Entry);
1441
1442  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
1443  TargetLowering::CallLoweringInfo CLI(Chain,
1444                                       Type::getVoidTy(*DAG.getContext()),
1445                                       false, false, false, false, 0,
1446                                       CallingConv::C,
1447                /*isTailCall=*/false,
1448                                       /*doesNotRet=*/false,
1449                                       /*isReturnValueUsed=*/true,
1450                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
1451                Args, DAG, dl);
1452  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
1453
1454  return CallResult.second;
1455}
1456
1457SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
1458                                        const PPCSubtarget &Subtarget) const {
1459  MachineFunction &MF = DAG.getMachineFunction();
1460  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1461
1462  DebugLoc dl = Op.getDebugLoc();
1463
1464  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
1465    // vastart just stores the address of the VarArgsFrameIndex slot into the
1466    // memory location argument.
1467    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1468    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1469    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1470    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
1471                        MachinePointerInfo(SV),
1472                        false, false, 0);
1473  }
1474
1475  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
1476  // We suppose the given va_list is already allocated.
1477  //
1478  // typedef struct {
1479  //  char gpr;     /* index into the array of 8 GPRs
1480  //                 * stored in the register save area
1481  //                 * gpr=0 corresponds to r3,
1482  //                 * gpr=1 to r4, etc.
1483  //                 */
1484  //  char fpr;     /* index into the array of 8 FPRs
1485  //                 * stored in the register save area
1486  //                 * fpr=0 corresponds to f1,
1487  //                 * fpr=1 to f2, etc.
1488  //                 */
1489  //  char *overflow_arg_area;
1490  //                /* location on stack that holds
1491  //                 * the next overflow argument
1492  //                 */
1493  //  char *reg_save_area;
1494  //               /* where r3:r10 and f1:f8 (if saved)
1495  //                * are stored
1496  //                */
1497  // } va_list[1];
1498
1499
1500  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
1501  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
1502
1503
1504  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1505
1506  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
1507                                            PtrVT);
1508  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1509                                 PtrVT);
1510
1511  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
1512  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
1513
1514  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
1515  SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
1516
1517  uint64_t FPROffset = 1;
1518  SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
1519
1520  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1521
1522  // Store first byte : number of int regs
1523  SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
1524                                         Op.getOperand(1),
1525                                         MachinePointerInfo(SV),
1526                                         MVT::i8, false, false, 0);
1527  uint64_t nextOffset = FPROffset;
1528  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
1529                                  ConstFPROffset);
1530
1531  // Store second byte : number of float regs
1532  SDValue secondStore =
1533    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
1534                      MachinePointerInfo(SV, nextOffset), MVT::i8,
1535                      false, false, 0);
1536  nextOffset += StackOffset;
1537  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
1538
1539  // Store second word : arguments given on stack
1540  SDValue thirdStore =
1541    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
1542                 MachinePointerInfo(SV, nextOffset),
1543                 false, false, 0);
1544  nextOffset += FrameOffset;
1545  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
1546
1547  // Store third word : arguments given in registers
1548  return DAG.getStore(thirdStore, dl, FR, nextPtr,
1549                      MachinePointerInfo(SV, nextOffset),
1550                      false, false, 0);
1551
1552}
1553
1554#include "PPCGenCallingConv.inc"
1555
1556static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
1557                                     CCValAssign::LocInfo &LocInfo,
1558                                     ISD::ArgFlagsTy &ArgFlags,
1559                                     CCState &State) {
1560  return true;
1561}
1562
1563static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
1564                                            MVT &LocVT,
1565                                            CCValAssign::LocInfo &LocInfo,
1566                                            ISD::ArgFlagsTy &ArgFlags,
1567                                            CCState &State) {
1568  static const uint16_t ArgRegs[] = {
1569    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
1570    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
1571  };
1572  const unsigned NumArgRegs = array_lengthof(ArgRegs);
1573
1574  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
1575
1576  // Skip one register if the first unallocated register has an even register
1577  // number and there are still argument registers available which have not been
1578  // allocated yet. RegNum is actually an index into ArgRegs, which means we
1579  // need to skip a register if RegNum is odd.
1580  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
1581    State.AllocateReg(ArgRegs[RegNum]);
1582  }
1583
1584  // Always return false here, as this function only makes sure that the first
1585  // unallocated register has an odd register number and does not actually
1586  // allocate a register for the current argument.
1587  return false;
1588}
1589
1590static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
1591                                              MVT &LocVT,
1592                                              CCValAssign::LocInfo &LocInfo,
1593                                              ISD::ArgFlagsTy &ArgFlags,
1594                                              CCState &State) {
1595  static const uint16_t ArgRegs[] = {
1596    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1597    PPC::F8
1598  };
1599
1600  const unsigned NumArgRegs = array_lengthof(ArgRegs);
1601
1602  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
1603
1604  // If there is only one Floating-point register left we need to put both f64
1605  // values of a split ppc_fp128 value on the stack.
1606  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
1607    State.AllocateReg(ArgRegs[RegNum]);
1608  }
1609
1610  // Always return false here, as this function only makes sure that the two f64
1611  // values a ppc_fp128 value is split into are both passed in registers or both
1612  // passed on the stack and does not actually allocate a register for the
1613  // current argument.
1614  return false;
1615}
1616
1617/// GetFPR - Get the set of FP registers that should be allocated for arguments,
1618/// on Darwin.
1619static const uint16_t *GetFPR() {
1620  static const uint16_t FPR[] = {
1621    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1622    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
1623  };
1624
1625  return FPR;
1626}
1627
1628/// CalculateStackSlotSize - Calculates the size reserved for this argument on
1629/// the stack.
1630static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
1631                                       unsigned PtrByteSize) {
1632  unsigned ArgSize = ArgVT.getSizeInBits()/8;
1633  if (Flags.isByVal())
1634    ArgSize = Flags.getByValSize();
1635  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
1636
1637  return ArgSize;
1638}
1639
1640SDValue
1641PPCTargetLowering::LowerFormalArguments(SDValue Chain,
1642                                        CallingConv::ID CallConv, bool isVarArg,
1643                                        const SmallVectorImpl<ISD::InputArg>
1644                                          &Ins,
1645                                        DebugLoc dl, SelectionDAG &DAG,
1646                                        SmallVectorImpl<SDValue> &InVals)
1647                                          const {
1648  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
1649    return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
1650                                     dl, DAG, InVals);
1651  } else {
1652    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
1653                                       dl, DAG, InVals);
1654  }
1655}
1656
1657SDValue
1658PPCTargetLowering::LowerFormalArguments_SVR4(
1659                                      SDValue Chain,
1660                                      CallingConv::ID CallConv, bool isVarArg,
1661                                      const SmallVectorImpl<ISD::InputArg>
1662                                        &Ins,
1663                                      DebugLoc dl, SelectionDAG &DAG,
1664                                      SmallVectorImpl<SDValue> &InVals) const {
1665
1666  // 32-bit SVR4 ABI Stack Frame Layout:
1667  //              +-----------------------------------+
1668  //        +-->  |            Back chain             |
1669  //        |     +-----------------------------------+
1670  //        |     | Floating-point register save area |
1671  //        |     +-----------------------------------+
1672  //        |     |    General register save area     |
1673  //        |     +-----------------------------------+
1674  //        |     |          CR save word             |
1675  //        |     +-----------------------------------+
1676  //        |     |         VRSAVE save word          |
1677  //        |     +-----------------------------------+
1678  //        |     |         Alignment padding         |
1679  //        |     +-----------------------------------+
1680  //        |     |     Vector register save area     |
1681  //        |     +-----------------------------------+
1682  //        |     |       Local variable space        |
1683  //        |     +-----------------------------------+
1684  //        |     |        Parameter list area        |
1685  //        |     +-----------------------------------+
1686  //        |     |           LR save word            |
1687  //        |     +-----------------------------------+
1688  // SP-->  +---  |            Back chain             |
1689  //              +-----------------------------------+
1690  //
1691  // Specifications:
1692  //   System V Application Binary Interface PowerPC Processor Supplement
1693  //   AltiVec Technology Programming Interface Manual
1694
1695  MachineFunction &MF = DAG.getMachineFunction();
1696  MachineFrameInfo *MFI = MF.getFrameInfo();
1697  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1698
1699  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1700  // Potential tail calls could cause overwriting of argument stack slots.
1701  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
1702                       (CallConv == CallingConv::Fast));
1703  unsigned PtrByteSize = 4;
1704
1705  // Assign locations to all of the incoming arguments.
1706  SmallVector<CCValAssign, 16> ArgLocs;
1707  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1708                 getTargetMachine(), ArgLocs, *DAG.getContext());
1709
1710  // Reserve space for the linkage area on the stack.
1711  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
1712
1713  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
1714
1715  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1716    CCValAssign &VA = ArgLocs[i];
1717
1718    // Arguments stored in registers.
1719    if (VA.isRegLoc()) {
1720      const TargetRegisterClass *RC;
1721      EVT ValVT = VA.getValVT();
1722
1723      switch (ValVT.getSimpleVT().SimpleTy) {
1724        default:
1725          llvm_unreachable("ValVT not supported by formal arguments Lowering");
1726        case MVT::i32:
1727          RC = &PPC::GPRCRegClass;
1728          break;
1729        case MVT::f32:
1730          RC = &PPC::F4RCRegClass;
1731          break;
1732        case MVT::f64:
1733          RC = &PPC::F8RCRegClass;
1734          break;
1735        case MVT::v16i8:
1736        case MVT::v8i16:
1737        case MVT::v4i32:
1738        case MVT::v4f32:
1739          RC = &PPC::VRRCRegClass;
1740          break;
1741      }
1742
1743      // Transform the arguments stored in physical registers into virtual ones.
1744      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1745      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
1746
1747      InVals.push_back(ArgValue);
1748    } else {
1749      // Argument stored in memory.
1750      assert(VA.isMemLoc());
1751
1752      unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
1753      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
1754                                      isImmutable);
1755
1756      // Create load nodes to retrieve arguments from the stack.
1757      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1758      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
1759                                   MachinePointerInfo(),
1760                                   false, false, false, 0));
1761    }
1762  }
1763
1764  // Assign locations to all of the incoming aggregate by value arguments.
1765  // Aggregates passed by value are stored in the local variable space of the
1766  // caller's stack frame, right above the parameter list area.
1767  SmallVector<CCValAssign, 16> ByValArgLocs;
1768  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1769                      getTargetMachine(), ByValArgLocs, *DAG.getContext());
1770
1771  // Reserve stack space for the allocations in CCInfo.
1772  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
1773
1774  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
1775
1776  // Area that is at least reserved in the caller of this function.
1777  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
1778
1779  // Set the size that is at least reserved in caller of this function.  Tail
1780  // call optimized function's reserved stack space needs to be aligned so that
1781  // taking the difference between two stack areas will result in an aligned
1782  // stack.
1783  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1784
1785  MinReservedArea =
1786    std::max(MinReservedArea,
1787             PPCFrameLowering::getMinCallFrameSize(false, false));
1788
1789  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
1790    getStackAlignment();
1791  unsigned AlignMask = TargetAlign-1;
1792  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
1793
1794  FI->setMinReservedArea(MinReservedArea);
1795
1796  SmallVector<SDValue, 8> MemOps;
1797
1798  // If the function takes variable number of arguments, make a frame index for
1799  // the start of the first vararg value... for expansion of llvm.va_start.
1800  if (isVarArg) {
1801    static const uint16_t GPArgRegs[] = {
1802      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
1803      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
1804    };
1805    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
1806
1807    static const uint16_t FPArgRegs[] = {
1808      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1809      PPC::F8
1810    };
1811    const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
1812
1813    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
1814                                                          NumGPArgRegs));
1815    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
1816                                                          NumFPArgRegs));
1817
1818    // Make room for NumGPArgRegs and NumFPArgRegs.
1819    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
1820                NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
1821
1822    FuncInfo->setVarArgsStackOffset(
1823      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1824                             CCInfo.getNextStackOffset(), true));
1825
1826    FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
1827    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1828
1829    // The fixed integer arguments of a variadic function are stored to the
1830    // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
1831    // the result of va_next.
1832    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
1833      // Get an existing live-in vreg, or add a new one.
1834      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
1835      if (!VReg)
1836        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
1837
1838      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
1839      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
1840                                   MachinePointerInfo(), false, false, 0);
1841      MemOps.push_back(Store);
1842      // Increment the address by four for the next argument to store
1843      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1844      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
1845    }
1846
1847    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
1848    // is set.
1849    // The double arguments are stored to the VarArgsFrameIndex
1850    // on the stack.
1851    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
1852      // Get an existing live-in vreg, or add a new one.
1853      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
1854      if (!VReg)
1855        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
1856
1857      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
1858      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
1859                                   MachinePointerInfo(), false, false, 0);
1860      MemOps.push_back(Store);
1861      // Increment the address by eight for the next argument to store
1862      SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
1863                                         PtrVT);
1864      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
1865    }
1866  }
1867
1868  if (!MemOps.empty())
1869    Chain = DAG.getNode(ISD::TokenFactor, dl,
1870                        MVT::Other, &MemOps[0], MemOps.size());
1871
1872  return Chain;
1873}
1874
1875SDValue
1876PPCTargetLowering::LowerFormalArguments_Darwin(
1877                                      SDValue Chain,
1878                                      CallingConv::ID CallConv, bool isVarArg,
1879                                      const SmallVectorImpl<ISD::InputArg>
1880                                        &Ins,
1881                                      DebugLoc dl, SelectionDAG &DAG,
1882                                      SmallVectorImpl<SDValue> &InVals) const {
1883  // TODO: add description of PPC stack frame format, or at least some docs.
1884  //
1885  MachineFunction &MF = DAG.getMachineFunction();
1886  MachineFrameInfo *MFI = MF.getFrameInfo();
1887  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1888
1889  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1890  bool isPPC64 = PtrVT == MVT::i64;
1891  // Potential tail calls could cause overwriting of argument stack slots.
1892  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
1893                       (CallConv == CallingConv::Fast));
1894  unsigned PtrByteSize = isPPC64 ? 8 : 4;
1895
1896  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
1897  // Area that is at least reserved in caller of this function.
1898  unsigned MinReservedArea = ArgOffset;
1899
1900  static const uint16_t GPR_32[] = {           // 32-bit registers.
1901    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
1902    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
1903  };
1904  static const uint16_t GPR_64[] = {           // 64-bit registers.
1905    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
1906    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
1907  };
1908
1909  static const uint16_t *FPR = GetFPR();
1910
1911  static const uint16_t VR[] = {
1912    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
1913    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
1914  };
1915
1916  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
1917  const unsigned Num_FPR_Regs = 13;
1918  const unsigned Num_VR_Regs  = array_lengthof( VR);
1919
1920  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
1921
1922  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
1923
1924  // In 32-bit non-varargs functions, the stack space for vectors is after the
1925  // stack space for non-vectors.  We do not use this space unless we have
1926  // too many vectors to fit in registers, something that only occurs in
1927  // constructed examples:), but we have to walk the arglist to figure
1928  // that out...for the pathological case, compute VecArgOffset as the
1929  // start of the vector parameter area.  Computing VecArgOffset is the
1930  // entire point of the following loop.
1931  unsigned VecArgOffset = ArgOffset;
1932  if (!isVarArg && !isPPC64) {
1933    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
1934         ++ArgNo) {
1935      EVT ObjectVT = Ins[ArgNo].VT;
1936      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
1937
1938      if (Flags.isByVal()) {
1939        // ObjSize is the true size, ArgSize rounded up to multiple of regs.
1940        unsigned ObjSize = Flags.getByValSize();
1941        unsigned ArgSize =
1942                ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
1943        VecArgOffset += ArgSize;
1944        continue;
1945      }
1946
1947      switch(ObjectVT.getSimpleVT().SimpleTy) {
1948      default: llvm_unreachable("Unhandled argument type!");
1949      case MVT::i32:
1950      case MVT::f32:
1951        VecArgOffset += isPPC64 ? 8 : 4;
1952        break;
1953      case MVT::i64:  // PPC64
1954      case MVT::f64:
1955        VecArgOffset += 8;
1956        break;
1957      case MVT::v4f32:
1958      case MVT::v4i32:
1959      case MVT::v8i16:
1960      case MVT::v16i8:
1961        // Nothing to do, we're only looking at Nonvector args here.
1962        break;
1963      }
1964    }
1965  }
1966  // We've found where the vector parameter area in memory is.  Skip the
1967  // first 12 parameters; these don't use that memory.
1968  VecArgOffset = ((VecArgOffset+15)/16)*16;
1969  VecArgOffset += 12*16;
1970
1971  // Add DAG nodes to load the arguments or copy them out of registers.  On
1972  // entry to a function on PPC, the arguments start after the linkage area,
1973  // although the first ones are often in registers.
1974
1975  SmallVector<SDValue, 8> MemOps;
1976  unsigned nAltivecParamsAtEnd = 0;
1977  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1978    SDValue ArgVal;
1979    bool needsLoad = false;
1980    EVT ObjectVT = Ins[ArgNo].VT;
1981    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1982    unsigned ArgSize = ObjSize;
1983    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
1984
1985    unsigned CurArgOffset = ArgOffset;
1986
1987    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
1988    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
1989        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
1990      if (isVarArg || isPPC64) {
1991        MinReservedArea = ((MinReservedArea+15)/16)*16;
1992        MinReservedArea += CalculateStackSlotSize(ObjectVT,
1993                                                  Flags,
1994                                                  PtrByteSize);
1995      } else  nAltivecParamsAtEnd++;
1996    } else
1997      // Calculate min reserved area.
1998      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
1999                                                Flags,
2000                                                PtrByteSize);
2001
2002    // FIXME the codegen can be much improved in some cases.
2003    // We do not have to keep everything in memory.
2004    if (Flags.isByVal()) {
2005      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
2006      ObjSize = Flags.getByValSize();
2007      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2008      // Objects of size 1 and 2 are right justified, everything else is
2009      // left justified.  This means the memory address is adjusted forwards.
2010      if (ObjSize==1 || ObjSize==2) {
2011        CurArgOffset = CurArgOffset + (4 - ObjSize);
2012      }
2013      // The value of the object is its address.
2014      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
2015      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2016      InVals.push_back(FIN);
2017      if (ObjSize==1 || ObjSize==2) {
2018        if (GPR_idx != Num_GPR_Regs) {
2019          unsigned VReg;
2020          if (isPPC64)
2021            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2022          else
2023            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2024          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2025          SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
2026                                            MachinePointerInfo(),
2027                                            ObjSize==1 ? MVT::i8 : MVT::i16,
2028                                            false, false, 0);
2029          MemOps.push_back(Store);
2030          ++GPR_idx;
2031        }
2032
2033        ArgOffset += PtrByteSize;
2034
2035        continue;
2036      }
2037      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
2038        // Store whatever pieces of the object are in registers
2039        // to memory.  ArgVal will be address of the beginning of
2040        // the object.
2041        if (GPR_idx != Num_GPR_Regs) {
2042          unsigned VReg;
2043          if (isPPC64)
2044            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2045          else
2046            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2047          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2048          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2049          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2050          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2051                                       MachinePointerInfo(),
2052                                       false, false, 0);
2053          MemOps.push_back(Store);
2054          ++GPR_idx;
2055          ArgOffset += PtrByteSize;
2056        } else {
2057          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
2058          break;
2059        }
2060      }
2061      continue;
2062    }
2063
2064    switch (ObjectVT.getSimpleVT().SimpleTy) {
2065    default: llvm_unreachable("Unhandled argument type!");
2066    case MVT::i32:
2067      if (!isPPC64) {
2068        if (GPR_idx != Num_GPR_Regs) {
2069          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2070          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2071          ++GPR_idx;
2072        } else {
2073          needsLoad = true;
2074          ArgSize = PtrByteSize;
2075        }
2076        // All int arguments reserve stack space in the Darwin ABI.
2077        ArgOffset += PtrByteSize;
2078        break;
2079      }
2080      // FALLTHROUGH
2081    case MVT::i64:  // PPC64
2082      if (GPR_idx != Num_GPR_Regs) {
2083        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2084        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
2085
2086        if (ObjectVT == MVT::i32) {
2087          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2088          // value to MVT::i64 and then truncate to the correct register size.
2089          if (Flags.isSExt())
2090            ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
2091                                 DAG.getValueType(ObjectVT));
2092          else if (Flags.isZExt())
2093            ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
2094                                 DAG.getValueType(ObjectVT));
2095
2096          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
2097        }
2098
2099        ++GPR_idx;
2100      } else {
2101        needsLoad = true;
2102        ArgSize = PtrByteSize;
2103      }
2104      // All int arguments reserve stack space in the Darwin ABI.
2105      ArgOffset += 8;
2106      break;
2107
2108    case MVT::f32:
2109    case MVT::f64:
2110      // Every 4 bytes of argument space consumes one of the GPRs available for
2111      // argument passing.
2112      if (GPR_idx != Num_GPR_Regs) {
2113        ++GPR_idx;
2114        if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
2115          ++GPR_idx;
2116      }
2117      if (FPR_idx != Num_FPR_Regs) {
2118        unsigned VReg;
2119
2120        if (ObjectVT == MVT::f32)
2121          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
2122        else
2123          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
2124
2125        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2126        ++FPR_idx;
2127      } else {
2128        needsLoad = true;
2129      }
2130
2131      // All FP arguments reserve stack space in the Darwin ABI.
2132      ArgOffset += isPPC64 ? 8 : ObjSize;
2133      break;
2134    case MVT::v4f32:
2135    case MVT::v4i32:
2136    case MVT::v8i16:
2137    case MVT::v16i8:
2138      // Note that vector arguments in registers don't reserve stack space,
2139      // except in varargs functions.
2140      if (VR_idx != Num_VR_Regs) {
2141        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
2142        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2143        if (isVarArg) {
2144          while ((ArgOffset % 16) != 0) {
2145            ArgOffset += PtrByteSize;
2146            if (GPR_idx != Num_GPR_Regs)
2147              GPR_idx++;
2148          }
2149          ArgOffset += 16;
2150          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
2151        }
2152        ++VR_idx;
2153      } else {
2154        if (!isVarArg && !isPPC64) {
2155          // Vectors go after all the nonvectors.
2156          CurArgOffset = VecArgOffset;
2157          VecArgOffset += 16;
2158        } else {
2159          // Vectors are aligned.
2160          ArgOffset = ((ArgOffset+15)/16)*16;
2161          CurArgOffset = ArgOffset;
2162          ArgOffset += 16;
2163        }
2164        needsLoad = true;
2165      }
2166      break;
2167    }
2168
2169    // We need to load the argument to a virtual register if we determined above
2170    // that we ran out of physical registers of the appropriate type.
2171    if (needsLoad) {
2172      int FI = MFI->CreateFixedObject(ObjSize,
2173                                      CurArgOffset + (ArgSize - ObjSize),
2174                                      isImmutable);
2175      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2176      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
2177                           false, false, false, 0);
2178    }
2179
2180    InVals.push_back(ArgVal);
2181  }
2182
2183  // Set the size that is at least reserved in caller of this function.  Tail
2184  // call optimized function's reserved stack space needs to be aligned so that
2185  // taking the difference between two stack areas will result in an aligned
2186  // stack.
2187  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
2188  // Add the Altivec parameters at the end, if needed.
2189  if (nAltivecParamsAtEnd) {
2190    MinReservedArea = ((MinReservedArea+15)/16)*16;
2191    MinReservedArea += 16*nAltivecParamsAtEnd;
2192  }
2193  MinReservedArea =
2194    std::max(MinReservedArea,
2195             PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
2196  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
2197    getStackAlignment();
2198  unsigned AlignMask = TargetAlign-1;
2199  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
2200  FI->setMinReservedArea(MinReservedArea);
2201
2202  // If the function takes variable number of arguments, make a frame index for
2203  // the start of the first vararg value... for expansion of llvm.va_start.
2204  if (isVarArg) {
2205    int Depth = ArgOffset;
2206
2207    FuncInfo->setVarArgsFrameIndex(
2208      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2209                             Depth, true));
2210    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2211
2212    // If this function is vararg, store any remaining integer argument regs
2213    // to their spots on the stack so that they may be loaded by deferencing the
2214    // result of va_next.
2215    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
2216      unsigned VReg;
2217
2218      if (isPPC64)
2219        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2220      else
2221        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2222
2223      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2224      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2225                                   MachinePointerInfo(), false, false, 0);
2226      MemOps.push_back(Store);
2227      // Increment the address by four for the next argument to store
2228      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
2229      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2230    }
2231  }
2232
2233  if (!MemOps.empty())
2234    Chain = DAG.getNode(ISD::TokenFactor, dl,
2235                        MVT::Other, &MemOps[0], MemOps.size());
2236
2237  return Chain;
2238}
2239
2240/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
2241/// linkage area for the Darwin ABI.
2242static unsigned
2243CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
2244                                     bool isPPC64,
2245                                     bool isVarArg,
2246                                     unsigned CC,
2247                                     const SmallVectorImpl<ISD::OutputArg>
2248                                       &Outs,
2249                                     const SmallVectorImpl<SDValue> &OutVals,
2250                                     unsigned &nAltivecParamsAtEnd) {
2251  // Count how many bytes are to be pushed on the stack, including the linkage
2252  // area, and parameter passing area.  We start with 24/48 bytes, which is
2253  // prereserved space for [SP][CR][LR][3 x unused].
2254  unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
2255  unsigned NumOps = Outs.size();
2256  unsigned PtrByteSize = isPPC64 ? 8 : 4;
2257
2258  // Add up all the space actually used.
2259  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
2260  // they all go in registers, but we must reserve stack space for them for
2261  // possible use by the caller.  In varargs or 64-bit calls, parameters are
2262  // assigned stack space in order, with padding so Altivec parameters are
2263  // 16-byte aligned.
2264  nAltivecParamsAtEnd = 0;
2265  for (unsigned i = 0; i != NumOps; ++i) {
2266    ISD::ArgFlagsTy Flags = Outs[i].Flags;
2267    EVT ArgVT = Outs[i].VT;
2268    // Varargs Altivec parameters are padded to a 16 byte boundary.
2269    if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
2270        ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
2271      if (!isVarArg && !isPPC64) {
2272        // Non-varargs Altivec parameters go after all the non-Altivec
2273        // parameters; handle those later so we know how much padding we need.
2274        nAltivecParamsAtEnd++;
2275        continue;
2276      }
2277      // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
2278      NumBytes = ((NumBytes+15)/16)*16;
2279    }
2280    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2281  }
2282
2283   // Allow for Altivec parameters at the end, if needed.
2284  if (nAltivecParamsAtEnd) {
2285    NumBytes = ((NumBytes+15)/16)*16;
2286    NumBytes += 16*nAltivecParamsAtEnd;
2287  }
2288
2289  // The prolog code of the callee may store up to 8 GPR argument registers to
2290  // the stack, allowing va_start to index over them in memory if its varargs.
2291  // Because we cannot tell if this is needed on the caller side, we have to
2292  // conservatively assume that it is needed.  As such, make sure we have at
2293  // least enough stack space for the caller to store the 8 GPRs.
2294  NumBytes = std::max(NumBytes,
2295                      PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
2296
2297  // Tail call needs the stack to be aligned.
2298  if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
2299    unsigned TargetAlign = DAG.getMachineFunction().getTarget().
2300      getFrameLowering()->getStackAlignment();
2301    unsigned AlignMask = TargetAlign-1;
2302    NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2303  }
2304
2305  return NumBytes;
2306}
2307
2308/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
2309/// adjusted to accommodate the arguments for the tailcall.
2310static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
2311                                   unsigned ParamSize) {
2312
2313  if (!isTailCall) return 0;
2314
2315  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
2316  unsigned CallerMinReservedArea = FI->getMinReservedArea();
2317  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
2318  // Remember only if the new adjustement is bigger.
2319  if (SPDiff < FI->getTailCallSPDelta())
2320    FI->setTailCallSPDelta(SPDiff);
2321
2322  return SPDiff;
2323}
2324
2325/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2326/// for tail call optimization. Targets which want to do tail call
2327/// optimization should implement this function.
2328bool
2329PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2330                                                     CallingConv::ID CalleeCC,
2331                                                     bool isVarArg,
2332                                      const SmallVectorImpl<ISD::InputArg> &Ins,
2333                                                     SelectionDAG& DAG) const {
2334  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
2335    return false;
2336
2337  // Variable argument functions are not supported.
2338  if (isVarArg)
2339    return false;
2340
2341  MachineFunction &MF = DAG.getMachineFunction();
2342  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
2343  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
2344    // Functions containing by val parameters are not supported.
2345    for (unsigned i = 0; i != Ins.size(); i++) {
2346       ISD::ArgFlagsTy Flags = Ins[i].Flags;
2347       if (Flags.isByVal()) return false;
2348    }
2349
2350    // Non PIC/GOT  tail calls are supported.
2351    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
2352      return true;
2353
2354    // At the moment we can only do local tail calls (in same module, hidden
2355    // or protected) if we are generating PIC.
2356    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2357      return G->getGlobal()->hasHiddenVisibility()
2358          || G->getGlobal()->hasProtectedVisibility();
2359  }
2360
2361  return false;
2362}
2363
2364/// isCallCompatibleAddress - Return the immediate to use if the specified
2365/// 32-bit value is representable in the immediate field of a BxA instruction.
2366static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
2367  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
2368  if (!C) return 0;
2369
2370  int Addr = C->getZExtValue();
2371  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
2372      (Addr << 6 >> 6) != Addr)
2373    return 0;  // Top 6 bits have to be sext of immediate.
2374
2375  return DAG.getConstant((int)C->getZExtValue() >> 2,
2376                         DAG.getTargetLoweringInfo().getPointerTy()).getNode();
2377}
2378
2379namespace {
2380
2381struct TailCallArgumentInfo {
2382  SDValue Arg;
2383  SDValue FrameIdxOp;
2384  int       FrameIdx;
2385
2386  TailCallArgumentInfo() : FrameIdx(0) {}
2387};
2388
2389}
2390
2391/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
2392static void
2393StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
2394                                           SDValue Chain,
2395                   const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
2396                   SmallVector<SDValue, 8> &MemOpChains,
2397                   DebugLoc dl) {
2398  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
2399    SDValue Arg = TailCallArgs[i].Arg;
2400    SDValue FIN = TailCallArgs[i].FrameIdxOp;
2401    int FI = TailCallArgs[i].FrameIdx;
2402    // Store relative to framepointer.
2403    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
2404                                       MachinePointerInfo::getFixedStack(FI),
2405                                       false, false, 0));
2406  }
2407}
2408
2409/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
2410/// the appropriate stack slot for the tail call optimized function call.
2411static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
2412                                               MachineFunction &MF,
2413                                               SDValue Chain,
2414                                               SDValue OldRetAddr,
2415                                               SDValue OldFP,
2416                                               int SPDiff,
2417                                               bool isPPC64,
2418                                               bool isDarwinABI,
2419                                               DebugLoc dl) {
2420  if (SPDiff) {
2421    // Calculate the new stack slot for the return address.
2422    int SlotSize = isPPC64 ? 8 : 4;
2423    int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
2424                                                                   isDarwinABI);
2425    int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
2426                                                          NewRetAddrLoc, true);
2427    EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
2428    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
2429    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
2430                         MachinePointerInfo::getFixedStack(NewRetAddr),
2431                         false, false, 0);
2432
2433    // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
2434    // slot as the FP is never overwritten.
2435    if (isDarwinABI) {
2436      int NewFPLoc =
2437        SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
2438      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
2439                                                          true);
2440      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
2441      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
2442                           MachinePointerInfo::getFixedStack(NewFPIdx),
2443                           false, false, 0);
2444    }
2445  }
2446  return Chain;
2447}
2448
2449/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
2450/// the position of the argument.
2451static void
2452CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
2453                         SDValue Arg, int SPDiff, unsigned ArgOffset,
2454                      SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
2455  int Offset = ArgOffset + SPDiff;
2456  uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
2457  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
2458  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
2459  SDValue FIN = DAG.getFrameIndex(FI, VT);
2460  TailCallArgumentInfo Info;
2461  Info.Arg = Arg;
2462  Info.FrameIdxOp = FIN;
2463  Info.FrameIdx = FI;
2464  TailCallArguments.push_back(Info);
2465}
2466
2467/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
2468/// stack slot. Returns the chain as result and the loaded frame pointers in
2469/// LROpOut/FPOpout. Used when tail calling.
2470SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
2471                                                        int SPDiff,
2472                                                        SDValue Chain,
2473                                                        SDValue &LROpOut,
2474                                                        SDValue &FPOpOut,
2475                                                        bool isDarwinABI,
2476                                                        DebugLoc dl) const {
2477  if (SPDiff) {
2478    // Load the LR and FP stack slot for later adjusting.
2479    EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
2480    LROpOut = getReturnAddrFrameIndex(DAG);
2481    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
2482                          false, false, false, 0);
2483    Chain = SDValue(LROpOut.getNode(), 1);
2484
2485    // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
2486    // slot as the FP is never overwritten.
2487    if (isDarwinABI) {
2488      FPOpOut = getFramePointerFrameIndex(DAG);
2489      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
2490                            false, false, false, 0);
2491      Chain = SDValue(FPOpOut.getNode(), 1);
2492    }
2493  }
2494  return Chain;
2495}
2496
2497/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
2498/// by "Src" to address "Dst" of size "Size".  Alignment information is
2499/// specified by the specific parameter attribute. The copy will be passed as
2500/// a byval function parameter.
2501/// Sometimes what we are copying is the end of a larger object, the part that
2502/// does not fit in registers.
2503static SDValue
2504CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2505                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
2506                          DebugLoc dl) {
2507  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
2508  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2509                       false, false, MachinePointerInfo(0),
2510                       MachinePointerInfo(0));
2511}
2512
2513/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
2514/// tail calls.
2515static void
2516LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
2517                 SDValue Arg, SDValue PtrOff, int SPDiff,
2518                 unsigned ArgOffset, bool isPPC64, bool isTailCall,
2519                 bool isVector, SmallVector<SDValue, 8> &MemOpChains,
2520                 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
2521                 DebugLoc dl) {
2522  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2523  if (!isTailCall) {
2524    if (isVector) {
2525      SDValue StackPtr;
2526      if (isPPC64)
2527        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
2528      else
2529        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
2530      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
2531                           DAG.getConstant(ArgOffset, PtrVT));
2532    }
2533    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
2534                                       MachinePointerInfo(), false, false, 0));
2535  // Calculate and remember argument location.
2536  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
2537                                  TailCallArguments);
2538}
2539
2540static
2541void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
2542                     DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
2543                     SDValue LROp, SDValue FPOp, bool isDarwinABI,
2544                     SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
2545  MachineFunction &MF = DAG.getMachineFunction();
2546
2547  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
2548  // might overwrite each other in case of tail call optimization.
2549  SmallVector<SDValue, 8> MemOpChains2;
2550  // Do not flag preceding copytoreg stuff together with the following stuff.
2551  InFlag = SDValue();
2552  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
2553                                    MemOpChains2, dl);
2554  if (!MemOpChains2.empty())
2555    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2556                        &MemOpChains2[0], MemOpChains2.size());
2557
2558  // Store the return address to the appropriate stack slot.
2559  Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
2560                                        isPPC64, isDarwinABI, dl);
2561
2562  // Emit callseq_end just before tailcall node.
2563  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
2564                             DAG.getIntPtrConstant(0, true), InFlag);
2565  InFlag = Chain.getValue(1);
2566}
2567
2568static
2569unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
2570                     SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
2571                     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
2572                     SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
2573                     const PPCSubtarget &PPCSubTarget) {
2574
2575  bool isPPC64 = PPCSubTarget.isPPC64();
2576  bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
2577
2578  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2579  NodeTys.push_back(MVT::Other);   // Returns a chain
2580  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
2581
2582  unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
2583
2584  bool needIndirectCall = true;
2585  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
2586    // If this is an absolute destination address, use the munged value.
2587    Callee = SDValue(Dest, 0);
2588    needIndirectCall = false;
2589  }
2590
2591  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2592    // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
2593    // Use indirect calls for ALL functions calls in JIT mode, since the
2594    // far-call stubs may be outside relocation limits for a BL instruction.
2595    if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
2596      unsigned OpFlags = 0;
2597      if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
2598          (PPCSubTarget.getTargetTriple().isMacOSX() &&
2599           PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
2600          (G->getGlobal()->isDeclaration() ||
2601           G->getGlobal()->isWeakForLinker())) {
2602        // PC-relative references to external symbols should go through $stub,
2603        // unless we're building with the leopard linker or later, which
2604        // automatically synthesizes these stubs.
2605        OpFlags = PPCII::MO_DARWIN_STUB;
2606      }
2607
2608      // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
2609      // every direct call is) turn it into a TargetGlobalAddress /
2610      // TargetExternalSymbol node so that legalize doesn't hack it.
2611      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
2612                                          Callee.getValueType(),
2613                                          0, OpFlags);
2614      needIndirectCall = false;
2615    }
2616  }
2617
2618  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2619    unsigned char OpFlags = 0;
2620
2621    if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
2622        (PPCSubTarget.getTargetTriple().isMacOSX() &&
2623         PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
2624      // PC-relative references to external symbols should go through $stub,
2625      // unless we're building with the leopard linker or later, which
2626      // automatically synthesizes these stubs.
2627      OpFlags = PPCII::MO_DARWIN_STUB;
2628    }
2629
2630    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
2631                                         OpFlags);
2632    needIndirectCall = false;
2633  }
2634
2635  if (needIndirectCall) {
2636    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
2637    // to do the call, we can't use PPCISD::CALL.
2638    SDValue MTCTROps[] = {Chain, Callee, InFlag};
2639
2640    if (isSVR4ABI && isPPC64) {
2641      // Function pointers in the 64-bit SVR4 ABI do not point to the function
2642      // entry point, but to the function descriptor (the function entry point
2643      // address is part of the function descriptor though).
2644      // The function descriptor is a three doubleword structure with the
2645      // following fields: function entry point, TOC base address and
2646      // environment pointer.
2647      // Thus for a call through a function pointer, the following actions need
2648      // to be performed:
2649      //   1. Save the TOC of the caller in the TOC save area of its stack
2650      //      frame (this is done in LowerCall_Darwin()).
2651      //   2. Load the address of the function entry point from the function
2652      //      descriptor.
2653      //   3. Load the TOC of the callee from the function descriptor into r2.
2654      //   4. Load the environment pointer from the function descriptor into
2655      //      r11.
2656      //   5. Branch to the function entry point address.
2657      //   6. On return of the callee, the TOC of the caller needs to be
2658      //      restored (this is done in FinishCall()).
2659      //
2660      // All those operations are flagged together to ensure that no other
2661      // operations can be scheduled in between. E.g. without flagging the
2662      // operations together, a TOC access in the caller could be scheduled
2663      // between the load of the callee TOC and the branch to the callee, which
2664      // results in the TOC access going through the TOC of the callee instead
2665      // of going through the TOC of the caller, which leads to incorrect code.
2666
2667      // Load the address of the function entry point from the function
2668      // descriptor.
2669      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
2670      SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
2671                                        InFlag.getNode() ? 3 : 2);
2672      Chain = LoadFuncPtr.getValue(1);
2673      InFlag = LoadFuncPtr.getValue(2);
2674
2675      // Load environment pointer into r11.
2676      // Offset of the environment pointer within the function descriptor.
2677      SDValue PtrOff = DAG.getIntPtrConstant(16);
2678
2679      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
2680      SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
2681                                       InFlag);
2682      Chain = LoadEnvPtr.getValue(1);
2683      InFlag = LoadEnvPtr.getValue(2);
2684
2685      SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
2686                                        InFlag);
2687      Chain = EnvVal.getValue(0);
2688      InFlag = EnvVal.getValue(1);
2689
2690      // Load TOC of the callee into r2. We are using a target-specific load
2691      // with r2 hard coded, because the result of a target-independent load
2692      // would never go directly into r2, since r2 is a reserved register (which
2693      // prevents the register allocator from allocating it), resulting in an
2694      // additional register being allocated and an unnecessary move instruction
2695      // being generated.
2696      VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2697      SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
2698                                       Callee, InFlag);
2699      Chain = LoadTOCPtr.getValue(0);
2700      InFlag = LoadTOCPtr.getValue(1);
2701
2702      MTCTROps[0] = Chain;
2703      MTCTROps[1] = LoadFuncPtr;
2704      MTCTROps[2] = InFlag;
2705    }
2706
2707    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
2708                        2 + (InFlag.getNode() != 0));
2709    InFlag = Chain.getValue(1);
2710
2711    NodeTys.clear();
2712    NodeTys.push_back(MVT::Other);
2713    NodeTys.push_back(MVT::Glue);
2714    Ops.push_back(Chain);
2715    CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
2716    Callee.setNode(0);
2717    // Add CTR register as callee so a bctr can be emitted later.
2718    if (isTailCall)
2719      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
2720  }
2721
2722  // If this is a direct call, pass the chain and the callee.
2723  if (Callee.getNode()) {
2724    Ops.push_back(Chain);
2725    Ops.push_back(Callee);
2726  }
2727  // If this is a tail call add stack pointer delta.
2728  if (isTailCall)
2729    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
2730
2731  // Add argument registers to the end of the list so that they are known live
2732  // into the call.
2733  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2734    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2735                                  RegsToPass[i].second.getValueType()));
2736
2737  return CallOpc;
2738}
2739
2740SDValue
2741PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
2742                                   CallingConv::ID CallConv, bool isVarArg,
2743                                   const SmallVectorImpl<ISD::InputArg> &Ins,
2744                                   DebugLoc dl, SelectionDAG &DAG,
2745                                   SmallVectorImpl<SDValue> &InVals) const {
2746
2747  SmallVector<CCValAssign, 16> RVLocs;
2748  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2749                    getTargetMachine(), RVLocs, *DAG.getContext());
2750  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
2751
2752  // Copy all of the result registers out of their specified physreg.
2753  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2754    CCValAssign &VA = RVLocs[i];
2755    EVT VT = VA.getValVT();
2756    assert(VA.isRegLoc() && "Can only return in registers!");
2757    Chain = DAG.getCopyFromReg(Chain, dl,
2758                               VA.getLocReg(), VT, InFlag).getValue(1);
2759    InVals.push_back(Chain.getValue(0));
2760    InFlag = Chain.getValue(2);
2761  }
2762
2763  return Chain;
2764}
2765
2766SDValue
2767PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
2768                              bool isTailCall, bool isVarArg,
2769                              SelectionDAG &DAG,
2770                              SmallVector<std::pair<unsigned, SDValue>, 8>
2771                                &RegsToPass,
2772                              SDValue InFlag, SDValue Chain,
2773                              SDValue &Callee,
2774                              int SPDiff, unsigned NumBytes,
2775                              const SmallVectorImpl<ISD::InputArg> &Ins,
2776                              SmallVectorImpl<SDValue> &InVals) const {
2777  std::vector<EVT> NodeTys;
2778  SmallVector<SDValue, 8> Ops;
2779  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
2780                                 isTailCall, RegsToPass, Ops, NodeTys,
2781                                 PPCSubTarget);
2782
2783  // When performing tail call optimization the callee pops its arguments off
2784  // the stack. Account for this here so these bytes can be pushed back on in
2785  // PPCRegisterInfo::eliminateCallFramePseudoInstr.
2786  int BytesCalleePops =
2787    (CallConv == CallingConv::Fast &&
2788     getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
2789
2790  // Add a register mask operand representing the call-preserved registers.
2791  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
2792  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
2793  assert(Mask && "Missing call preserved mask for calling convention");
2794  Ops.push_back(DAG.getRegisterMask(Mask));
2795
2796  if (InFlag.getNode())
2797    Ops.push_back(InFlag);
2798
2799  // Emit tail call.
2800  if (isTailCall) {
2801    // If this is the first return lowered for this function, add the regs
2802    // to the liveout set for the function.
2803    if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
2804      SmallVector<CCValAssign, 16> RVLocs;
2805      CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2806                     getTargetMachine(), RVLocs, *DAG.getContext());
2807      CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
2808      for (unsigned i = 0; i != RVLocs.size(); ++i)
2809        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
2810    }
2811
2812    assert(((Callee.getOpcode() == ISD::Register &&
2813             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
2814            Callee.getOpcode() == ISD::TargetExternalSymbol ||
2815            Callee.getOpcode() == ISD::TargetGlobalAddress ||
2816            isa<ConstantSDNode>(Callee)) &&
2817    "Expecting an global address, external symbol, absolute value or register");
2818
2819    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
2820  }
2821
2822  // Add a NOP immediately after the branch instruction when using the 64-bit
2823  // SVR4 ABI. At link time, if caller and callee are in a different module and
2824  // thus have a different TOC, the call will be replaced with a call to a stub
2825  // function which saves the current TOC, loads the TOC of the callee and
2826  // branches to the callee. The NOP will be replaced with a load instruction
2827  // which restores the TOC of the caller from the TOC save slot of the current
2828  // stack frame. If caller and callee belong to the same module (and have the
2829  // same TOC), the NOP will remain unchanged.
2830
2831  bool needsTOCRestore = false;
2832  if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
2833    if (CallOpc == PPCISD::BCTRL_SVR4) {
2834      // This is a call through a function pointer.
2835      // Restore the caller TOC from the save area into R2.
2836      // See PrepareCall() for more information about calls through function
2837      // pointers in the 64-bit SVR4 ABI.
2838      // We are using a target-specific load with r2 hard coded, because the
2839      // result of a target-independent load would never go directly into r2,
2840      // since r2 is a reserved register (which prevents the register allocator
2841      // from allocating it), resulting in an additional register being
2842      // allocated and an unnecessary move instruction being generated.
2843      needsTOCRestore = true;
2844    } else if (CallOpc == PPCISD::CALL_SVR4) {
2845      // Otherwise insert NOP.
2846      CallOpc = PPCISD::CALL_NOP_SVR4;
2847    }
2848  }
2849
2850  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
2851  InFlag = Chain.getValue(1);
2852
2853  if (needsTOCRestore) {
2854    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2855    Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
2856    InFlag = Chain.getValue(1);
2857  }
2858
2859  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
2860                             DAG.getIntPtrConstant(BytesCalleePops, true),
2861                             InFlag);
2862  if (!Ins.empty())
2863    InFlag = Chain.getValue(1);
2864
2865  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
2866                         Ins, dl, DAG, InVals);
2867}
2868
2869SDValue
2870PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2871                             SmallVectorImpl<SDValue> &InVals) const {
2872  SelectionDAG &DAG                     = CLI.DAG;
2873  DebugLoc &dl                          = CLI.DL;
2874  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
2875  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
2876  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
2877  SDValue Chain                         = CLI.Chain;
2878  SDValue Callee                        = CLI.Callee;
2879  bool &isTailCall                      = CLI.IsTailCall;
2880  CallingConv::ID CallConv              = CLI.CallConv;
2881  bool isVarArg                         = CLI.IsVarArg;
2882
2883  if (isTailCall)
2884    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
2885                                                   Ins, DAG);
2886
2887  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
2888    return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
2889                          isTailCall, Outs, OutVals, Ins,
2890                          dl, DAG, InVals);
2891
2892  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
2893                          isTailCall, Outs, OutVals, Ins,
2894                          dl, DAG, InVals);
2895}
2896
2897SDValue
2898PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
2899                                  CallingConv::ID CallConv, bool isVarArg,
2900                                  bool isTailCall,
2901                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
2902                                  const SmallVectorImpl<SDValue> &OutVals,
2903                                  const SmallVectorImpl<ISD::InputArg> &Ins,
2904                                  DebugLoc dl, SelectionDAG &DAG,
2905                                  SmallVectorImpl<SDValue> &InVals) const {
2906  // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
2907  // of the 32-bit SVR4 ABI stack frame layout.
2908
2909  assert((CallConv == CallingConv::C ||
2910          CallConv == CallingConv::Fast) && "Unknown calling convention!");
2911
2912  unsigned PtrByteSize = 4;
2913
2914  MachineFunction &MF = DAG.getMachineFunction();
2915
2916  // Mark this function as potentially containing a function that contains a
2917  // tail call. As a consequence the frame pointer will be used for dynamicalloc
2918  // and restoring the callers stack pointer in this functions epilog. This is
2919  // done because by tail calling the called function might overwrite the value
2920  // in this function's (MF) stack pointer stack slot 0(SP).
2921  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
2922      CallConv == CallingConv::Fast)
2923    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
2924
2925  // Count how many bytes are to be pushed on the stack, including the linkage
2926  // area, parameter list area and the part of the local variable space which
2927  // contains copies of aggregates which are passed by value.
2928
2929  // Assign locations to all of the outgoing arguments.
2930  SmallVector<CCValAssign, 16> ArgLocs;
2931  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2932                 getTargetMachine(), ArgLocs, *DAG.getContext());
2933
2934  // Reserve space for the linkage area on the stack.
2935  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
2936
2937  if (isVarArg) {
2938    // Handle fixed and variable vector arguments differently.
2939    // Fixed vector arguments go into registers as long as registers are
2940    // available. Variable vector arguments always go into memory.
2941    unsigned NumArgs = Outs.size();
2942
2943    for (unsigned i = 0; i != NumArgs; ++i) {
2944      MVT ArgVT = Outs[i].VT;
2945      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2946      bool Result;
2947
2948      if (Outs[i].IsFixed) {
2949        Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
2950                             CCInfo);
2951      } else {
2952        Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
2953                                    ArgFlags, CCInfo);
2954      }
2955
2956      if (Result) {
2957#ifndef NDEBUG
2958        errs() << "Call operand #" << i << " has unhandled type "
2959             << EVT(ArgVT).getEVTString() << "\n";
2960#endif
2961        llvm_unreachable(0);
2962      }
2963    }
2964  } else {
2965    // All arguments are treated the same.
2966    CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
2967  }
2968
2969  // Assign locations to all of the outgoing aggregate by value arguments.
2970  SmallVector<CCValAssign, 16> ByValArgLocs;
2971  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2972                      getTargetMachine(), ByValArgLocs, *DAG.getContext());
2973
2974  // Reserve stack space for the allocations in CCInfo.
2975  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
2976
2977  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
2978
2979  // Size of the linkage area, parameter list area and the part of the local
2980  // space variable where copies of aggregates which are passed by value are
2981  // stored.
2982  unsigned NumBytes = CCByValInfo.getNextStackOffset();
2983
2984  // Calculate by how many bytes the stack has to be adjusted in case of tail
2985  // call optimization.
2986  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
2987
2988  // Adjust the stack pointer for the new arguments...
2989  // These operations are automatically eliminated by the prolog/epilog pass
2990  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
2991  SDValue CallSeqStart = Chain;
2992
2993  // Load the return address and frame pointer so it can be moved somewhere else
2994  // later.
2995  SDValue LROp, FPOp;
2996  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
2997                                       dl);
2998
2999  // Set up a copy of the stack pointer for use loading and storing any
3000  // arguments that may not fit in the registers available for argument
3001  // passing.
3002  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3003
3004  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3005  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
3006  SmallVector<SDValue, 8> MemOpChains;
3007
3008  bool seenFloatArg = false;
3009  // Walk the register/memloc assignments, inserting copies/loads.
3010  for (unsigned i = 0, j = 0, e = ArgLocs.size();
3011       i != e;
3012       ++i) {
3013    CCValAssign &VA = ArgLocs[i];
3014    SDValue Arg = OutVals[i];
3015    ISD::ArgFlagsTy Flags = Outs[i].Flags;
3016
3017    if (Flags.isByVal()) {
3018      // Argument is an aggregate which is passed by value, thus we need to
3019      // create a copy of it in the local variable space of the current stack
3020      // frame (which is the stack frame of the caller) and pass the address of
3021      // this copy to the callee.
3022      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
3023      CCValAssign &ByValVA = ByValArgLocs[j++];
3024      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
3025
3026      // Memory reserved in the local variable space of the callers stack frame.
3027      unsigned LocMemOffset = ByValVA.getLocMemOffset();
3028
3029      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
3030      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
3031
3032      // Create a copy of the argument in the local area of the current
3033      // stack frame.
3034      SDValue MemcpyCall =
3035        CreateCopyOfByValArgument(Arg, PtrOff,
3036                                  CallSeqStart.getNode()->getOperand(0),
3037                                  Flags, DAG, dl);
3038
3039      // This must go outside the CALLSEQ_START..END.
3040      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3041                           CallSeqStart.getNode()->getOperand(1));
3042      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3043                             NewCallSeqStart.getNode());
3044      Chain = CallSeqStart = NewCallSeqStart;
3045
3046      // Pass the address of the aggregate copy on the stack either in a
3047      // physical register or in the parameter list area of the current stack
3048      // frame to the callee.
3049      Arg = PtrOff;
3050    }
3051
3052    if (VA.isRegLoc()) {
3053      seenFloatArg |= VA.getLocVT().isFloatingPoint();
3054      // Put argument in a physical register.
3055      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3056    } else {
3057      // Put argument in the parameter list area of the current stack frame.
3058      assert(VA.isMemLoc());
3059      unsigned LocMemOffset = VA.getLocMemOffset();
3060
3061      if (!isTailCall) {
3062        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
3063        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
3064
3065        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
3066                                           MachinePointerInfo(),
3067                                           false, false, 0));
3068      } else {
3069        // Calculate and remember argument location.
3070        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
3071                                 TailCallArguments);
3072      }
3073    }
3074  }
3075
3076  if (!MemOpChains.empty())
3077    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3078                        &MemOpChains[0], MemOpChains.size());
3079
3080  // Set CR6 to true if this is a vararg call with floating args passed in
3081  // registers.
3082  if (isVarArg) {
3083    SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET,
3084                                     dl, MVT::i32), 0);
3085    RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
3086  }
3087
3088  // Build a sequence of copy-to-reg nodes chained together with token chain
3089  // and flag operands which copy the outgoing args into the appropriate regs.
3090  SDValue InFlag;
3091  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3092    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3093                             RegsToPass[i].second, InFlag);
3094    InFlag = Chain.getValue(1);
3095  }
3096
3097  if (isTailCall)
3098    PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
3099                    false, TailCallArguments);
3100
3101  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
3102                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
3103                    Ins, InVals);
3104}
3105
3106SDValue
3107PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
3108                                    CallingConv::ID CallConv, bool isVarArg,
3109                                    bool isTailCall,
3110                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
3111                                    const SmallVectorImpl<SDValue> &OutVals,
3112                                    const SmallVectorImpl<ISD::InputArg> &Ins,
3113                                    DebugLoc dl, SelectionDAG &DAG,
3114                                    SmallVectorImpl<SDValue> &InVals) const {
3115
3116  unsigned NumOps  = Outs.size();
3117
3118  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3119  bool isPPC64 = PtrVT == MVT::i64;
3120  unsigned PtrByteSize = isPPC64 ? 8 : 4;
3121
3122  MachineFunction &MF = DAG.getMachineFunction();
3123
3124  // Mark this function as potentially containing a function that contains a
3125  // tail call. As a consequence the frame pointer will be used for dynamicalloc
3126  // and restoring the callers stack pointer in this functions epilog. This is
3127  // done because by tail calling the called function might overwrite the value
3128  // in this function's (MF) stack pointer stack slot 0(SP).
3129  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
3130      CallConv == CallingConv::Fast)
3131    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
3132
3133  unsigned nAltivecParamsAtEnd = 0;
3134
3135  // Count how many bytes are to be pushed on the stack, including the linkage
3136  // area, and parameter passing area.  We start with 24/48 bytes, which is
3137  // prereserved space for [SP][CR][LR][3 x unused].
3138  unsigned NumBytes =
3139    CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
3140                                         Outs, OutVals,
3141                                         nAltivecParamsAtEnd);
3142
3143  // Calculate by how many bytes the stack has to be adjusted in case of tail
3144  // call optimization.
3145  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
3146
3147  // To protect arguments on the stack from being clobbered in a tail call,
3148  // force all the loads to happen before doing any other lowering.
3149  if (isTailCall)
3150    Chain = DAG.getStackArgumentTokenFactor(Chain);
3151
3152  // Adjust the stack pointer for the new arguments...
3153  // These operations are automatically eliminated by the prolog/epilog pass
3154  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
3155  SDValue CallSeqStart = Chain;
3156
3157  // Load the return address and frame pointer so it can be move somewhere else
3158  // later.
3159  SDValue LROp, FPOp;
3160  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
3161                                       dl);
3162
3163  // Set up a copy of the stack pointer for use loading and storing any
3164  // arguments that may not fit in the registers available for argument
3165  // passing.
3166  SDValue StackPtr;
3167  if (isPPC64)
3168    StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
3169  else
3170    StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3171
3172  // Figure out which arguments are going to go in registers, and which in
3173  // memory.  Also, if this is a vararg function, floating point operations
3174  // must be stored to our stack, and loaded into integer regs as well, if
3175  // any integer regs are available for argument passing.
3176  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
3177  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3178
3179  static const uint16_t GPR_32[] = {           // 32-bit registers.
3180    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3181    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3182  };
3183  static const uint16_t GPR_64[] = {           // 64-bit registers.
3184    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3185    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3186  };
3187  static const uint16_t *FPR = GetFPR();
3188
3189  static const uint16_t VR[] = {
3190    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3191    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3192  };
3193  const unsigned NumGPRs = array_lengthof(GPR_32);
3194  const unsigned NumFPRs = 13;
3195  const unsigned NumVRs  = array_lengthof(VR);
3196
3197  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
3198
3199  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3200  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
3201
3202  SmallVector<SDValue, 8> MemOpChains;
3203  for (unsigned i = 0; i != NumOps; ++i) {
3204    SDValue Arg = OutVals[i];
3205    ISD::ArgFlagsTy Flags = Outs[i].Flags;
3206
3207    // PtrOff will be used to store the current argument to the stack if a
3208    // register cannot be found for it.
3209    SDValue PtrOff;
3210
3211    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
3212
3213    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
3214
3215    // On PPC64, promote integers to 64-bit values.
3216    if (isPPC64 && Arg.getValueType() == MVT::i32) {
3217      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
3218      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3219      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
3220    }
3221
3222    // FIXME memcpy is used way more than necessary.  Correctness first.
3223    if (Flags.isByVal()) {
3224      unsigned Size = Flags.getByValSize();
3225      if (Size==1 || Size==2) {
3226        // Very small objects are passed right-justified.
3227        // Everything else is passed left-justified.
3228        EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
3229        if (GPR_idx != NumGPRs) {
3230          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
3231                                        MachinePointerInfo(), VT,
3232                                        false, false, 0);
3233          MemOpChains.push_back(Load.getValue(1));
3234          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3235
3236          ArgOffset += PtrByteSize;
3237        } else {
3238          SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
3239          SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
3240          SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
3241                                CallSeqStart.getNode()->getOperand(0),
3242                                Flags, DAG, dl);
3243          // This must go outside the CALLSEQ_START..END.
3244          SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3245                               CallSeqStart.getNode()->getOperand(1));
3246          DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3247                                 NewCallSeqStart.getNode());
3248          Chain = CallSeqStart = NewCallSeqStart;
3249          ArgOffset += PtrByteSize;
3250        }
3251        continue;
3252      }
3253      // Copy entire object into memory.  There are cases where gcc-generated
3254      // code assumes it is there, even if it could be put entirely into
3255      // registers.  (This is not what the doc says.)
3256      SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
3257                            CallSeqStart.getNode()->getOperand(0),
3258                            Flags, DAG, dl);
3259      // This must go outside the CALLSEQ_START..END.
3260      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3261                           CallSeqStart.getNode()->getOperand(1));
3262      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
3263      Chain = CallSeqStart = NewCallSeqStart;
3264      // And copy the pieces of it that fit into registers.
3265      for (unsigned j=0; j<Size; j+=PtrByteSize) {
3266        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
3267        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
3268        if (GPR_idx != NumGPRs) {
3269          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
3270                                     MachinePointerInfo(),
3271                                     false, false, false, 0);
3272          MemOpChains.push_back(Load.getValue(1));
3273          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3274          ArgOffset += PtrByteSize;
3275        } else {
3276          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
3277          break;
3278        }
3279      }
3280      continue;
3281    }
3282
3283    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
3284    default: llvm_unreachable("Unexpected ValueType for argument!");
3285    case MVT::i32:
3286    case MVT::i64:
3287      if (GPR_idx != NumGPRs) {
3288        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
3289      } else {
3290        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
3291                         isPPC64, isTailCall, false, MemOpChains,
3292                         TailCallArguments, dl);
3293      }
3294      ArgOffset += PtrByteSize;
3295      break;
3296    case MVT::f32:
3297    case MVT::f64:
3298      if (FPR_idx != NumFPRs) {
3299        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
3300
3301        if (isVarArg) {
3302          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
3303                                       MachinePointerInfo(), false, false, 0);
3304          MemOpChains.push_back(Store);
3305
3306          // Float varargs are always shadowed in available integer registers
3307          if (GPR_idx != NumGPRs) {
3308            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
3309                                       MachinePointerInfo(), false, false,
3310                                       false, 0);
3311            MemOpChains.push_back(Load.getValue(1));
3312            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3313          }
3314          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
3315            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
3316            PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
3317            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
3318                                       MachinePointerInfo(),
3319                                       false, false, false, 0);
3320            MemOpChains.push_back(Load.getValue(1));
3321            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3322          }
3323        } else {
3324          // If we have any FPRs remaining, we may also have GPRs remaining.
3325          // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
3326          // GPRs.
3327          if (GPR_idx != NumGPRs)
3328            ++GPR_idx;
3329          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
3330              !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
3331            ++GPR_idx;
3332        }
3333      } else {
3334        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
3335                         isPPC64, isTailCall, false, MemOpChains,
3336                         TailCallArguments, dl);
3337      }
3338      if (isPPC64)
3339        ArgOffset += 8;
3340      else
3341        ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
3342      break;
3343    case MVT::v4f32:
3344    case MVT::v4i32:
3345    case MVT::v8i16:
3346    case MVT::v16i8:
3347      if (isVarArg) {
3348        // These go aligned on the stack, or in the corresponding R registers
3349        // when within range.  The Darwin PPC ABI doc claims they also go in
3350        // V registers; in fact gcc does this only for arguments that are
3351        // prototyped, not for those that match the ...  We do it for all
3352        // arguments, seems to work.
3353        while (ArgOffset % 16 !=0) {
3354          ArgOffset += PtrByteSize;
3355          if (GPR_idx != NumGPRs)
3356            GPR_idx++;
3357        }
3358        // We could elide this store in the case where the object fits
3359        // entirely in R registers.  Maybe later.
3360        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
3361                            DAG.getConstant(ArgOffset, PtrVT));
3362        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
3363                                     MachinePointerInfo(), false, false, 0);
3364        MemOpChains.push_back(Store);
3365        if (VR_idx != NumVRs) {
3366          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
3367                                     MachinePointerInfo(),
3368                                     false, false, false, 0);
3369          MemOpChains.push_back(Load.getValue(1));
3370          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
3371        }
3372        ArgOffset += 16;
3373        for (unsigned i=0; i<16; i+=PtrByteSize) {
3374          if (GPR_idx == NumGPRs)
3375            break;
3376          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
3377                                  DAG.getConstant(i, PtrVT));
3378          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
3379                                     false, false, false, 0);
3380          MemOpChains.push_back(Load.getValue(1));
3381          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3382        }
3383        break;
3384      }
3385
3386      // Non-varargs Altivec params generally go in registers, but have
3387      // stack space allocated at the end.
3388      if (VR_idx != NumVRs) {
3389        // Doesn't have GPR space allocated.
3390        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
3391      } else if (nAltivecParamsAtEnd==0) {
3392        // We are emitting Altivec params in order.
3393        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
3394                         isPPC64, isTailCall, true, MemOpChains,
3395                         TailCallArguments, dl);
3396        ArgOffset += 16;
3397      }
3398      break;
3399    }
3400  }
3401  // If all Altivec parameters fit in registers, as they usually do,
3402  // they get stack space following the non-Altivec parameters.  We
3403  // don't track this here because nobody below needs it.
3404  // If there are more Altivec parameters than fit in registers emit
3405  // the stores here.
3406  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
3407    unsigned j = 0;
3408    // Offset is aligned; skip 1st 12 params which go in V registers.
3409    ArgOffset = ((ArgOffset+15)/16)*16;
3410    ArgOffset += 12*16;
3411    for (unsigned i = 0; i != NumOps; ++i) {
3412      SDValue Arg = OutVals[i];
3413      EVT ArgType = Outs[i].VT;
3414      if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
3415          ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
3416        if (++j > NumVRs) {
3417          SDValue PtrOff;
3418          // We are emitting Altivec params in order.
3419          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
3420                           isPPC64, isTailCall, true, MemOpChains,
3421                           TailCallArguments, dl);
3422          ArgOffset += 16;
3423        }
3424      }
3425    }
3426  }
3427
3428  if (!MemOpChains.empty())
3429    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3430                        &MemOpChains[0], MemOpChains.size());
3431
3432  // Check if this is an indirect call (MTCTR/BCTRL).
3433  // See PrepareCall() for more information about calls through function
3434  // pointers in the 64-bit SVR4 ABI.
3435  if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() &&
3436      !dyn_cast<GlobalAddressSDNode>(Callee) &&
3437      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
3438      !isBLACompatibleAddress(Callee, DAG)) {
3439    // Load r2 into a virtual register and store it to the TOC save area.
3440    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
3441    // TOC save area offset.
3442    SDValue PtrOff = DAG.getIntPtrConstant(40);
3443    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
3444    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
3445                         false, false, 0);
3446  }
3447
3448  // On Darwin, R12 must contain the address of an indirect callee.  This does
3449  // not mean the MTCTR instruction must use R12; it's easier to model this as
3450  // an extra parameter, so do that.
3451  if (!isTailCall &&
3452      !dyn_cast<GlobalAddressSDNode>(Callee) &&
3453      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
3454      !isBLACompatibleAddress(Callee, DAG))
3455    RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
3456                                                   PPC::R12), Callee));
3457
3458  // Build a sequence of copy-to-reg nodes chained together with token chain
3459  // and flag operands which copy the outgoing args into the appropriate regs.
3460  SDValue InFlag;
3461  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3462    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3463                             RegsToPass[i].second, InFlag);
3464    InFlag = Chain.getValue(1);
3465  }
3466
3467  if (isTailCall)
3468    PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
3469                    FPOp, true, TailCallArguments);
3470
3471  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
3472                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
3473                    Ins, InVals);
3474}
3475
3476bool
3477PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3478                                  MachineFunction &MF, bool isVarArg,
3479                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
3480                                  LLVMContext &Context) const {
3481  SmallVector<CCValAssign, 16> RVLocs;
3482  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
3483                 RVLocs, Context);
3484  return CCInfo.CheckReturn(Outs, RetCC_PPC);
3485}
3486
3487SDValue
3488PPCTargetLowering::LowerReturn(SDValue Chain,
3489                               CallingConv::ID CallConv, bool isVarArg,
3490                               const SmallVectorImpl<ISD::OutputArg> &Outs,
3491                               const SmallVectorImpl<SDValue> &OutVals,
3492                               DebugLoc dl, SelectionDAG &DAG) const {
3493
3494  SmallVector<CCValAssign, 16> RVLocs;
3495  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3496                 getTargetMachine(), RVLocs, *DAG.getContext());
3497  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
3498
3499  // If this is the first return lowered for this function, add the regs to the
3500  // liveout set for the function.
3501  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
3502    for (unsigned i = 0; i != RVLocs.size(); ++i)
3503      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
3504  }
3505
3506  SDValue Flag;
3507
3508  // Copy the result values into the output registers.
3509  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3510    CCValAssign &VA = RVLocs[i];
3511    assert(VA.isRegLoc() && "Can only return in registers!");
3512    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3513                             OutVals[i], Flag);
3514    Flag = Chain.getValue(1);
3515  }
3516
3517  if (Flag.getNode())
3518    return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
3519  else
3520    return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
3521}
3522
3523SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
3524                                   const PPCSubtarget &Subtarget) const {
3525  // When we pop the dynamic allocation we need to restore the SP link.
3526  DebugLoc dl = Op.getDebugLoc();
3527
3528  // Get the corect type for pointers.
3529  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3530
3531  // Construct the stack pointer operand.
3532  bool isPPC64 = Subtarget.isPPC64();
3533  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
3534  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
3535
3536  // Get the operands for the STACKRESTORE.
3537  SDValue Chain = Op.getOperand(0);
3538  SDValue SaveSP = Op.getOperand(1);
3539
3540  // Load the old link SP.
3541  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
3542                                   MachinePointerInfo(),
3543                                   false, false, false, 0);
3544
3545  // Restore the stack pointer.
3546  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
3547
3548  // Store the old link SP.
3549  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
3550                      false, false, 0);
3551}
3552
3553
3554
3555SDValue
3556PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
3557  MachineFunction &MF = DAG.getMachineFunction();
3558  bool isPPC64 = PPCSubTarget.isPPC64();
3559  bool isDarwinABI = PPCSubTarget.isDarwinABI();
3560  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3561
3562  // Get current frame pointer save index.  The users of this index will be
3563  // primarily DYNALLOC instructions.
3564  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
3565  int RASI = FI->getReturnAddrSaveIndex();
3566
3567  // If the frame pointer save index hasn't been defined yet.
3568  if (!RASI) {
3569    // Find out what the fix offset of the frame pointer save area.
3570    int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
3571    // Allocate the frame index for frame pointer save area.
3572    RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
3573    // Save the result.
3574    FI->setReturnAddrSaveIndex(RASI);
3575  }
3576  return DAG.getFrameIndex(RASI, PtrVT);
3577}
3578
3579SDValue
3580PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
3581  MachineFunction &MF = DAG.getMachineFunction();
3582  bool isPPC64 = PPCSubTarget.isPPC64();
3583  bool isDarwinABI = PPCSubTarget.isDarwinABI();
3584  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3585
3586  // Get current frame pointer save index.  The users of this index will be
3587  // primarily DYNALLOC instructions.
3588  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
3589  int FPSI = FI->getFramePointerSaveIndex();
3590
3591  // If the frame pointer save index hasn't been defined yet.
3592  if (!FPSI) {
3593    // Find out what the fix offset of the frame pointer save area.
3594    int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
3595                                                           isDarwinABI);
3596
3597    // Allocate the frame index for frame pointer save area.
3598    FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
3599    // Save the result.
3600    FI->setFramePointerSaveIndex(FPSI);
3601  }
3602  return DAG.getFrameIndex(FPSI, PtrVT);
3603}
3604
3605SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
3606                                         SelectionDAG &DAG,
3607                                         const PPCSubtarget &Subtarget) const {
3608  // Get the inputs.
3609  SDValue Chain = Op.getOperand(0);
3610  SDValue Size  = Op.getOperand(1);
3611  DebugLoc dl = Op.getDebugLoc();
3612
3613  // Get the corect type for pointers.
3614  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3615  // Negate the size.
3616  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
3617                                  DAG.getConstant(0, PtrVT), Size);
3618  // Construct a node for the frame pointer save index.
3619  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
3620  // Build a DYNALLOC node.
3621  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
3622  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
3623  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
3624}
3625
3626/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
3627/// possible.
3628SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
3629  // Not FP? Not a fsel.
3630  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
3631      !Op.getOperand(2).getValueType().isFloatingPoint())
3632    return Op;
3633
3634  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3635
3636  // Cannot handle SETEQ/SETNE.
3637  if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
3638
3639  EVT ResVT = Op.getValueType();
3640  EVT CmpVT = Op.getOperand(0).getValueType();
3641  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
3642  SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
3643  DebugLoc dl = Op.getDebugLoc();
3644
3645  // If the RHS of the comparison is a 0.0, we don't need to do the
3646  // subtraction at all.
3647  if (isFloatingPointZero(RHS))
3648    switch (CC) {
3649    default: break;       // SETUO etc aren't handled by fsel.
3650    case ISD::SETULT:
3651    case ISD::SETLT:
3652      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
3653    case ISD::SETOGE:
3654    case ISD::SETGE:
3655      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
3656        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
3657      return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
3658    case ISD::SETUGT:
3659    case ISD::SETGT:
3660      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
3661    case ISD::SETOLE:
3662    case ISD::SETLE:
3663      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
3664        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
3665      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
3666                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
3667    }
3668
3669  SDValue Cmp;
3670  switch (CC) {
3671  default: break;       // SETUO etc aren't handled by fsel.
3672  case ISD::SETULT:
3673  case ISD::SETLT:
3674    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
3675    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
3676      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
3677      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
3678  case ISD::SETOGE:
3679  case ISD::SETGE:
3680    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
3681    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
3682      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
3683      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
3684  case ISD::SETUGT:
3685  case ISD::SETGT:
3686    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
3687    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
3688      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
3689      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
3690  case ISD::SETOLE:
3691  case ISD::SETLE:
3692    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
3693    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
3694      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
3695      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
3696  }
3697  return Op;
3698}
3699
3700// FIXME: Split this code up when LegalizeDAGTypes lands.
3701SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
3702                                           DebugLoc dl) const {
3703  assert(Op.getOperand(0).getValueType().isFloatingPoint());
3704  SDValue Src = Op.getOperand(0);
3705  if (Src.getValueType() == MVT::f32)
3706    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
3707
3708  SDValue Tmp;
3709  switch (Op.getValueType().getSimpleVT().SimpleTy) {
3710  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
3711  case MVT::i32:
3712    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
3713                                                         PPCISD::FCTIDZ,
3714                      dl, MVT::f64, Src);
3715    break;
3716  case MVT::i64:
3717    Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
3718    break;
3719  }
3720
3721  // Convert the FP value to an int value through memory.
3722  SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
3723
3724  // Emit a store to the stack slot.
3725  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
3726                               MachinePointerInfo(), false, false, 0);
3727
3728  // Result is a load from the stack slot.  If loading 4 bytes, make sure to
3729  // add in a bias.
3730  if (Op.getValueType() == MVT::i32)
3731    FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
3732                        DAG.getConstant(4, FIPtr.getValueType()));
3733  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
3734                     false, false, false, 0);
3735}
3736
3737SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
3738                                           SelectionDAG &DAG) const {
3739  DebugLoc dl = Op.getDebugLoc();
3740  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
3741  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
3742    return SDValue();
3743
3744  if (Op.getOperand(0).getValueType() == MVT::i64) {
3745    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
3746    SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
3747    if (Op.getValueType() == MVT::f32)
3748      FP = DAG.getNode(ISD::FP_ROUND, dl,
3749                       MVT::f32, FP, DAG.getIntPtrConstant(0));
3750    return FP;
3751  }
3752
3753  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
3754         "Unhandled SINT_TO_FP type in custom expander!");
3755  // Since we only generate this in 64-bit mode, we can take advantage of
3756  // 64-bit registers.  In particular, sign extend the input value into the
3757  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
3758  // then lfd it and fcfid it.
3759  MachineFunction &MF = DAG.getMachineFunction();
3760  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
3761  int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
3762  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3763  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
3764
3765  SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
3766                                Op.getOperand(0));
3767
3768  // STD the extended value into the stack slot.
3769  MachineMemOperand *MMO =
3770    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
3771                            MachineMemOperand::MOStore, 8, 8);
3772  SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
3773  SDValue Store =
3774    DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
3775                            Ops, 4, MVT::i64, MMO);
3776  // Load the value as a double.
3777  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
3778                           false, false, false, 0);
3779
3780  // FCFID it and return it.
3781  SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
3782  if (Op.getValueType() == MVT::f32)
3783    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
3784  return FP;
3785}
3786
3787SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3788                                            SelectionDAG &DAG) const {
3789  DebugLoc dl = Op.getDebugLoc();
3790  /*
3791   The rounding mode is in bits 30:31 of FPSR, and has the following
3792   settings:
3793     00 Round to nearest
3794     01 Round to 0
3795     10 Round to +inf
3796     11 Round to -inf
3797
3798  FLT_ROUNDS, on the other hand, expects the following:
3799    -1 Undefined
3800     0 Round to 0
3801     1 Round to nearest
3802     2 Round to +inf
3803     3 Round to -inf
3804
3805  To perform the conversion, we do:
3806    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
3807  */
3808
3809  MachineFunction &MF = DAG.getMachineFunction();
3810  EVT VT = Op.getValueType();
3811  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3812  std::vector<EVT> NodeTys;
3813  SDValue MFFSreg, InFlag;
3814
3815  // Save FP Control Word to register
3816  NodeTys.push_back(MVT::f64);    // return register
3817  NodeTys.push_back(MVT::Glue);   // unused in this context
3818  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
3819
3820  // Save FP register to stack slot
3821  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
3822  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
3823  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
3824                               StackSlot, MachinePointerInfo(), false, false,0);
3825
3826  // Load FP Control Word from low 32 bits of stack slot.
3827  SDValue Four = DAG.getConstant(4, PtrVT);
3828  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
3829  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
3830                            false, false, false, 0);
3831
3832  // Transform as necessary
3833  SDValue CWD1 =
3834    DAG.getNode(ISD::AND, dl, MVT::i32,
3835                CWD, DAG.getConstant(3, MVT::i32));
3836  SDValue CWD2 =
3837    DAG.getNode(ISD::SRL, dl, MVT::i32,
3838                DAG.getNode(ISD::AND, dl, MVT::i32,
3839                            DAG.getNode(ISD::XOR, dl, MVT::i32,
3840                                        CWD, DAG.getConstant(3, MVT::i32)),
3841                            DAG.getConstant(3, MVT::i32)),
3842                DAG.getConstant(1, MVT::i32));
3843
3844  SDValue RetVal =
3845    DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
3846
3847  return DAG.getNode((VT.getSizeInBits() < 16 ?
3848                      ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
3849}
3850
3851SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
3852  EVT VT = Op.getValueType();
3853  unsigned BitWidth = VT.getSizeInBits();
3854  DebugLoc dl = Op.getDebugLoc();
3855  assert(Op.getNumOperands() == 3 &&
3856         VT == Op.getOperand(1).getValueType() &&
3857         "Unexpected SHL!");
3858
3859  // Expand into a bunch of logical ops.  Note that these ops
3860  // depend on the PPC behavior for oversized shift amounts.
3861  SDValue Lo = Op.getOperand(0);
3862  SDValue Hi = Op.getOperand(1);
3863  SDValue Amt = Op.getOperand(2);
3864  EVT AmtVT = Amt.getValueType();
3865
3866  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
3867                             DAG.getConstant(BitWidth, AmtVT), Amt);
3868  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
3869  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
3870  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
3871  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
3872                             DAG.getConstant(-BitWidth, AmtVT));
3873  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
3874  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
3875  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
3876  SDValue OutOps[] = { OutLo, OutHi };
3877  return DAG.getMergeValues(OutOps, 2, dl);
3878}
3879
3880SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
3881  EVT VT = Op.getValueType();
3882  DebugLoc dl = Op.getDebugLoc();
3883  unsigned BitWidth = VT.getSizeInBits();
3884  assert(Op.getNumOperands() == 3 &&
3885         VT == Op.getOperand(1).getValueType() &&
3886         "Unexpected SRL!");
3887
3888  // Expand into a bunch of logical ops.  Note that these ops
3889  // depend on the PPC behavior for oversized shift amounts.
3890  SDValue Lo = Op.getOperand(0);
3891  SDValue Hi = Op.getOperand(1);
3892  SDValue Amt = Op.getOperand(2);
3893  EVT AmtVT = Amt.getValueType();
3894
3895  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
3896                             DAG.getConstant(BitWidth, AmtVT), Amt);
3897  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
3898  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
3899  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
3900  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
3901                             DAG.getConstant(-BitWidth, AmtVT));
3902  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
3903  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
3904  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
3905  SDValue OutOps[] = { OutLo, OutHi };
3906  return DAG.getMergeValues(OutOps, 2, dl);
3907}
3908
3909SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
3910  DebugLoc dl = Op.getDebugLoc();
3911  EVT VT = Op.getValueType();
3912  unsigned BitWidth = VT.getSizeInBits();
3913  assert(Op.getNumOperands() == 3 &&
3914         VT == Op.getOperand(1).getValueType() &&
3915         "Unexpected SRA!");
3916
3917  // Expand into a bunch of logical ops, followed by a select_cc.
3918  SDValue Lo = Op.getOperand(0);
3919  SDValue Hi = Op.getOperand(1);
3920  SDValue Amt = Op.getOperand(2);
3921  EVT AmtVT = Amt.getValueType();
3922
3923  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
3924                             DAG.getConstant(BitWidth, AmtVT), Amt);
3925  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
3926  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
3927  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
3928  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
3929                             DAG.getConstant(-BitWidth, AmtVT));
3930  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
3931  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
3932  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
3933                                  Tmp4, Tmp6, ISD::SETLE);
3934  SDValue OutOps[] = { OutLo, OutHi };
3935  return DAG.getMergeValues(OutOps, 2, dl);
3936}
3937
3938//===----------------------------------------------------------------------===//
3939// Vector related lowering.
3940//
3941
3942/// BuildSplatI - Build a canonical splati of Val with an element size of
3943/// SplatSize.  Cast the result to VT.
3944static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
3945                             SelectionDAG &DAG, DebugLoc dl) {
3946  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
3947
3948  static const EVT VTys[] = { // canonical VT to use for each size.
3949    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
3950  };
3951
3952  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
3953
3954  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
3955  if (Val == -1)
3956    SplatSize = 1;
3957
3958  EVT CanonicalVT = VTys[SplatSize-1];
3959
3960  // Build a canonical splat for this value.
3961  SDValue Elt = DAG.getConstant(Val, MVT::i32);
3962  SmallVector<SDValue, 8> Ops;
3963  Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
3964  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
3965                              &Ops[0], Ops.size());
3966  return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
3967}
3968
3969/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
3970/// specified intrinsic ID.
3971static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
3972                                SelectionDAG &DAG, DebugLoc dl,
3973                                EVT DestVT = MVT::Other) {
3974  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
3975  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
3976                     DAG.getConstant(IID, MVT::i32), LHS, RHS);
3977}
3978
3979/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
3980/// specified intrinsic ID.
3981static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
3982                                SDValue Op2, SelectionDAG &DAG,
3983                                DebugLoc dl, EVT DestVT = MVT::Other) {
3984  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
3985  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
3986                     DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
3987}
3988
3989
3990/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
3991/// amount.  The result has the specified value type.
3992static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
3993                             EVT VT, SelectionDAG &DAG, DebugLoc dl) {
3994  // Force LHS/RHS to be the right type.
3995  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
3996  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
3997
3998  int Ops[16];
3999  for (unsigned i = 0; i != 16; ++i)
4000    Ops[i] = i + Amt;
4001  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
4002  return DAG.getNode(ISD::BITCAST, dl, VT, T);
4003}
4004
4005// If this is a case we can't handle, return null and let the default
4006// expansion code take care of it.  If we CAN select this case, and if it
4007// selects to a single instruction, return Op.  Otherwise, if we can codegen
4008// this case more efficiently than a constant pool load, lower it to the
4009// sequence of ops that should be used.
4010SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
4011                                             SelectionDAG &DAG) const {
4012  DebugLoc dl = Op.getDebugLoc();
4013  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
4014  assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
4015
4016  // Check if this is a splat of a constant value.
4017  APInt APSplatBits, APSplatUndef;
4018  unsigned SplatBitSize;
4019  bool HasAnyUndefs;
4020  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
4021                             HasAnyUndefs, 0, true) || SplatBitSize > 32)
4022    return SDValue();
4023
4024  unsigned SplatBits = APSplatBits.getZExtValue();
4025  unsigned SplatUndef = APSplatUndef.getZExtValue();
4026  unsigned SplatSize = SplatBitSize / 8;
4027
4028  // First, handle single instruction cases.
4029
4030  // All zeros?
4031  if (SplatBits == 0) {
4032    // Canonicalize all zero vectors to be v4i32.
4033    if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
4034      SDValue Z = DAG.getConstant(0, MVT::i32);
4035      Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
4036      Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
4037    }
4038    return Op;
4039  }
4040
4041  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
4042  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
4043                    (32-SplatBitSize));
4044  if (SextVal >= -16 && SextVal <= 15)
4045    return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
4046
4047
4048  // Two instruction sequences.
4049
4050  // If this value is in the range [-32,30] and is even, use:
4051  //    tmp = VSPLTI[bhw], result = add tmp, tmp
4052  if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
4053    SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
4054    Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
4055    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
4056  }
4057
4058  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
4059  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
4060  // for fneg/fabs.
4061  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
4062    // Make -1 and vspltisw -1:
4063    SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
4064
4065    // Make the VSLW intrinsic, computing 0x8000_0000.
4066    SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
4067                                   OnesV, DAG, dl);
4068
4069    // xor by OnesV to invert it.
4070    Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
4071    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
4072  }
4073
4074  // Check to see if this is a wide variety of vsplti*, binop self cases.
4075  static const signed char SplatCsts[] = {
4076    -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
4077    -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
4078  };
4079
4080  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
4081    // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
4082    // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
4083    int i = SplatCsts[idx];
4084
4085    // Figure out what shift amount will be used by altivec if shifted by i in
4086    // this splat size.
4087    unsigned TypeShiftAmt = i & (SplatBitSize-1);
4088
4089    // vsplti + shl self.
4090    if (SextVal == (i << (int)TypeShiftAmt)) {
4091      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
4092      static const unsigned IIDs[] = { // Intrinsic to use for each size.
4093        Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
4094        Intrinsic::ppc_altivec_vslw
4095      };
4096      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
4097      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
4098    }
4099
4100    // vsplti + srl self.
4101    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
4102      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
4103      static const unsigned IIDs[] = { // Intrinsic to use for each size.
4104        Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
4105        Intrinsic::ppc_altivec_vsrw
4106      };
4107      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
4108      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
4109    }
4110
4111    // vsplti + sra self.
4112    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
4113      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
4114      static const unsigned IIDs[] = { // Intrinsic to use for each size.
4115        Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
4116        Intrinsic::ppc_altivec_vsraw
4117      };
4118      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
4119      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
4120    }
4121
4122    // vsplti + rol self.
4123    if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
4124                         ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
4125      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
4126      static const unsigned IIDs[] = { // Intrinsic to use for each size.
4127        Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
4128        Intrinsic::ppc_altivec_vrlw
4129      };
4130      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
4131      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
4132    }
4133
4134    // t = vsplti c, result = vsldoi t, t, 1
4135    if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
4136      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
4137      return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
4138    }
4139    // t = vsplti c, result = vsldoi t, t, 2
4140    if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
4141      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
4142      return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
4143    }
4144    // t = vsplti c, result = vsldoi t, t, 3
4145    if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
4146      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
4147      return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
4148    }
4149  }
4150
4151  // Three instruction sequences.
4152
4153  // Odd, in range [17,31]:  (vsplti C)-(vsplti -16).
4154  if (SextVal >= 0 && SextVal <= 31) {
4155    SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
4156    SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
4157    LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
4158    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
4159  }
4160  // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
4161  if (SextVal >= -31 && SextVal <= 0) {
4162    SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
4163    SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
4164    LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
4165    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
4166  }
4167
4168  return SDValue();
4169}
4170
4171/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
4172/// the specified operations to build the shuffle.
4173static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
4174                                      SDValue RHS, SelectionDAG &DAG,
4175                                      DebugLoc dl) {
4176  unsigned OpNum = (PFEntry >> 26) & 0x0F;
4177  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
4178  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
4179
4180  enum {
4181    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
4182    OP_VMRGHW,
4183    OP_VMRGLW,
4184    OP_VSPLTISW0,
4185    OP_VSPLTISW1,
4186    OP_VSPLTISW2,
4187    OP_VSPLTISW3,
4188    OP_VSLDOI4,
4189    OP_VSLDOI8,
4190    OP_VSLDOI12
4191  };
4192
4193  if (OpNum == OP_COPY) {
4194    if (LHSID == (1*9+2)*9+3) return LHS;
4195    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
4196    return RHS;
4197  }
4198
4199  SDValue OpLHS, OpRHS;
4200  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
4201  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
4202
4203  int ShufIdxs[16];
4204  switch (OpNum) {
4205  default: llvm_unreachable("Unknown i32 permute!");
4206  case OP_VMRGHW:
4207    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
4208    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
4209    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
4210    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
4211    break;
4212  case OP_VMRGLW:
4213    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
4214    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
4215    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
4216    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
4217    break;
4218  case OP_VSPLTISW0:
4219    for (unsigned i = 0; i != 16; ++i)
4220      ShufIdxs[i] = (i&3)+0;
4221    break;
4222  case OP_VSPLTISW1:
4223    for (unsigned i = 0; i != 16; ++i)
4224      ShufIdxs[i] = (i&3)+4;
4225    break;
4226  case OP_VSPLTISW2:
4227    for (unsigned i = 0; i != 16; ++i)
4228      ShufIdxs[i] = (i&3)+8;
4229    break;
4230  case OP_VSPLTISW3:
4231    for (unsigned i = 0; i != 16; ++i)
4232      ShufIdxs[i] = (i&3)+12;
4233    break;
4234  case OP_VSLDOI4:
4235    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
4236  case OP_VSLDOI8:
4237    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
4238  case OP_VSLDOI12:
4239    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
4240  }
4241  EVT VT = OpLHS.getValueType();
4242  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
4243  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
4244  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
4245  return DAG.getNode(ISD::BITCAST, dl, VT, T);
4246}
4247
4248/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
4249/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
4250/// return the code it can be lowered into.  Worst case, it can always be
4251/// lowered into a vperm.
4252SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
4253                                               SelectionDAG &DAG) const {
4254  DebugLoc dl = Op.getDebugLoc();
4255  SDValue V1 = Op.getOperand(0);
4256  SDValue V2 = Op.getOperand(1);
4257  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
4258  EVT VT = Op.getValueType();
4259
4260  // Cases that are handled by instructions that take permute immediates
4261  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
4262  // selected by the instruction selector.
4263  if (V2.getOpcode() == ISD::UNDEF) {
4264    if (PPC::isSplatShuffleMask(SVOp, 1) ||
4265        PPC::isSplatShuffleMask(SVOp, 2) ||
4266        PPC::isSplatShuffleMask(SVOp, 4) ||
4267        PPC::isVPKUWUMShuffleMask(SVOp, true) ||
4268        PPC::isVPKUHUMShuffleMask(SVOp, true) ||
4269        PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
4270        PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
4271        PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
4272        PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
4273        PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
4274        PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
4275        PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
4276      return Op;
4277    }
4278  }
4279
4280  // Altivec has a variety of "shuffle immediates" that take two vector inputs
4281  // and produce a fixed permutation.  If any of these match, do not lower to
4282  // VPERM.
4283  if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
4284      PPC::isVPKUHUMShuffleMask(SVOp, false) ||
4285      PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
4286      PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
4287      PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
4288      PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
4289      PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
4290      PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
4291      PPC::isVMRGHShuffleMask(SVOp, 4, false))
4292    return Op;
4293
4294  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
4295  // perfect shuffle table to emit an optimal matching sequence.
4296  ArrayRef<int> PermMask = SVOp->getMask();
4297
4298  unsigned PFIndexes[4];
4299  bool isFourElementShuffle = true;
4300  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
4301    unsigned EltNo = 8;   // Start out undef.
4302    for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
4303      if (PermMask[i*4+j] < 0)
4304        continue;   // Undef, ignore it.
4305
4306      unsigned ByteSource = PermMask[i*4+j];
4307      if ((ByteSource & 3) != j) {
4308        isFourElementShuffle = false;
4309        break;
4310      }
4311
4312      if (EltNo == 8) {
4313        EltNo = ByteSource/4;
4314      } else if (EltNo != ByteSource/4) {
4315        isFourElementShuffle = false;
4316        break;
4317      }
4318    }
4319    PFIndexes[i] = EltNo;
4320  }
4321
4322  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
4323  // perfect shuffle vector to determine if it is cost effective to do this as
4324  // discrete instructions, or whether we should use a vperm.
4325  if (isFourElementShuffle) {
4326    // Compute the index in the perfect shuffle table.
4327    unsigned PFTableIndex =
4328      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
4329
4330    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
4331    unsigned Cost  = (PFEntry >> 30);
4332
4333    // Determining when to avoid vperm is tricky.  Many things affect the cost
4334    // of vperm, particularly how many times the perm mask needs to be computed.
4335    // For example, if the perm mask can be hoisted out of a loop or is already
4336    // used (perhaps because there are multiple permutes with the same shuffle
4337    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
4338    // the loop requires an extra register.
4339    //
4340    // As a compromise, we only emit discrete instructions if the shuffle can be
4341    // generated in 3 or fewer operations.  When we have loop information
4342    // available, if this block is within a loop, we should avoid using vperm
4343    // for 3-operation perms and use a constant pool load instead.
4344    if (Cost < 3)
4345      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
4346  }
4347
4348  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
4349  // vector that will get spilled to the constant pool.
4350  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
4351
4352  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
4353  // that it is in input element units, not in bytes.  Convert now.
4354  EVT EltVT = V1.getValueType().getVectorElementType();
4355  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
4356
4357  SmallVector<SDValue, 16> ResultMask;
4358  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
4359    unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
4360
4361    for (unsigned j = 0; j != BytesPerElement; ++j)
4362      ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
4363                                           MVT::i32));
4364  }
4365
4366  SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
4367                                    &ResultMask[0], ResultMask.size());
4368  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
4369}
4370
4371/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
4372/// altivec comparison.  If it is, return true and fill in Opc/isDot with
4373/// information about the intrinsic.
4374static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
4375                                  bool &isDot) {
4376  unsigned IntrinsicID =
4377    cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
4378  CompareOpc = -1;
4379  isDot = false;
4380  switch (IntrinsicID) {
4381  default: return false;
4382    // Comparison predicates.
4383  case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
4384  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
4385  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
4386  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
4387  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
4388  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
4389  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
4390  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
4391  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
4392  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
4393  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
4394  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
4395  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
4396
4397    // Normal Comparisons.
4398  case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
4399  case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
4400  case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
4401  case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
4402  case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
4403  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
4404  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
4405  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
4406  case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
4407  case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
4408  case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
4409  case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
4410  case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
4411  }
4412  return true;
4413}
4414
4415/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
4416/// lower, do it, otherwise return null.
4417SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
4418                                                   SelectionDAG &DAG) const {
4419  // If this is a lowered altivec predicate compare, CompareOpc is set to the
4420  // opcode number of the comparison.
4421  DebugLoc dl = Op.getDebugLoc();
4422  int CompareOpc;
4423  bool isDot;
4424  if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
4425    return SDValue();    // Don't custom lower most intrinsics.
4426
4427  // If this is a non-dot comparison, make the VCMP node and we are done.
4428  if (!isDot) {
4429    SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
4430                              Op.getOperand(1), Op.getOperand(2),
4431                              DAG.getConstant(CompareOpc, MVT::i32));
4432    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
4433  }
4434
4435  // Create the PPCISD altivec 'dot' comparison node.
4436  SDValue Ops[] = {
4437    Op.getOperand(2),  // LHS
4438    Op.getOperand(3),  // RHS
4439    DAG.getConstant(CompareOpc, MVT::i32)
4440  };
4441  std::vector<EVT> VTs;
4442  VTs.push_back(Op.getOperand(2).getValueType());
4443  VTs.push_back(MVT::Glue);
4444  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
4445
4446  // Now that we have the comparison, emit a copy from the CR to a GPR.
4447  // This is flagged to the above dot comparison.
4448  SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
4449                                DAG.getRegister(PPC::CR6, MVT::i32),
4450                                CompNode.getValue(1));
4451
4452  // Unpack the result based on how the target uses it.
4453  unsigned BitNo;   // Bit # of CR6.
4454  bool InvertBit;   // Invert result?
4455  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
4456  default:  // Can't happen, don't crash on invalid number though.
4457  case 0:   // Return the value of the EQ bit of CR6.
4458    BitNo = 0; InvertBit = false;
4459    break;
4460  case 1:   // Return the inverted value of the EQ bit of CR6.
4461    BitNo = 0; InvertBit = true;
4462    break;
4463  case 2:   // Return the value of the LT bit of CR6.
4464    BitNo = 2; InvertBit = false;
4465    break;
4466  case 3:   // Return the inverted value of the LT bit of CR6.
4467    BitNo = 2; InvertBit = true;
4468    break;
4469  }
4470
4471  // Shift the bit into the low position.
4472  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
4473                      DAG.getConstant(8-(3-BitNo), MVT::i32));
4474  // Isolate the bit.
4475  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
4476                      DAG.getConstant(1, MVT::i32));
4477
4478  // If we are supposed to, toggle the bit.
4479  if (InvertBit)
4480    Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
4481                        DAG.getConstant(1, MVT::i32));
4482  return Flags;
4483}
4484
4485SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
4486                                                   SelectionDAG &DAG) const {
4487  DebugLoc dl = Op.getDebugLoc();
4488  // Create a stack slot that is 16-byte aligned.
4489  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
4490  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
4491  EVT PtrVT = getPointerTy();
4492  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
4493
4494  // Store the input value into Value#0 of the stack slot.
4495  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
4496                               Op.getOperand(0), FIdx, MachinePointerInfo(),
4497                               false, false, 0);
4498  // Load it out.
4499  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
4500                     false, false, false, 0);
4501}
4502
4503SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
4504  DebugLoc dl = Op.getDebugLoc();
4505  if (Op.getValueType() == MVT::v4i32) {
4506    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
4507
4508    SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
4509    SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
4510
4511    SDValue RHSSwap =   // = vrlw RHS, 16
4512      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
4513
4514    // Shrinkify inputs to v8i16.
4515    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
4516    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
4517    RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
4518
4519    // Low parts multiplied together, generating 32-bit results (we ignore the
4520    // top parts).
4521    SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
4522                                        LHS, RHS, DAG, dl, MVT::v4i32);
4523
4524    SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
4525                                      LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
4526    // Shift the high parts up 16 bits.
4527    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
4528                              Neg16, DAG, dl);
4529    return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
4530  } else if (Op.getValueType() == MVT::v8i16) {
4531    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
4532
4533    SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
4534
4535    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
4536                            LHS, RHS, Zero, DAG, dl);
4537  } else if (Op.getValueType() == MVT::v16i8) {
4538    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
4539
4540    // Multiply the even 8-bit parts, producing 16-bit sums.
4541    SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
4542                                           LHS, RHS, DAG, dl, MVT::v8i16);
4543    EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
4544
4545    // Multiply the odd 8-bit parts, producing 16-bit sums.
4546    SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
4547                                          LHS, RHS, DAG, dl, MVT::v8i16);
4548    OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
4549
4550    // Merge the results together.
4551    int Ops[16];
4552    for (unsigned i = 0; i != 8; ++i) {
4553      Ops[i*2  ] = 2*i+1;
4554      Ops[i*2+1] = 2*i+1+16;
4555    }
4556    return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
4557  } else {
4558    llvm_unreachable("Unknown mul to lower!");
4559  }
4560}
4561
4562/// LowerOperation - Provide custom lowering hooks for some operations.
4563///
4564SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
4565  switch (Op.getOpcode()) {
4566  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
4567  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
4568  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
4569  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
4570  case ISD::GlobalTLSAddress:   llvm_unreachable("TLS not implemented for PPC");
4571  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
4572  case ISD::SETCC:              return LowerSETCC(Op, DAG);
4573  case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
4574  case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
4575  case ISD::VASTART:
4576    return LowerVASTART(Op, DAG, PPCSubTarget);
4577
4578  case ISD::VAARG:
4579    return LowerVAARG(Op, DAG, PPCSubTarget);
4580
4581  case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
4582  case ISD::DYNAMIC_STACKALLOC:
4583    return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
4584
4585  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
4586  case ISD::FP_TO_UINT:
4587  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
4588                                                       Op.getDebugLoc());
4589  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
4590  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
4591
4592  // Lower 64-bit shifts.
4593  case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
4594  case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
4595  case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
4596
4597  // Vector-related lowering.
4598  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
4599  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
4600  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4601  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
4602  case ISD::MUL:                return LowerMUL(Op, DAG);
4603
4604  // Frame & Return address.
4605  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
4606  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
4607  }
4608}
4609
4610void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
4611                                           SmallVectorImpl<SDValue>&Results,
4612                                           SelectionDAG &DAG) const {
4613  const TargetMachine &TM = getTargetMachine();
4614  DebugLoc dl = N->getDebugLoc();
4615  switch (N->getOpcode()) {
4616  default:
4617    llvm_unreachable("Do not know how to custom type legalize this operation!");
4618  case ISD::VAARG: {
4619    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
4620        || TM.getSubtarget<PPCSubtarget>().isPPC64())
4621      return;
4622
4623    EVT VT = N->getValueType(0);
4624
4625    if (VT == MVT::i64) {
4626      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
4627
4628      Results.push_back(NewNode);
4629      Results.push_back(NewNode.getValue(1));
4630    }
4631    return;
4632  }
4633  case ISD::FP_ROUND_INREG: {
4634    assert(N->getValueType(0) == MVT::ppcf128);
4635    assert(N->getOperand(0).getValueType() == MVT::ppcf128);
4636    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
4637                             MVT::f64, N->getOperand(0),
4638                             DAG.getIntPtrConstant(0));
4639    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
4640                             MVT::f64, N->getOperand(0),
4641                             DAG.getIntPtrConstant(1));
4642
4643    // This sequence changes FPSCR to do round-to-zero, adds the two halves
4644    // of the long double, and puts FPSCR back the way it was.  We do not
4645    // actually model FPSCR.
4646    std::vector<EVT> NodeTys;
4647    SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
4648
4649    NodeTys.push_back(MVT::f64);   // Return register
4650    NodeTys.push_back(MVT::Glue);    // Returns a flag for later insns
4651    Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
4652    MFFSreg = Result.getValue(0);
4653    InFlag = Result.getValue(1);
4654
4655    NodeTys.clear();
4656    NodeTys.push_back(MVT::Glue);   // Returns a flag
4657    Ops[0] = DAG.getConstant(31, MVT::i32);
4658    Ops[1] = InFlag;
4659    Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
4660    InFlag = Result.getValue(0);
4661
4662    NodeTys.clear();
4663    NodeTys.push_back(MVT::Glue);   // Returns a flag
4664    Ops[0] = DAG.getConstant(30, MVT::i32);
4665    Ops[1] = InFlag;
4666    Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
4667    InFlag = Result.getValue(0);
4668
4669    NodeTys.clear();
4670    NodeTys.push_back(MVT::f64);    // result of add
4671    NodeTys.push_back(MVT::Glue);   // Returns a flag
4672    Ops[0] = Lo;
4673    Ops[1] = Hi;
4674    Ops[2] = InFlag;
4675    Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
4676    FPreg = Result.getValue(0);
4677    InFlag = Result.getValue(1);
4678
4679    NodeTys.clear();
4680    NodeTys.push_back(MVT::f64);
4681    Ops[0] = DAG.getConstant(1, MVT::i32);
4682    Ops[1] = MFFSreg;
4683    Ops[2] = FPreg;
4684    Ops[3] = InFlag;
4685    Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
4686    FPreg = Result.getValue(0);
4687
4688    // We know the low half is about to be thrown away, so just use something
4689    // convenient.
4690    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
4691                                FPreg, FPreg));
4692    return;
4693  }
4694  case ISD::FP_TO_SINT:
4695    Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
4696    return;
4697  }
4698}
4699
4700
4701//===----------------------------------------------------------------------===//
4702//  Other Lowering Code
4703//===----------------------------------------------------------------------===//
4704
4705MachineBasicBlock *
4706PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
4707                                    bool is64bit, unsigned BinOpcode) const {
4708  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
4709  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4710
4711  const BasicBlock *LLVM_BB = BB->getBasicBlock();
4712  MachineFunction *F = BB->getParent();
4713  MachineFunction::iterator It = BB;
4714  ++It;
4715
4716  unsigned dest = MI->getOperand(0).getReg();
4717  unsigned ptrA = MI->getOperand(1).getReg();
4718  unsigned ptrB = MI->getOperand(2).getReg();
4719  unsigned incr = MI->getOperand(3).getReg();
4720  DebugLoc dl = MI->getDebugLoc();
4721
4722  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
4723  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
4724  F->insert(It, loopMBB);
4725  F->insert(It, exitMBB);
4726  exitMBB->splice(exitMBB->begin(), BB,
4727                  llvm::next(MachineBasicBlock::iterator(MI)),
4728                  BB->end());
4729  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
4730
4731  MachineRegisterInfo &RegInfo = F->getRegInfo();
4732  unsigned TmpReg = (!BinOpcode) ? incr :
4733    RegInfo.createVirtualRegister(
4734       is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
4735                 (const TargetRegisterClass *) &PPC::GPRCRegClass);
4736
4737  //  thisMBB:
4738  //   ...
4739  //   fallthrough --> loopMBB
4740  BB->addSuccessor(loopMBB);
4741
4742  //  loopMBB:
4743  //   l[wd]arx dest, ptr
4744  //   add r0, dest, incr
4745  //   st[wd]cx. r0, ptr
4746  //   bne- loopMBB
4747  //   fallthrough --> exitMBB
4748  BB = loopMBB;
4749  BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
4750    .addReg(ptrA).addReg(ptrB);
4751  if (BinOpcode)
4752    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
4753  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
4754    .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
4755  BuildMI(BB, dl, TII->get(PPC::BCC))
4756    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
4757  BB->addSuccessor(loopMBB);
4758  BB->addSuccessor(exitMBB);
4759
4760  //  exitMBB:
4761  //   ...
4762  BB = exitMBB;
4763  return BB;
4764}
4765
4766MachineBasicBlock *
4767PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
4768                                            MachineBasicBlock *BB,
4769                                            bool is8bit,    // operation
4770                                            unsigned BinOpcode) const {
4771  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
4772  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4773  // In 64 bit mode we have to use 64 bits for addresses, even though the
4774  // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
4775  // registers without caring whether they're 32 or 64, but here we're
4776  // doing actual arithmetic on the addresses.
4777  bool is64bit = PPCSubTarget.isPPC64();
4778  unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
4779
4780  const BasicBlock *LLVM_BB = BB->getBasicBlock();
4781  MachineFunction *F = BB->getParent();
4782  MachineFunction::iterator It = BB;
4783  ++It;
4784
4785  unsigned dest = MI->getOperand(0).getReg();
4786  unsigned ptrA = MI->getOperand(1).getReg();
4787  unsigned ptrB = MI->getOperand(2).getReg();
4788  unsigned incr = MI->getOperand(3).getReg();
4789  DebugLoc dl = MI->getDebugLoc();
4790
4791  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
4792  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
4793  F->insert(It, loopMBB);
4794  F->insert(It, exitMBB);
4795  exitMBB->splice(exitMBB->begin(), BB,
4796                  llvm::next(MachineBasicBlock::iterator(MI)),
4797                  BB->end());
4798  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
4799
4800  MachineRegisterInfo &RegInfo = F->getRegInfo();
4801  const TargetRegisterClass *RC =
4802    is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
4803              (const TargetRegisterClass *) &PPC::GPRCRegClass;
4804  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
4805  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
4806  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
4807  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
4808  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
4809  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
4810  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
4811  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
4812  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
4813  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
4814  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
4815  unsigned Ptr1Reg;
4816  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
4817
4818  //  thisMBB:
4819  //   ...
4820  //   fallthrough --> loopMBB
4821  BB->addSuccessor(loopMBB);
4822
4823  // The 4-byte load must be aligned, while a char or short may be
4824  // anywhere in the word.  Hence all this nasty bookkeeping code.
4825  //   add ptr1, ptrA, ptrB [copy if ptrA==0]
4826  //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
4827  //   xori shift, shift1, 24 [16]
4828  //   rlwinm ptr, ptr1, 0, 0, 29
4829  //   slw incr2, incr, shift
4830  //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
4831  //   slw mask, mask2, shift
4832  //  loopMBB:
4833  //   lwarx tmpDest, ptr
4834  //   add tmp, tmpDest, incr2
4835  //   andc tmp2, tmpDest, mask
4836  //   and tmp3, tmp, mask
4837  //   or tmp4, tmp3, tmp2
4838  //   stwcx. tmp4, ptr
4839  //   bne- loopMBB
4840  //   fallthrough --> exitMBB
4841  //   srw dest, tmpDest, shift
4842  if (ptrA != ZeroReg) {
4843    Ptr1Reg = RegInfo.createVirtualRegister(RC);
4844    BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
4845      .addReg(ptrA).addReg(ptrB);
4846  } else {
4847    Ptr1Reg = ptrB;
4848  }
4849  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
4850      .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
4851  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
4852      .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
4853  if (is64bit)
4854    BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
4855      .addReg(Ptr1Reg).addImm(0).addImm(61);
4856  else
4857    BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
4858      .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
4859  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
4860      .addReg(incr).addReg(ShiftReg);
4861  if (is8bit)
4862    BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
4863  else {
4864    BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
4865    BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
4866  }
4867  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
4868      .addReg(Mask2Reg).addReg(ShiftReg);
4869
4870  BB = loopMBB;
4871  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
4872    .addReg(ZeroReg).addReg(PtrReg);
4873  if (BinOpcode)
4874    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
4875      .addReg(Incr2Reg).addReg(TmpDestReg);
4876  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
4877    .addReg(TmpDestReg).addReg(MaskReg);
4878  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
4879    .addReg(TmpReg).addReg(MaskReg);
4880  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
4881    .addReg(Tmp3Reg).addReg(Tmp2Reg);
4882  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
4883    .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
4884  BuildMI(BB, dl, TII->get(PPC::BCC))
4885    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
4886  BB->addSuccessor(loopMBB);
4887  BB->addSuccessor(exitMBB);
4888
4889  //  exitMBB:
4890  //   ...
4891  BB = exitMBB;
4892  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
4893    .addReg(ShiftReg);
4894  return BB;
4895}
4896
4897MachineBasicBlock *
4898PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
4899                                               MachineBasicBlock *BB) const {
4900  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4901
4902  // To "insert" these instructions we actually have to insert their
4903  // control-flow patterns.
4904  const BasicBlock *LLVM_BB = BB->getBasicBlock();
4905  MachineFunction::iterator It = BB;
4906  ++It;
4907
4908  MachineFunction *F = BB->getParent();
4909
4910  if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
4911      MI->getOpcode() == PPC::SELECT_CC_I8 ||
4912      MI->getOpcode() == PPC::SELECT_CC_F4 ||
4913      MI->getOpcode() == PPC::SELECT_CC_F8 ||
4914      MI->getOpcode() == PPC::SELECT_CC_VRRC) {
4915
4916    // The incoming instruction knows the destination vreg to set, the
4917    // condition code register to branch on, the true/false values to
4918    // select between, and a branch opcode to use.
4919
4920    //  thisMBB:
4921    //  ...
4922    //   TrueVal = ...
4923    //   cmpTY ccX, r1, r2
4924    //   bCC copy1MBB
4925    //   fallthrough --> copy0MBB
4926    MachineBasicBlock *thisMBB = BB;
4927    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
4928    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
4929    unsigned SelectPred = MI->getOperand(4).getImm();
4930    DebugLoc dl = MI->getDebugLoc();
4931    F->insert(It, copy0MBB);
4932    F->insert(It, sinkMBB);
4933
4934    // Transfer the remainder of BB and its successor edges to sinkMBB.
4935    sinkMBB->splice(sinkMBB->begin(), BB,
4936                    llvm::next(MachineBasicBlock::iterator(MI)),
4937                    BB->end());
4938    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
4939
4940    // Next, add the true and fallthrough blocks as its successors.
4941    BB->addSuccessor(copy0MBB);
4942    BB->addSuccessor(sinkMBB);
4943
4944    BuildMI(BB, dl, TII->get(PPC::BCC))
4945      .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
4946
4947    //  copy0MBB:
4948    //   %FalseValue = ...
4949    //   # fallthrough to sinkMBB
4950    BB = copy0MBB;
4951
4952    // Update machine-CFG edges
4953    BB->addSuccessor(sinkMBB);
4954
4955    //  sinkMBB:
4956    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4957    //  ...
4958    BB = sinkMBB;
4959    BuildMI(*BB, BB->begin(), dl,
4960            TII->get(PPC::PHI), MI->getOperand(0).getReg())
4961      .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
4962      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4963  }
4964  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
4965    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
4966  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
4967    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
4968  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
4969    BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
4970  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
4971    BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
4972
4973  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
4974    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
4975  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
4976    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
4977  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
4978    BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
4979  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
4980    BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
4981
4982  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
4983    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
4984  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
4985    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
4986  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
4987    BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
4988  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
4989    BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
4990
4991  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
4992    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
4993  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
4994    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
4995  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
4996    BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
4997  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
4998    BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
4999
5000  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
5001    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
5002  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
5003    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
5004  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
5005    BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
5006  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
5007    BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
5008
5009  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
5010    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
5011  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
5012    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
5013  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
5014    BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
5015  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
5016    BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
5017
5018  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
5019    BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
5020  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
5021    BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
5022  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
5023    BB = EmitAtomicBinary(MI, BB, false, 0);
5024  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
5025    BB = EmitAtomicBinary(MI, BB, true, 0);
5026
5027  else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
5028           MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
5029    bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
5030
5031    unsigned dest   = MI->getOperand(0).getReg();
5032    unsigned ptrA   = MI->getOperand(1).getReg();
5033    unsigned ptrB   = MI->getOperand(2).getReg();
5034    unsigned oldval = MI->getOperand(3).getReg();
5035    unsigned newval = MI->getOperand(4).getReg();
5036    DebugLoc dl     = MI->getDebugLoc();
5037
5038    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
5039    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
5040    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
5041    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
5042    F->insert(It, loop1MBB);
5043    F->insert(It, loop2MBB);
5044    F->insert(It, midMBB);
5045    F->insert(It, exitMBB);
5046    exitMBB->splice(exitMBB->begin(), BB,
5047                    llvm::next(MachineBasicBlock::iterator(MI)),
5048                    BB->end());
5049    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5050
5051    //  thisMBB:
5052    //   ...
5053    //   fallthrough --> loopMBB
5054    BB->addSuccessor(loop1MBB);
5055
5056    // loop1MBB:
5057    //   l[wd]arx dest, ptr
5058    //   cmp[wd] dest, oldval
5059    //   bne- midMBB
5060    // loop2MBB:
5061    //   st[wd]cx. newval, ptr
5062    //   bne- loopMBB
5063    //   b exitBB
5064    // midMBB:
5065    //   st[wd]cx. dest, ptr
5066    // exitBB:
5067    BB = loop1MBB;
5068    BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
5069      .addReg(ptrA).addReg(ptrB);
5070    BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
5071      .addReg(oldval).addReg(dest);
5072    BuildMI(BB, dl, TII->get(PPC::BCC))
5073      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
5074    BB->addSuccessor(loop2MBB);
5075    BB->addSuccessor(midMBB);
5076
5077    BB = loop2MBB;
5078    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
5079      .addReg(newval).addReg(ptrA).addReg(ptrB);
5080    BuildMI(BB, dl, TII->get(PPC::BCC))
5081      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
5082    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
5083    BB->addSuccessor(loop1MBB);
5084    BB->addSuccessor(exitMBB);
5085
5086    BB = midMBB;
5087    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
5088      .addReg(dest).addReg(ptrA).addReg(ptrB);
5089    BB->addSuccessor(exitMBB);
5090
5091    //  exitMBB:
5092    //   ...
5093    BB = exitMBB;
5094  } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
5095             MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
5096    // We must use 64-bit registers for addresses when targeting 64-bit,
5097    // since we're actually doing arithmetic on them.  Other registers
5098    // can be 32-bit.
5099    bool is64bit = PPCSubTarget.isPPC64();
5100    bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
5101
5102    unsigned dest   = MI->getOperand(0).getReg();
5103    unsigned ptrA   = MI->getOperand(1).getReg();
5104    unsigned ptrB   = MI->getOperand(2).getReg();
5105    unsigned oldval = MI->getOperand(3).getReg();
5106    unsigned newval = MI->getOperand(4).getReg();
5107    DebugLoc dl     = MI->getDebugLoc();
5108
5109    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
5110    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
5111    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
5112    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
5113    F->insert(It, loop1MBB);
5114    F->insert(It, loop2MBB);
5115    F->insert(It, midMBB);
5116    F->insert(It, exitMBB);
5117    exitMBB->splice(exitMBB->begin(), BB,
5118                    llvm::next(MachineBasicBlock::iterator(MI)),
5119                    BB->end());
5120    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5121
5122    MachineRegisterInfo &RegInfo = F->getRegInfo();
5123    const TargetRegisterClass *RC =
5124      is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
5125                (const TargetRegisterClass *) &PPC::GPRCRegClass;
5126    unsigned PtrReg = RegInfo.createVirtualRegister(RC);
5127    unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
5128    unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
5129    unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
5130    unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
5131    unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
5132    unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
5133    unsigned MaskReg = RegInfo.createVirtualRegister(RC);
5134    unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
5135    unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
5136    unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
5137    unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
5138    unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
5139    unsigned Ptr1Reg;
5140    unsigned TmpReg = RegInfo.createVirtualRegister(RC);
5141    unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
5142    //  thisMBB:
5143    //   ...
5144    //   fallthrough --> loopMBB
5145    BB->addSuccessor(loop1MBB);
5146
5147    // The 4-byte load must be aligned, while a char or short may be
5148    // anywhere in the word.  Hence all this nasty bookkeeping code.
5149    //   add ptr1, ptrA, ptrB [copy if ptrA==0]
5150    //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
5151    //   xori shift, shift1, 24 [16]
5152    //   rlwinm ptr, ptr1, 0, 0, 29
5153    //   slw newval2, newval, shift
5154    //   slw oldval2, oldval,shift
5155    //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
5156    //   slw mask, mask2, shift
5157    //   and newval3, newval2, mask
5158    //   and oldval3, oldval2, mask
5159    // loop1MBB:
5160    //   lwarx tmpDest, ptr
5161    //   and tmp, tmpDest, mask
5162    //   cmpw tmp, oldval3
5163    //   bne- midMBB
5164    // loop2MBB:
5165    //   andc tmp2, tmpDest, mask
5166    //   or tmp4, tmp2, newval3
5167    //   stwcx. tmp4, ptr
5168    //   bne- loop1MBB
5169    //   b exitBB
5170    // midMBB:
5171    //   stwcx. tmpDest, ptr
5172    // exitBB:
5173    //   srw dest, tmpDest, shift
5174    if (ptrA != ZeroReg) {
5175      Ptr1Reg = RegInfo.createVirtualRegister(RC);
5176      BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
5177        .addReg(ptrA).addReg(ptrB);
5178    } else {
5179      Ptr1Reg = ptrB;
5180    }
5181    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
5182        .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
5183    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
5184        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
5185    if (is64bit)
5186      BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
5187        .addReg(Ptr1Reg).addImm(0).addImm(61);
5188    else
5189      BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
5190        .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
5191    BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
5192        .addReg(newval).addReg(ShiftReg);
5193    BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
5194        .addReg(oldval).addReg(ShiftReg);
5195    if (is8bit)
5196      BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
5197    else {
5198      BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
5199      BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
5200        .addReg(Mask3Reg).addImm(65535);
5201    }
5202    BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
5203        .addReg(Mask2Reg).addReg(ShiftReg);
5204    BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
5205        .addReg(NewVal2Reg).addReg(MaskReg);
5206    BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
5207        .addReg(OldVal2Reg).addReg(MaskReg);
5208
5209    BB = loop1MBB;
5210    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
5211        .addReg(ZeroReg).addReg(PtrReg);
5212    BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
5213        .addReg(TmpDestReg).addReg(MaskReg);
5214    BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
5215        .addReg(TmpReg).addReg(OldVal3Reg);
5216    BuildMI(BB, dl, TII->get(PPC::BCC))
5217        .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
5218    BB->addSuccessor(loop2MBB);
5219    BB->addSuccessor(midMBB);
5220
5221    BB = loop2MBB;
5222    BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
5223        .addReg(TmpDestReg).addReg(MaskReg);
5224    BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
5225        .addReg(Tmp2Reg).addReg(NewVal3Reg);
5226    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
5227        .addReg(ZeroReg).addReg(PtrReg);
5228    BuildMI(BB, dl, TII->get(PPC::BCC))
5229      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
5230    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
5231    BB->addSuccessor(loop1MBB);
5232    BB->addSuccessor(exitMBB);
5233
5234    BB = midMBB;
5235    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
5236      .addReg(ZeroReg).addReg(PtrReg);
5237    BB->addSuccessor(exitMBB);
5238
5239    //  exitMBB:
5240    //   ...
5241    BB = exitMBB;
5242    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
5243      .addReg(ShiftReg);
5244  } else {
5245    llvm_unreachable("Unexpected instr type to insert");
5246  }
5247
5248  MI->eraseFromParent();   // The pseudo instruction is gone now.
5249  return BB;
5250}
5251
5252//===----------------------------------------------------------------------===//
5253// Target Optimization Hooks
5254//===----------------------------------------------------------------------===//
5255
5256SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
5257                                             DAGCombinerInfo &DCI) const {
5258  const TargetMachine &TM = getTargetMachine();
5259  SelectionDAG &DAG = DCI.DAG;
5260  DebugLoc dl = N->getDebugLoc();
5261  switch (N->getOpcode()) {
5262  default: break;
5263  case PPCISD::SHL:
5264    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
5265      if (C->isNullValue())   // 0 << V -> 0.
5266        return N->getOperand(0);
5267    }
5268    break;
5269  case PPCISD::SRL:
5270    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
5271      if (C->isNullValue())   // 0 >>u V -> 0.
5272        return N->getOperand(0);
5273    }
5274    break;
5275  case PPCISD::SRA:
5276    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
5277      if (C->isNullValue() ||   //  0 >>s V -> 0.
5278          C->isAllOnesValue())    // -1 >>s V -> -1.
5279        return N->getOperand(0);
5280    }
5281    break;
5282
5283  case ISD::SINT_TO_FP:
5284    if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
5285      if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
5286        // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
5287        // We allow the src/dst to be either f32/f64, but the intermediate
5288        // type must be i64.
5289        if (N->getOperand(0).getValueType() == MVT::i64 &&
5290            N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
5291          SDValue Val = N->getOperand(0).getOperand(0);
5292          if (Val.getValueType() == MVT::f32) {
5293            Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
5294            DCI.AddToWorklist(Val.getNode());
5295          }
5296
5297          Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
5298          DCI.AddToWorklist(Val.getNode());
5299          Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
5300          DCI.AddToWorklist(Val.getNode());
5301          if (N->getValueType(0) == MVT::f32) {
5302            Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
5303                              DAG.getIntPtrConstant(0));
5304            DCI.AddToWorklist(Val.getNode());
5305          }
5306          return Val;
5307        } else if (N->getOperand(0).getValueType() == MVT::i32) {
5308          // If the intermediate type is i32, we can avoid the load/store here
5309          // too.
5310        }
5311      }
5312    }
5313    break;
5314  case ISD::STORE:
5315    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
5316    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
5317        !cast<StoreSDNode>(N)->isTruncatingStore() &&
5318        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
5319        N->getOperand(1).getValueType() == MVT::i32 &&
5320        N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
5321      SDValue Val = N->getOperand(1).getOperand(0);
5322      if (Val.getValueType() == MVT::f32) {
5323        Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
5324        DCI.AddToWorklist(Val.getNode());
5325      }
5326      Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
5327      DCI.AddToWorklist(Val.getNode());
5328
5329      Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
5330                        N->getOperand(2), N->getOperand(3));
5331      DCI.AddToWorklist(Val.getNode());
5332      return Val;
5333    }
5334
5335    // Turn STORE (BSWAP) -> sthbrx/stwbrx.
5336    if (cast<StoreSDNode>(N)->isUnindexed() &&
5337        N->getOperand(1).getOpcode() == ISD::BSWAP &&
5338        N->getOperand(1).getNode()->hasOneUse() &&
5339        (N->getOperand(1).getValueType() == MVT::i32 ||
5340         N->getOperand(1).getValueType() == MVT::i16)) {
5341      SDValue BSwapOp = N->getOperand(1).getOperand(0);
5342      // Do an any-extend to 32-bits if this is a half-word input.
5343      if (BSwapOp.getValueType() == MVT::i16)
5344        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
5345
5346      SDValue Ops[] = {
5347        N->getOperand(0), BSwapOp, N->getOperand(2),
5348        DAG.getValueType(N->getOperand(1).getValueType())
5349      };
5350      return
5351        DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
5352                                Ops, array_lengthof(Ops),
5353                                cast<StoreSDNode>(N)->getMemoryVT(),
5354                                cast<StoreSDNode>(N)->getMemOperand());
5355    }
5356    break;
5357  case ISD::BSWAP:
5358    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
5359    if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
5360        N->getOperand(0).hasOneUse() &&
5361        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
5362      SDValue Load = N->getOperand(0);
5363      LoadSDNode *LD = cast<LoadSDNode>(Load);
5364      // Create the byte-swapping load.
5365      SDValue Ops[] = {
5366        LD->getChain(),    // Chain
5367        LD->getBasePtr(),  // Ptr
5368        DAG.getValueType(N->getValueType(0)) // VT
5369      };
5370      SDValue BSLoad =
5371        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
5372                                DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
5373                                LD->getMemoryVT(), LD->getMemOperand());
5374
5375      // If this is an i16 load, insert the truncate.
5376      SDValue ResVal = BSLoad;
5377      if (N->getValueType(0) == MVT::i16)
5378        ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
5379
5380      // First, combine the bswap away.  This makes the value produced by the
5381      // load dead.
5382      DCI.CombineTo(N, ResVal);
5383
5384      // Next, combine the load away, we give it a bogus result value but a real
5385      // chain result.  The result value is dead because the bswap is dead.
5386      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
5387
5388      // Return N so it doesn't get rechecked!
5389      return SDValue(N, 0);
5390    }
5391
5392    break;
5393  case PPCISD::VCMP: {
5394    // If a VCMPo node already exists with exactly the same operands as this
5395    // node, use its result instead of this node (VCMPo computes both a CR6 and
5396    // a normal output).
5397    //
5398    if (!N->getOperand(0).hasOneUse() &&
5399        !N->getOperand(1).hasOneUse() &&
5400        !N->getOperand(2).hasOneUse()) {
5401
5402      // Scan all of the users of the LHS, looking for VCMPo's that match.
5403      SDNode *VCMPoNode = 0;
5404
5405      SDNode *LHSN = N->getOperand(0).getNode();
5406      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
5407           UI != E; ++UI)
5408        if (UI->getOpcode() == PPCISD::VCMPo &&
5409            UI->getOperand(1) == N->getOperand(1) &&
5410            UI->getOperand(2) == N->getOperand(2) &&
5411            UI->getOperand(0) == N->getOperand(0)) {
5412          VCMPoNode = *UI;
5413          break;
5414        }
5415
5416      // If there is no VCMPo node, or if the flag value has a single use, don't
5417      // transform this.
5418      if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
5419        break;
5420
5421      // Look at the (necessarily single) use of the flag value.  If it has a
5422      // chain, this transformation is more complex.  Note that multiple things
5423      // could use the value result, which we should ignore.
5424      SDNode *FlagUser = 0;
5425      for (SDNode::use_iterator UI = VCMPoNode->use_begin();
5426           FlagUser == 0; ++UI) {
5427        assert(UI != VCMPoNode->use_end() && "Didn't find user!");
5428        SDNode *User = *UI;
5429        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
5430          if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
5431            FlagUser = User;
5432            break;
5433          }
5434        }
5435      }
5436
5437      // If the user is a MFCR instruction, we know this is safe.  Otherwise we
5438      // give up for right now.
5439      if (FlagUser->getOpcode() == PPCISD::MFCR)
5440        return SDValue(VCMPoNode, 0);
5441    }
5442    break;
5443  }
5444  case ISD::BR_CC: {
5445    // If this is a branch on an altivec predicate comparison, lower this so
5446    // that we don't have to do a MFCR: instead, branch directly on CR6.  This
5447    // lowering is done pre-legalize, because the legalizer lowers the predicate
5448    // compare down to code that is difficult to reassemble.
5449    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
5450    SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
5451    int CompareOpc;
5452    bool isDot;
5453
5454    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
5455        isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
5456        getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
5457      assert(isDot && "Can't compare against a vector result!");
5458
5459      // If this is a comparison against something other than 0/1, then we know
5460      // that the condition is never/always true.
5461      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
5462      if (Val != 0 && Val != 1) {
5463        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
5464          return N->getOperand(0);
5465        // Always !=, turn it into an unconditional branch.
5466        return DAG.getNode(ISD::BR, dl, MVT::Other,
5467                           N->getOperand(0), N->getOperand(4));
5468      }
5469
5470      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
5471
5472      // Create the PPCISD altivec 'dot' comparison node.
5473      std::vector<EVT> VTs;
5474      SDValue Ops[] = {
5475        LHS.getOperand(2),  // LHS of compare
5476        LHS.getOperand(3),  // RHS of compare
5477        DAG.getConstant(CompareOpc, MVT::i32)
5478      };
5479      VTs.push_back(LHS.getOperand(2).getValueType());
5480      VTs.push_back(MVT::Glue);
5481      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
5482
5483      // Unpack the result based on how the target uses it.
5484      PPC::Predicate CompOpc;
5485      switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
5486      default:  // Can't happen, don't crash on invalid number though.
5487      case 0:   // Branch on the value of the EQ bit of CR6.
5488        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
5489        break;
5490      case 1:   // Branch on the inverted value of the EQ bit of CR6.
5491        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
5492        break;
5493      case 2:   // Branch on the value of the LT bit of CR6.
5494        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
5495        break;
5496      case 3:   // Branch on the inverted value of the LT bit of CR6.
5497        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
5498        break;
5499      }
5500
5501      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
5502                         DAG.getConstant(CompOpc, MVT::i32),
5503                         DAG.getRegister(PPC::CR6, MVT::i32),
5504                         N->getOperand(4), CompNode.getValue(1));
5505    }
5506    break;
5507  }
5508  }
5509
5510  return SDValue();
5511}
5512
5513//===----------------------------------------------------------------------===//
5514// Inline Assembly Support
5515//===----------------------------------------------------------------------===//
5516
5517void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
5518                                                       APInt &KnownZero,
5519                                                       APInt &KnownOne,
5520                                                       const SelectionDAG &DAG,
5521                                                       unsigned Depth) const {
5522  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
5523  switch (Op.getOpcode()) {
5524  default: break;
5525  case PPCISD::LBRX: {
5526    // lhbrx is known to have the top bits cleared out.
5527    if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
5528      KnownZero = 0xFFFF0000;
5529    break;
5530  }
5531  case ISD::INTRINSIC_WO_CHAIN: {
5532    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
5533    default: break;
5534    case Intrinsic::ppc_altivec_vcmpbfp_p:
5535    case Intrinsic::ppc_altivec_vcmpeqfp_p:
5536    case Intrinsic::ppc_altivec_vcmpequb_p:
5537    case Intrinsic::ppc_altivec_vcmpequh_p:
5538    case Intrinsic::ppc_altivec_vcmpequw_p:
5539    case Intrinsic::ppc_altivec_vcmpgefp_p:
5540    case Intrinsic::ppc_altivec_vcmpgtfp_p:
5541    case Intrinsic::ppc_altivec_vcmpgtsb_p:
5542    case Intrinsic::ppc_altivec_vcmpgtsh_p:
5543    case Intrinsic::ppc_altivec_vcmpgtsw_p:
5544    case Intrinsic::ppc_altivec_vcmpgtub_p:
5545    case Intrinsic::ppc_altivec_vcmpgtuh_p:
5546    case Intrinsic::ppc_altivec_vcmpgtuw_p:
5547      KnownZero = ~1U;  // All bits but the low one are known to be zero.
5548      break;
5549    }
5550  }
5551  }
5552}
5553
5554
5555/// getConstraintType - Given a constraint, return the type of
5556/// constraint it is for this target.
5557PPCTargetLowering::ConstraintType
5558PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
5559  if (Constraint.size() == 1) {
5560    switch (Constraint[0]) {
5561    default: break;
5562    case 'b':
5563    case 'r':
5564    case 'f':
5565    case 'v':
5566    case 'y':
5567      return C_RegisterClass;
5568    }
5569  }
5570  return TargetLowering::getConstraintType(Constraint);
5571}
5572
5573/// Examine constraint type and operand type and determine a weight value.
5574/// This object must already have been set up with the operand type
5575/// and the current alternative constraint selected.
5576TargetLowering::ConstraintWeight
5577PPCTargetLowering::getSingleConstraintMatchWeight(
5578    AsmOperandInfo &info, const char *constraint) const {
5579  ConstraintWeight weight = CW_Invalid;
5580  Value *CallOperandVal = info.CallOperandVal;
5581    // If we don't have a value, we can't do a match,
5582    // but allow it at the lowest weight.
5583  if (CallOperandVal == NULL)
5584    return CW_Default;
5585  Type *type = CallOperandVal->getType();
5586  // Look at the constraint type.
5587  switch (*constraint) {
5588  default:
5589    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
5590    break;
5591  case 'b':
5592    if (type->isIntegerTy())
5593      weight = CW_Register;
5594    break;
5595  case 'f':
5596    if (type->isFloatTy())
5597      weight = CW_Register;
5598    break;
5599  case 'd':
5600    if (type->isDoubleTy())
5601      weight = CW_Register;
5602    break;
5603  case 'v':
5604    if (type->isVectorTy())
5605      weight = CW_Register;
5606    break;
5607  case 'y':
5608    weight = CW_Register;
5609    break;
5610  }
5611  return weight;
5612}
5613
5614std::pair<unsigned, const TargetRegisterClass*>
5615PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
5616                                                EVT VT) const {
5617  if (Constraint.size() == 1) {
5618    // GCC RS6000 Constraint Letters
5619    switch (Constraint[0]) {
5620    case 'b':   // R1-R31
5621    case 'r':   // R0-R31
5622      if (VT == MVT::i64 && PPCSubTarget.isPPC64())
5623        return std::make_pair(0U, &PPC::G8RCRegClass);
5624      return std::make_pair(0U, &PPC::GPRCRegClass);
5625    case 'f':
5626      if (VT == MVT::f32)
5627        return std::make_pair(0U, &PPC::F4RCRegClass);
5628      if (VT == MVT::f64)
5629        return std::make_pair(0U, &PPC::F8RCRegClass);
5630      break;
5631    case 'v':
5632      return std::make_pair(0U, &PPC::VRRCRegClass);
5633    case 'y':   // crrc
5634      return std::make_pair(0U, &PPC::CRRCRegClass);
5635    }
5636  }
5637
5638  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
5639}
5640
5641
5642/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5643/// vector.  If it is invalid, don't add anything to Ops.
5644void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5645                                                     std::string &Constraint,
5646                                                     std::vector<SDValue>&Ops,
5647                                                     SelectionDAG &DAG) const {
5648  SDValue Result(0,0);
5649
5650  // Only support length 1 constraints.
5651  if (Constraint.length() > 1) return;
5652
5653  char Letter = Constraint[0];
5654  switch (Letter) {
5655  default: break;
5656  case 'I':
5657  case 'J':
5658  case 'K':
5659  case 'L':
5660  case 'M':
5661  case 'N':
5662  case 'O':
5663  case 'P': {
5664    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
5665    if (!CST) return; // Must be an immediate to match.
5666    unsigned Value = CST->getZExtValue();
5667    switch (Letter) {
5668    default: llvm_unreachable("Unknown constraint letter!");
5669    case 'I':  // "I" is a signed 16-bit constant.
5670      if ((short)Value == (int)Value)
5671        Result = DAG.getTargetConstant(Value, Op.getValueType());
5672      break;
5673    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
5674    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
5675      if ((short)Value == 0)
5676        Result = DAG.getTargetConstant(Value, Op.getValueType());
5677      break;
5678    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
5679      if ((Value >> 16) == 0)
5680        Result = DAG.getTargetConstant(Value, Op.getValueType());
5681      break;
5682    case 'M':  // "M" is a constant that is greater than 31.
5683      if (Value > 31)
5684        Result = DAG.getTargetConstant(Value, Op.getValueType());
5685      break;
5686    case 'N':  // "N" is a positive constant that is an exact power of two.
5687      if ((int)Value > 0 && isPowerOf2_32(Value))
5688        Result = DAG.getTargetConstant(Value, Op.getValueType());
5689      break;
5690    case 'O':  // "O" is the constant zero.
5691      if (Value == 0)
5692        Result = DAG.getTargetConstant(Value, Op.getValueType());
5693      break;
5694    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
5695      if ((short)-Value == (int)-Value)
5696        Result = DAG.getTargetConstant(Value, Op.getValueType());
5697      break;
5698    }
5699    break;
5700  }
5701  }
5702
5703  if (Result.getNode()) {
5704    Ops.push_back(Result);
5705    return;
5706  }
5707
5708  // Handle standard constraint letters.
5709  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5710}
5711
5712// isLegalAddressingMode - Return true if the addressing mode represented
5713// by AM is legal for this target, for a load/store of the specified type.
5714bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
5715                                              Type *Ty) const {
5716  // FIXME: PPC does not allow r+i addressing modes for vectors!
5717
5718  // PPC allows a sign-extended 16-bit immediate field.
5719  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
5720    return false;
5721
5722  // No global is ever allowed as a base.
5723  if (AM.BaseGV)
5724    return false;
5725
5726  // PPC only support r+r,
5727  switch (AM.Scale) {
5728  case 0:  // "r+i" or just "i", depending on HasBaseReg.
5729    break;
5730  case 1:
5731    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
5732      return false;
5733    // Otherwise we have r+r or r+i.
5734    break;
5735  case 2:
5736    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
5737      return false;
5738    // Allow 2*r as r+r.
5739    break;
5740  default:
5741    // No other scales are supported.
5742    return false;
5743  }
5744
5745  return true;
5746}
5747
5748/// isLegalAddressImmediate - Return true if the integer value can be used
5749/// as the offset of the target addressing mode for load / store of the
5750/// given type.
5751bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
5752  // PPC allows a sign-extended 16-bit immediate field.
5753  return (V > -(1 << 16) && V < (1 << 16)-1);
5754}
5755
5756bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
5757  return false;
5758}
5759
5760SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
5761                                           SelectionDAG &DAG) const {
5762  MachineFunction &MF = DAG.getMachineFunction();
5763  MachineFrameInfo *MFI = MF.getFrameInfo();
5764  MFI->setReturnAddressIsTaken(true);
5765
5766  DebugLoc dl = Op.getDebugLoc();
5767  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5768
5769  // Make sure the function does not optimize away the store of the RA to
5770  // the stack.
5771  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
5772  FuncInfo->setLRStoreRequired();
5773  bool isPPC64 = PPCSubTarget.isPPC64();
5774  bool isDarwinABI = PPCSubTarget.isDarwinABI();
5775
5776  if (Depth > 0) {
5777    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5778    SDValue Offset =
5779
5780      DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
5781                      isPPC64? MVT::i64 : MVT::i32);
5782    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
5783                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
5784                                   FrameAddr, Offset),
5785                       MachinePointerInfo(), false, false, false, 0);
5786  }
5787
5788  // Just load the return address off the stack.
5789  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
5790  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
5791                     RetAddrFI, MachinePointerInfo(), false, false, false, 0);
5792}
5793
5794SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
5795                                          SelectionDAG &DAG) const {
5796  DebugLoc dl = Op.getDebugLoc();
5797  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5798
5799  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5800  bool isPPC64 = PtrVT == MVT::i64;
5801
5802  MachineFunction &MF = DAG.getMachineFunction();
5803  MachineFrameInfo *MFI = MF.getFrameInfo();
5804  MFI->setFrameAddressIsTaken(true);
5805  bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
5806               MFI->hasVarSizedObjects()) &&
5807                  MFI->getStackSize() &&
5808                  !MF.getFunction()->hasFnAttr(Attribute::Naked);
5809  unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
5810                                (is31 ? PPC::R31 : PPC::R1);
5811  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
5812                                         PtrVT);
5813  while (Depth--)
5814    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
5815                            FrameAddr, MachinePointerInfo(), false, false,
5816                            false, 0);
5817  return FrameAddr;
5818}
5819
5820bool
5821PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
5822  // The PowerPC target isn't yet aware of offsets.
5823  return false;
5824}
5825
5826/// getOptimalMemOpType - Returns the target specific optimal type for load
5827/// and store operations as a result of memset, memcpy, and memmove
5828/// lowering. If DstAlign is zero that means it's safe to destination
5829/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
5830/// means there isn't a need to check it against alignment requirement,
5831/// probably because the source does not need to be loaded. If
5832/// 'IsZeroVal' is true, that means it's safe to return a
5833/// non-scalar-integer type, e.g. empty string source, constant, or loaded
5834/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
5835/// constant so it does not need to be loaded.
5836/// It returns EVT::Other if the type should be determined using generic
5837/// target-independent logic.
5838EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
5839                                           unsigned DstAlign, unsigned SrcAlign,
5840                                           bool IsZeroVal,
5841                                           bool MemcpyStrSrc,
5842                                           MachineFunction &MF) const {
5843  if (this->PPCSubTarget.isPPC64()) {
5844    return MVT::i64;
5845  } else {
5846    return MVT::i32;
5847  }
5848}
5849
5850Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
5851  unsigned Directive = PPCSubTarget.getDarwinDirective();
5852  if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2)
5853    return Sched::ILP;
5854
5855  return TargetLowering::getSchedulingPreference(N);
5856}
5857
5858