PPCISelLowering.cpp revision 2544f221c5f4047d7bdf10ec911c86a1d8be4a29
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PPCISelLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCISelLowering.h"
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPCMachineFunctionInfo.h"
17#include "PPCPerfectShuffle.h"
18#include "PPCTargetMachine.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
27#include "llvm/IR/CallingConv.h"
28#include "llvm/IR/Constants.h"
29#include "llvm/IR/DerivedTypes.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Intrinsics.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/MathExtras.h"
35#include "llvm/Support/raw_ostream.h"
36#include "llvm/Target/TargetOptions.h"
37using namespace llvm;
38
39static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
40                                       CCValAssign::LocInfo &LocInfo,
41                                       ISD::ArgFlagsTy &ArgFlags,
42                                       CCState &State);
43static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
44                                              MVT &LocVT,
45                                              CCValAssign::LocInfo &LocInfo,
46                                              ISD::ArgFlagsTy &ArgFlags,
47                                              CCState &State);
48static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
49                                                MVT &LocVT,
50                                                CCValAssign::LocInfo &LocInfo,
51                                                ISD::ArgFlagsTy &ArgFlags,
52                                                CCState &State);
53
54static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
55cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
56
57static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
58cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
59
60static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
61cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
62
63static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
64  if (TM.getSubtargetImpl()->isDarwin())
65    return new TargetLoweringObjectFileMachO();
66
67  return new TargetLoweringObjectFileELF();
68}
69
70PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
71  : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
72  const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
73  PPCRegInfo = TM.getRegisterInfo();
74
75  setPow2DivIsCheap();
76
77  // Use _setjmp/_longjmp instead of setjmp/longjmp.
78  setUseUnderscoreSetJmp(true);
79  setUseUnderscoreLongJmp(true);
80
81  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
82  // arguments are at least 4/8 bytes aligned.
83  bool isPPC64 = Subtarget->isPPC64();
84  setMinStackArgumentAlignment(isPPC64 ? 8:4);
85
86  // Set up the register classes.
87  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
88  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
89  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
90
91  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
92  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
93  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
94
95  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
96
97  // PowerPC has pre-inc load and store's.
98  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
99  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
100  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
101  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
102  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
103  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
104  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
105  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
106  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
107  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
108
109  // This is used in the ppcf128->int sequence.  Note it has different semantics
110  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
111  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
112
113  // We do not currently implement these libm ops for PowerPC.
114  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
115  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
116  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
117  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
118  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
119
120  // PowerPC has no SREM/UREM instructions
121  setOperationAction(ISD::SREM, MVT::i32, Expand);
122  setOperationAction(ISD::UREM, MVT::i32, Expand);
123  setOperationAction(ISD::SREM, MVT::i64, Expand);
124  setOperationAction(ISD::UREM, MVT::i64, Expand);
125
126  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
127  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
128  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
129  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
130  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
131  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
132  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
133  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
134  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
135
136  // We don't support sin/cos/sqrt/fmod/pow
137  setOperationAction(ISD::FSIN , MVT::f64, Expand);
138  setOperationAction(ISD::FCOS , MVT::f64, Expand);
139  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
140  setOperationAction(ISD::FREM , MVT::f64, Expand);
141  setOperationAction(ISD::FPOW , MVT::f64, Expand);
142  setOperationAction(ISD::FMA  , MVT::f64, Legal);
143  setOperationAction(ISD::FSIN , MVT::f32, Expand);
144  setOperationAction(ISD::FCOS , MVT::f32, Expand);
145  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
146  setOperationAction(ISD::FREM , MVT::f32, Expand);
147  setOperationAction(ISD::FPOW , MVT::f32, Expand);
148  setOperationAction(ISD::FMA  , MVT::f32, Legal);
149
150  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
151
152  // If we're enabling GP optimizations, use hardware square root
153  if (!Subtarget->hasFSQRT()) {
154    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
155    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
156  }
157
158  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
159  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
160
161  // PowerPC does not have BSWAP, CTPOP or CTTZ
162  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
163  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
164  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
165  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
166  setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
167  setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
168  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
169  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
170
171  if (Subtarget->hasPOPCNTD()) {
172    setOperationAction(ISD::CTPOP, MVT::i32  , Promote);
173    setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
174  } else {
175    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
176    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
177  }
178
179  // PowerPC does not have ROTR
180  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
181  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
182
183  // PowerPC does not have Select
184  setOperationAction(ISD::SELECT, MVT::i32, Expand);
185  setOperationAction(ISD::SELECT, MVT::i64, Expand);
186  setOperationAction(ISD::SELECT, MVT::f32, Expand);
187  setOperationAction(ISD::SELECT, MVT::f64, Expand);
188
189  // PowerPC wants to turn select_cc of FP into fsel when possible.
190  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
191  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
192
193  // PowerPC wants to optimize integer setcc a bit
194  setOperationAction(ISD::SETCC, MVT::i32, Custom);
195
196  // PowerPC does not have BRCOND which requires SetCC
197  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
198
199  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
200
201  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
202  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
203
204  // PowerPC does not have [U|S]INT_TO_FP
205  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
206  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
207
208  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
209  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
210  setOperationAction(ISD::BITCAST, MVT::i64, Expand);
211  setOperationAction(ISD::BITCAST, MVT::f64, Expand);
212
213  // We cannot sextinreg(i1).  Expand to shifts.
214  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
215
216  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
217  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
218  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
219  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
220
221  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
222  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
223  // support continuation, user-level threading, and etc.. As a result, no
224  // other SjLj exception interfaces are implemented and please don't build
225  // your own exception handling based on them.
226  // LLVM/Clang supports zero-cost DWARF exception handling.
227  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
228  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
229
230  // We want to legalize GlobalAddress and ConstantPool nodes into the
231  // appropriate instructions to materialize the address.
232  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
233  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
234  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
235  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
236  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
237  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
238  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
239  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
240  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
241  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
242
243  // TRAP is legal.
244  setOperationAction(ISD::TRAP, MVT::Other, Legal);
245
246  // TRAMPOLINE is custom lowered.
247  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
248  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
249
250  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
251  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
252
253  if (Subtarget->isSVR4ABI()) {
254    if (isPPC64) {
255      // VAARG always uses double-word chunks, so promote anything smaller.
256      setOperationAction(ISD::VAARG, MVT::i1, Promote);
257      AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
258      setOperationAction(ISD::VAARG, MVT::i8, Promote);
259      AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
260      setOperationAction(ISD::VAARG, MVT::i16, Promote);
261      AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
262      setOperationAction(ISD::VAARG, MVT::i32, Promote);
263      AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
264      setOperationAction(ISD::VAARG, MVT::Other, Expand);
265    } else {
266      // VAARG is custom lowered with the 32-bit SVR4 ABI.
267      setOperationAction(ISD::VAARG, MVT::Other, Custom);
268      setOperationAction(ISD::VAARG, MVT::i64, Custom);
269    }
270  } else
271    setOperationAction(ISD::VAARG, MVT::Other, Expand);
272
273  // Use the default implementation.
274  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
275  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
276  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
277  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
278  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
279  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
280
281  // We want to custom lower some of our intrinsics.
282  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
283
284  // Comparisons that require checking two conditions.
285  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
286  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
287  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
288  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
289  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
290  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
291  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
292  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
293  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
294  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
295  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
296  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
297
298  if (Subtarget->has64BitSupport()) {
299    // They also have instructions for converting between i64 and fp.
300    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
301    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
302    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
303    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
304    // This is just the low 32 bits of a (signed) fp->i64 conversion.
305    // We cannot do this with Promote because i64 is not a legal type.
306    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
307
308    // FIXME: disable this lowered code.  This generates 64-bit register values,
309    // and we don't model the fact that the top part is clobbered by calls.  We
310    // need to flag these together so that the value isn't live across a call.
311    //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
312  } else {
313    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
314    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
315  }
316
317  if (Subtarget->use64BitRegs()) {
318    // 64-bit PowerPC implementations can support i64 types directly
319    addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
320    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
321    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
322    // 64-bit PowerPC wants to expand i128 shifts itself.
323    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
324    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
325    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
326  } else {
327    // 32-bit PowerPC wants to expand i64 shifts itself.
328    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
329    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
330    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
331  }
332
333  if (Subtarget->hasAltivec()) {
334    // First set operation action for all vector types to expand. Then we
335    // will selectively turn on ones that can be effectively codegen'd.
336    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
337         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
338      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
339
340      // add/sub are legal for all supported vector VT's.
341      setOperationAction(ISD::ADD , VT, Legal);
342      setOperationAction(ISD::SUB , VT, Legal);
343
344      // We promote all shuffles to v16i8.
345      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
346      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
347
348      // We promote all non-typed operations to v4i32.
349      setOperationAction(ISD::AND   , VT, Promote);
350      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
351      setOperationAction(ISD::OR    , VT, Promote);
352      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
353      setOperationAction(ISD::XOR   , VT, Promote);
354      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
355      setOperationAction(ISD::LOAD  , VT, Promote);
356      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
357      setOperationAction(ISD::SELECT, VT, Promote);
358      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
359      setOperationAction(ISD::STORE, VT, Promote);
360      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
361
362      // No other operations are legal.
363      setOperationAction(ISD::MUL , VT, Expand);
364      setOperationAction(ISD::SDIV, VT, Expand);
365      setOperationAction(ISD::SREM, VT, Expand);
366      setOperationAction(ISD::UDIV, VT, Expand);
367      setOperationAction(ISD::UREM, VT, Expand);
368      setOperationAction(ISD::FDIV, VT, Expand);
369      setOperationAction(ISD::FNEG, VT, Expand);
370      setOperationAction(ISD::FSQRT, VT, Expand);
371      setOperationAction(ISD::FLOG, VT, Expand);
372      setOperationAction(ISD::FLOG10, VT, Expand);
373      setOperationAction(ISD::FLOG2, VT, Expand);
374      setOperationAction(ISD::FEXP, VT, Expand);
375      setOperationAction(ISD::FEXP2, VT, Expand);
376      setOperationAction(ISD::FSIN, VT, Expand);
377      setOperationAction(ISD::FCOS, VT, Expand);
378      setOperationAction(ISD::FABS, VT, Expand);
379      setOperationAction(ISD::FPOWI, VT, Expand);
380      setOperationAction(ISD::FFLOOR, VT, Expand);
381      setOperationAction(ISD::FCEIL,  VT, Expand);
382      setOperationAction(ISD::FTRUNC, VT, Expand);
383      setOperationAction(ISD::FRINT,  VT, Expand);
384      setOperationAction(ISD::FNEARBYINT, VT, Expand);
385      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
386      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
387      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
388      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
389      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
390      setOperationAction(ISD::UDIVREM, VT, Expand);
391      setOperationAction(ISD::SDIVREM, VT, Expand);
392      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
393      setOperationAction(ISD::FPOW, VT, Expand);
394      setOperationAction(ISD::CTPOP, VT, Expand);
395      setOperationAction(ISD::CTLZ, VT, Expand);
396      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
397      setOperationAction(ISD::CTTZ, VT, Expand);
398      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
399      setOperationAction(ISD::VSELECT, VT, Expand);
400      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
401
402      for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
403           j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
404        MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
405        setTruncStoreAction(VT, InnerVT, Expand);
406      }
407      setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
408      setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
409      setLoadExtAction(ISD::EXTLOAD, VT, Expand);
410    }
411
412    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
413    // with merges, splats, etc.
414    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
415
416    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
417    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
418    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
419    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
420    setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
421    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
422    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
423    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
424    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
425    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
426    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
427    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
428    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
429    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
430
431    addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
432    addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
433    addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
434    addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
435
436    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
437    setOperationAction(ISD::FMA, MVT::v4f32, Legal);
438    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
439    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
440    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
441
442    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
443    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
444
445    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
446    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
447    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
448    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
449
450    // Altivec does not contain unordered floating-point compare instructions
451    setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
452    setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
453    setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
454    setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
455    setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
456    setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
457  }
458
459  if (Subtarget->has64BitSupport()) {
460    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
461    setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
462  }
463
464  setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
465  setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
466  setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
467  setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
468
469  setBooleanContents(ZeroOrOneBooleanContent);
470  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
471
472  if (isPPC64) {
473    setStackPointerRegisterToSaveRestore(PPC::X1);
474    setExceptionPointerRegister(PPC::X3);
475    setExceptionSelectorRegister(PPC::X4);
476  } else {
477    setStackPointerRegisterToSaveRestore(PPC::R1);
478    setExceptionPointerRegister(PPC::R3);
479    setExceptionSelectorRegister(PPC::R4);
480  }
481
482  // We have target-specific dag combine patterns for the following nodes:
483  setTargetDAGCombine(ISD::SINT_TO_FP);
484  setTargetDAGCombine(ISD::STORE);
485  setTargetDAGCombine(ISD::BR_CC);
486  setTargetDAGCombine(ISD::BSWAP);
487
488  // Darwin long double math library functions have $LDBL128 appended.
489  if (Subtarget->isDarwin()) {
490    setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
491    setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
492    setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
493    setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
494    setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
495    setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
496    setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
497    setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
498    setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
499    setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
500  }
501
502  setMinFunctionAlignment(2);
503  if (PPCSubTarget.isDarwin())
504    setPrefFunctionAlignment(4);
505
506  if (isPPC64 && Subtarget->isJITCodeModel())
507    // Temporary workaround for the inability of PPC64 JIT to handle jump
508    // tables.
509    setSupportJumpTables(false);
510
511  setInsertFencesForAtomic(true);
512
513  setSchedulingPreference(Sched::Hybrid);
514
515  computeRegisterProperties();
516
517  // The Freescale cores does better with aggressive inlining of memcpy and
518  // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
519  if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
520      Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
521    MaxStoresPerMemset = 32;
522    MaxStoresPerMemsetOptSize = 16;
523    MaxStoresPerMemcpy = 32;
524    MaxStoresPerMemcpyOptSize = 8;
525    MaxStoresPerMemmove = 32;
526    MaxStoresPerMemmoveOptSize = 8;
527
528    setPrefFunctionAlignment(4);
529    BenefitFromCodePlacementOpt = true;
530  }
531}
532
533/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
534/// function arguments in the caller parameter area.
535unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
536  const TargetMachine &TM = getTargetMachine();
537  // Darwin passes everything on 4 byte boundary.
538  if (TM.getSubtarget<PPCSubtarget>().isDarwin())
539    return 4;
540
541  // 16byte and wider vectors are passed on 16byte boundary.
542  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
543    if (VTy->getBitWidth() >= 128)
544      return 16;
545
546  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
547   if (PPCSubTarget.isPPC64())
548     return 8;
549
550  return 4;
551}
552
553const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
554  switch (Opcode) {
555  default: return 0;
556  case PPCISD::FSEL:            return "PPCISD::FSEL";
557  case PPCISD::FCFID:           return "PPCISD::FCFID";
558  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
559  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
560  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
561  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
562  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
563  case PPCISD::VPERM:           return "PPCISD::VPERM";
564  case PPCISD::Hi:              return "PPCISD::Hi";
565  case PPCISD::Lo:              return "PPCISD::Lo";
566  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
567  case PPCISD::TOC_RESTORE:     return "PPCISD::TOC_RESTORE";
568  case PPCISD::LOAD:            return "PPCISD::LOAD";
569  case PPCISD::LOAD_TOC:        return "PPCISD::LOAD_TOC";
570  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
571  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
572  case PPCISD::SRL:             return "PPCISD::SRL";
573  case PPCISD::SRA:             return "PPCISD::SRA";
574  case PPCISD::SHL:             return "PPCISD::SHL";
575  case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
576  case PPCISD::STD_32:          return "PPCISD::STD_32";
577  case PPCISD::CALL:            return "PPCISD::CALL";
578  case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
579  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
580  case PPCISD::BCTRL:           return "PPCISD::BCTRL";
581  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
582  case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
583  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
584  case PPCISD::MFCR:            return "PPCISD::MFCR";
585  case PPCISD::VCMP:            return "PPCISD::VCMP";
586  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
587  case PPCISD::LBRX:            return "PPCISD::LBRX";
588  case PPCISD::STBRX:           return "PPCISD::STBRX";
589  case PPCISD::LARX:            return "PPCISD::LARX";
590  case PPCISD::STCX:            return "PPCISD::STCX";
591  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
592  case PPCISD::MFFS:            return "PPCISD::MFFS";
593  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
594  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
595  case PPCISD::CR6SET:          return "PPCISD::CR6SET";
596  case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
597  case PPCISD::ADDIS_TOC_HA:    return "PPCISD::ADDIS_TOC_HA";
598  case PPCISD::LD_TOC_L:        return "PPCISD::LD_TOC_L";
599  case PPCISD::ADDI_TOC_L:      return "PPCISD::ADDI_TOC_L";
600  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
601  case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
602  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
603  case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
604  case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
605  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
606  case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
607  case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
608  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
609  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
610  case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
611  case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
612  }
613}
614
615EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
616  if (!VT.isVector())
617    return MVT::i32;
618  return VT.changeVectorElementTypeToInteger();
619}
620
621//===----------------------------------------------------------------------===//
622// Node matching predicates, for use by the tblgen matching code.
623//===----------------------------------------------------------------------===//
624
625/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
626static bool isFloatingPointZero(SDValue Op) {
627  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
628    return CFP->getValueAPF().isZero();
629  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
630    // Maybe this has already been legalized into the constant pool?
631    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
632      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
633        return CFP->getValueAPF().isZero();
634  }
635  return false;
636}
637
638/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
639/// true if Op is undef or if it matches the specified value.
640static bool isConstantOrUndef(int Op, int Val) {
641  return Op < 0 || Op == Val;
642}
643
644/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
645/// VPKUHUM instruction.
646bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
647  if (!isUnary) {
648    for (unsigned i = 0; i != 16; ++i)
649      if (!isConstantOrUndef(N->getMaskElt(i),  i*2+1))
650        return false;
651  } else {
652    for (unsigned i = 0; i != 8; ++i)
653      if (!isConstantOrUndef(N->getMaskElt(i),    i*2+1) ||
654          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+1))
655        return false;
656  }
657  return true;
658}
659
660/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
661/// VPKUWUM instruction.
662bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
663  if (!isUnary) {
664    for (unsigned i = 0; i != 16; i += 2)
665      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
666          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
667        return false;
668  } else {
669    for (unsigned i = 0; i != 8; i += 2)
670      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
671          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3) ||
672          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+2) ||
673          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+3))
674        return false;
675  }
676  return true;
677}
678
679/// isVMerge - Common function, used to match vmrg* shuffles.
680///
681static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
682                     unsigned LHSStart, unsigned RHSStart) {
683  assert(N->getValueType(0) == MVT::v16i8 &&
684         "PPC only supports shuffles by bytes!");
685  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
686         "Unsupported merge size!");
687
688  for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
689    for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
690      if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
691                             LHSStart+j+i*UnitSize) ||
692          !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
693                             RHSStart+j+i*UnitSize))
694        return false;
695    }
696  return true;
697}
698
699/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
700/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
701bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
702                             bool isUnary) {
703  if (!isUnary)
704    return isVMerge(N, UnitSize, 8, 24);
705  return isVMerge(N, UnitSize, 8, 8);
706}
707
708/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
709/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
710bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
711                             bool isUnary) {
712  if (!isUnary)
713    return isVMerge(N, UnitSize, 0, 16);
714  return isVMerge(N, UnitSize, 0, 0);
715}
716
717
718/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
719/// amount, otherwise return -1.
720int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
721  assert(N->getValueType(0) == MVT::v16i8 &&
722         "PPC only supports shuffles by bytes!");
723
724  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
725
726  // Find the first non-undef value in the shuffle mask.
727  unsigned i;
728  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
729    /*search*/;
730
731  if (i == 16) return -1;  // all undef.
732
733  // Otherwise, check to see if the rest of the elements are consecutively
734  // numbered from this value.
735  unsigned ShiftAmt = SVOp->getMaskElt(i);
736  if (ShiftAmt < i) return -1;
737  ShiftAmt -= i;
738
739  if (!isUnary) {
740    // Check the rest of the elements to see if they are consecutive.
741    for (++i; i != 16; ++i)
742      if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
743        return -1;
744  } else {
745    // Check the rest of the elements to see if they are consecutive.
746    for (++i; i != 16; ++i)
747      if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
748        return -1;
749  }
750  return ShiftAmt;
751}
752
753/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
754/// specifies a splat of a single element that is suitable for input to
755/// VSPLTB/VSPLTH/VSPLTW.
756bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
757  assert(N->getValueType(0) == MVT::v16i8 &&
758         (EltSize == 1 || EltSize == 2 || EltSize == 4));
759
760  // This is a splat operation if each element of the permute is the same, and
761  // if the value doesn't reference the second vector.
762  unsigned ElementBase = N->getMaskElt(0);
763
764  // FIXME: Handle UNDEF elements too!
765  if (ElementBase >= 16)
766    return false;
767
768  // Check that the indices are consecutive, in the case of a multi-byte element
769  // splatted with a v16i8 mask.
770  for (unsigned i = 1; i != EltSize; ++i)
771    if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
772      return false;
773
774  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
775    if (N->getMaskElt(i) < 0) continue;
776    for (unsigned j = 0; j != EltSize; ++j)
777      if (N->getMaskElt(i+j) != N->getMaskElt(j))
778        return false;
779  }
780  return true;
781}
782
783/// isAllNegativeZeroVector - Returns true if all elements of build_vector
784/// are -0.0.
785bool PPC::isAllNegativeZeroVector(SDNode *N) {
786  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
787
788  APInt APVal, APUndef;
789  unsigned BitSize;
790  bool HasAnyUndefs;
791
792  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
793    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
794      return CFP->getValueAPF().isNegZero();
795
796  return false;
797}
798
799/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
800/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
801unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
802  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
803  assert(isSplatShuffleMask(SVOp, EltSize));
804  return SVOp->getMaskElt(0) / EltSize;
805}
806
807/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
808/// by using a vspltis[bhw] instruction of the specified element size, return
809/// the constant being splatted.  The ByteSize field indicates the number of
810/// bytes of each element [124] -> [bhw].
811SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
812  SDValue OpVal(0, 0);
813
814  // If ByteSize of the splat is bigger than the element size of the
815  // build_vector, then we have a case where we are checking for a splat where
816  // multiple elements of the buildvector are folded together into a single
817  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
818  unsigned EltSize = 16/N->getNumOperands();
819  if (EltSize < ByteSize) {
820    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
821    SDValue UniquedVals[4];
822    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
823
824    // See if all of the elements in the buildvector agree across.
825    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
826      if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
827      // If the element isn't a constant, bail fully out.
828      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
829
830
831      if (UniquedVals[i&(Multiple-1)].getNode() == 0)
832        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
833      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
834        return SDValue();  // no match.
835    }
836
837    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
838    // either constant or undef values that are identical for each chunk.  See
839    // if these chunks can form into a larger vspltis*.
840
841    // Check to see if all of the leading entries are either 0 or -1.  If
842    // neither, then this won't fit into the immediate field.
843    bool LeadingZero = true;
844    bool LeadingOnes = true;
845    for (unsigned i = 0; i != Multiple-1; ++i) {
846      if (UniquedVals[i].getNode() == 0) continue;  // Must have been undefs.
847
848      LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
849      LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
850    }
851    // Finally, check the least significant entry.
852    if (LeadingZero) {
853      if (UniquedVals[Multiple-1].getNode() == 0)
854        return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
855      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
856      if (Val < 16)
857        return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
858    }
859    if (LeadingOnes) {
860      if (UniquedVals[Multiple-1].getNode() == 0)
861        return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
862      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
863      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
864        return DAG.getTargetConstant(Val, MVT::i32);
865    }
866
867    return SDValue();
868  }
869
870  // Check to see if this buildvec has a single non-undef value in its elements.
871  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
872    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
873    if (OpVal.getNode() == 0)
874      OpVal = N->getOperand(i);
875    else if (OpVal != N->getOperand(i))
876      return SDValue();
877  }
878
879  if (OpVal.getNode() == 0) return SDValue();  // All UNDEF: use implicit def.
880
881  unsigned ValSizeInBytes = EltSize;
882  uint64_t Value = 0;
883  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
884    Value = CN->getZExtValue();
885  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
886    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
887    Value = FloatToBits(CN->getValueAPF().convertToFloat());
888  }
889
890  // If the splat value is larger than the element value, then we can never do
891  // this splat.  The only case that we could fit the replicated bits into our
892  // immediate field for would be zero, and we prefer to use vxor for it.
893  if (ValSizeInBytes < ByteSize) return SDValue();
894
895  // If the element value is larger than the splat value, cut it in half and
896  // check to see if the two halves are equal.  Continue doing this until we
897  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
898  while (ValSizeInBytes > ByteSize) {
899    ValSizeInBytes >>= 1;
900
901    // If the top half equals the bottom half, we're still ok.
902    if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
903         (Value                        & ((1 << (8*ValSizeInBytes))-1)))
904      return SDValue();
905  }
906
907  // Properly sign extend the value.
908  int MaskVal = SignExtend32(Value, ByteSize * 8);
909
910  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
911  if (MaskVal == 0) return SDValue();
912
913  // Finally, if this value fits in a 5 bit sext field, return it
914  if (SignExtend32<5>(MaskVal) == MaskVal)
915    return DAG.getTargetConstant(MaskVal, MVT::i32);
916  return SDValue();
917}
918
919//===----------------------------------------------------------------------===//
920//  Addressing Mode Selection
921//===----------------------------------------------------------------------===//
922
923/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
924/// or 64-bit immediate, and if the value can be accurately represented as a
925/// sign extension from a 16-bit value.  If so, this returns true and the
926/// immediate.
927static bool isIntS16Immediate(SDNode *N, short &Imm) {
928  if (N->getOpcode() != ISD::Constant)
929    return false;
930
931  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
932  if (N->getValueType(0) == MVT::i32)
933    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
934  else
935    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
936}
937static bool isIntS16Immediate(SDValue Op, short &Imm) {
938  return isIntS16Immediate(Op.getNode(), Imm);
939}
940
941
942/// SelectAddressRegReg - Given the specified addressed, check to see if it
943/// can be represented as an indexed [r+r] operation.  Returns false if it
944/// can be more efficiently represented with [r+imm].
945bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
946                                            SDValue &Index,
947                                            SelectionDAG &DAG) const {
948  short imm = 0;
949  if (N.getOpcode() == ISD::ADD) {
950    if (isIntS16Immediate(N.getOperand(1), imm))
951      return false;    // r+i
952    if (N.getOperand(1).getOpcode() == PPCISD::Lo)
953      return false;    // r+i
954
955    Base = N.getOperand(0);
956    Index = N.getOperand(1);
957    return true;
958  } else if (N.getOpcode() == ISD::OR) {
959    if (isIntS16Immediate(N.getOperand(1), imm))
960      return false;    // r+i can fold it if we can.
961
962    // If this is an or of disjoint bitfields, we can codegen this as an add
963    // (for better address arithmetic) if the LHS and RHS of the OR are provably
964    // disjoint.
965    APInt LHSKnownZero, LHSKnownOne;
966    APInt RHSKnownZero, RHSKnownOne;
967    DAG.ComputeMaskedBits(N.getOperand(0),
968                          LHSKnownZero, LHSKnownOne);
969
970    if (LHSKnownZero.getBoolValue()) {
971      DAG.ComputeMaskedBits(N.getOperand(1),
972                            RHSKnownZero, RHSKnownOne);
973      // If all of the bits are known zero on the LHS or RHS, the add won't
974      // carry.
975      if (~(LHSKnownZero | RHSKnownZero) == 0) {
976        Base = N.getOperand(0);
977        Index = N.getOperand(1);
978        return true;
979      }
980    }
981  }
982
983  return false;
984}
985
986/// Returns true if the address N can be represented by a base register plus
987/// a signed 16-bit displacement [r+imm], and if it is not better
988/// represented as reg+reg.
989bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
990                                            SDValue &Base,
991                                            SelectionDAG &DAG) const {
992  // FIXME dl should come from parent load or store, not from address
993  DebugLoc dl = N.getDebugLoc();
994  // If this can be more profitably realized as r+r, fail.
995  if (SelectAddressRegReg(N, Disp, Base, DAG))
996    return false;
997
998  if (N.getOpcode() == ISD::ADD) {
999    short imm = 0;
1000    if (isIntS16Immediate(N.getOperand(1), imm)) {
1001      Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
1002      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1003        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1004      } else {
1005        Base = N.getOperand(0);
1006      }
1007      return true; // [r+i]
1008    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1009      // Match LOAD (ADD (X, Lo(G))).
1010      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1011             && "Cannot handle constant offsets yet!");
1012      Disp = N.getOperand(1).getOperand(0);  // The global address.
1013      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1014             Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1015             Disp.getOpcode() == ISD::TargetConstantPool ||
1016             Disp.getOpcode() == ISD::TargetJumpTable);
1017      Base = N.getOperand(0);
1018      return true;  // [&g+r]
1019    }
1020  } else if (N.getOpcode() == ISD::OR) {
1021    short imm = 0;
1022    if (isIntS16Immediate(N.getOperand(1), imm)) {
1023      // If this is an or of disjoint bitfields, we can codegen this as an add
1024      // (for better address arithmetic) if the LHS and RHS of the OR are
1025      // provably disjoint.
1026      APInt LHSKnownZero, LHSKnownOne;
1027      DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1028
1029      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1030        // If all of the bits are known zero on the LHS or RHS, the add won't
1031        // carry.
1032        Base = N.getOperand(0);
1033        Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
1034        return true;
1035      }
1036    }
1037  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1038    // Loading from a constant address.
1039
1040    // If this address fits entirely in a 16-bit sext immediate field, codegen
1041    // this as "d, 0"
1042    short Imm;
1043    if (isIntS16Immediate(CN, Imm)) {
1044      Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
1045      Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1046                             CN->getValueType(0));
1047      return true;
1048    }
1049
1050    // Handle 32-bit sext immediates with LIS + addr mode.
1051    if (CN->getValueType(0) == MVT::i32 ||
1052        (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
1053      int Addr = (int)CN->getZExtValue();
1054
1055      // Otherwise, break this down into an LIS + disp.
1056      Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
1057
1058      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
1059      unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1060      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1061      return true;
1062    }
1063  }
1064
1065  Disp = DAG.getTargetConstant(0, getPointerTy());
1066  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
1067    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1068  else
1069    Base = N;
1070  return true;      // [r+0]
1071}
1072
1073/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1074/// represented as an indexed [r+r] operation.
1075bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
1076                                                SDValue &Index,
1077                                                SelectionDAG &DAG) const {
1078  // Check to see if we can easily represent this as an [r+r] address.  This
1079  // will fail if it thinks that the address is more profitably represented as
1080  // reg+imm, e.g. where imm = 0.
1081  if (SelectAddressRegReg(N, Base, Index, DAG))
1082    return true;
1083
1084  // If the operand is an addition, always emit this as [r+r], since this is
1085  // better (for code size, and execution, as the memop does the add for free)
1086  // than emitting an explicit add.
1087  if (N.getOpcode() == ISD::ADD) {
1088    Base = N.getOperand(0);
1089    Index = N.getOperand(1);
1090    return true;
1091  }
1092
1093  // Otherwise, do it the hard way, using R0 as the base register.
1094  Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1095                         N.getValueType());
1096  Index = N;
1097  return true;
1098}
1099
1100/// SelectAddressRegImmShift - Returns true if the address N can be
1101/// represented by a base register plus a signed 14-bit displacement
1102/// [r+imm*4].  Suitable for use by STD and friends.
1103bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
1104                                                 SDValue &Base,
1105                                                 SelectionDAG &DAG) const {
1106  // FIXME dl should come from the parent load or store, not the address
1107  DebugLoc dl = N.getDebugLoc();
1108  // If this can be more profitably realized as r+r, fail.
1109  if (SelectAddressRegReg(N, Disp, Base, DAG))
1110    return false;
1111
1112  if (N.getOpcode() == ISD::ADD) {
1113    short imm = 0;
1114    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
1115      Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
1116      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1117        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1118      } else {
1119        Base = N.getOperand(0);
1120      }
1121      return true; // [r+i]
1122    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1123      // Match LOAD (ADD (X, Lo(G))).
1124      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1125             && "Cannot handle constant offsets yet!");
1126      Disp = N.getOperand(1).getOperand(0);  // The global address.
1127      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1128             Disp.getOpcode() == ISD::TargetConstantPool ||
1129             Disp.getOpcode() == ISD::TargetJumpTable);
1130      Base = N.getOperand(0);
1131      return true;  // [&g+r]
1132    }
1133  } else if (N.getOpcode() == ISD::OR) {
1134    short imm = 0;
1135    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
1136      // If this is an or of disjoint bitfields, we can codegen this as an add
1137      // (for better address arithmetic) if the LHS and RHS of the OR are
1138      // provably disjoint.
1139      APInt LHSKnownZero, LHSKnownOne;
1140      DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1141      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1142        // If all of the bits are known zero on the LHS or RHS, the add won't
1143        // carry.
1144        Base = N.getOperand(0);
1145        Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
1146        return true;
1147      }
1148    }
1149  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1150    // Loading from a constant address.  Verify low two bits are clear.
1151    if ((CN->getZExtValue() & 3) == 0) {
1152      // If this address fits entirely in a 14-bit sext immediate field, codegen
1153      // this as "d, 0"
1154      short Imm;
1155      if (isIntS16Immediate(CN, Imm)) {
1156        Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
1157        Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1158                               CN->getValueType(0));
1159        return true;
1160      }
1161
1162      // Fold the low-part of 32-bit absolute addresses into addr mode.
1163      if (CN->getValueType(0) == MVT::i32 ||
1164          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
1165        int Addr = (int)CN->getZExtValue();
1166
1167        // Otherwise, break this down into an LIS + disp.
1168        Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
1169        Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
1170        unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1171        Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
1172        return true;
1173      }
1174    }
1175  }
1176
1177  Disp = DAG.getTargetConstant(0, getPointerTy());
1178  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
1179    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1180  else
1181    Base = N;
1182  return true;      // [r+0]
1183}
1184
1185
1186/// getPreIndexedAddressParts - returns true by value, base pointer and
1187/// offset pointer and addressing mode by reference if the node's address
1188/// can be legally represented as pre-indexed load / store address.
1189bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1190                                                  SDValue &Offset,
1191                                                  ISD::MemIndexedMode &AM,
1192                                                  SelectionDAG &DAG) const {
1193  if (DisablePPCPreinc) return false;
1194
1195  bool isLoad = true;
1196  SDValue Ptr;
1197  EVT VT;
1198  unsigned Alignment;
1199  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1200    Ptr = LD->getBasePtr();
1201    VT = LD->getMemoryVT();
1202    Alignment = LD->getAlignment();
1203  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1204    Ptr = ST->getBasePtr();
1205    VT  = ST->getMemoryVT();
1206    Alignment = ST->getAlignment();
1207    isLoad = false;
1208  } else
1209    return false;
1210
1211  // PowerPC doesn't have preinc load/store instructions for vectors.
1212  if (VT.isVector())
1213    return false;
1214
1215  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
1216
1217    // Common code will reject creating a pre-inc form if the base pointer
1218    // is a frame index, or if N is a store and the base pointer is either
1219    // the same as or a predecessor of the value being stored.  Check for
1220    // those situations here, and try with swapped Base/Offset instead.
1221    bool Swap = false;
1222
1223    if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
1224      Swap = true;
1225    else if (!isLoad) {
1226      SDValue Val = cast<StoreSDNode>(N)->getValue();
1227      if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
1228        Swap = true;
1229    }
1230
1231    if (Swap)
1232      std::swap(Base, Offset);
1233
1234    AM = ISD::PRE_INC;
1235    return true;
1236  }
1237
1238  // LDU/STU use reg+imm*4, others use reg+imm.
1239  if (VT != MVT::i64) {
1240    // reg + imm
1241    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
1242      return false;
1243  } else {
1244    // LDU/STU need an address with at least 4-byte alignment.
1245    if (Alignment < 4)
1246      return false;
1247
1248    // reg + imm * 4.
1249    if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
1250      return false;
1251  }
1252
1253  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1254    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
1255    // sext i32 to i64 when addr mode is r+i.
1256    if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
1257        LD->getExtensionType() == ISD::SEXTLOAD &&
1258        isa<ConstantSDNode>(Offset))
1259      return false;
1260  }
1261
1262  AM = ISD::PRE_INC;
1263  return true;
1264}
1265
1266//===----------------------------------------------------------------------===//
1267//  LowerOperation implementation
1268//===----------------------------------------------------------------------===//
1269
1270/// GetLabelAccessInfo - Return true if we should reference labels using a
1271/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
1272static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
1273                               unsigned &LoOpFlags, const GlobalValue *GV = 0) {
1274  HiOpFlags = PPCII::MO_HA16;
1275  LoOpFlags = PPCII::MO_LO16;
1276
1277  // Don't use the pic base if not in PIC relocation model.  Or if we are on a
1278  // non-darwin platform.  We don't support PIC on other platforms yet.
1279  bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
1280               TM.getSubtarget<PPCSubtarget>().isDarwin();
1281  if (isPIC) {
1282    HiOpFlags |= PPCII::MO_PIC_FLAG;
1283    LoOpFlags |= PPCII::MO_PIC_FLAG;
1284  }
1285
1286  // If this is a reference to a global value that requires a non-lazy-ptr, make
1287  // sure that instruction lowering adds it.
1288  if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
1289    HiOpFlags |= PPCII::MO_NLP_FLAG;
1290    LoOpFlags |= PPCII::MO_NLP_FLAG;
1291
1292    if (GV->hasHiddenVisibility()) {
1293      HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1294      LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1295    }
1296  }
1297
1298  return isPIC;
1299}
1300
1301static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
1302                             SelectionDAG &DAG) {
1303  EVT PtrVT = HiPart.getValueType();
1304  SDValue Zero = DAG.getConstant(0, PtrVT);
1305  DebugLoc DL = HiPart.getDebugLoc();
1306
1307  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
1308  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
1309
1310  // With PIC, the first instruction is actually "GR+hi(&G)".
1311  if (isPIC)
1312    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
1313                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
1314
1315  // Generate non-pic code that has direct accesses to the constant pool.
1316  // The address of the global is just (hi(&g)+lo(&g)).
1317  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
1318}
1319
1320SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
1321                                             SelectionDAG &DAG) const {
1322  EVT PtrVT = Op.getValueType();
1323  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1324  const Constant *C = CP->getConstVal();
1325
1326  // 64-bit SVR4 ABI code is always position-independent.
1327  // The actual address of the GlobalValue is stored in the TOC.
1328  if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
1329    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
1330    return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA,
1331                       DAG.getRegister(PPC::X2, MVT::i64));
1332  }
1333
1334  unsigned MOHiFlag, MOLoFlag;
1335  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1336  SDValue CPIHi =
1337    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
1338  SDValue CPILo =
1339    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
1340  return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
1341}
1342
1343SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1344  EVT PtrVT = Op.getValueType();
1345  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1346
1347  // 64-bit SVR4 ABI code is always position-independent.
1348  // The actual address of the GlobalValue is stored in the TOC.
1349  if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
1350    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1351    return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA,
1352                       DAG.getRegister(PPC::X2, MVT::i64));
1353  }
1354
1355  unsigned MOHiFlag, MOLoFlag;
1356  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1357  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
1358  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
1359  return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
1360}
1361
1362SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
1363                                             SelectionDAG &DAG) const {
1364  EVT PtrVT = Op.getValueType();
1365
1366  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1367
1368  unsigned MOHiFlag, MOLoFlag;
1369  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1370  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
1371  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
1372  return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
1373}
1374
1375SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1376                                              SelectionDAG &DAG) const {
1377
1378  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1379  DebugLoc dl = GA->getDebugLoc();
1380  const GlobalValue *GV = GA->getGlobal();
1381  EVT PtrVT = getPointerTy();
1382  bool is64bit = PPCSubTarget.isPPC64();
1383
1384  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
1385
1386  if (Model == TLSModel::LocalExec) {
1387    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1388                                               PPCII::MO_TPREL16_HA);
1389    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1390                                               PPCII::MO_TPREL16_LO);
1391    SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
1392                                     is64bit ? MVT::i64 : MVT::i32);
1393    SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
1394    return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
1395  }
1396
1397  if (!is64bit)
1398    llvm_unreachable("only local-exec is currently supported for ppc32");
1399
1400  if (Model == TLSModel::InitialExec) {
1401    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1402    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1403    SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
1404                                     PtrVT, GOTReg, TGA);
1405    SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
1406                                   PtrVT, TGA, TPOffsetHi);
1407    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
1408  }
1409
1410  if (Model == TLSModel::GeneralDynamic) {
1411    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1412    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1413    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
1414                                     GOTReg, TGA);
1415    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
1416                                   GOTEntryHi, TGA);
1417
1418    // We need a chain node, and don't have one handy.  The underlying
1419    // call has no side effects, so using the function entry node
1420    // suffices.
1421    SDValue Chain = DAG.getEntryNode();
1422    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
1423    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
1424    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
1425                                  PtrVT, ParmReg, TGA);
1426    // The return value from GET_TLS_ADDR really is in X3 already, but
1427    // some hacks are needed here to tie everything together.  The extra
1428    // copies dissolve during subsequent transforms.
1429    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
1430    return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
1431  }
1432
1433  if (Model == TLSModel::LocalDynamic) {
1434    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1435    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1436    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
1437                                     GOTReg, TGA);
1438    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
1439                                   GOTEntryHi, TGA);
1440
1441    // We need a chain node, and don't have one handy.  The underlying
1442    // call has no side effects, so using the function entry node
1443    // suffices.
1444    SDValue Chain = DAG.getEntryNode();
1445    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
1446    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
1447    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
1448                                  PtrVT, ParmReg, TGA);
1449    // The return value from GET_TLSLD_ADDR really is in X3 already, but
1450    // some hacks are needed here to tie everything together.  The extra
1451    // copies dissolve during subsequent transforms.
1452    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
1453    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
1454                                      Chain, ParmReg, TGA);
1455    return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
1456  }
1457
1458  llvm_unreachable("Unknown TLS model!");
1459}
1460
1461SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
1462                                              SelectionDAG &DAG) const {
1463  EVT PtrVT = Op.getValueType();
1464  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1465  DebugLoc DL = GSDN->getDebugLoc();
1466  const GlobalValue *GV = GSDN->getGlobal();
1467
1468  // 64-bit SVR4 ABI code is always position-independent.
1469  // The actual address of the GlobalValue is stored in the TOC.
1470  if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
1471    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
1472    return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
1473                       DAG.getRegister(PPC::X2, MVT::i64));
1474  }
1475
1476  unsigned MOHiFlag, MOLoFlag;
1477  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
1478
1479  SDValue GAHi =
1480    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
1481  SDValue GALo =
1482    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
1483
1484  SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
1485
1486  // If the global reference is actually to a non-lazy-pointer, we have to do an
1487  // extra load to get the address of the global.
1488  if (MOHiFlag & PPCII::MO_NLP_FLAG)
1489    Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
1490                      false, false, false, 0);
1491  return Ptr;
1492}
1493
1494SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1495  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1496  DebugLoc dl = Op.getDebugLoc();
1497
1498  // If we're comparing for equality to zero, expose the fact that this is
1499  // implented as a ctlz/srl pair on ppc, so that the dag combiner can
1500  // fold the new nodes.
1501  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1502    if (C->isNullValue() && CC == ISD::SETEQ) {
1503      EVT VT = Op.getOperand(0).getValueType();
1504      SDValue Zext = Op.getOperand(0);
1505      if (VT.bitsLT(MVT::i32)) {
1506        VT = MVT::i32;
1507        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
1508      }
1509      unsigned Log2b = Log2_32(VT.getSizeInBits());
1510      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
1511      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
1512                                DAG.getConstant(Log2b, MVT::i32));
1513      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
1514    }
1515    // Leave comparisons against 0 and -1 alone for now, since they're usually
1516    // optimized.  FIXME: revisit this when we can custom lower all setcc
1517    // optimizations.
1518    if (C->isAllOnesValue() || C->isNullValue())
1519      return SDValue();
1520  }
1521
1522  // If we have an integer seteq/setne, turn it into a compare against zero
1523  // by xor'ing the rhs with the lhs, which is faster than setting a
1524  // condition register, reading it back out, and masking the correct bit.  The
1525  // normal approach here uses sub to do this instead of xor.  Using xor exposes
1526  // the result to other bit-twiddling opportunities.
1527  EVT LHSVT = Op.getOperand(0).getValueType();
1528  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1529    EVT VT = Op.getValueType();
1530    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
1531                                Op.getOperand(1));
1532    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
1533  }
1534  return SDValue();
1535}
1536
1537SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
1538                                      const PPCSubtarget &Subtarget) const {
1539  SDNode *Node = Op.getNode();
1540  EVT VT = Node->getValueType(0);
1541  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1542  SDValue InChain = Node->getOperand(0);
1543  SDValue VAListPtr = Node->getOperand(1);
1544  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1545  DebugLoc dl = Node->getDebugLoc();
1546
1547  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
1548
1549  // gpr_index
1550  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
1551                                    VAListPtr, MachinePointerInfo(SV), MVT::i8,
1552                                    false, false, 0);
1553  InChain = GprIndex.getValue(1);
1554
1555  if (VT == MVT::i64) {
1556    // Check if GprIndex is even
1557    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
1558                                 DAG.getConstant(1, MVT::i32));
1559    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
1560                                DAG.getConstant(0, MVT::i32), ISD::SETNE);
1561    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
1562                                          DAG.getConstant(1, MVT::i32));
1563    // Align GprIndex to be even if it isn't
1564    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
1565                           GprIndex);
1566  }
1567
1568  // fpr index is 1 byte after gpr
1569  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1570                               DAG.getConstant(1, MVT::i32));
1571
1572  // fpr
1573  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
1574                                    FprPtr, MachinePointerInfo(SV), MVT::i8,
1575                                    false, false, 0);
1576  InChain = FprIndex.getValue(1);
1577
1578  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1579                                       DAG.getConstant(8, MVT::i32));
1580
1581  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1582                                        DAG.getConstant(4, MVT::i32));
1583
1584  // areas
1585  SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
1586                                     MachinePointerInfo(), false, false,
1587                                     false, 0);
1588  InChain = OverflowArea.getValue(1);
1589
1590  SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
1591                                    MachinePointerInfo(), false, false,
1592                                    false, 0);
1593  InChain = RegSaveArea.getValue(1);
1594
1595  // select overflow_area if index > 8
1596  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
1597                            DAG.getConstant(8, MVT::i32), ISD::SETLT);
1598
1599  // adjustment constant gpr_index * 4/8
1600  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
1601                                    VT.isInteger() ? GprIndex : FprIndex,
1602                                    DAG.getConstant(VT.isInteger() ? 4 : 8,
1603                                                    MVT::i32));
1604
1605  // OurReg = RegSaveArea + RegConstant
1606  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
1607                               RegConstant);
1608
1609  // Floating types are 32 bytes into RegSaveArea
1610  if (VT.isFloatingPoint())
1611    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
1612                         DAG.getConstant(32, MVT::i32));
1613
1614  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
1615  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
1616                                   VT.isInteger() ? GprIndex : FprIndex,
1617                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1,
1618                                                   MVT::i32));
1619
1620  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
1621                              VT.isInteger() ? VAListPtr : FprPtr,
1622                              MachinePointerInfo(SV),
1623                              MVT::i8, false, false, 0);
1624
1625  // determine if we should load from reg_save_area or overflow_area
1626  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
1627
1628  // increase overflow_area by 4/8 if gpr/fpr > 8
1629  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
1630                                          DAG.getConstant(VT.isInteger() ? 4 : 8,
1631                                          MVT::i32));
1632
1633  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
1634                             OverflowAreaPlusN);
1635
1636  InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
1637                              OverflowAreaPtr,
1638                              MachinePointerInfo(),
1639                              MVT::i32, false, false, 0);
1640
1641  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
1642                     false, false, false, 0);
1643}
1644
1645SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
1646                                                  SelectionDAG &DAG) const {
1647  return Op.getOperand(0);
1648}
1649
1650SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
1651                                                SelectionDAG &DAG) const {
1652  SDValue Chain = Op.getOperand(0);
1653  SDValue Trmp = Op.getOperand(1); // trampoline
1654  SDValue FPtr = Op.getOperand(2); // nested function
1655  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
1656  DebugLoc dl = Op.getDebugLoc();
1657
1658  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1659  bool isPPC64 = (PtrVT == MVT::i64);
1660  Type *IntPtrTy =
1661    DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
1662                                                             *DAG.getContext());
1663
1664  TargetLowering::ArgListTy Args;
1665  TargetLowering::ArgListEntry Entry;
1666
1667  Entry.Ty = IntPtrTy;
1668  Entry.Node = Trmp; Args.push_back(Entry);
1669
1670  // TrampSize == (isPPC64 ? 48 : 40);
1671  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
1672                               isPPC64 ? MVT::i64 : MVT::i32);
1673  Args.push_back(Entry);
1674
1675  Entry.Node = FPtr; Args.push_back(Entry);
1676  Entry.Node = Nest; Args.push_back(Entry);
1677
1678  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
1679  TargetLowering::CallLoweringInfo CLI(Chain,
1680                                       Type::getVoidTy(*DAG.getContext()),
1681                                       false, false, false, false, 0,
1682                                       CallingConv::C,
1683                /*isTailCall=*/false,
1684                                       /*doesNotRet=*/false,
1685                                       /*isReturnValueUsed=*/true,
1686                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
1687                Args, DAG, dl);
1688  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
1689
1690  return CallResult.second;
1691}
1692
1693SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
1694                                        const PPCSubtarget &Subtarget) const {
1695  MachineFunction &MF = DAG.getMachineFunction();
1696  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1697
1698  DebugLoc dl = Op.getDebugLoc();
1699
1700  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
1701    // vastart just stores the address of the VarArgsFrameIndex slot into the
1702    // memory location argument.
1703    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1704    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1705    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1706    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
1707                        MachinePointerInfo(SV),
1708                        false, false, 0);
1709  }
1710
1711  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
1712  // We suppose the given va_list is already allocated.
1713  //
1714  // typedef struct {
1715  //  char gpr;     /* index into the array of 8 GPRs
1716  //                 * stored in the register save area
1717  //                 * gpr=0 corresponds to r3,
1718  //                 * gpr=1 to r4, etc.
1719  //                 */
1720  //  char fpr;     /* index into the array of 8 FPRs
1721  //                 * stored in the register save area
1722  //                 * fpr=0 corresponds to f1,
1723  //                 * fpr=1 to f2, etc.
1724  //                 */
1725  //  char *overflow_arg_area;
1726  //                /* location on stack that holds
1727  //                 * the next overflow argument
1728  //                 */
1729  //  char *reg_save_area;
1730  //               /* where r3:r10 and f1:f8 (if saved)
1731  //                * are stored
1732  //                */
1733  // } va_list[1];
1734
1735
1736  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
1737  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
1738
1739
1740  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1741
1742  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
1743                                            PtrVT);
1744  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1745                                 PtrVT);
1746
1747  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
1748  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
1749
1750  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
1751  SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
1752
1753  uint64_t FPROffset = 1;
1754  SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
1755
1756  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1757
1758  // Store first byte : number of int regs
1759  SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
1760                                         Op.getOperand(1),
1761                                         MachinePointerInfo(SV),
1762                                         MVT::i8, false, false, 0);
1763  uint64_t nextOffset = FPROffset;
1764  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
1765                                  ConstFPROffset);
1766
1767  // Store second byte : number of float regs
1768  SDValue secondStore =
1769    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
1770                      MachinePointerInfo(SV, nextOffset), MVT::i8,
1771                      false, false, 0);
1772  nextOffset += StackOffset;
1773  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
1774
1775  // Store second word : arguments given on stack
1776  SDValue thirdStore =
1777    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
1778                 MachinePointerInfo(SV, nextOffset),
1779                 false, false, 0);
1780  nextOffset += FrameOffset;
1781  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
1782
1783  // Store third word : arguments given in registers
1784  return DAG.getStore(thirdStore, dl, FR, nextPtr,
1785                      MachinePointerInfo(SV, nextOffset),
1786                      false, false, 0);
1787
1788}
1789
1790#include "PPCGenCallingConv.inc"
1791
1792static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
1793                                       CCValAssign::LocInfo &LocInfo,
1794                                       ISD::ArgFlagsTy &ArgFlags,
1795                                       CCState &State) {
1796  return true;
1797}
1798
1799static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
1800                                              MVT &LocVT,
1801                                              CCValAssign::LocInfo &LocInfo,
1802                                              ISD::ArgFlagsTy &ArgFlags,
1803                                              CCState &State) {
1804  static const uint16_t ArgRegs[] = {
1805    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
1806    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
1807  };
1808  const unsigned NumArgRegs = array_lengthof(ArgRegs);
1809
1810  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
1811
1812  // Skip one register if the first unallocated register has an even register
1813  // number and there are still argument registers available which have not been
1814  // allocated yet. RegNum is actually an index into ArgRegs, which means we
1815  // need to skip a register if RegNum is odd.
1816  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
1817    State.AllocateReg(ArgRegs[RegNum]);
1818  }
1819
1820  // Always return false here, as this function only makes sure that the first
1821  // unallocated register has an odd register number and does not actually
1822  // allocate a register for the current argument.
1823  return false;
1824}
1825
1826static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
1827                                                MVT &LocVT,
1828                                                CCValAssign::LocInfo &LocInfo,
1829                                                ISD::ArgFlagsTy &ArgFlags,
1830                                                CCState &State) {
1831  static const uint16_t ArgRegs[] = {
1832    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1833    PPC::F8
1834  };
1835
1836  const unsigned NumArgRegs = array_lengthof(ArgRegs);
1837
1838  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
1839
1840  // If there is only one Floating-point register left we need to put both f64
1841  // values of a split ppc_fp128 value on the stack.
1842  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
1843    State.AllocateReg(ArgRegs[RegNum]);
1844  }
1845
1846  // Always return false here, as this function only makes sure that the two f64
1847  // values a ppc_fp128 value is split into are both passed in registers or both
1848  // passed on the stack and does not actually allocate a register for the
1849  // current argument.
1850  return false;
1851}
1852
1853/// GetFPR - Get the set of FP registers that should be allocated for arguments,
1854/// on Darwin.
1855static const uint16_t *GetFPR() {
1856  static const uint16_t FPR[] = {
1857    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
1858    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
1859  };
1860
1861  return FPR;
1862}
1863
1864/// CalculateStackSlotSize - Calculates the size reserved for this argument on
1865/// the stack.
1866static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
1867                                       unsigned PtrByteSize) {
1868  unsigned ArgSize = ArgVT.getSizeInBits()/8;
1869  if (Flags.isByVal())
1870    ArgSize = Flags.getByValSize();
1871  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
1872
1873  return ArgSize;
1874}
1875
1876SDValue
1877PPCTargetLowering::LowerFormalArguments(SDValue Chain,
1878                                        CallingConv::ID CallConv, bool isVarArg,
1879                                        const SmallVectorImpl<ISD::InputArg>
1880                                          &Ins,
1881                                        DebugLoc dl, SelectionDAG &DAG,
1882                                        SmallVectorImpl<SDValue> &InVals)
1883                                          const {
1884  if (PPCSubTarget.isSVR4ABI()) {
1885    if (PPCSubTarget.isPPC64())
1886      return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
1887                                         dl, DAG, InVals);
1888    else
1889      return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
1890                                         dl, DAG, InVals);
1891  } else {
1892    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
1893                                       dl, DAG, InVals);
1894  }
1895}
1896
1897SDValue
1898PPCTargetLowering::LowerFormalArguments_32SVR4(
1899                                      SDValue Chain,
1900                                      CallingConv::ID CallConv, bool isVarArg,
1901                                      const SmallVectorImpl<ISD::InputArg>
1902                                        &Ins,
1903                                      DebugLoc dl, SelectionDAG &DAG,
1904                                      SmallVectorImpl<SDValue> &InVals) const {
1905
1906  // 32-bit SVR4 ABI Stack Frame Layout:
1907  //              +-----------------------------------+
1908  //        +-->  |            Back chain             |
1909  //        |     +-----------------------------------+
1910  //        |     | Floating-point register save area |
1911  //        |     +-----------------------------------+
1912  //        |     |    General register save area     |
1913  //        |     +-----------------------------------+
1914  //        |     |          CR save word             |
1915  //        |     +-----------------------------------+
1916  //        |     |         VRSAVE save word          |
1917  //        |     +-----------------------------------+
1918  //        |     |         Alignment padding         |
1919  //        |     +-----------------------------------+
1920  //        |     |     Vector register save area     |
1921  //        |     +-----------------------------------+
1922  //        |     |       Local variable space        |
1923  //        |     +-----------------------------------+
1924  //        |     |        Parameter list area        |
1925  //        |     +-----------------------------------+
1926  //        |     |           LR save word            |
1927  //        |     +-----------------------------------+
1928  // SP-->  +---  |            Back chain             |
1929  //              +-----------------------------------+
1930  //
1931  // Specifications:
1932  //   System V Application Binary Interface PowerPC Processor Supplement
1933  //   AltiVec Technology Programming Interface Manual
1934
1935  MachineFunction &MF = DAG.getMachineFunction();
1936  MachineFrameInfo *MFI = MF.getFrameInfo();
1937  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1938
1939  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1940  // Potential tail calls could cause overwriting of argument stack slots.
1941  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
1942                       (CallConv == CallingConv::Fast));
1943  unsigned PtrByteSize = 4;
1944
1945  // Assign locations to all of the incoming arguments.
1946  SmallVector<CCValAssign, 16> ArgLocs;
1947  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1948                 getTargetMachine(), ArgLocs, *DAG.getContext());
1949
1950  // Reserve space for the linkage area on the stack.
1951  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
1952
1953  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
1954
1955  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1956    CCValAssign &VA = ArgLocs[i];
1957
1958    // Arguments stored in registers.
1959    if (VA.isRegLoc()) {
1960      const TargetRegisterClass *RC;
1961      EVT ValVT = VA.getValVT();
1962
1963      switch (ValVT.getSimpleVT().SimpleTy) {
1964        default:
1965          llvm_unreachable("ValVT not supported by formal arguments Lowering");
1966        case MVT::i32:
1967          RC = &PPC::GPRCRegClass;
1968          break;
1969        case MVT::f32:
1970          RC = &PPC::F4RCRegClass;
1971          break;
1972        case MVT::f64:
1973          RC = &PPC::F8RCRegClass;
1974          break;
1975        case MVT::v16i8:
1976        case MVT::v8i16:
1977        case MVT::v4i32:
1978        case MVT::v4f32:
1979          RC = &PPC::VRRCRegClass;
1980          break;
1981      }
1982
1983      // Transform the arguments stored in physical registers into virtual ones.
1984      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1985      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
1986
1987      InVals.push_back(ArgValue);
1988    } else {
1989      // Argument stored in memory.
1990      assert(VA.isMemLoc());
1991
1992      unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
1993      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
1994                                      isImmutable);
1995
1996      // Create load nodes to retrieve arguments from the stack.
1997      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1998      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
1999                                   MachinePointerInfo(),
2000                                   false, false, false, 0));
2001    }
2002  }
2003
2004  // Assign locations to all of the incoming aggregate by value arguments.
2005  // Aggregates passed by value are stored in the local variable space of the
2006  // caller's stack frame, right above the parameter list area.
2007  SmallVector<CCValAssign, 16> ByValArgLocs;
2008  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2009                      getTargetMachine(), ByValArgLocs, *DAG.getContext());
2010
2011  // Reserve stack space for the allocations in CCInfo.
2012  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
2013
2014  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
2015
2016  // Area that is at least reserved in the caller of this function.
2017  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
2018
2019  // Set the size that is at least reserved in caller of this function.  Tail
2020  // call optimized function's reserved stack space needs to be aligned so that
2021  // taking the difference between two stack areas will result in an aligned
2022  // stack.
2023  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
2024
2025  MinReservedArea =
2026    std::max(MinReservedArea,
2027             PPCFrameLowering::getMinCallFrameSize(false, false));
2028
2029  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
2030    getStackAlignment();
2031  unsigned AlignMask = TargetAlign-1;
2032  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
2033
2034  FI->setMinReservedArea(MinReservedArea);
2035
2036  SmallVector<SDValue, 8> MemOps;
2037
2038  // If the function takes variable number of arguments, make a frame index for
2039  // the start of the first vararg value... for expansion of llvm.va_start.
2040  if (isVarArg) {
2041    static const uint16_t GPArgRegs[] = {
2042      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2043      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2044    };
2045    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
2046
2047    static const uint16_t FPArgRegs[] = {
2048      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2049      PPC::F8
2050    };
2051    const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
2052
2053    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
2054                                                          NumGPArgRegs));
2055    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
2056                                                          NumFPArgRegs));
2057
2058    // Make room for NumGPArgRegs and NumFPArgRegs.
2059    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
2060                NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
2061
2062    FuncInfo->setVarArgsStackOffset(
2063      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2064                             CCInfo.getNextStackOffset(), true));
2065
2066    FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
2067    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2068
2069    // The fixed integer arguments of a variadic function are stored to the
2070    // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
2071    // the result of va_next.
2072    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
2073      // Get an existing live-in vreg, or add a new one.
2074      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
2075      if (!VReg)
2076        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
2077
2078      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2079      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2080                                   MachinePointerInfo(), false, false, 0);
2081      MemOps.push_back(Store);
2082      // Increment the address by four for the next argument to store
2083      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
2084      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2085    }
2086
2087    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
2088    // is set.
2089    // The double arguments are stored to the VarArgsFrameIndex
2090    // on the stack.
2091    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
2092      // Get an existing live-in vreg, or add a new one.
2093      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
2094      if (!VReg)
2095        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
2096
2097      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
2098      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2099                                   MachinePointerInfo(), false, false, 0);
2100      MemOps.push_back(Store);
2101      // Increment the address by eight for the next argument to store
2102      SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
2103                                         PtrVT);
2104      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2105    }
2106  }
2107
2108  if (!MemOps.empty())
2109    Chain = DAG.getNode(ISD::TokenFactor, dl,
2110                        MVT::Other, &MemOps[0], MemOps.size());
2111
2112  return Chain;
2113}
2114
2115// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2116// value to MVT::i64 and then truncate to the correct register size.
2117SDValue
2118PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
2119                                     SelectionDAG &DAG, SDValue ArgVal,
2120                                     DebugLoc dl) const {
2121  if (Flags.isSExt())
2122    ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
2123                         DAG.getValueType(ObjectVT));
2124  else if (Flags.isZExt())
2125    ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
2126                         DAG.getValueType(ObjectVT));
2127
2128  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
2129}
2130
2131// Set the size that is at least reserved in caller of this function.  Tail
2132// call optimized functions' reserved stack space needs to be aligned so that
2133// taking the difference between two stack areas will result in an aligned
2134// stack.
2135void
2136PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
2137                                      unsigned nAltivecParamsAtEnd,
2138                                      unsigned MinReservedArea,
2139                                      bool isPPC64) const {
2140  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
2141  // Add the Altivec parameters at the end, if needed.
2142  if (nAltivecParamsAtEnd) {
2143    MinReservedArea = ((MinReservedArea+15)/16)*16;
2144    MinReservedArea += 16*nAltivecParamsAtEnd;
2145  }
2146  MinReservedArea =
2147    std::max(MinReservedArea,
2148             PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
2149  unsigned TargetAlign
2150    = DAG.getMachineFunction().getTarget().getFrameLowering()->
2151        getStackAlignment();
2152  unsigned AlignMask = TargetAlign-1;
2153  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
2154  FI->setMinReservedArea(MinReservedArea);
2155}
2156
2157SDValue
2158PPCTargetLowering::LowerFormalArguments_64SVR4(
2159                                      SDValue Chain,
2160                                      CallingConv::ID CallConv, bool isVarArg,
2161                                      const SmallVectorImpl<ISD::InputArg>
2162                                        &Ins,
2163                                      DebugLoc dl, SelectionDAG &DAG,
2164                                      SmallVectorImpl<SDValue> &InVals) const {
2165  // TODO: add description of PPC stack frame format, or at least some docs.
2166  //
2167  MachineFunction &MF = DAG.getMachineFunction();
2168  MachineFrameInfo *MFI = MF.getFrameInfo();
2169  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2170
2171  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2172  // Potential tail calls could cause overwriting of argument stack slots.
2173  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2174                       (CallConv == CallingConv::Fast));
2175  unsigned PtrByteSize = 8;
2176
2177  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
2178  // Area that is at least reserved in caller of this function.
2179  unsigned MinReservedArea = ArgOffset;
2180
2181  static const uint16_t GPR[] = {
2182    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2183    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2184  };
2185
2186  static const uint16_t *FPR = GetFPR();
2187
2188  static const uint16_t VR[] = {
2189    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2190    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2191  };
2192
2193  const unsigned Num_GPR_Regs = array_lengthof(GPR);
2194  const unsigned Num_FPR_Regs = 13;
2195  const unsigned Num_VR_Regs  = array_lengthof(VR);
2196
2197  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
2198
2199  // Add DAG nodes to load the arguments or copy them out of registers.  On
2200  // entry to a function on PPC, the arguments start after the linkage area,
2201  // although the first ones are often in registers.
2202
2203  SmallVector<SDValue, 8> MemOps;
2204  unsigned nAltivecParamsAtEnd = 0;
2205  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
2206  unsigned CurArgIdx = 0;
2207  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
2208    SDValue ArgVal;
2209    bool needsLoad = false;
2210    EVT ObjectVT = Ins[ArgNo].VT;
2211    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
2212    unsigned ArgSize = ObjSize;
2213    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2214    std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
2215    CurArgIdx = Ins[ArgNo].OrigArgIndex;
2216
2217    unsigned CurArgOffset = ArgOffset;
2218
2219    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
2220    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
2221        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
2222      if (isVarArg) {
2223        MinReservedArea = ((MinReservedArea+15)/16)*16;
2224        MinReservedArea += CalculateStackSlotSize(ObjectVT,
2225                                                  Flags,
2226                                                  PtrByteSize);
2227      } else
2228        nAltivecParamsAtEnd++;
2229    } else
2230      // Calculate min reserved area.
2231      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
2232                                                Flags,
2233                                                PtrByteSize);
2234
2235    // FIXME the codegen can be much improved in some cases.
2236    // We do not have to keep everything in memory.
2237    if (Flags.isByVal()) {
2238      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
2239      ObjSize = Flags.getByValSize();
2240      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2241      // Empty aggregate parameters do not take up registers.  Examples:
2242      //   struct { } a;
2243      //   union  { } b;
2244      //   int c[0];
2245      // etc.  However, we have to provide a place-holder in InVals, so
2246      // pretend we have an 8-byte item at the current address for that
2247      // purpose.
2248      if (!ObjSize) {
2249        int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2250        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2251        InVals.push_back(FIN);
2252        continue;
2253      }
2254      // All aggregates smaller than 8 bytes must be passed right-justified.
2255      if (ObjSize < PtrByteSize)
2256        CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
2257      // The value of the object is its address.
2258      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
2259      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2260      InVals.push_back(FIN);
2261
2262      if (ObjSize < 8) {
2263        if (GPR_idx != Num_GPR_Regs) {
2264          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2265          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2266          SDValue Store;
2267
2268          if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
2269            EVT ObjType = (ObjSize == 1 ? MVT::i8 :
2270                           (ObjSize == 2 ? MVT::i16 : MVT::i32));
2271            Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
2272                                      MachinePointerInfo(FuncArg, CurArgOffset),
2273                                      ObjType, false, false, 0);
2274          } else {
2275            // For sizes that don't fit a truncating store (3, 5, 6, 7),
2276            // store the whole register as-is to the parameter save area
2277            // slot.  The address of the parameter was already calculated
2278            // above (InVals.push_back(FIN)) to be the right-justified
2279            // offset within the slot.  For this store, we need a new
2280            // frame index that points at the beginning of the slot.
2281            int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2282            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2283            Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2284                                 MachinePointerInfo(FuncArg, ArgOffset),
2285                                 false, false, 0);
2286          }
2287
2288          MemOps.push_back(Store);
2289          ++GPR_idx;
2290        }
2291        // Whether we copied from a register or not, advance the offset
2292        // into the parameter save area by a full doubleword.
2293        ArgOffset += PtrByteSize;
2294        continue;
2295      }
2296
2297      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
2298        // Store whatever pieces of the object are in registers
2299        // to memory.  ArgOffset will be the address of the beginning
2300        // of the object.
2301        if (GPR_idx != Num_GPR_Regs) {
2302          unsigned VReg;
2303          VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2304          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2305          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2306          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2307          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2308                                       MachinePointerInfo(FuncArg, ArgOffset),
2309                                       false, false, 0);
2310          MemOps.push_back(Store);
2311          ++GPR_idx;
2312          ArgOffset += PtrByteSize;
2313        } else {
2314          ArgOffset += ArgSize - j;
2315          break;
2316        }
2317      }
2318      continue;
2319    }
2320
2321    switch (ObjectVT.getSimpleVT().SimpleTy) {
2322    default: llvm_unreachable("Unhandled argument type!");
2323    case MVT::i32:
2324    case MVT::i64:
2325      if (GPR_idx != Num_GPR_Regs) {
2326        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2327        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
2328
2329        if (ObjectVT == MVT::i32)
2330          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2331          // value to MVT::i64 and then truncate to the correct register size.
2332          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
2333
2334        ++GPR_idx;
2335      } else {
2336        needsLoad = true;
2337        ArgSize = PtrByteSize;
2338      }
2339      ArgOffset += 8;
2340      break;
2341
2342    case MVT::f32:
2343    case MVT::f64:
2344      // Every 8 bytes of argument space consumes one of the GPRs available for
2345      // argument passing.
2346      if (GPR_idx != Num_GPR_Regs) {
2347        ++GPR_idx;
2348      }
2349      if (FPR_idx != Num_FPR_Regs) {
2350        unsigned VReg;
2351
2352        if (ObjectVT == MVT::f32)
2353          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
2354        else
2355          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
2356
2357        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2358        ++FPR_idx;
2359      } else {
2360        needsLoad = true;
2361        ArgSize = PtrByteSize;
2362      }
2363
2364      ArgOffset += 8;
2365      break;
2366    case MVT::v4f32:
2367    case MVT::v4i32:
2368    case MVT::v8i16:
2369    case MVT::v16i8:
2370      // Note that vector arguments in registers don't reserve stack space,
2371      // except in varargs functions.
2372      if (VR_idx != Num_VR_Regs) {
2373        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
2374        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2375        if (isVarArg) {
2376          while ((ArgOffset % 16) != 0) {
2377            ArgOffset += PtrByteSize;
2378            if (GPR_idx != Num_GPR_Regs)
2379              GPR_idx++;
2380          }
2381          ArgOffset += 16;
2382          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
2383        }
2384        ++VR_idx;
2385      } else {
2386        // Vectors are aligned.
2387        ArgOffset = ((ArgOffset+15)/16)*16;
2388        CurArgOffset = ArgOffset;
2389        ArgOffset += 16;
2390        needsLoad = true;
2391      }
2392      break;
2393    }
2394
2395    // We need to load the argument to a virtual register if we determined
2396    // above that we ran out of physical registers of the appropriate type.
2397    if (needsLoad) {
2398      int FI = MFI->CreateFixedObject(ObjSize,
2399                                      CurArgOffset + (ArgSize - ObjSize),
2400                                      isImmutable);
2401      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2402      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
2403                           false, false, false, 0);
2404    }
2405
2406    InVals.push_back(ArgVal);
2407  }
2408
2409  // Set the size that is at least reserved in caller of this function.  Tail
2410  // call optimized functions' reserved stack space needs to be aligned so that
2411  // taking the difference between two stack areas will result in an aligned
2412  // stack.
2413  setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
2414
2415  // If the function takes variable number of arguments, make a frame index for
2416  // the start of the first vararg value... for expansion of llvm.va_start.
2417  if (isVarArg) {
2418    int Depth = ArgOffset;
2419
2420    FuncInfo->setVarArgsFrameIndex(
2421      MFI->CreateFixedObject(PtrByteSize, Depth, true));
2422    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2423
2424    // If this function is vararg, store any remaining integer argument regs
2425    // to their spots on the stack so that they may be loaded by deferencing the
2426    // result of va_next.
2427    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
2428      unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2429      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2430      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2431                                   MachinePointerInfo(), false, false, 0);
2432      MemOps.push_back(Store);
2433      // Increment the address by four for the next argument to store
2434      SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
2435      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2436    }
2437  }
2438
2439  if (!MemOps.empty())
2440    Chain = DAG.getNode(ISD::TokenFactor, dl,
2441                        MVT::Other, &MemOps[0], MemOps.size());
2442
2443  return Chain;
2444}
2445
2446SDValue
2447PPCTargetLowering::LowerFormalArguments_Darwin(
2448                                      SDValue Chain,
2449                                      CallingConv::ID CallConv, bool isVarArg,
2450                                      const SmallVectorImpl<ISD::InputArg>
2451                                        &Ins,
2452                                      DebugLoc dl, SelectionDAG &DAG,
2453                                      SmallVectorImpl<SDValue> &InVals) const {
2454  // TODO: add description of PPC stack frame format, or at least some docs.
2455  //
2456  MachineFunction &MF = DAG.getMachineFunction();
2457  MachineFrameInfo *MFI = MF.getFrameInfo();
2458  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2459
2460  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2461  bool isPPC64 = PtrVT == MVT::i64;
2462  // Potential tail calls could cause overwriting of argument stack slots.
2463  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2464                       (CallConv == CallingConv::Fast));
2465  unsigned PtrByteSize = isPPC64 ? 8 : 4;
2466
2467  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
2468  // Area that is at least reserved in caller of this function.
2469  unsigned MinReservedArea = ArgOffset;
2470
2471  static const uint16_t GPR_32[] = {           // 32-bit registers.
2472    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2473    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2474  };
2475  static const uint16_t GPR_64[] = {           // 64-bit registers.
2476    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2477    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2478  };
2479
2480  static const uint16_t *FPR = GetFPR();
2481
2482  static const uint16_t VR[] = {
2483    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2484    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2485  };
2486
2487  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
2488  const unsigned Num_FPR_Regs = 13;
2489  const unsigned Num_VR_Regs  = array_lengthof( VR);
2490
2491  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
2492
2493  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
2494
2495  // In 32-bit non-varargs functions, the stack space for vectors is after the
2496  // stack space for non-vectors.  We do not use this space unless we have
2497  // too many vectors to fit in registers, something that only occurs in
2498  // constructed examples:), but we have to walk the arglist to figure
2499  // that out...for the pathological case, compute VecArgOffset as the
2500  // start of the vector parameter area.  Computing VecArgOffset is the
2501  // entire point of the following loop.
2502  unsigned VecArgOffset = ArgOffset;
2503  if (!isVarArg && !isPPC64) {
2504    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
2505         ++ArgNo) {
2506      EVT ObjectVT = Ins[ArgNo].VT;
2507      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2508
2509      if (Flags.isByVal()) {
2510        // ObjSize is the true size, ArgSize rounded up to multiple of regs.
2511        unsigned ObjSize = Flags.getByValSize();
2512        unsigned ArgSize =
2513                ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2514        VecArgOffset += ArgSize;
2515        continue;
2516      }
2517
2518      switch(ObjectVT.getSimpleVT().SimpleTy) {
2519      default: llvm_unreachable("Unhandled argument type!");
2520      case MVT::i32:
2521      case MVT::f32:
2522        VecArgOffset += 4;
2523        break;
2524      case MVT::i64:  // PPC64
2525      case MVT::f64:
2526        // FIXME: We are guaranteed to be !isPPC64 at this point.
2527        // Does MVT::i64 apply?
2528        VecArgOffset += 8;
2529        break;
2530      case MVT::v4f32:
2531      case MVT::v4i32:
2532      case MVT::v8i16:
2533      case MVT::v16i8:
2534        // Nothing to do, we're only looking at Nonvector args here.
2535        break;
2536      }
2537    }
2538  }
2539  // We've found where the vector parameter area in memory is.  Skip the
2540  // first 12 parameters; these don't use that memory.
2541  VecArgOffset = ((VecArgOffset+15)/16)*16;
2542  VecArgOffset += 12*16;
2543
2544  // Add DAG nodes to load the arguments or copy them out of registers.  On
2545  // entry to a function on PPC, the arguments start after the linkage area,
2546  // although the first ones are often in registers.
2547
2548  SmallVector<SDValue, 8> MemOps;
2549  unsigned nAltivecParamsAtEnd = 0;
2550  // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
2551  // When passing anonymous aggregates, this is currently not true.
2552  // See LowerFormalArguments_64SVR4 for a fix.
2553  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
2554  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
2555    SDValue ArgVal;
2556    bool needsLoad = false;
2557    EVT ObjectVT = Ins[ArgNo].VT;
2558    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
2559    unsigned ArgSize = ObjSize;
2560    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2561
2562    unsigned CurArgOffset = ArgOffset;
2563
2564    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
2565    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
2566        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
2567      if (isVarArg || isPPC64) {
2568        MinReservedArea = ((MinReservedArea+15)/16)*16;
2569        MinReservedArea += CalculateStackSlotSize(ObjectVT,
2570                                                  Flags,
2571                                                  PtrByteSize);
2572      } else  nAltivecParamsAtEnd++;
2573    } else
2574      // Calculate min reserved area.
2575      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
2576                                                Flags,
2577                                                PtrByteSize);
2578
2579    // FIXME the codegen can be much improved in some cases.
2580    // We do not have to keep everything in memory.
2581    if (Flags.isByVal()) {
2582      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
2583      ObjSize = Flags.getByValSize();
2584      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2585      // Objects of size 1 and 2 are right justified, everything else is
2586      // left justified.  This means the memory address is adjusted forwards.
2587      if (ObjSize==1 || ObjSize==2) {
2588        CurArgOffset = CurArgOffset + (4 - ObjSize);
2589      }
2590      // The value of the object is its address.
2591      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
2592      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2593      InVals.push_back(FIN);
2594      if (ObjSize==1 || ObjSize==2) {
2595        if (GPR_idx != Num_GPR_Regs) {
2596          unsigned VReg;
2597          if (isPPC64)
2598            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2599          else
2600            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2601          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2602          EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
2603          SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
2604                                            MachinePointerInfo(FuncArg,
2605                                              CurArgOffset),
2606                                            ObjType, false, false, 0);
2607          MemOps.push_back(Store);
2608          ++GPR_idx;
2609        }
2610
2611        ArgOffset += PtrByteSize;
2612
2613        continue;
2614      }
2615      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
2616        // Store whatever pieces of the object are in registers
2617        // to memory.  ArgOffset will be the address of the beginning
2618        // of the object.
2619        if (GPR_idx != Num_GPR_Regs) {
2620          unsigned VReg;
2621          if (isPPC64)
2622            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2623          else
2624            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2625          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2626          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2627          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2628          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2629                                       MachinePointerInfo(FuncArg, ArgOffset),
2630                                       false, false, 0);
2631          MemOps.push_back(Store);
2632          ++GPR_idx;
2633          ArgOffset += PtrByteSize;
2634        } else {
2635          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
2636          break;
2637        }
2638      }
2639      continue;
2640    }
2641
2642    switch (ObjectVT.getSimpleVT().SimpleTy) {
2643    default: llvm_unreachable("Unhandled argument type!");
2644    case MVT::i32:
2645      if (!isPPC64) {
2646        if (GPR_idx != Num_GPR_Regs) {
2647          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2648          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2649          ++GPR_idx;
2650        } else {
2651          needsLoad = true;
2652          ArgSize = PtrByteSize;
2653        }
2654        // All int arguments reserve stack space in the Darwin ABI.
2655        ArgOffset += PtrByteSize;
2656        break;
2657      }
2658      // FALLTHROUGH
2659    case MVT::i64:  // PPC64
2660      if (GPR_idx != Num_GPR_Regs) {
2661        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2662        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
2663
2664        if (ObjectVT == MVT::i32)
2665          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2666          // value to MVT::i64 and then truncate to the correct register size.
2667          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
2668
2669        ++GPR_idx;
2670      } else {
2671        needsLoad = true;
2672        ArgSize = PtrByteSize;
2673      }
2674      // All int arguments reserve stack space in the Darwin ABI.
2675      ArgOffset += 8;
2676      break;
2677
2678    case MVT::f32:
2679    case MVT::f64:
2680      // Every 4 bytes of argument space consumes one of the GPRs available for
2681      // argument passing.
2682      if (GPR_idx != Num_GPR_Regs) {
2683        ++GPR_idx;
2684        if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
2685          ++GPR_idx;
2686      }
2687      if (FPR_idx != Num_FPR_Regs) {
2688        unsigned VReg;
2689
2690        if (ObjectVT == MVT::f32)
2691          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
2692        else
2693          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
2694
2695        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2696        ++FPR_idx;
2697      } else {
2698        needsLoad = true;
2699      }
2700
2701      // All FP arguments reserve stack space in the Darwin ABI.
2702      ArgOffset += isPPC64 ? 8 : ObjSize;
2703      break;
2704    case MVT::v4f32:
2705    case MVT::v4i32:
2706    case MVT::v8i16:
2707    case MVT::v16i8:
2708      // Note that vector arguments in registers don't reserve stack space,
2709      // except in varargs functions.
2710      if (VR_idx != Num_VR_Regs) {
2711        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
2712        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2713        if (isVarArg) {
2714          while ((ArgOffset % 16) != 0) {
2715            ArgOffset += PtrByteSize;
2716            if (GPR_idx != Num_GPR_Regs)
2717              GPR_idx++;
2718          }
2719          ArgOffset += 16;
2720          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
2721        }
2722        ++VR_idx;
2723      } else {
2724        if (!isVarArg && !isPPC64) {
2725          // Vectors go after all the nonvectors.
2726          CurArgOffset = VecArgOffset;
2727          VecArgOffset += 16;
2728        } else {
2729          // Vectors are aligned.
2730          ArgOffset = ((ArgOffset+15)/16)*16;
2731          CurArgOffset = ArgOffset;
2732          ArgOffset += 16;
2733        }
2734        needsLoad = true;
2735      }
2736      break;
2737    }
2738
2739    // We need to load the argument to a virtual register if we determined above
2740    // that we ran out of physical registers of the appropriate type.
2741    if (needsLoad) {
2742      int FI = MFI->CreateFixedObject(ObjSize,
2743                                      CurArgOffset + (ArgSize - ObjSize),
2744                                      isImmutable);
2745      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2746      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
2747                           false, false, false, 0);
2748    }
2749
2750    InVals.push_back(ArgVal);
2751  }
2752
2753  // Set the size that is at least reserved in caller of this function.  Tail
2754  // call optimized functions' reserved stack space needs to be aligned so that
2755  // taking the difference between two stack areas will result in an aligned
2756  // stack.
2757  setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
2758
2759  // If the function takes variable number of arguments, make a frame index for
2760  // the start of the first vararg value... for expansion of llvm.va_start.
2761  if (isVarArg) {
2762    int Depth = ArgOffset;
2763
2764    FuncInfo->setVarArgsFrameIndex(
2765      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2766                             Depth, true));
2767    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2768
2769    // If this function is vararg, store any remaining integer argument regs
2770    // to their spots on the stack so that they may be loaded by deferencing the
2771    // result of va_next.
2772    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
2773      unsigned VReg;
2774
2775      if (isPPC64)
2776        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2777      else
2778        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2779
2780      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2781      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2782                                   MachinePointerInfo(), false, false, 0);
2783      MemOps.push_back(Store);
2784      // Increment the address by four for the next argument to store
2785      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
2786      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2787    }
2788  }
2789
2790  if (!MemOps.empty())
2791    Chain = DAG.getNode(ISD::TokenFactor, dl,
2792                        MVT::Other, &MemOps[0], MemOps.size());
2793
2794  return Chain;
2795}
2796
2797/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
2798/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
2799static unsigned
2800CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
2801                                     bool isPPC64,
2802                                     bool isVarArg,
2803                                     unsigned CC,
2804                                     const SmallVectorImpl<ISD::OutputArg>
2805                                       &Outs,
2806                                     const SmallVectorImpl<SDValue> &OutVals,
2807                                     unsigned &nAltivecParamsAtEnd) {
2808  // Count how many bytes are to be pushed on the stack, including the linkage
2809  // area, and parameter passing area.  We start with 24/48 bytes, which is
2810  // prereserved space for [SP][CR][LR][3 x unused].
2811  unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
2812  unsigned NumOps = Outs.size();
2813  unsigned PtrByteSize = isPPC64 ? 8 : 4;
2814
2815  // Add up all the space actually used.
2816  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
2817  // they all go in registers, but we must reserve stack space for them for
2818  // possible use by the caller.  In varargs or 64-bit calls, parameters are
2819  // assigned stack space in order, with padding so Altivec parameters are
2820  // 16-byte aligned.
2821  nAltivecParamsAtEnd = 0;
2822  for (unsigned i = 0; i != NumOps; ++i) {
2823    ISD::ArgFlagsTy Flags = Outs[i].Flags;
2824    EVT ArgVT = Outs[i].VT;
2825    // Varargs Altivec parameters are padded to a 16 byte boundary.
2826    if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
2827        ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
2828      if (!isVarArg && !isPPC64) {
2829        // Non-varargs Altivec parameters go after all the non-Altivec
2830        // parameters; handle those later so we know how much padding we need.
2831        nAltivecParamsAtEnd++;
2832        continue;
2833      }
2834      // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
2835      NumBytes = ((NumBytes+15)/16)*16;
2836    }
2837    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2838  }
2839
2840   // Allow for Altivec parameters at the end, if needed.
2841  if (nAltivecParamsAtEnd) {
2842    NumBytes = ((NumBytes+15)/16)*16;
2843    NumBytes += 16*nAltivecParamsAtEnd;
2844  }
2845
2846  // The prolog code of the callee may store up to 8 GPR argument registers to
2847  // the stack, allowing va_start to index over them in memory if its varargs.
2848  // Because we cannot tell if this is needed on the caller side, we have to
2849  // conservatively assume that it is needed.  As such, make sure we have at
2850  // least enough stack space for the caller to store the 8 GPRs.
2851  NumBytes = std::max(NumBytes,
2852                      PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
2853
2854  // Tail call needs the stack to be aligned.
2855  if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
2856    unsigned TargetAlign = DAG.getMachineFunction().getTarget().
2857      getFrameLowering()->getStackAlignment();
2858    unsigned AlignMask = TargetAlign-1;
2859    NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2860  }
2861
2862  return NumBytes;
2863}
2864
2865/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
2866/// adjusted to accommodate the arguments for the tailcall.
2867static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
2868                                   unsigned ParamSize) {
2869
2870  if (!isTailCall) return 0;
2871
2872  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
2873  unsigned CallerMinReservedArea = FI->getMinReservedArea();
2874  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
2875  // Remember only if the new adjustement is bigger.
2876  if (SPDiff < FI->getTailCallSPDelta())
2877    FI->setTailCallSPDelta(SPDiff);
2878
2879  return SPDiff;
2880}
2881
2882/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2883/// for tail call optimization. Targets which want to do tail call
2884/// optimization should implement this function.
2885bool
2886PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2887                                                     CallingConv::ID CalleeCC,
2888                                                     bool isVarArg,
2889                                      const SmallVectorImpl<ISD::InputArg> &Ins,
2890                                                     SelectionDAG& DAG) const {
2891  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
2892    return false;
2893
2894  // Variable argument functions are not supported.
2895  if (isVarArg)
2896    return false;
2897
2898  MachineFunction &MF = DAG.getMachineFunction();
2899  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
2900  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
2901    // Functions containing by val parameters are not supported.
2902    for (unsigned i = 0; i != Ins.size(); i++) {
2903       ISD::ArgFlagsTy Flags = Ins[i].Flags;
2904       if (Flags.isByVal()) return false;
2905    }
2906
2907    // Non PIC/GOT  tail calls are supported.
2908    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
2909      return true;
2910
2911    // At the moment we can only do local tail calls (in same module, hidden
2912    // or protected) if we are generating PIC.
2913    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2914      return G->getGlobal()->hasHiddenVisibility()
2915          || G->getGlobal()->hasProtectedVisibility();
2916  }
2917
2918  return false;
2919}
2920
2921/// isCallCompatibleAddress - Return the immediate to use if the specified
2922/// 32-bit value is representable in the immediate field of a BxA instruction.
2923static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
2924  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
2925  if (!C) return 0;
2926
2927  int Addr = C->getZExtValue();
2928  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
2929      SignExtend32<26>(Addr) != Addr)
2930    return 0;  // Top 6 bits have to be sext of immediate.
2931
2932  return DAG.getConstant((int)C->getZExtValue() >> 2,
2933                         DAG.getTargetLoweringInfo().getPointerTy()).getNode();
2934}
2935
2936namespace {
2937
2938struct TailCallArgumentInfo {
2939  SDValue Arg;
2940  SDValue FrameIdxOp;
2941  int       FrameIdx;
2942
2943  TailCallArgumentInfo() : FrameIdx(0) {}
2944};
2945
2946}
2947
2948/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
2949static void
2950StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
2951                                           SDValue Chain,
2952                   const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
2953                   SmallVector<SDValue, 8> &MemOpChains,
2954                   DebugLoc dl) {
2955  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
2956    SDValue Arg = TailCallArgs[i].Arg;
2957    SDValue FIN = TailCallArgs[i].FrameIdxOp;
2958    int FI = TailCallArgs[i].FrameIdx;
2959    // Store relative to framepointer.
2960    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
2961                                       MachinePointerInfo::getFixedStack(FI),
2962                                       false, false, 0));
2963  }
2964}
2965
2966/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
2967/// the appropriate stack slot for the tail call optimized function call.
2968static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
2969                                               MachineFunction &MF,
2970                                               SDValue Chain,
2971                                               SDValue OldRetAddr,
2972                                               SDValue OldFP,
2973                                               int SPDiff,
2974                                               bool isPPC64,
2975                                               bool isDarwinABI,
2976                                               DebugLoc dl) {
2977  if (SPDiff) {
2978    // Calculate the new stack slot for the return address.
2979    int SlotSize = isPPC64 ? 8 : 4;
2980    int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
2981                                                                   isDarwinABI);
2982    int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
2983                                                          NewRetAddrLoc, true);
2984    EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
2985    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
2986    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
2987                         MachinePointerInfo::getFixedStack(NewRetAddr),
2988                         false, false, 0);
2989
2990    // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
2991    // slot as the FP is never overwritten.
2992    if (isDarwinABI) {
2993      int NewFPLoc =
2994        SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
2995      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
2996                                                          true);
2997      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
2998      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
2999                           MachinePointerInfo::getFixedStack(NewFPIdx),
3000                           false, false, 0);
3001    }
3002  }
3003  return Chain;
3004}
3005
3006/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
3007/// the position of the argument.
3008static void
3009CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
3010                         SDValue Arg, int SPDiff, unsigned ArgOffset,
3011                      SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
3012  int Offset = ArgOffset + SPDiff;
3013  uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
3014  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3015  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3016  SDValue FIN = DAG.getFrameIndex(FI, VT);
3017  TailCallArgumentInfo Info;
3018  Info.Arg = Arg;
3019  Info.FrameIdxOp = FIN;
3020  Info.FrameIdx = FI;
3021  TailCallArguments.push_back(Info);
3022}
3023
3024/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
3025/// stack slot. Returns the chain as result and the loaded frame pointers in
3026/// LROpOut/FPOpout. Used when tail calling.
3027SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
3028                                                        int SPDiff,
3029                                                        SDValue Chain,
3030                                                        SDValue &LROpOut,
3031                                                        SDValue &FPOpOut,
3032                                                        bool isDarwinABI,
3033                                                        DebugLoc dl) const {
3034  if (SPDiff) {
3035    // Load the LR and FP stack slot for later adjusting.
3036    EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
3037    LROpOut = getReturnAddrFrameIndex(DAG);
3038    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
3039                          false, false, false, 0);
3040    Chain = SDValue(LROpOut.getNode(), 1);
3041
3042    // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
3043    // slot as the FP is never overwritten.
3044    if (isDarwinABI) {
3045      FPOpOut = getFramePointerFrameIndex(DAG);
3046      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
3047                            false, false, false, 0);
3048      Chain = SDValue(FPOpOut.getNode(), 1);
3049    }
3050  }
3051  return Chain;
3052}
3053
3054/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
3055/// by "Src" to address "Dst" of size "Size".  Alignment information is
3056/// specified by the specific parameter attribute. The copy will be passed as
3057/// a byval function parameter.
3058/// Sometimes what we are copying is the end of a larger object, the part that
3059/// does not fit in registers.
3060static SDValue
3061CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
3062                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
3063                          DebugLoc dl) {
3064  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
3065  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
3066                       false, false, MachinePointerInfo(0),
3067                       MachinePointerInfo(0));
3068}
3069
3070/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
3071/// tail calls.
3072static void
3073LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
3074                 SDValue Arg, SDValue PtrOff, int SPDiff,
3075                 unsigned ArgOffset, bool isPPC64, bool isTailCall,
3076                 bool isVector, SmallVector<SDValue, 8> &MemOpChains,
3077                 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
3078                 DebugLoc dl) {
3079  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3080  if (!isTailCall) {
3081    if (isVector) {
3082      SDValue StackPtr;
3083      if (isPPC64)
3084        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
3085      else
3086        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3087      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
3088                           DAG.getConstant(ArgOffset, PtrVT));
3089    }
3090    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
3091                                       MachinePointerInfo(), false, false, 0));
3092  // Calculate and remember argument location.
3093  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
3094                                  TailCallArguments);
3095}
3096
3097static
3098void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
3099                     DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
3100                     SDValue LROp, SDValue FPOp, bool isDarwinABI,
3101                     SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
3102  MachineFunction &MF = DAG.getMachineFunction();
3103
3104  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
3105  // might overwrite each other in case of tail call optimization.
3106  SmallVector<SDValue, 8> MemOpChains2;
3107  // Do not flag preceding copytoreg stuff together with the following stuff.
3108  InFlag = SDValue();
3109  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
3110                                    MemOpChains2, dl);
3111  if (!MemOpChains2.empty())
3112    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3113                        &MemOpChains2[0], MemOpChains2.size());
3114
3115  // Store the return address to the appropriate stack slot.
3116  Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
3117                                        isPPC64, isDarwinABI, dl);
3118
3119  // Emit callseq_end just before tailcall node.
3120  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
3121                             DAG.getIntPtrConstant(0, true), InFlag);
3122  InFlag = Chain.getValue(1);
3123}
3124
3125static
3126unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
3127                     SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
3128                     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
3129                     SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
3130                     const PPCSubtarget &PPCSubTarget) {
3131
3132  bool isPPC64 = PPCSubTarget.isPPC64();
3133  bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
3134
3135  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3136  NodeTys.push_back(MVT::Other);   // Returns a chain
3137  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
3138
3139  unsigned CallOpc = PPCISD::CALL;
3140
3141  bool needIndirectCall = true;
3142  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
3143    // If this is an absolute destination address, use the munged value.
3144    Callee = SDValue(Dest, 0);
3145    needIndirectCall = false;
3146  }
3147
3148  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3149    // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
3150    // Use indirect calls for ALL functions calls in JIT mode, since the
3151    // far-call stubs may be outside relocation limits for a BL instruction.
3152    if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
3153      unsigned OpFlags = 0;
3154      if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
3155          (PPCSubTarget.getTargetTriple().isMacOSX() &&
3156           PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
3157          (G->getGlobal()->isDeclaration() ||
3158           G->getGlobal()->isWeakForLinker())) {
3159        // PC-relative references to external symbols should go through $stub,
3160        // unless we're building with the leopard linker or later, which
3161        // automatically synthesizes these stubs.
3162        OpFlags = PPCII::MO_DARWIN_STUB;
3163      }
3164
3165      // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
3166      // every direct call is) turn it into a TargetGlobalAddress /
3167      // TargetExternalSymbol node so that legalize doesn't hack it.
3168      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
3169                                          Callee.getValueType(),
3170                                          0, OpFlags);
3171      needIndirectCall = false;
3172    }
3173  }
3174
3175  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3176    unsigned char OpFlags = 0;
3177
3178    if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
3179        (PPCSubTarget.getTargetTriple().isMacOSX() &&
3180         PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
3181      // PC-relative references to external symbols should go through $stub,
3182      // unless we're building with the leopard linker or later, which
3183      // automatically synthesizes these stubs.
3184      OpFlags = PPCII::MO_DARWIN_STUB;
3185    }
3186
3187    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
3188                                         OpFlags);
3189    needIndirectCall = false;
3190  }
3191
3192  if (needIndirectCall) {
3193    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
3194    // to do the call, we can't use PPCISD::CALL.
3195    SDValue MTCTROps[] = {Chain, Callee, InFlag};
3196
3197    if (isSVR4ABI && isPPC64) {
3198      // Function pointers in the 64-bit SVR4 ABI do not point to the function
3199      // entry point, but to the function descriptor (the function entry point
3200      // address is part of the function descriptor though).
3201      // The function descriptor is a three doubleword structure with the
3202      // following fields: function entry point, TOC base address and
3203      // environment pointer.
3204      // Thus for a call through a function pointer, the following actions need
3205      // to be performed:
3206      //   1. Save the TOC of the caller in the TOC save area of its stack
3207      //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
3208      //   2. Load the address of the function entry point from the function
3209      //      descriptor.
3210      //   3. Load the TOC of the callee from the function descriptor into r2.
3211      //   4. Load the environment pointer from the function descriptor into
3212      //      r11.
3213      //   5. Branch to the function entry point address.
3214      //   6. On return of the callee, the TOC of the caller needs to be
3215      //      restored (this is done in FinishCall()).
3216      //
3217      // All those operations are flagged together to ensure that no other
3218      // operations can be scheduled in between. E.g. without flagging the
3219      // operations together, a TOC access in the caller could be scheduled
3220      // between the load of the callee TOC and the branch to the callee, which
3221      // results in the TOC access going through the TOC of the callee instead
3222      // of going through the TOC of the caller, which leads to incorrect code.
3223
3224      // Load the address of the function entry point from the function
3225      // descriptor.
3226      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
3227      SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
3228                                        InFlag.getNode() ? 3 : 2);
3229      Chain = LoadFuncPtr.getValue(1);
3230      InFlag = LoadFuncPtr.getValue(2);
3231
3232      // Load environment pointer into r11.
3233      // Offset of the environment pointer within the function descriptor.
3234      SDValue PtrOff = DAG.getIntPtrConstant(16);
3235
3236      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
3237      SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
3238                                       InFlag);
3239      Chain = LoadEnvPtr.getValue(1);
3240      InFlag = LoadEnvPtr.getValue(2);
3241
3242      SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
3243                                        InFlag);
3244      Chain = EnvVal.getValue(0);
3245      InFlag = EnvVal.getValue(1);
3246
3247      // Load TOC of the callee into r2. We are using a target-specific load
3248      // with r2 hard coded, because the result of a target-independent load
3249      // would never go directly into r2, since r2 is a reserved register (which
3250      // prevents the register allocator from allocating it), resulting in an
3251      // additional register being allocated and an unnecessary move instruction
3252      // being generated.
3253      VTs = DAG.getVTList(MVT::Other, MVT::Glue);
3254      SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
3255                                       Callee, InFlag);
3256      Chain = LoadTOCPtr.getValue(0);
3257      InFlag = LoadTOCPtr.getValue(1);
3258
3259      MTCTROps[0] = Chain;
3260      MTCTROps[1] = LoadFuncPtr;
3261      MTCTROps[2] = InFlag;
3262    }
3263
3264    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
3265                        2 + (InFlag.getNode() != 0));
3266    InFlag = Chain.getValue(1);
3267
3268    NodeTys.clear();
3269    NodeTys.push_back(MVT::Other);
3270    NodeTys.push_back(MVT::Glue);
3271    Ops.push_back(Chain);
3272    CallOpc = PPCISD::BCTRL;
3273    Callee.setNode(0);
3274    // Add use of X11 (holding environment pointer)
3275    if (isSVR4ABI && isPPC64)
3276      Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
3277    // Add CTR register as callee so a bctr can be emitted later.
3278    if (isTailCall)
3279      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
3280  }
3281
3282  // If this is a direct call, pass the chain and the callee.
3283  if (Callee.getNode()) {
3284    Ops.push_back(Chain);
3285    Ops.push_back(Callee);
3286  }
3287  // If this is a tail call add stack pointer delta.
3288  if (isTailCall)
3289    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
3290
3291  // Add argument registers to the end of the list so that they are known live
3292  // into the call.
3293  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3294    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3295                                  RegsToPass[i].second.getValueType()));
3296
3297  return CallOpc;
3298}
3299
3300static
3301bool isLocalCall(const SDValue &Callee)
3302{
3303  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
3304    return !G->getGlobal()->isDeclaration() &&
3305           !G->getGlobal()->isWeakForLinker();
3306  return false;
3307}
3308
3309SDValue
3310PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
3311                                   CallingConv::ID CallConv, bool isVarArg,
3312                                   const SmallVectorImpl<ISD::InputArg> &Ins,
3313                                   DebugLoc dl, SelectionDAG &DAG,
3314                                   SmallVectorImpl<SDValue> &InVals) const {
3315
3316  SmallVector<CCValAssign, 16> RVLocs;
3317  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3318                    getTargetMachine(), RVLocs, *DAG.getContext());
3319  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
3320
3321  // Copy all of the result registers out of their specified physreg.
3322  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
3323    CCValAssign &VA = RVLocs[i];
3324    assert(VA.isRegLoc() && "Can only return in registers!");
3325
3326    SDValue Val = DAG.getCopyFromReg(Chain, dl,
3327                                     VA.getLocReg(), VA.getLocVT(), InFlag);
3328    Chain = Val.getValue(1);
3329    InFlag = Val.getValue(2);
3330
3331    switch (VA.getLocInfo()) {
3332    default: llvm_unreachable("Unknown loc info!");
3333    case CCValAssign::Full: break;
3334    case CCValAssign::AExt:
3335      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3336      break;
3337    case CCValAssign::ZExt:
3338      Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
3339                        DAG.getValueType(VA.getValVT()));
3340      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3341      break;
3342    case CCValAssign::SExt:
3343      Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
3344                        DAG.getValueType(VA.getValVT()));
3345      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3346      break;
3347    }
3348
3349    InVals.push_back(Val);
3350  }
3351
3352  return Chain;
3353}
3354
3355SDValue
3356PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
3357                              bool isTailCall, bool isVarArg,
3358                              SelectionDAG &DAG,
3359                              SmallVector<std::pair<unsigned, SDValue>, 8>
3360                                &RegsToPass,
3361                              SDValue InFlag, SDValue Chain,
3362                              SDValue &Callee,
3363                              int SPDiff, unsigned NumBytes,
3364                              const SmallVectorImpl<ISD::InputArg> &Ins,
3365                              SmallVectorImpl<SDValue> &InVals) const {
3366  std::vector<EVT> NodeTys;
3367  SmallVector<SDValue, 8> Ops;
3368  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
3369                                 isTailCall, RegsToPass, Ops, NodeTys,
3370                                 PPCSubTarget);
3371
3372  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
3373  if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
3374    Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
3375
3376  // When performing tail call optimization the callee pops its arguments off
3377  // the stack. Account for this here so these bytes can be pushed back on in
3378  // PPCFrameLowering::eliminateCallFramePseudoInstr.
3379  int BytesCalleePops =
3380    (CallConv == CallingConv::Fast &&
3381     getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
3382
3383  // Add a register mask operand representing the call-preserved registers.
3384  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
3385  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
3386  assert(Mask && "Missing call preserved mask for calling convention");
3387  Ops.push_back(DAG.getRegisterMask(Mask));
3388
3389  if (InFlag.getNode())
3390    Ops.push_back(InFlag);
3391
3392  // Emit tail call.
3393  if (isTailCall) {
3394    assert(((Callee.getOpcode() == ISD::Register &&
3395             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
3396            Callee.getOpcode() == ISD::TargetExternalSymbol ||
3397            Callee.getOpcode() == ISD::TargetGlobalAddress ||
3398            isa<ConstantSDNode>(Callee)) &&
3399    "Expecting an global address, external symbol, absolute value or register");
3400
3401    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
3402  }
3403
3404  // Add a NOP immediately after the branch instruction when using the 64-bit
3405  // SVR4 ABI. At link time, if caller and callee are in a different module and
3406  // thus have a different TOC, the call will be replaced with a call to a stub
3407  // function which saves the current TOC, loads the TOC of the callee and
3408  // branches to the callee. The NOP will be replaced with a load instruction
3409  // which restores the TOC of the caller from the TOC save slot of the current
3410  // stack frame. If caller and callee belong to the same module (and have the
3411  // same TOC), the NOP will remain unchanged.
3412
3413  bool needsTOCRestore = false;
3414  if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
3415    if (CallOpc == PPCISD::BCTRL) {
3416      // This is a call through a function pointer.
3417      // Restore the caller TOC from the save area into R2.
3418      // See PrepareCall() for more information about calls through function
3419      // pointers in the 64-bit SVR4 ABI.
3420      // We are using a target-specific load with r2 hard coded, because the
3421      // result of a target-independent load would never go directly into r2,
3422      // since r2 is a reserved register (which prevents the register allocator
3423      // from allocating it), resulting in an additional register being
3424      // allocated and an unnecessary move instruction being generated.
3425      needsTOCRestore = true;
3426    } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
3427      // Otherwise insert NOP for non-local calls.
3428      CallOpc = PPCISD::CALL_NOP;
3429    }
3430  }
3431
3432  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
3433  InFlag = Chain.getValue(1);
3434
3435  if (needsTOCRestore) {
3436    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
3437    Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
3438    InFlag = Chain.getValue(1);
3439  }
3440
3441  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
3442                             DAG.getIntPtrConstant(BytesCalleePops, true),
3443                             InFlag);
3444  if (!Ins.empty())
3445    InFlag = Chain.getValue(1);
3446
3447  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
3448                         Ins, dl, DAG, InVals);
3449}
3450
3451SDValue
3452PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3453                             SmallVectorImpl<SDValue> &InVals) const {
3454  SelectionDAG &DAG                     = CLI.DAG;
3455  DebugLoc &dl                          = CLI.DL;
3456  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3457  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
3458  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
3459  SDValue Chain                         = CLI.Chain;
3460  SDValue Callee                        = CLI.Callee;
3461  bool &isTailCall                      = CLI.IsTailCall;
3462  CallingConv::ID CallConv              = CLI.CallConv;
3463  bool isVarArg                         = CLI.IsVarArg;
3464
3465  if (isTailCall)
3466    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
3467                                                   Ins, DAG);
3468
3469  if (PPCSubTarget.isSVR4ABI()) {
3470    if (PPCSubTarget.isPPC64())
3471      return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
3472                              isTailCall, Outs, OutVals, Ins,
3473                              dl, DAG, InVals);
3474    else
3475      return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
3476                              isTailCall, Outs, OutVals, Ins,
3477                              dl, DAG, InVals);
3478  }
3479
3480  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
3481                          isTailCall, Outs, OutVals, Ins,
3482                          dl, DAG, InVals);
3483}
3484
3485SDValue
3486PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
3487                                    CallingConv::ID CallConv, bool isVarArg,
3488                                    bool isTailCall,
3489                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
3490                                    const SmallVectorImpl<SDValue> &OutVals,
3491                                    const SmallVectorImpl<ISD::InputArg> &Ins,
3492                                    DebugLoc dl, SelectionDAG &DAG,
3493                                    SmallVectorImpl<SDValue> &InVals) const {
3494  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
3495  // of the 32-bit SVR4 ABI stack frame layout.
3496
3497  assert((CallConv == CallingConv::C ||
3498          CallConv == CallingConv::Fast) && "Unknown calling convention!");
3499
3500  unsigned PtrByteSize = 4;
3501
3502  MachineFunction &MF = DAG.getMachineFunction();
3503
3504  // Mark this function as potentially containing a function that contains a
3505  // tail call. As a consequence the frame pointer will be used for dynamicalloc
3506  // and restoring the callers stack pointer in this functions epilog. This is
3507  // done because by tail calling the called function might overwrite the value
3508  // in this function's (MF) stack pointer stack slot 0(SP).
3509  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
3510      CallConv == CallingConv::Fast)
3511    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
3512
3513  // Count how many bytes are to be pushed on the stack, including the linkage
3514  // area, parameter list area and the part of the local variable space which
3515  // contains copies of aggregates which are passed by value.
3516
3517  // Assign locations to all of the outgoing arguments.
3518  SmallVector<CCValAssign, 16> ArgLocs;
3519  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3520                 getTargetMachine(), ArgLocs, *DAG.getContext());
3521
3522  // Reserve space for the linkage area on the stack.
3523  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
3524
3525  if (isVarArg) {
3526    // Handle fixed and variable vector arguments differently.
3527    // Fixed vector arguments go into registers as long as registers are
3528    // available. Variable vector arguments always go into memory.
3529    unsigned NumArgs = Outs.size();
3530
3531    for (unsigned i = 0; i != NumArgs; ++i) {
3532      MVT ArgVT = Outs[i].VT;
3533      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3534      bool Result;
3535
3536      if (Outs[i].IsFixed) {
3537        Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
3538                               CCInfo);
3539      } else {
3540        Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
3541                                      ArgFlags, CCInfo);
3542      }
3543
3544      if (Result) {
3545#ifndef NDEBUG
3546        errs() << "Call operand #" << i << " has unhandled type "
3547             << EVT(ArgVT).getEVTString() << "\n";
3548#endif
3549        llvm_unreachable(0);
3550      }
3551    }
3552  } else {
3553    // All arguments are treated the same.
3554    CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
3555  }
3556
3557  // Assign locations to all of the outgoing aggregate by value arguments.
3558  SmallVector<CCValAssign, 16> ByValArgLocs;
3559  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3560                      getTargetMachine(), ByValArgLocs, *DAG.getContext());
3561
3562  // Reserve stack space for the allocations in CCInfo.
3563  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3564
3565  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
3566
3567  // Size of the linkage area, parameter list area and the part of the local
3568  // space variable where copies of aggregates which are passed by value are
3569  // stored.
3570  unsigned NumBytes = CCByValInfo.getNextStackOffset();
3571
3572  // Calculate by how many bytes the stack has to be adjusted in case of tail
3573  // call optimization.
3574  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
3575
3576  // Adjust the stack pointer for the new arguments...
3577  // These operations are automatically eliminated by the prolog/epilog pass
3578  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
3579  SDValue CallSeqStart = Chain;
3580
3581  // Load the return address and frame pointer so it can be moved somewhere else
3582  // later.
3583  SDValue LROp, FPOp;
3584  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
3585                                       dl);
3586
3587  // Set up a copy of the stack pointer for use loading and storing any
3588  // arguments that may not fit in the registers available for argument
3589  // passing.
3590  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3591
3592  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3593  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
3594  SmallVector<SDValue, 8> MemOpChains;
3595
3596  bool seenFloatArg = false;
3597  // Walk the register/memloc assignments, inserting copies/loads.
3598  for (unsigned i = 0, j = 0, e = ArgLocs.size();
3599       i != e;
3600       ++i) {
3601    CCValAssign &VA = ArgLocs[i];
3602    SDValue Arg = OutVals[i];
3603    ISD::ArgFlagsTy Flags = Outs[i].Flags;
3604
3605    if (Flags.isByVal()) {
3606      // Argument is an aggregate which is passed by value, thus we need to
3607      // create a copy of it in the local variable space of the current stack
3608      // frame (which is the stack frame of the caller) and pass the address of
3609      // this copy to the callee.
3610      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
3611      CCValAssign &ByValVA = ByValArgLocs[j++];
3612      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
3613
3614      // Memory reserved in the local variable space of the callers stack frame.
3615      unsigned LocMemOffset = ByValVA.getLocMemOffset();
3616
3617      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
3618      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
3619
3620      // Create a copy of the argument in the local area of the current
3621      // stack frame.
3622      SDValue MemcpyCall =
3623        CreateCopyOfByValArgument(Arg, PtrOff,
3624                                  CallSeqStart.getNode()->getOperand(0),
3625                                  Flags, DAG, dl);
3626
3627      // This must go outside the CALLSEQ_START..END.
3628      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3629                           CallSeqStart.getNode()->getOperand(1));
3630      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3631                             NewCallSeqStart.getNode());
3632      Chain = CallSeqStart = NewCallSeqStart;
3633
3634      // Pass the address of the aggregate copy on the stack either in a
3635      // physical register or in the parameter list area of the current stack
3636      // frame to the callee.
3637      Arg = PtrOff;
3638    }
3639
3640    if (VA.isRegLoc()) {
3641      seenFloatArg |= VA.getLocVT().isFloatingPoint();
3642      // Put argument in a physical register.
3643      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3644    } else {
3645      // Put argument in the parameter list area of the current stack frame.
3646      assert(VA.isMemLoc());
3647      unsigned LocMemOffset = VA.getLocMemOffset();
3648
3649      if (!isTailCall) {
3650        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
3651        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
3652
3653        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
3654                                           MachinePointerInfo(),
3655                                           false, false, 0));
3656      } else {
3657        // Calculate and remember argument location.
3658        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
3659                                 TailCallArguments);
3660      }
3661    }
3662  }
3663
3664  if (!MemOpChains.empty())
3665    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3666                        &MemOpChains[0], MemOpChains.size());
3667
3668  // Build a sequence of copy-to-reg nodes chained together with token chain
3669  // and flag operands which copy the outgoing args into the appropriate regs.
3670  SDValue InFlag;
3671  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3672    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3673                             RegsToPass[i].second, InFlag);
3674    InFlag = Chain.getValue(1);
3675  }
3676
3677  // Set CR bit 6 to true if this is a vararg call with floating args passed in
3678  // registers.
3679  if (isVarArg) {
3680    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
3681    SDValue Ops[] = { Chain, InFlag };
3682
3683    Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
3684                        dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
3685
3686    InFlag = Chain.getValue(1);
3687  }
3688
3689  if (isTailCall)
3690    PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
3691                    false, TailCallArguments);
3692
3693  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
3694                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
3695                    Ins, InVals);
3696}
3697
3698// Copy an argument into memory, being careful to do this outside the
3699// call sequence for the call to which the argument belongs.
3700SDValue
3701PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
3702                                              SDValue CallSeqStart,
3703                                              ISD::ArgFlagsTy Flags,
3704                                              SelectionDAG &DAG,
3705                                              DebugLoc dl) const {
3706  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
3707                        CallSeqStart.getNode()->getOperand(0),
3708                        Flags, DAG, dl);
3709  // The MEMCPY must go outside the CALLSEQ_START..END.
3710  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3711                             CallSeqStart.getNode()->getOperand(1));
3712  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3713                         NewCallSeqStart.getNode());
3714  return NewCallSeqStart;
3715}
3716
3717SDValue
3718PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
3719                                    CallingConv::ID CallConv, bool isVarArg,
3720                                    bool isTailCall,
3721                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
3722                                    const SmallVectorImpl<SDValue> &OutVals,
3723                                    const SmallVectorImpl<ISD::InputArg> &Ins,
3724                                    DebugLoc dl, SelectionDAG &DAG,
3725                                    SmallVectorImpl<SDValue> &InVals) const {
3726
3727  unsigned NumOps = Outs.size();
3728
3729  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3730  unsigned PtrByteSize = 8;
3731
3732  MachineFunction &MF = DAG.getMachineFunction();
3733
3734  // Mark this function as potentially containing a function that contains a
3735  // tail call. As a consequence the frame pointer will be used for dynamicalloc
3736  // and restoring the callers stack pointer in this functions epilog. This is
3737  // done because by tail calling the called function might overwrite the value
3738  // in this function's (MF) stack pointer stack slot 0(SP).
3739  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
3740      CallConv == CallingConv::Fast)
3741    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
3742
3743  unsigned nAltivecParamsAtEnd = 0;
3744
3745  // Count how many bytes are to be pushed on the stack, including the linkage
3746  // area, and parameter passing area.  We start with at least 48 bytes, which
3747  // is reserved space for [SP][CR][LR][3 x unused].
3748  // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
3749  // of this call.
3750  unsigned NumBytes =
3751    CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
3752                                         Outs, OutVals, nAltivecParamsAtEnd);
3753
3754  // Calculate by how many bytes the stack has to be adjusted in case of tail
3755  // call optimization.
3756  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
3757
3758  // To protect arguments on the stack from being clobbered in a tail call,
3759  // force all the loads to happen before doing any other lowering.
3760  if (isTailCall)
3761    Chain = DAG.getStackArgumentTokenFactor(Chain);
3762
3763  // Adjust the stack pointer for the new arguments...
3764  // These operations are automatically eliminated by the prolog/epilog pass
3765  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
3766  SDValue CallSeqStart = Chain;
3767
3768  // Load the return address and frame pointer so it can be move somewhere else
3769  // later.
3770  SDValue LROp, FPOp;
3771  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
3772                                       dl);
3773
3774  // Set up a copy of the stack pointer for use loading and storing any
3775  // arguments that may not fit in the registers available for argument
3776  // passing.
3777  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
3778
3779  // Figure out which arguments are going to go in registers, and which in
3780  // memory.  Also, if this is a vararg function, floating point operations
3781  // must be stored to our stack, and loaded into integer regs as well, if
3782  // any integer regs are available for argument passing.
3783  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
3784  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3785
3786  static const uint16_t GPR[] = {
3787    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3788    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3789  };
3790  static const uint16_t *FPR = GetFPR();
3791
3792  static const uint16_t VR[] = {
3793    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3794    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3795  };
3796  const unsigned NumGPRs = array_lengthof(GPR);
3797  const unsigned NumFPRs = 13;
3798  const unsigned NumVRs  = array_lengthof(VR);
3799
3800  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3801  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
3802
3803  SmallVector<SDValue, 8> MemOpChains;
3804  for (unsigned i = 0; i != NumOps; ++i) {
3805    SDValue Arg = OutVals[i];
3806    ISD::ArgFlagsTy Flags = Outs[i].Flags;
3807
3808    // PtrOff will be used to store the current argument to the stack if a
3809    // register cannot be found for it.
3810    SDValue PtrOff;
3811
3812    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
3813
3814    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
3815
3816    // Promote integers to 64-bit values.
3817    if (Arg.getValueType() == MVT::i32) {
3818      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
3819      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3820      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
3821    }
3822
3823    // FIXME memcpy is used way more than necessary.  Correctness first.
3824    // Note: "by value" is code for passing a structure by value, not
3825    // basic types.
3826    if (Flags.isByVal()) {
3827      // Note: Size includes alignment padding, so
3828      //   struct x { short a; char b; }
3829      // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
3830      // These are the proper values we need for right-justifying the
3831      // aggregate in a parameter register.
3832      unsigned Size = Flags.getByValSize();
3833
3834      // An empty aggregate parameter takes up no storage and no
3835      // registers.
3836      if (Size == 0)
3837        continue;
3838
3839      // All aggregates smaller than 8 bytes must be passed right-justified.
3840      if (Size==1 || Size==2 || Size==4) {
3841        EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
3842        if (GPR_idx != NumGPRs) {
3843          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
3844                                        MachinePointerInfo(), VT,
3845                                        false, false, 0);
3846          MemOpChains.push_back(Load.getValue(1));
3847          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3848
3849          ArgOffset += PtrByteSize;
3850          continue;
3851        }
3852      }
3853
3854      if (GPR_idx == NumGPRs && Size < 8) {
3855        SDValue Const = DAG.getConstant(PtrByteSize - Size,
3856                                        PtrOff.getValueType());
3857        SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
3858        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
3859                                                          CallSeqStart,
3860                                                          Flags, DAG, dl);
3861        ArgOffset += PtrByteSize;
3862        continue;
3863      }
3864      // Copy entire object into memory.  There are cases where gcc-generated
3865      // code assumes it is there, even if it could be put entirely into
3866      // registers.  (This is not what the doc says.)
3867
3868      // FIXME: The above statement is likely due to a misunderstanding of the
3869      // documents.  All arguments must be copied into the parameter area BY
3870      // THE CALLEE in the event that the callee takes the address of any
3871      // formal argument.  That has not yet been implemented.  However, it is
3872      // reasonable to use the stack area as a staging area for the register
3873      // load.
3874
3875      // Skip this for small aggregates, as we will use the same slot for a
3876      // right-justified copy, below.
3877      if (Size >= 8)
3878        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
3879                                                          CallSeqStart,
3880                                                          Flags, DAG, dl);
3881
3882      // When a register is available, pass a small aggregate right-justified.
3883      if (Size < 8 && GPR_idx != NumGPRs) {
3884        // The easiest way to get this right-justified in a register
3885        // is to copy the structure into the rightmost portion of a
3886        // local variable slot, then load the whole slot into the
3887        // register.
3888        // FIXME: The memcpy seems to produce pretty awful code for
3889        // small aggregates, particularly for packed ones.
3890        // FIXME: It would be preferable to use the slot in the
3891        // parameter save area instead of a new local variable.
3892        SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
3893        SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
3894        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
3895                                                          CallSeqStart,
3896                                                          Flags, DAG, dl);
3897
3898        // Load the slot into the register.
3899        SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
3900                                   MachinePointerInfo(),
3901                                   false, false, false, 0);
3902        MemOpChains.push_back(Load.getValue(1));
3903        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3904
3905        // Done with this argument.
3906        ArgOffset += PtrByteSize;
3907        continue;
3908      }
3909
3910      // For aggregates larger than PtrByteSize, copy the pieces of the
3911      // object that fit into registers from the parameter save area.
3912      for (unsigned j=0; j<Size; j+=PtrByteSize) {
3913        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
3914        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
3915        if (GPR_idx != NumGPRs) {
3916          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
3917                                     MachinePointerInfo(),
3918                                     false, false, false, 0);
3919          MemOpChains.push_back(Load.getValue(1));
3920          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3921          ArgOffset += PtrByteSize;
3922        } else {
3923          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
3924          break;
3925        }
3926      }
3927      continue;
3928    }
3929
3930    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
3931    default: llvm_unreachable("Unexpected ValueType for argument!");
3932    case MVT::i32:
3933    case MVT::i64:
3934      if (GPR_idx != NumGPRs) {
3935        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
3936      } else {
3937        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
3938                         true, isTailCall, false, MemOpChains,
3939                         TailCallArguments, dl);
3940      }
3941      ArgOffset += PtrByteSize;
3942      break;
3943    case MVT::f32:
3944    case MVT::f64:
3945      if (FPR_idx != NumFPRs) {
3946        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
3947
3948        if (isVarArg) {
3949          // A single float or an aggregate containing only a single float
3950          // must be passed right-justified in the stack doubleword, and
3951          // in the GPR, if one is available.
3952          SDValue StoreOff;
3953          if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
3954            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
3955            StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
3956          } else
3957            StoreOff = PtrOff;
3958
3959          SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
3960                                       MachinePointerInfo(), false, false, 0);
3961          MemOpChains.push_back(Store);
3962
3963          // Float varargs are always shadowed in available integer registers
3964          if (GPR_idx != NumGPRs) {
3965            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
3966                                       MachinePointerInfo(), false, false,
3967                                       false, 0);
3968            MemOpChains.push_back(Load.getValue(1));
3969            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
3970          }
3971        } else if (GPR_idx != NumGPRs)
3972          // If we have any FPRs remaining, we may also have GPRs remaining.
3973          ++GPR_idx;
3974      } else {
3975        // Single-precision floating-point values are mapped to the
3976        // second (rightmost) word of the stack doubleword.
3977        if (Arg.getValueType() == MVT::f32) {
3978          SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
3979          PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
3980        }
3981
3982        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
3983                         true, isTailCall, false, MemOpChains,
3984                         TailCallArguments, dl);
3985      }
3986      ArgOffset += 8;
3987      break;
3988    case MVT::v4f32:
3989    case MVT::v4i32:
3990    case MVT::v8i16:
3991    case MVT::v16i8:
3992      if (isVarArg) {
3993        // These go aligned on the stack, or in the corresponding R registers
3994        // when within range.  The Darwin PPC ABI doc claims they also go in
3995        // V registers; in fact gcc does this only for arguments that are
3996        // prototyped, not for those that match the ...  We do it for all
3997        // arguments, seems to work.
3998        while (ArgOffset % 16 !=0) {
3999          ArgOffset += PtrByteSize;
4000          if (GPR_idx != NumGPRs)
4001            GPR_idx++;
4002        }
4003        // We could elide this store in the case where the object fits
4004        // entirely in R registers.  Maybe later.
4005        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4006                            DAG.getConstant(ArgOffset, PtrVT));
4007        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4008                                     MachinePointerInfo(), false, false, 0);
4009        MemOpChains.push_back(Store);
4010        if (VR_idx != NumVRs) {
4011          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
4012                                     MachinePointerInfo(),
4013                                     false, false, false, 0);
4014          MemOpChains.push_back(Load.getValue(1));
4015          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
4016        }
4017        ArgOffset += 16;
4018        for (unsigned i=0; i<16; i+=PtrByteSize) {
4019          if (GPR_idx == NumGPRs)
4020            break;
4021          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
4022                                  DAG.getConstant(i, PtrVT));
4023          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
4024                                     false, false, false, 0);
4025          MemOpChains.push_back(Load.getValue(1));
4026          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4027        }
4028        break;
4029      }
4030
4031      // Non-varargs Altivec params generally go in registers, but have
4032      // stack space allocated at the end.
4033      if (VR_idx != NumVRs) {
4034        // Doesn't have GPR space allocated.
4035        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
4036      } else {
4037        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4038                         true, isTailCall, true, MemOpChains,
4039                         TailCallArguments, dl);
4040        ArgOffset += 16;
4041      }
4042      break;
4043    }
4044  }
4045
4046  if (!MemOpChains.empty())
4047    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4048                        &MemOpChains[0], MemOpChains.size());
4049
4050  // Check if this is an indirect call (MTCTR/BCTRL).
4051  // See PrepareCall() for more information about calls through function
4052  // pointers in the 64-bit SVR4 ABI.
4053  if (!isTailCall &&
4054      !dyn_cast<GlobalAddressSDNode>(Callee) &&
4055      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
4056      !isBLACompatibleAddress(Callee, DAG)) {
4057    // Load r2 into a virtual register and store it to the TOC save area.
4058    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
4059    // TOC save area offset.
4060    SDValue PtrOff = DAG.getIntPtrConstant(40);
4061    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4062    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
4063                         false, false, 0);
4064    // R12 must contain the address of an indirect callee.  This does not
4065    // mean the MTCTR instruction must use R12; it's easier to model this
4066    // as an extra parameter, so do that.
4067    RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
4068  }
4069
4070  // Build a sequence of copy-to-reg nodes chained together with token chain
4071  // and flag operands which copy the outgoing args into the appropriate regs.
4072  SDValue InFlag;
4073  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4074    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4075                             RegsToPass[i].second, InFlag);
4076    InFlag = Chain.getValue(1);
4077  }
4078
4079  if (isTailCall)
4080    PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
4081                    FPOp, true, TailCallArguments);
4082
4083  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
4084                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
4085                    Ins, InVals);
4086}
4087
4088SDValue
4089PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
4090                                    CallingConv::ID CallConv, bool isVarArg,
4091                                    bool isTailCall,
4092                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
4093                                    const SmallVectorImpl<SDValue> &OutVals,
4094                                    const SmallVectorImpl<ISD::InputArg> &Ins,
4095                                    DebugLoc dl, SelectionDAG &DAG,
4096                                    SmallVectorImpl<SDValue> &InVals) const {
4097
4098  unsigned NumOps = Outs.size();
4099
4100  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4101  bool isPPC64 = PtrVT == MVT::i64;
4102  unsigned PtrByteSize = isPPC64 ? 8 : 4;
4103
4104  MachineFunction &MF = DAG.getMachineFunction();
4105
4106  // Mark this function as potentially containing a function that contains a
4107  // tail call. As a consequence the frame pointer will be used for dynamicalloc
4108  // and restoring the callers stack pointer in this functions epilog. This is
4109  // done because by tail calling the called function might overwrite the value
4110  // in this function's (MF) stack pointer stack slot 0(SP).
4111  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4112      CallConv == CallingConv::Fast)
4113    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4114
4115  unsigned nAltivecParamsAtEnd = 0;
4116
4117  // Count how many bytes are to be pushed on the stack, including the linkage
4118  // area, and parameter passing area.  We start with 24/48 bytes, which is
4119  // prereserved space for [SP][CR][LR][3 x unused].
4120  unsigned NumBytes =
4121    CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
4122                                         Outs, OutVals,
4123                                         nAltivecParamsAtEnd);
4124
4125  // Calculate by how many bytes the stack has to be adjusted in case of tail
4126  // call optimization.
4127  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4128
4129  // To protect arguments on the stack from being clobbered in a tail call,
4130  // force all the loads to happen before doing any other lowering.
4131  if (isTailCall)
4132    Chain = DAG.getStackArgumentTokenFactor(Chain);
4133
4134  // Adjust the stack pointer for the new arguments...
4135  // These operations are automatically eliminated by the prolog/epilog pass
4136  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
4137  SDValue CallSeqStart = Chain;
4138
4139  // Load the return address and frame pointer so it can be move somewhere else
4140  // later.
4141  SDValue LROp, FPOp;
4142  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
4143                                       dl);
4144
4145  // Set up a copy of the stack pointer for use loading and storing any
4146  // arguments that may not fit in the registers available for argument
4147  // passing.
4148  SDValue StackPtr;
4149  if (isPPC64)
4150    StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4151  else
4152    StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4153
4154  // Figure out which arguments are going to go in registers, and which in
4155  // memory.  Also, if this is a vararg function, floating point operations
4156  // must be stored to our stack, and loaded into integer regs as well, if
4157  // any integer regs are available for argument passing.
4158  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
4159  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4160
4161  static const uint16_t GPR_32[] = {           // 32-bit registers.
4162    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4163    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4164  };
4165  static const uint16_t GPR_64[] = {           // 64-bit registers.
4166    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4167    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4168  };
4169  static const uint16_t *FPR = GetFPR();
4170
4171  static const uint16_t VR[] = {
4172    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4173    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4174  };
4175  const unsigned NumGPRs = array_lengthof(GPR_32);
4176  const unsigned NumFPRs = 13;
4177  const unsigned NumVRs  = array_lengthof(VR);
4178
4179  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
4180
4181  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4182  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4183
4184  SmallVector<SDValue, 8> MemOpChains;
4185  for (unsigned i = 0; i != NumOps; ++i) {
4186    SDValue Arg = OutVals[i];
4187    ISD::ArgFlagsTy Flags = Outs[i].Flags;
4188
4189    // PtrOff will be used to store the current argument to the stack if a
4190    // register cannot be found for it.
4191    SDValue PtrOff;
4192
4193    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
4194
4195    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4196
4197    // On PPC64, promote integers to 64-bit values.
4198    if (isPPC64 && Arg.getValueType() == MVT::i32) {
4199      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
4200      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4201      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
4202    }
4203
4204    // FIXME memcpy is used way more than necessary.  Correctness first.
4205    // Note: "by value" is code for passing a structure by value, not
4206    // basic types.
4207    if (Flags.isByVal()) {
4208      unsigned Size = Flags.getByValSize();
4209      // Very small objects are passed right-justified.  Everything else is
4210      // passed left-justified.
4211      if (Size==1 || Size==2) {
4212        EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
4213        if (GPR_idx != NumGPRs) {
4214          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
4215                                        MachinePointerInfo(), VT,
4216                                        false, false, 0);
4217          MemOpChains.push_back(Load.getValue(1));
4218          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4219
4220          ArgOffset += PtrByteSize;
4221        } else {
4222          SDValue Const = DAG.getConstant(PtrByteSize - Size,
4223                                          PtrOff.getValueType());
4224          SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4225          Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4226                                                            CallSeqStart,
4227                                                            Flags, DAG, dl);
4228          ArgOffset += PtrByteSize;
4229        }
4230        continue;
4231      }
4232      // Copy entire object into memory.  There are cases where gcc-generated
4233      // code assumes it is there, even if it could be put entirely into
4234      // registers.  (This is not what the doc says.)
4235      Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
4236                                                        CallSeqStart,
4237                                                        Flags, DAG, dl);
4238
4239      // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
4240      // copy the pieces of the object that fit into registers from the
4241      // parameter save area.
4242      for (unsigned j=0; j<Size; j+=PtrByteSize) {
4243        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
4244        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
4245        if (GPR_idx != NumGPRs) {
4246          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
4247                                     MachinePointerInfo(),
4248                                     false, false, false, 0);
4249          MemOpChains.push_back(Load.getValue(1));
4250          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4251          ArgOffset += PtrByteSize;
4252        } else {
4253          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
4254          break;
4255        }
4256      }
4257      continue;
4258    }
4259
4260    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
4261    default: llvm_unreachable("Unexpected ValueType for argument!");
4262    case MVT::i32:
4263    case MVT::i64:
4264      if (GPR_idx != NumGPRs) {
4265        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
4266      } else {
4267        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4268                         isPPC64, isTailCall, false, MemOpChains,
4269                         TailCallArguments, dl);
4270      }
4271      ArgOffset += PtrByteSize;
4272      break;
4273    case MVT::f32:
4274    case MVT::f64:
4275      if (FPR_idx != NumFPRs) {
4276        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
4277
4278        if (isVarArg) {
4279          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4280                                       MachinePointerInfo(), false, false, 0);
4281          MemOpChains.push_back(Store);
4282
4283          // Float varargs are always shadowed in available integer registers
4284          if (GPR_idx != NumGPRs) {
4285            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
4286                                       MachinePointerInfo(), false, false,
4287                                       false, 0);
4288            MemOpChains.push_back(Load.getValue(1));
4289            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4290          }
4291          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
4292            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
4293            PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
4294            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
4295                                       MachinePointerInfo(),
4296                                       false, false, false, 0);
4297            MemOpChains.push_back(Load.getValue(1));
4298            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4299          }
4300        } else {
4301          // If we have any FPRs remaining, we may also have GPRs remaining.
4302          // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
4303          // GPRs.
4304          if (GPR_idx != NumGPRs)
4305            ++GPR_idx;
4306          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
4307              !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
4308            ++GPR_idx;
4309        }
4310      } else
4311        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4312                         isPPC64, isTailCall, false, MemOpChains,
4313                         TailCallArguments, dl);
4314      if (isPPC64)
4315        ArgOffset += 8;
4316      else
4317        ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
4318      break;
4319    case MVT::v4f32:
4320    case MVT::v4i32:
4321    case MVT::v8i16:
4322    case MVT::v16i8:
4323      if (isVarArg) {
4324        // These go aligned on the stack, or in the corresponding R registers
4325        // when within range.  The Darwin PPC ABI doc claims they also go in
4326        // V registers; in fact gcc does this only for arguments that are
4327        // prototyped, not for those that match the ...  We do it for all
4328        // arguments, seems to work.
4329        while (ArgOffset % 16 !=0) {
4330          ArgOffset += PtrByteSize;
4331          if (GPR_idx != NumGPRs)
4332            GPR_idx++;
4333        }
4334        // We could elide this store in the case where the object fits
4335        // entirely in R registers.  Maybe later.
4336        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4337                            DAG.getConstant(ArgOffset, PtrVT));
4338        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4339                                     MachinePointerInfo(), false, false, 0);
4340        MemOpChains.push_back(Store);
4341        if (VR_idx != NumVRs) {
4342          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
4343                                     MachinePointerInfo(),
4344                                     false, false, false, 0);
4345          MemOpChains.push_back(Load.getValue(1));
4346          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
4347        }
4348        ArgOffset += 16;
4349        for (unsigned i=0; i<16; i+=PtrByteSize) {
4350          if (GPR_idx == NumGPRs)
4351            break;
4352          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
4353                                  DAG.getConstant(i, PtrVT));
4354          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
4355                                     false, false, false, 0);
4356          MemOpChains.push_back(Load.getValue(1));
4357          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4358        }
4359        break;
4360      }
4361
4362      // Non-varargs Altivec params generally go in registers, but have
4363      // stack space allocated at the end.
4364      if (VR_idx != NumVRs) {
4365        // Doesn't have GPR space allocated.
4366        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
4367      } else if (nAltivecParamsAtEnd==0) {
4368        // We are emitting Altivec params in order.
4369        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4370                         isPPC64, isTailCall, true, MemOpChains,
4371                         TailCallArguments, dl);
4372        ArgOffset += 16;
4373      }
4374      break;
4375    }
4376  }
4377  // If all Altivec parameters fit in registers, as they usually do,
4378  // they get stack space following the non-Altivec parameters.  We
4379  // don't track this here because nobody below needs it.
4380  // If there are more Altivec parameters than fit in registers emit
4381  // the stores here.
4382  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
4383    unsigned j = 0;
4384    // Offset is aligned; skip 1st 12 params which go in V registers.
4385    ArgOffset = ((ArgOffset+15)/16)*16;
4386    ArgOffset += 12*16;
4387    for (unsigned i = 0; i != NumOps; ++i) {
4388      SDValue Arg = OutVals[i];
4389      EVT ArgType = Outs[i].VT;
4390      if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
4391          ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
4392        if (++j > NumVRs) {
4393          SDValue PtrOff;
4394          // We are emitting Altivec params in order.
4395          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4396                           isPPC64, isTailCall, true, MemOpChains,
4397                           TailCallArguments, dl);
4398          ArgOffset += 16;
4399        }
4400      }
4401    }
4402  }
4403
4404  if (!MemOpChains.empty())
4405    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4406                        &MemOpChains[0], MemOpChains.size());
4407
4408  // On Darwin, R12 must contain the address of an indirect callee.  This does
4409  // not mean the MTCTR instruction must use R12; it's easier to model this as
4410  // an extra parameter, so do that.
4411  if (!isTailCall &&
4412      !dyn_cast<GlobalAddressSDNode>(Callee) &&
4413      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
4414      !isBLACompatibleAddress(Callee, DAG))
4415    RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
4416                                                   PPC::R12), Callee));
4417
4418  // Build a sequence of copy-to-reg nodes chained together with token chain
4419  // and flag operands which copy the outgoing args into the appropriate regs.
4420  SDValue InFlag;
4421  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4422    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4423                             RegsToPass[i].second, InFlag);
4424    InFlag = Chain.getValue(1);
4425  }
4426
4427  if (isTailCall)
4428    PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
4429                    FPOp, true, TailCallArguments);
4430
4431  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
4432                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
4433                    Ins, InVals);
4434}
4435
4436bool
4437PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
4438                                  MachineFunction &MF, bool isVarArg,
4439                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
4440                                  LLVMContext &Context) const {
4441  SmallVector<CCValAssign, 16> RVLocs;
4442  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
4443                 RVLocs, Context);
4444  return CCInfo.CheckReturn(Outs, RetCC_PPC);
4445}
4446
4447SDValue
4448PPCTargetLowering::LowerReturn(SDValue Chain,
4449                               CallingConv::ID CallConv, bool isVarArg,
4450                               const SmallVectorImpl<ISD::OutputArg> &Outs,
4451                               const SmallVectorImpl<SDValue> &OutVals,
4452                               DebugLoc dl, SelectionDAG &DAG) const {
4453
4454  SmallVector<CCValAssign, 16> RVLocs;
4455  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4456                 getTargetMachine(), RVLocs, *DAG.getContext());
4457  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
4458
4459  SDValue Flag;
4460  SmallVector<SDValue, 4> RetOps(1, Chain);
4461
4462  // Copy the result values into the output registers.
4463  for (unsigned i = 0; i != RVLocs.size(); ++i) {
4464    CCValAssign &VA = RVLocs[i];
4465    assert(VA.isRegLoc() && "Can only return in registers!");
4466
4467    SDValue Arg = OutVals[i];
4468
4469    switch (VA.getLocInfo()) {
4470    default: llvm_unreachable("Unknown loc info!");
4471    case CCValAssign::Full: break;
4472    case CCValAssign::AExt:
4473      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
4474      break;
4475    case CCValAssign::ZExt:
4476      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
4477      break;
4478    case CCValAssign::SExt:
4479      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
4480      break;
4481    }
4482
4483    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
4484    Flag = Chain.getValue(1);
4485    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4486  }
4487
4488  RetOps[0] = Chain;  // Update chain.
4489
4490  // Add the flag if we have it.
4491  if (Flag.getNode())
4492    RetOps.push_back(Flag);
4493
4494  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
4495                     &RetOps[0], RetOps.size());
4496}
4497
4498SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
4499                                   const PPCSubtarget &Subtarget) const {
4500  // When we pop the dynamic allocation we need to restore the SP link.
4501  DebugLoc dl = Op.getDebugLoc();
4502
4503  // Get the corect type for pointers.
4504  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4505
4506  // Construct the stack pointer operand.
4507  bool isPPC64 = Subtarget.isPPC64();
4508  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
4509  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
4510
4511  // Get the operands for the STACKRESTORE.
4512  SDValue Chain = Op.getOperand(0);
4513  SDValue SaveSP = Op.getOperand(1);
4514
4515  // Load the old link SP.
4516  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
4517                                   MachinePointerInfo(),
4518                                   false, false, false, 0);
4519
4520  // Restore the stack pointer.
4521  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
4522
4523  // Store the old link SP.
4524  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
4525                      false, false, 0);
4526}
4527
4528
4529
4530SDValue
4531PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
4532  MachineFunction &MF = DAG.getMachineFunction();
4533  bool isPPC64 = PPCSubTarget.isPPC64();
4534  bool isDarwinABI = PPCSubTarget.isDarwinABI();
4535  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4536
4537  // Get current frame pointer save index.  The users of this index will be
4538  // primarily DYNALLOC instructions.
4539  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
4540  int RASI = FI->getReturnAddrSaveIndex();
4541
4542  // If the frame pointer save index hasn't been defined yet.
4543  if (!RASI) {
4544    // Find out what the fix offset of the frame pointer save area.
4545    int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
4546    // Allocate the frame index for frame pointer save area.
4547    RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
4548    // Save the result.
4549    FI->setReturnAddrSaveIndex(RASI);
4550  }
4551  return DAG.getFrameIndex(RASI, PtrVT);
4552}
4553
4554SDValue
4555PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
4556  MachineFunction &MF = DAG.getMachineFunction();
4557  bool isPPC64 = PPCSubTarget.isPPC64();
4558  bool isDarwinABI = PPCSubTarget.isDarwinABI();
4559  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4560
4561  // Get current frame pointer save index.  The users of this index will be
4562  // primarily DYNALLOC instructions.
4563  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
4564  int FPSI = FI->getFramePointerSaveIndex();
4565
4566  // If the frame pointer save index hasn't been defined yet.
4567  if (!FPSI) {
4568    // Find out what the fix offset of the frame pointer save area.
4569    int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
4570                                                           isDarwinABI);
4571
4572    // Allocate the frame index for frame pointer save area.
4573    FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
4574    // Save the result.
4575    FI->setFramePointerSaveIndex(FPSI);
4576  }
4577  return DAG.getFrameIndex(FPSI, PtrVT);
4578}
4579
4580SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4581                                         SelectionDAG &DAG,
4582                                         const PPCSubtarget &Subtarget) const {
4583  // Get the inputs.
4584  SDValue Chain = Op.getOperand(0);
4585  SDValue Size  = Op.getOperand(1);
4586  DebugLoc dl = Op.getDebugLoc();
4587
4588  // Get the corect type for pointers.
4589  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4590  // Negate the size.
4591  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
4592                                  DAG.getConstant(0, PtrVT), Size);
4593  // Construct a node for the frame pointer save index.
4594  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
4595  // Build a DYNALLOC node.
4596  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
4597  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
4598  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
4599}
4600
4601SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
4602                                               SelectionDAG &DAG) const {
4603  DebugLoc DL = Op.getDebugLoc();
4604  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
4605                     DAG.getVTList(MVT::i32, MVT::Other),
4606                     Op.getOperand(0), Op.getOperand(1));
4607}
4608
4609SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
4610                                                SelectionDAG &DAG) const {
4611  DebugLoc DL = Op.getDebugLoc();
4612  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
4613                     Op.getOperand(0), Op.getOperand(1));
4614}
4615
4616/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
4617/// possible.
4618SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4619  // Not FP? Not a fsel.
4620  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
4621      !Op.getOperand(2).getValueType().isFloatingPoint())
4622    return Op;
4623
4624  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4625
4626  // Cannot handle SETEQ/SETNE.
4627  if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
4628
4629  EVT ResVT = Op.getValueType();
4630  EVT CmpVT = Op.getOperand(0).getValueType();
4631  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
4632  SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
4633  DebugLoc dl = Op.getDebugLoc();
4634
4635  // If the RHS of the comparison is a 0.0, we don't need to do the
4636  // subtraction at all.
4637  if (isFloatingPointZero(RHS))
4638    switch (CC) {
4639    default: break;       // SETUO etc aren't handled by fsel.
4640    case ISD::SETULT:
4641    case ISD::SETLT:
4642      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
4643    case ISD::SETOGE:
4644    case ISD::SETGE:
4645      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
4646        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
4647      return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
4648    case ISD::SETUGT:
4649    case ISD::SETGT:
4650      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
4651    case ISD::SETOLE:
4652    case ISD::SETLE:
4653      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
4654        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
4655      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
4656                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
4657    }
4658
4659  SDValue Cmp;
4660  switch (CC) {
4661  default: break;       // SETUO etc aren't handled by fsel.
4662  case ISD::SETULT:
4663  case ISD::SETLT:
4664    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
4665    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
4666      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
4667      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
4668  case ISD::SETOGE:
4669  case ISD::SETGE:
4670    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
4671    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
4672      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
4673      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
4674  case ISD::SETUGT:
4675  case ISD::SETGT:
4676    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
4677    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
4678      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
4679      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
4680  case ISD::SETOLE:
4681  case ISD::SETLE:
4682    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
4683    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
4684      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
4685      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
4686  }
4687  return Op;
4688}
4689
4690// FIXME: Split this code up when LegalizeDAGTypes lands.
4691SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
4692                                           DebugLoc dl) const {
4693  assert(Op.getOperand(0).getValueType().isFloatingPoint());
4694  SDValue Src = Op.getOperand(0);
4695  if (Src.getValueType() == MVT::f32)
4696    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
4697
4698  SDValue Tmp;
4699  switch (Op.getValueType().getSimpleVT().SimpleTy) {
4700  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
4701  case MVT::i32:
4702    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
4703                                                         PPCISD::FCTIDZ,
4704                      dl, MVT::f64, Src);
4705    break;
4706  case MVT::i64:
4707    Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
4708    break;
4709  }
4710
4711  // Convert the FP value to an int value through memory.
4712  SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
4713
4714  // Emit a store to the stack slot.
4715  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
4716                               MachinePointerInfo(), false, false, 0);
4717
4718  // Result is a load from the stack slot.  If loading 4 bytes, make sure to
4719  // add in a bias.
4720  if (Op.getValueType() == MVT::i32)
4721    FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
4722                        DAG.getConstant(4, FIPtr.getValueType()));
4723  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
4724                     false, false, false, 0);
4725}
4726
4727SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
4728                                           SelectionDAG &DAG) const {
4729  DebugLoc dl = Op.getDebugLoc();
4730  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
4731  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
4732    return SDValue();
4733
4734  if (Op.getOperand(0).getValueType() == MVT::i64) {
4735    SDValue SINT = Op.getOperand(0);
4736    // When converting to single-precision, we actually need to convert
4737    // to double-precision first and then round to single-precision.
4738    // To avoid double-rounding effects during that operation, we have
4739    // to prepare the input operand.  Bits that might be truncated when
4740    // converting to double-precision are replaced by a bit that won't
4741    // be lost at this stage, but is below the single-precision rounding
4742    // position.
4743    //
4744    // However, if -enable-unsafe-fp-math is in effect, accept double
4745    // rounding to avoid the extra overhead.
4746    if (Op.getValueType() == MVT::f32 &&
4747        !DAG.getTarget().Options.UnsafeFPMath) {
4748
4749      // Twiddle input to make sure the low 11 bits are zero.  (If this
4750      // is the case, we are guaranteed the value will fit into the 53 bit
4751      // mantissa of an IEEE double-precision value without rounding.)
4752      // If any of those low 11 bits were not zero originally, make sure
4753      // bit 12 (value 2048) is set instead, so that the final rounding
4754      // to single-precision gets the correct result.
4755      SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
4756                                  SINT, DAG.getConstant(2047, MVT::i64));
4757      Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
4758                          Round, DAG.getConstant(2047, MVT::i64));
4759      Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
4760      Round = DAG.getNode(ISD::AND, dl, MVT::i64,
4761                          Round, DAG.getConstant(-2048, MVT::i64));
4762
4763      // However, we cannot use that value unconditionally: if the magnitude
4764      // of the input value is small, the bit-twiddling we did above might
4765      // end up visibly changing the output.  Fortunately, in that case, we
4766      // don't need to twiddle bits since the original input will convert
4767      // exactly to double-precision floating-point already.  Therefore,
4768      // construct a conditional to use the original value if the top 11
4769      // bits are all sign-bit copies, and use the rounded value computed
4770      // above otherwise.
4771      SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
4772                                 SINT, DAG.getConstant(53, MVT::i32));
4773      Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
4774                         Cond, DAG.getConstant(1, MVT::i64));
4775      Cond = DAG.getSetCC(dl, MVT::i32,
4776                          Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
4777
4778      SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
4779    }
4780    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
4781    SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
4782    if (Op.getValueType() == MVT::f32)
4783      FP = DAG.getNode(ISD::FP_ROUND, dl,
4784                       MVT::f32, FP, DAG.getIntPtrConstant(0));
4785    return FP;
4786  }
4787
4788  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
4789         "Unhandled SINT_TO_FP type in custom expander!");
4790  // Since we only generate this in 64-bit mode, we can take advantage of
4791  // 64-bit registers.  In particular, sign extend the input value into the
4792  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
4793  // then lfd it and fcfid it.
4794  MachineFunction &MF = DAG.getMachineFunction();
4795  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
4796  int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
4797  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4798  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
4799
4800  SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
4801                                Op.getOperand(0));
4802
4803  // STD the extended value into the stack slot.
4804  MachineMemOperand *MMO =
4805    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
4806                            MachineMemOperand::MOStore, 8, 8);
4807  SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
4808  SDValue Store =
4809    DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
4810                            Ops, 4, MVT::i64, MMO);
4811  // Load the value as a double.
4812  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
4813                           false, false, false, 0);
4814
4815  // FCFID it and return it.
4816  SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
4817  if (Op.getValueType() == MVT::f32)
4818    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
4819  return FP;
4820}
4821
4822SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
4823                                            SelectionDAG &DAG) const {
4824  DebugLoc dl = Op.getDebugLoc();
4825  /*
4826   The rounding mode is in bits 30:31 of FPSR, and has the following
4827   settings:
4828     00 Round to nearest
4829     01 Round to 0
4830     10 Round to +inf
4831     11 Round to -inf
4832
4833  FLT_ROUNDS, on the other hand, expects the following:
4834    -1 Undefined
4835     0 Round to 0
4836     1 Round to nearest
4837     2 Round to +inf
4838     3 Round to -inf
4839
4840  To perform the conversion, we do:
4841    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
4842  */
4843
4844  MachineFunction &MF = DAG.getMachineFunction();
4845  EVT VT = Op.getValueType();
4846  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4847  SDValue MFFSreg, InFlag;
4848
4849  // Save FP Control Word to register
4850  EVT NodeTys[] = {
4851    MVT::f64,    // return register
4852    MVT::Glue    // unused in this context
4853  };
4854  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
4855
4856  // Save FP register to stack slot
4857  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
4858  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
4859  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
4860                               StackSlot, MachinePointerInfo(), false, false,0);
4861
4862  // Load FP Control Word from low 32 bits of stack slot.
4863  SDValue Four = DAG.getConstant(4, PtrVT);
4864  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
4865  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
4866                            false, false, false, 0);
4867
4868  // Transform as necessary
4869  SDValue CWD1 =
4870    DAG.getNode(ISD::AND, dl, MVT::i32,
4871                CWD, DAG.getConstant(3, MVT::i32));
4872  SDValue CWD2 =
4873    DAG.getNode(ISD::SRL, dl, MVT::i32,
4874                DAG.getNode(ISD::AND, dl, MVT::i32,
4875                            DAG.getNode(ISD::XOR, dl, MVT::i32,
4876                                        CWD, DAG.getConstant(3, MVT::i32)),
4877                            DAG.getConstant(3, MVT::i32)),
4878                DAG.getConstant(1, MVT::i32));
4879
4880  SDValue RetVal =
4881    DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
4882
4883  return DAG.getNode((VT.getSizeInBits() < 16 ?
4884                      ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
4885}
4886
4887SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
4888  EVT VT = Op.getValueType();
4889  unsigned BitWidth = VT.getSizeInBits();
4890  DebugLoc dl = Op.getDebugLoc();
4891  assert(Op.getNumOperands() == 3 &&
4892         VT == Op.getOperand(1).getValueType() &&
4893         "Unexpected SHL!");
4894
4895  // Expand into a bunch of logical ops.  Note that these ops
4896  // depend on the PPC behavior for oversized shift amounts.
4897  SDValue Lo = Op.getOperand(0);
4898  SDValue Hi = Op.getOperand(1);
4899  SDValue Amt = Op.getOperand(2);
4900  EVT AmtVT = Amt.getValueType();
4901
4902  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
4903                             DAG.getConstant(BitWidth, AmtVT), Amt);
4904  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
4905  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
4906  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
4907  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
4908                             DAG.getConstant(-BitWidth, AmtVT));
4909  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
4910  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
4911  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
4912  SDValue OutOps[] = { OutLo, OutHi };
4913  return DAG.getMergeValues(OutOps, 2, dl);
4914}
4915
4916SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
4917  EVT VT = Op.getValueType();
4918  DebugLoc dl = Op.getDebugLoc();
4919  unsigned BitWidth = VT.getSizeInBits();
4920  assert(Op.getNumOperands() == 3 &&
4921         VT == Op.getOperand(1).getValueType() &&
4922         "Unexpected SRL!");
4923
4924  // Expand into a bunch of logical ops.  Note that these ops
4925  // depend on the PPC behavior for oversized shift amounts.
4926  SDValue Lo = Op.getOperand(0);
4927  SDValue Hi = Op.getOperand(1);
4928  SDValue Amt = Op.getOperand(2);
4929  EVT AmtVT = Amt.getValueType();
4930
4931  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
4932                             DAG.getConstant(BitWidth, AmtVT), Amt);
4933  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
4934  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
4935  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
4936  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
4937                             DAG.getConstant(-BitWidth, AmtVT));
4938  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
4939  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
4940  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
4941  SDValue OutOps[] = { OutLo, OutHi };
4942  return DAG.getMergeValues(OutOps, 2, dl);
4943}
4944
4945SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
4946  DebugLoc dl = Op.getDebugLoc();
4947  EVT VT = Op.getValueType();
4948  unsigned BitWidth = VT.getSizeInBits();
4949  assert(Op.getNumOperands() == 3 &&
4950         VT == Op.getOperand(1).getValueType() &&
4951         "Unexpected SRA!");
4952
4953  // Expand into a bunch of logical ops, followed by a select_cc.
4954  SDValue Lo = Op.getOperand(0);
4955  SDValue Hi = Op.getOperand(1);
4956  SDValue Amt = Op.getOperand(2);
4957  EVT AmtVT = Amt.getValueType();
4958
4959  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
4960                             DAG.getConstant(BitWidth, AmtVT), Amt);
4961  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
4962  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
4963  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
4964  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
4965                             DAG.getConstant(-BitWidth, AmtVT));
4966  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
4967  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
4968  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
4969                                  Tmp4, Tmp6, ISD::SETLE);
4970  SDValue OutOps[] = { OutLo, OutHi };
4971  return DAG.getMergeValues(OutOps, 2, dl);
4972}
4973
4974//===----------------------------------------------------------------------===//
4975// Vector related lowering.
4976//
4977
4978/// BuildSplatI - Build a canonical splati of Val with an element size of
4979/// SplatSize.  Cast the result to VT.
4980static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
4981                             SelectionDAG &DAG, DebugLoc dl) {
4982  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
4983
4984  static const EVT VTys[] = { // canonical VT to use for each size.
4985    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
4986  };
4987
4988  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
4989
4990  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
4991  if (Val == -1)
4992    SplatSize = 1;
4993
4994  EVT CanonicalVT = VTys[SplatSize-1];
4995
4996  // Build a canonical splat for this value.
4997  SDValue Elt = DAG.getConstant(Val, MVT::i32);
4998  SmallVector<SDValue, 8> Ops;
4999  Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
5000  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
5001                              &Ops[0], Ops.size());
5002  return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
5003}
5004
5005/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
5006/// specified intrinsic ID.
5007static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
5008                                SelectionDAG &DAG, DebugLoc dl,
5009                                EVT DestVT = MVT::Other) {
5010  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
5011  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
5012                     DAG.getConstant(IID, MVT::i32), LHS, RHS);
5013}
5014
5015/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
5016/// specified intrinsic ID.
5017static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
5018                                SDValue Op2, SelectionDAG &DAG,
5019                                DebugLoc dl, EVT DestVT = MVT::Other) {
5020  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
5021  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
5022                     DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
5023}
5024
5025
5026/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
5027/// amount.  The result has the specified value type.
5028static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
5029                             EVT VT, SelectionDAG &DAG, DebugLoc dl) {
5030  // Force LHS/RHS to be the right type.
5031  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
5032  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
5033
5034  int Ops[16];
5035  for (unsigned i = 0; i != 16; ++i)
5036    Ops[i] = i + Amt;
5037  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
5038  return DAG.getNode(ISD::BITCAST, dl, VT, T);
5039}
5040
5041// If this is a case we can't handle, return null and let the default
5042// expansion code take care of it.  If we CAN select this case, and if it
5043// selects to a single instruction, return Op.  Otherwise, if we can codegen
5044// this case more efficiently than a constant pool load, lower it to the
5045// sequence of ops that should be used.
5046SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
5047                                             SelectionDAG &DAG) const {
5048  DebugLoc dl = Op.getDebugLoc();
5049  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
5050  assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
5051
5052  // Check if this is a splat of a constant value.
5053  APInt APSplatBits, APSplatUndef;
5054  unsigned SplatBitSize;
5055  bool HasAnyUndefs;
5056  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
5057                             HasAnyUndefs, 0, true) || SplatBitSize > 32)
5058    return SDValue();
5059
5060  unsigned SplatBits = APSplatBits.getZExtValue();
5061  unsigned SplatUndef = APSplatUndef.getZExtValue();
5062  unsigned SplatSize = SplatBitSize / 8;
5063
5064  // First, handle single instruction cases.
5065
5066  // All zeros?
5067  if (SplatBits == 0) {
5068    // Canonicalize all zero vectors to be v4i32.
5069    if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
5070      SDValue Z = DAG.getConstant(0, MVT::i32);
5071      Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
5072      Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
5073    }
5074    return Op;
5075  }
5076
5077  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
5078  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
5079                    (32-SplatBitSize));
5080  if (SextVal >= -16 && SextVal <= 15)
5081    return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
5082
5083
5084  // Two instruction sequences.
5085
5086  // If this value is in the range [-32,30] and is even, use:
5087  //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
5088  // If this value is in the range [17,31] and is odd, use:
5089  //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
5090  // If this value is in the range [-31,-17] and is odd, use:
5091  //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
5092  // Note the last two are three-instruction sequences.
5093  if (SextVal >= -32 && SextVal <= 31) {
5094    // To avoid having these optimizations undone by constant folding,
5095    // we convert to a pseudo that will be expanded later into one of
5096    // the above forms.
5097    SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
5098    EVT VT = Op.getValueType();
5099    int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
5100    SDValue EltSize = DAG.getConstant(Size, MVT::i32);
5101    return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
5102  }
5103
5104  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
5105  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
5106  // for fneg/fabs.
5107  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
5108    // Make -1 and vspltisw -1:
5109    SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
5110
5111    // Make the VSLW intrinsic, computing 0x8000_0000.
5112    SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
5113                                   OnesV, DAG, dl);
5114
5115    // xor by OnesV to invert it.
5116    Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
5117    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5118  }
5119
5120  // Check to see if this is a wide variety of vsplti*, binop self cases.
5121  static const signed char SplatCsts[] = {
5122    -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
5123    -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
5124  };
5125
5126  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
5127    // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
5128    // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
5129    int i = SplatCsts[idx];
5130
5131    // Figure out what shift amount will be used by altivec if shifted by i in
5132    // this splat size.
5133    unsigned TypeShiftAmt = i & (SplatBitSize-1);
5134
5135    // vsplti + shl self.
5136    if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
5137      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5138      static const unsigned IIDs[] = { // Intrinsic to use for each size.
5139        Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
5140        Intrinsic::ppc_altivec_vslw
5141      };
5142      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5143      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5144    }
5145
5146    // vsplti + srl self.
5147    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
5148      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5149      static const unsigned IIDs[] = { // Intrinsic to use for each size.
5150        Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
5151        Intrinsic::ppc_altivec_vsrw
5152      };
5153      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5154      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5155    }
5156
5157    // vsplti + sra self.
5158    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
5159      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5160      static const unsigned IIDs[] = { // Intrinsic to use for each size.
5161        Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
5162        Intrinsic::ppc_altivec_vsraw
5163      };
5164      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5165      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5166    }
5167
5168    // vsplti + rol self.
5169    if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
5170                         ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
5171      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5172      static const unsigned IIDs[] = { // Intrinsic to use for each size.
5173        Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
5174        Intrinsic::ppc_altivec_vrlw
5175      };
5176      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5177      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5178    }
5179
5180    // t = vsplti c, result = vsldoi t, t, 1
5181    if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
5182      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5183      return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
5184    }
5185    // t = vsplti c, result = vsldoi t, t, 2
5186    if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
5187      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5188      return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
5189    }
5190    // t = vsplti c, result = vsldoi t, t, 3
5191    if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
5192      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5193      return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
5194    }
5195  }
5196
5197  return SDValue();
5198}
5199
5200/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
5201/// the specified operations to build the shuffle.
5202static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
5203                                      SDValue RHS, SelectionDAG &DAG,
5204                                      DebugLoc dl) {
5205  unsigned OpNum = (PFEntry >> 26) & 0x0F;
5206  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
5207  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
5208
5209  enum {
5210    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5211    OP_VMRGHW,
5212    OP_VMRGLW,
5213    OP_VSPLTISW0,
5214    OP_VSPLTISW1,
5215    OP_VSPLTISW2,
5216    OP_VSPLTISW3,
5217    OP_VSLDOI4,
5218    OP_VSLDOI8,
5219    OP_VSLDOI12
5220  };
5221
5222  if (OpNum == OP_COPY) {
5223    if (LHSID == (1*9+2)*9+3) return LHS;
5224    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
5225    return RHS;
5226  }
5227
5228  SDValue OpLHS, OpRHS;
5229  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
5230  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
5231
5232  int ShufIdxs[16];
5233  switch (OpNum) {
5234  default: llvm_unreachable("Unknown i32 permute!");
5235  case OP_VMRGHW:
5236    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
5237    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
5238    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
5239    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
5240    break;
5241  case OP_VMRGLW:
5242    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
5243    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
5244    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
5245    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
5246    break;
5247  case OP_VSPLTISW0:
5248    for (unsigned i = 0; i != 16; ++i)
5249      ShufIdxs[i] = (i&3)+0;
5250    break;
5251  case OP_VSPLTISW1:
5252    for (unsigned i = 0; i != 16; ++i)
5253      ShufIdxs[i] = (i&3)+4;
5254    break;
5255  case OP_VSPLTISW2:
5256    for (unsigned i = 0; i != 16; ++i)
5257      ShufIdxs[i] = (i&3)+8;
5258    break;
5259  case OP_VSPLTISW3:
5260    for (unsigned i = 0; i != 16; ++i)
5261      ShufIdxs[i] = (i&3)+12;
5262    break;
5263  case OP_VSLDOI4:
5264    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
5265  case OP_VSLDOI8:
5266    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
5267  case OP_VSLDOI12:
5268    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
5269  }
5270  EVT VT = OpLHS.getValueType();
5271  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
5272  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
5273  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
5274  return DAG.getNode(ISD::BITCAST, dl, VT, T);
5275}
5276
5277/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
5278/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
5279/// return the code it can be lowered into.  Worst case, it can always be
5280/// lowered into a vperm.
5281SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
5282                                               SelectionDAG &DAG) const {
5283  DebugLoc dl = Op.getDebugLoc();
5284  SDValue V1 = Op.getOperand(0);
5285  SDValue V2 = Op.getOperand(1);
5286  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
5287  EVT VT = Op.getValueType();
5288
5289  // Cases that are handled by instructions that take permute immediates
5290  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
5291  // selected by the instruction selector.
5292  if (V2.getOpcode() == ISD::UNDEF) {
5293    if (PPC::isSplatShuffleMask(SVOp, 1) ||
5294        PPC::isSplatShuffleMask(SVOp, 2) ||
5295        PPC::isSplatShuffleMask(SVOp, 4) ||
5296        PPC::isVPKUWUMShuffleMask(SVOp, true) ||
5297        PPC::isVPKUHUMShuffleMask(SVOp, true) ||
5298        PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
5299        PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
5300        PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
5301        PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
5302        PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
5303        PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
5304        PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
5305      return Op;
5306    }
5307  }
5308
5309  // Altivec has a variety of "shuffle immediates" that take two vector inputs
5310  // and produce a fixed permutation.  If any of these match, do not lower to
5311  // VPERM.
5312  if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
5313      PPC::isVPKUHUMShuffleMask(SVOp, false) ||
5314      PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
5315      PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
5316      PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
5317      PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
5318      PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
5319      PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
5320      PPC::isVMRGHShuffleMask(SVOp, 4, false))
5321    return Op;
5322
5323  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
5324  // perfect shuffle table to emit an optimal matching sequence.
5325  ArrayRef<int> PermMask = SVOp->getMask();
5326
5327  unsigned PFIndexes[4];
5328  bool isFourElementShuffle = true;
5329  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
5330    unsigned EltNo = 8;   // Start out undef.
5331    for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
5332      if (PermMask[i*4+j] < 0)
5333        continue;   // Undef, ignore it.
5334
5335      unsigned ByteSource = PermMask[i*4+j];
5336      if ((ByteSource & 3) != j) {
5337        isFourElementShuffle = false;
5338        break;
5339      }
5340
5341      if (EltNo == 8) {
5342        EltNo = ByteSource/4;
5343      } else if (EltNo != ByteSource/4) {
5344        isFourElementShuffle = false;
5345        break;
5346      }
5347    }
5348    PFIndexes[i] = EltNo;
5349  }
5350
5351  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
5352  // perfect shuffle vector to determine if it is cost effective to do this as
5353  // discrete instructions, or whether we should use a vperm.
5354  if (isFourElementShuffle) {
5355    // Compute the index in the perfect shuffle table.
5356    unsigned PFTableIndex =
5357      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
5358
5359    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
5360    unsigned Cost  = (PFEntry >> 30);
5361
5362    // Determining when to avoid vperm is tricky.  Many things affect the cost
5363    // of vperm, particularly how many times the perm mask needs to be computed.
5364    // For example, if the perm mask can be hoisted out of a loop or is already
5365    // used (perhaps because there are multiple permutes with the same shuffle
5366    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
5367    // the loop requires an extra register.
5368    //
5369    // As a compromise, we only emit discrete instructions if the shuffle can be
5370    // generated in 3 or fewer operations.  When we have loop information
5371    // available, if this block is within a loop, we should avoid using vperm
5372    // for 3-operation perms and use a constant pool load instead.
5373    if (Cost < 3)
5374      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
5375  }
5376
5377  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
5378  // vector that will get spilled to the constant pool.
5379  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
5380
5381  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
5382  // that it is in input element units, not in bytes.  Convert now.
5383  EVT EltVT = V1.getValueType().getVectorElementType();
5384  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
5385
5386  SmallVector<SDValue, 16> ResultMask;
5387  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
5388    unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
5389
5390    for (unsigned j = 0; j != BytesPerElement; ++j)
5391      ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
5392                                           MVT::i32));
5393  }
5394
5395  SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
5396                                    &ResultMask[0], ResultMask.size());
5397  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
5398}
5399
5400/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
5401/// altivec comparison.  If it is, return true and fill in Opc/isDot with
5402/// information about the intrinsic.
5403static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
5404                                  bool &isDot) {
5405  unsigned IntrinsicID =
5406    cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
5407  CompareOpc = -1;
5408  isDot = false;
5409  switch (IntrinsicID) {
5410  default: return false;
5411    // Comparison predicates.
5412  case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
5413  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
5414  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
5415  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
5416  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
5417  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
5418  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
5419  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
5420  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
5421  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
5422  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
5423  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
5424  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
5425
5426    // Normal Comparisons.
5427  case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
5428  case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
5429  case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
5430  case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
5431  case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
5432  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
5433  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
5434  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
5435  case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
5436  case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
5437  case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
5438  case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
5439  case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
5440  }
5441  return true;
5442}
5443
5444/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
5445/// lower, do it, otherwise return null.
5446SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
5447                                                   SelectionDAG &DAG) const {
5448  // If this is a lowered altivec predicate compare, CompareOpc is set to the
5449  // opcode number of the comparison.
5450  DebugLoc dl = Op.getDebugLoc();
5451  int CompareOpc;
5452  bool isDot;
5453  if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
5454    return SDValue();    // Don't custom lower most intrinsics.
5455
5456  // If this is a non-dot comparison, make the VCMP node and we are done.
5457  if (!isDot) {
5458    SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
5459                              Op.getOperand(1), Op.getOperand(2),
5460                              DAG.getConstant(CompareOpc, MVT::i32));
5461    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
5462  }
5463
5464  // Create the PPCISD altivec 'dot' comparison node.
5465  SDValue Ops[] = {
5466    Op.getOperand(2),  // LHS
5467    Op.getOperand(3),  // RHS
5468    DAG.getConstant(CompareOpc, MVT::i32)
5469  };
5470  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
5471  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
5472
5473  // Now that we have the comparison, emit a copy from the CR to a GPR.
5474  // This is flagged to the above dot comparison.
5475  SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
5476                                DAG.getRegister(PPC::CR6, MVT::i32),
5477                                CompNode.getValue(1));
5478
5479  // Unpack the result based on how the target uses it.
5480  unsigned BitNo;   // Bit # of CR6.
5481  bool InvertBit;   // Invert result?
5482  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
5483  default:  // Can't happen, don't crash on invalid number though.
5484  case 0:   // Return the value of the EQ bit of CR6.
5485    BitNo = 0; InvertBit = false;
5486    break;
5487  case 1:   // Return the inverted value of the EQ bit of CR6.
5488    BitNo = 0; InvertBit = true;
5489    break;
5490  case 2:   // Return the value of the LT bit of CR6.
5491    BitNo = 2; InvertBit = false;
5492    break;
5493  case 3:   // Return the inverted value of the LT bit of CR6.
5494    BitNo = 2; InvertBit = true;
5495    break;
5496  }
5497
5498  // Shift the bit into the low position.
5499  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
5500                      DAG.getConstant(8-(3-BitNo), MVT::i32));
5501  // Isolate the bit.
5502  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
5503                      DAG.getConstant(1, MVT::i32));
5504
5505  // If we are supposed to, toggle the bit.
5506  if (InvertBit)
5507    Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
5508                        DAG.getConstant(1, MVT::i32));
5509  return Flags;
5510}
5511
5512SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
5513                                                   SelectionDAG &DAG) const {
5514  DebugLoc dl = Op.getDebugLoc();
5515  // Create a stack slot that is 16-byte aligned.
5516  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
5517  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
5518  EVT PtrVT = getPointerTy();
5519  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
5520
5521  // Store the input value into Value#0 of the stack slot.
5522  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
5523                               Op.getOperand(0), FIdx, MachinePointerInfo(),
5524                               false, false, 0);
5525  // Load it out.
5526  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
5527                     false, false, false, 0);
5528}
5529
5530SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
5531  DebugLoc dl = Op.getDebugLoc();
5532  if (Op.getValueType() == MVT::v4i32) {
5533    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
5534
5535    SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
5536    SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
5537
5538    SDValue RHSSwap =   // = vrlw RHS, 16
5539      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
5540
5541    // Shrinkify inputs to v8i16.
5542    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
5543    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
5544    RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
5545
5546    // Low parts multiplied together, generating 32-bit results (we ignore the
5547    // top parts).
5548    SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
5549                                        LHS, RHS, DAG, dl, MVT::v4i32);
5550
5551    SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
5552                                      LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
5553    // Shift the high parts up 16 bits.
5554    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
5555                              Neg16, DAG, dl);
5556    return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
5557  } else if (Op.getValueType() == MVT::v8i16) {
5558    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
5559
5560    SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
5561
5562    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
5563                            LHS, RHS, Zero, DAG, dl);
5564  } else if (Op.getValueType() == MVT::v16i8) {
5565    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
5566
5567    // Multiply the even 8-bit parts, producing 16-bit sums.
5568    SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
5569                                           LHS, RHS, DAG, dl, MVT::v8i16);
5570    EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
5571
5572    // Multiply the odd 8-bit parts, producing 16-bit sums.
5573    SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
5574                                          LHS, RHS, DAG, dl, MVT::v8i16);
5575    OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
5576
5577    // Merge the results together.
5578    int Ops[16];
5579    for (unsigned i = 0; i != 8; ++i) {
5580      Ops[i*2  ] = 2*i+1;
5581      Ops[i*2+1] = 2*i+1+16;
5582    }
5583    return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
5584  } else {
5585    llvm_unreachable("Unknown mul to lower!");
5586  }
5587}
5588
5589/// LowerOperation - Provide custom lowering hooks for some operations.
5590///
5591SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
5592  switch (Op.getOpcode()) {
5593  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
5594  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
5595  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
5596  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
5597  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
5598  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
5599  case ISD::SETCC:              return LowerSETCC(Op, DAG);
5600  case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
5601  case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
5602  case ISD::VASTART:
5603    return LowerVASTART(Op, DAG, PPCSubTarget);
5604
5605  case ISD::VAARG:
5606    return LowerVAARG(Op, DAG, PPCSubTarget);
5607
5608  case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
5609  case ISD::DYNAMIC_STACKALLOC:
5610    return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
5611
5612  case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
5613  case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
5614
5615  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
5616  case ISD::FP_TO_UINT:
5617  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
5618                                                       Op.getDebugLoc());
5619  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
5620  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
5621
5622  // Lower 64-bit shifts.
5623  case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
5624  case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
5625  case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
5626
5627  // Vector-related lowering.
5628  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
5629  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
5630  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
5631  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
5632  case ISD::MUL:                return LowerMUL(Op, DAG);
5633
5634  // Frame & Return address.
5635  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
5636  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
5637  }
5638}
5639
5640void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
5641                                           SmallVectorImpl<SDValue>&Results,
5642                                           SelectionDAG &DAG) const {
5643  const TargetMachine &TM = getTargetMachine();
5644  DebugLoc dl = N->getDebugLoc();
5645  switch (N->getOpcode()) {
5646  default:
5647    llvm_unreachable("Do not know how to custom type legalize this operation!");
5648  case ISD::VAARG: {
5649    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
5650        || TM.getSubtarget<PPCSubtarget>().isPPC64())
5651      return;
5652
5653    EVT VT = N->getValueType(0);
5654
5655    if (VT == MVT::i64) {
5656      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
5657
5658      Results.push_back(NewNode);
5659      Results.push_back(NewNode.getValue(1));
5660    }
5661    return;
5662  }
5663  case ISD::FP_ROUND_INREG: {
5664    assert(N->getValueType(0) == MVT::ppcf128);
5665    assert(N->getOperand(0).getValueType() == MVT::ppcf128);
5666    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
5667                             MVT::f64, N->getOperand(0),
5668                             DAG.getIntPtrConstant(0));
5669    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
5670                             MVT::f64, N->getOperand(0),
5671                             DAG.getIntPtrConstant(1));
5672
5673    // Add the two halves of the long double in round-to-zero mode.
5674    SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
5675
5676    // We know the low half is about to be thrown away, so just use something
5677    // convenient.
5678    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
5679                                FPreg, FPreg));
5680    return;
5681  }
5682  case ISD::FP_TO_SINT:
5683    Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
5684    return;
5685  }
5686}
5687
5688
5689//===----------------------------------------------------------------------===//
5690//  Other Lowering Code
5691//===----------------------------------------------------------------------===//
5692
5693MachineBasicBlock *
5694PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
5695                                    bool is64bit, unsigned BinOpcode) const {
5696  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
5697  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5698
5699  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5700  MachineFunction *F = BB->getParent();
5701  MachineFunction::iterator It = BB;
5702  ++It;
5703
5704  unsigned dest = MI->getOperand(0).getReg();
5705  unsigned ptrA = MI->getOperand(1).getReg();
5706  unsigned ptrB = MI->getOperand(2).getReg();
5707  unsigned incr = MI->getOperand(3).getReg();
5708  DebugLoc dl = MI->getDebugLoc();
5709
5710  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
5711  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
5712  F->insert(It, loopMBB);
5713  F->insert(It, exitMBB);
5714  exitMBB->splice(exitMBB->begin(), BB,
5715                  llvm::next(MachineBasicBlock::iterator(MI)),
5716                  BB->end());
5717  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5718
5719  MachineRegisterInfo &RegInfo = F->getRegInfo();
5720  unsigned TmpReg = (!BinOpcode) ? incr :
5721    RegInfo.createVirtualRegister(
5722       is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
5723                 (const TargetRegisterClass *) &PPC::GPRCRegClass);
5724
5725  //  thisMBB:
5726  //   ...
5727  //   fallthrough --> loopMBB
5728  BB->addSuccessor(loopMBB);
5729
5730  //  loopMBB:
5731  //   l[wd]arx dest, ptr
5732  //   add r0, dest, incr
5733  //   st[wd]cx. r0, ptr
5734  //   bne- loopMBB
5735  //   fallthrough --> exitMBB
5736  BB = loopMBB;
5737  BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
5738    .addReg(ptrA).addReg(ptrB);
5739  if (BinOpcode)
5740    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
5741  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
5742    .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
5743  BuildMI(BB, dl, TII->get(PPC::BCC))
5744    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
5745  BB->addSuccessor(loopMBB);
5746  BB->addSuccessor(exitMBB);
5747
5748  //  exitMBB:
5749  //   ...
5750  BB = exitMBB;
5751  return BB;
5752}
5753
5754MachineBasicBlock *
5755PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
5756                                            MachineBasicBlock *BB,
5757                                            bool is8bit,    // operation
5758                                            unsigned BinOpcode) const {
5759  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
5760  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5761  // In 64 bit mode we have to use 64 bits for addresses, even though the
5762  // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
5763  // registers without caring whether they're 32 or 64, but here we're
5764  // doing actual arithmetic on the addresses.
5765  bool is64bit = PPCSubTarget.isPPC64();
5766  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
5767
5768  const BasicBlock *LLVM_BB = BB->getBasicBlock();
5769  MachineFunction *F = BB->getParent();
5770  MachineFunction::iterator It = BB;
5771  ++It;
5772
5773  unsigned dest = MI->getOperand(0).getReg();
5774  unsigned ptrA = MI->getOperand(1).getReg();
5775  unsigned ptrB = MI->getOperand(2).getReg();
5776  unsigned incr = MI->getOperand(3).getReg();
5777  DebugLoc dl = MI->getDebugLoc();
5778
5779  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
5780  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
5781  F->insert(It, loopMBB);
5782  F->insert(It, exitMBB);
5783  exitMBB->splice(exitMBB->begin(), BB,
5784                  llvm::next(MachineBasicBlock::iterator(MI)),
5785                  BB->end());
5786  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
5787
5788  MachineRegisterInfo &RegInfo = F->getRegInfo();
5789  const TargetRegisterClass *RC =
5790    is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
5791              (const TargetRegisterClass *) &PPC::GPRCRegClass;
5792  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
5793  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
5794  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
5795  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
5796  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
5797  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
5798  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
5799  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
5800  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
5801  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
5802  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
5803  unsigned Ptr1Reg;
5804  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
5805
5806  //  thisMBB:
5807  //   ...
5808  //   fallthrough --> loopMBB
5809  BB->addSuccessor(loopMBB);
5810
5811  // The 4-byte load must be aligned, while a char or short may be
5812  // anywhere in the word.  Hence all this nasty bookkeeping code.
5813  //   add ptr1, ptrA, ptrB [copy if ptrA==0]
5814  //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
5815  //   xori shift, shift1, 24 [16]
5816  //   rlwinm ptr, ptr1, 0, 0, 29
5817  //   slw incr2, incr, shift
5818  //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
5819  //   slw mask, mask2, shift
5820  //  loopMBB:
5821  //   lwarx tmpDest, ptr
5822  //   add tmp, tmpDest, incr2
5823  //   andc tmp2, tmpDest, mask
5824  //   and tmp3, tmp, mask
5825  //   or tmp4, tmp3, tmp2
5826  //   stwcx. tmp4, ptr
5827  //   bne- loopMBB
5828  //   fallthrough --> exitMBB
5829  //   srw dest, tmpDest, shift
5830  if (ptrA != ZeroReg) {
5831    Ptr1Reg = RegInfo.createVirtualRegister(RC);
5832    BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
5833      .addReg(ptrA).addReg(ptrB);
5834  } else {
5835    Ptr1Reg = ptrB;
5836  }
5837  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
5838      .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
5839  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
5840      .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
5841  if (is64bit)
5842    BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
5843      .addReg(Ptr1Reg).addImm(0).addImm(61);
5844  else
5845    BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
5846      .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
5847  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
5848      .addReg(incr).addReg(ShiftReg);
5849  if (is8bit)
5850    BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
5851  else {
5852    BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
5853    BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
5854  }
5855  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
5856      .addReg(Mask2Reg).addReg(ShiftReg);
5857
5858  BB = loopMBB;
5859  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
5860    .addReg(ZeroReg).addReg(PtrReg);
5861  if (BinOpcode)
5862    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
5863      .addReg(Incr2Reg).addReg(TmpDestReg);
5864  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
5865    .addReg(TmpDestReg).addReg(MaskReg);
5866  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
5867    .addReg(TmpReg).addReg(MaskReg);
5868  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
5869    .addReg(Tmp3Reg).addReg(Tmp2Reg);
5870  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
5871    .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
5872  BuildMI(BB, dl, TII->get(PPC::BCC))
5873    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
5874  BB->addSuccessor(loopMBB);
5875  BB->addSuccessor(exitMBB);
5876
5877  //  exitMBB:
5878  //   ...
5879  BB = exitMBB;
5880  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
5881    .addReg(ShiftReg);
5882  return BB;
5883}
5884
5885llvm::MachineBasicBlock*
5886PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
5887                                    MachineBasicBlock *MBB) const {
5888  DebugLoc DL = MI->getDebugLoc();
5889  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5890
5891  MachineFunction *MF = MBB->getParent();
5892  MachineRegisterInfo &MRI = MF->getRegInfo();
5893
5894  const BasicBlock *BB = MBB->getBasicBlock();
5895  MachineFunction::iterator I = MBB;
5896  ++I;
5897
5898  // Memory Reference
5899  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
5900  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
5901
5902  unsigned DstReg = MI->getOperand(0).getReg();
5903  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
5904  assert(RC->hasType(MVT::i32) && "Invalid destination!");
5905  unsigned mainDstReg = MRI.createVirtualRegister(RC);
5906  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
5907
5908  MVT PVT = getPointerTy();
5909  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
5910         "Invalid Pointer Size!");
5911  // For v = setjmp(buf), we generate
5912  //
5913  // thisMBB:
5914  //  SjLjSetup mainMBB
5915  //  bl mainMBB
5916  //  v_restore = 1
5917  //  b sinkMBB
5918  //
5919  // mainMBB:
5920  //  buf[LabelOffset] = LR
5921  //  v_main = 0
5922  //
5923  // sinkMBB:
5924  //  v = phi(main, restore)
5925  //
5926
5927  MachineBasicBlock *thisMBB = MBB;
5928  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
5929  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
5930  MF->insert(I, mainMBB);
5931  MF->insert(I, sinkMBB);
5932
5933  MachineInstrBuilder MIB;
5934
5935  // Transfer the remainder of BB and its successor edges to sinkMBB.
5936  sinkMBB->splice(sinkMBB->begin(), MBB,
5937                  llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
5938  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
5939
5940  // Note that the structure of the jmp_buf used here is not compatible
5941  // with that used by libc, and is not designed to be. Specifically, it
5942  // stores only those 'reserved' registers that LLVM does not otherwise
5943  // understand how to spill. Also, by convention, by the time this
5944  // intrinsic is called, Clang has already stored the frame address in the
5945  // first slot of the buffer and stack address in the third. Following the
5946  // X86 target code, we'll store the jump address in the second slot. We also
5947  // need to save the TOC pointer (R2) to handle jumps between shared
5948  // libraries, and that will be stored in the fourth slot. The thread
5949  // identifier (R13) is not affected.
5950
5951  // thisMBB:
5952  const int64_t LabelOffset = 1 * PVT.getStoreSize();
5953  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
5954
5955  // Prepare IP either in reg.
5956  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
5957  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
5958  unsigned BufReg = MI->getOperand(1).getReg();
5959
5960  if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
5961    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
5962            .addReg(PPC::X2)
5963            .addImm(TOCOffset / 4)
5964            .addReg(BufReg);
5965
5966    MIB.setMemRefs(MMOBegin, MMOEnd);
5967  }
5968
5969  // Setup
5970  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCL)).addMBB(mainMBB);
5971  MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
5972
5973  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
5974
5975  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
5976          .addMBB(mainMBB);
5977  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
5978
5979  thisMBB->addSuccessor(mainMBB, /* weight */ 0);
5980  thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
5981
5982  // mainMBB:
5983  //  mainDstReg = 0
5984  MIB = BuildMI(mainMBB, DL,
5985    TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
5986
5987  // Store IP
5988  if (PPCSubTarget.isPPC64()) {
5989    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
5990            .addReg(LabelReg)
5991            .addImm(LabelOffset / 4)
5992            .addReg(BufReg);
5993  } else {
5994    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
5995            .addReg(LabelReg)
5996            .addImm(LabelOffset)
5997            .addReg(BufReg);
5998  }
5999
6000  MIB.setMemRefs(MMOBegin, MMOEnd);
6001
6002  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
6003  mainMBB->addSuccessor(sinkMBB);
6004
6005  // sinkMBB:
6006  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
6007          TII->get(PPC::PHI), DstReg)
6008    .addReg(mainDstReg).addMBB(mainMBB)
6009    .addReg(restoreDstReg).addMBB(thisMBB);
6010
6011  MI->eraseFromParent();
6012  return sinkMBB;
6013}
6014
6015MachineBasicBlock *
6016PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
6017                                     MachineBasicBlock *MBB) const {
6018  DebugLoc DL = MI->getDebugLoc();
6019  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6020
6021  MachineFunction *MF = MBB->getParent();
6022  MachineRegisterInfo &MRI = MF->getRegInfo();
6023
6024  // Memory Reference
6025  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
6026  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
6027
6028  MVT PVT = getPointerTy();
6029  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
6030         "Invalid Pointer Size!");
6031
6032  const TargetRegisterClass *RC =
6033    (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6034  unsigned Tmp = MRI.createVirtualRegister(RC);
6035  // Since FP is only updated here but NOT referenced, it's treated as GPR.
6036  unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
6037  unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
6038
6039  MachineInstrBuilder MIB;
6040
6041  const int64_t LabelOffset = 1 * PVT.getStoreSize();
6042  const int64_t SPOffset    = 2 * PVT.getStoreSize();
6043  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
6044
6045  unsigned BufReg = MI->getOperand(0).getReg();
6046
6047  // Reload FP (the jumped-to function may not have had a
6048  // frame pointer, and if so, then its r31 will be restored
6049  // as necessary).
6050  if (PVT == MVT::i64) {
6051    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
6052            .addImm(0)
6053            .addReg(BufReg);
6054  } else {
6055    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
6056            .addImm(0)
6057            .addReg(BufReg);
6058  }
6059  MIB.setMemRefs(MMOBegin, MMOEnd);
6060
6061  // Reload IP
6062  if (PVT == MVT::i64) {
6063    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
6064            .addImm(LabelOffset / 4)
6065            .addReg(BufReg);
6066  } else {
6067    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
6068            .addImm(LabelOffset)
6069            .addReg(BufReg);
6070  }
6071  MIB.setMemRefs(MMOBegin, MMOEnd);
6072
6073  // Reload SP
6074  if (PVT == MVT::i64) {
6075    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
6076            .addImm(SPOffset / 4)
6077            .addReg(BufReg);
6078  } else {
6079    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
6080            .addImm(SPOffset)
6081            .addReg(BufReg);
6082  }
6083  MIB.setMemRefs(MMOBegin, MMOEnd);
6084
6085  // FIXME: When we also support base pointers, that register must also be
6086  // restored here.
6087
6088  // Reload TOC
6089  if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
6090    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
6091            .addImm(TOCOffset / 4)
6092            .addReg(BufReg);
6093
6094    MIB.setMemRefs(MMOBegin, MMOEnd);
6095  }
6096
6097  // Jump
6098  BuildMI(*MBB, MI, DL,
6099          TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
6100  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
6101
6102  MI->eraseFromParent();
6103  return MBB;
6104}
6105
6106MachineBasicBlock *
6107PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
6108                                               MachineBasicBlock *BB) const {
6109  if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
6110      MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
6111    return emitEHSjLjSetJmp(MI, BB);
6112  } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
6113             MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
6114    return emitEHSjLjLongJmp(MI, BB);
6115  }
6116
6117  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6118
6119  // To "insert" these instructions we actually have to insert their
6120  // control-flow patterns.
6121  const BasicBlock *LLVM_BB = BB->getBasicBlock();
6122  MachineFunction::iterator It = BB;
6123  ++It;
6124
6125  MachineFunction *F = BB->getParent();
6126
6127  if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
6128                                 MI->getOpcode() == PPC::SELECT_CC_I8)) {
6129    unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ?
6130                                         PPC::ISEL8 : PPC::ISEL;
6131    unsigned SelectPred = MI->getOperand(4).getImm();
6132    DebugLoc dl = MI->getDebugLoc();
6133
6134    unsigned SubIdx;
6135    bool SwapOps;
6136    switch (SelectPred) {
6137    default: llvm_unreachable("invalid predicate for isel");
6138    case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
6139    case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
6140    case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
6141    case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
6142    case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
6143    case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
6144    case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
6145    case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
6146    }
6147
6148    BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
6149      .addReg(MI->getOperand(SwapOps? 3 : 2).getReg())
6150      .addReg(MI->getOperand(SwapOps? 2 : 3).getReg())
6151      .addReg(MI->getOperand(1).getReg(), 0, SubIdx);
6152  } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
6153             MI->getOpcode() == PPC::SELECT_CC_I8 ||
6154             MI->getOpcode() == PPC::SELECT_CC_F4 ||
6155             MI->getOpcode() == PPC::SELECT_CC_F8 ||
6156             MI->getOpcode() == PPC::SELECT_CC_VRRC) {
6157
6158
6159    // The incoming instruction knows the destination vreg to set, the
6160    // condition code register to branch on, the true/false values to
6161    // select between, and a branch opcode to use.
6162
6163    //  thisMBB:
6164    //  ...
6165    //   TrueVal = ...
6166    //   cmpTY ccX, r1, r2
6167    //   bCC copy1MBB
6168    //   fallthrough --> copy0MBB
6169    MachineBasicBlock *thisMBB = BB;
6170    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
6171    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
6172    unsigned SelectPred = MI->getOperand(4).getImm();
6173    DebugLoc dl = MI->getDebugLoc();
6174    F->insert(It, copy0MBB);
6175    F->insert(It, sinkMBB);
6176
6177    // Transfer the remainder of BB and its successor edges to sinkMBB.
6178    sinkMBB->splice(sinkMBB->begin(), BB,
6179                    llvm::next(MachineBasicBlock::iterator(MI)),
6180                    BB->end());
6181    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
6182
6183    // Next, add the true and fallthrough blocks as its successors.
6184    BB->addSuccessor(copy0MBB);
6185    BB->addSuccessor(sinkMBB);
6186
6187    BuildMI(BB, dl, TII->get(PPC::BCC))
6188      .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
6189
6190    //  copy0MBB:
6191    //   %FalseValue = ...
6192    //   # fallthrough to sinkMBB
6193    BB = copy0MBB;
6194
6195    // Update machine-CFG edges
6196    BB->addSuccessor(sinkMBB);
6197
6198    //  sinkMBB:
6199    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
6200    //  ...
6201    BB = sinkMBB;
6202    BuildMI(*BB, BB->begin(), dl,
6203            TII->get(PPC::PHI), MI->getOperand(0).getReg())
6204      .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
6205      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
6206  }
6207  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
6208    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
6209  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
6210    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
6211  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
6212    BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
6213  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
6214    BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
6215
6216  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
6217    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
6218  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
6219    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
6220  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
6221    BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
6222  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
6223    BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
6224
6225  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
6226    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
6227  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
6228    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
6229  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
6230    BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
6231  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
6232    BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
6233
6234  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
6235    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
6236  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
6237    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
6238  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
6239    BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
6240  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
6241    BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
6242
6243  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
6244    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
6245  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
6246    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
6247  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
6248    BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
6249  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
6250    BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
6251
6252  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
6253    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
6254  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
6255    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
6256  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
6257    BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
6258  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
6259    BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
6260
6261  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
6262    BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
6263  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
6264    BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
6265  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
6266    BB = EmitAtomicBinary(MI, BB, false, 0);
6267  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
6268    BB = EmitAtomicBinary(MI, BB, true, 0);
6269
6270  else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
6271           MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
6272    bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
6273
6274    unsigned dest   = MI->getOperand(0).getReg();
6275    unsigned ptrA   = MI->getOperand(1).getReg();
6276    unsigned ptrB   = MI->getOperand(2).getReg();
6277    unsigned oldval = MI->getOperand(3).getReg();
6278    unsigned newval = MI->getOperand(4).getReg();
6279    DebugLoc dl     = MI->getDebugLoc();
6280
6281    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
6282    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
6283    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
6284    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
6285    F->insert(It, loop1MBB);
6286    F->insert(It, loop2MBB);
6287    F->insert(It, midMBB);
6288    F->insert(It, exitMBB);
6289    exitMBB->splice(exitMBB->begin(), BB,
6290                    llvm::next(MachineBasicBlock::iterator(MI)),
6291                    BB->end());
6292    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
6293
6294    //  thisMBB:
6295    //   ...
6296    //   fallthrough --> loopMBB
6297    BB->addSuccessor(loop1MBB);
6298
6299    // loop1MBB:
6300    //   l[wd]arx dest, ptr
6301    //   cmp[wd] dest, oldval
6302    //   bne- midMBB
6303    // loop2MBB:
6304    //   st[wd]cx. newval, ptr
6305    //   bne- loopMBB
6306    //   b exitBB
6307    // midMBB:
6308    //   st[wd]cx. dest, ptr
6309    // exitBB:
6310    BB = loop1MBB;
6311    BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
6312      .addReg(ptrA).addReg(ptrB);
6313    BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
6314      .addReg(oldval).addReg(dest);
6315    BuildMI(BB, dl, TII->get(PPC::BCC))
6316      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
6317    BB->addSuccessor(loop2MBB);
6318    BB->addSuccessor(midMBB);
6319
6320    BB = loop2MBB;
6321    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
6322      .addReg(newval).addReg(ptrA).addReg(ptrB);
6323    BuildMI(BB, dl, TII->get(PPC::BCC))
6324      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
6325    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
6326    BB->addSuccessor(loop1MBB);
6327    BB->addSuccessor(exitMBB);
6328
6329    BB = midMBB;
6330    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
6331      .addReg(dest).addReg(ptrA).addReg(ptrB);
6332    BB->addSuccessor(exitMBB);
6333
6334    //  exitMBB:
6335    //   ...
6336    BB = exitMBB;
6337  } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
6338             MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
6339    // We must use 64-bit registers for addresses when targeting 64-bit,
6340    // since we're actually doing arithmetic on them.  Other registers
6341    // can be 32-bit.
6342    bool is64bit = PPCSubTarget.isPPC64();
6343    bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
6344
6345    unsigned dest   = MI->getOperand(0).getReg();
6346    unsigned ptrA   = MI->getOperand(1).getReg();
6347    unsigned ptrB   = MI->getOperand(2).getReg();
6348    unsigned oldval = MI->getOperand(3).getReg();
6349    unsigned newval = MI->getOperand(4).getReg();
6350    DebugLoc dl     = MI->getDebugLoc();
6351
6352    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
6353    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
6354    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
6355    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
6356    F->insert(It, loop1MBB);
6357    F->insert(It, loop2MBB);
6358    F->insert(It, midMBB);
6359    F->insert(It, exitMBB);
6360    exitMBB->splice(exitMBB->begin(), BB,
6361                    llvm::next(MachineBasicBlock::iterator(MI)),
6362                    BB->end());
6363    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
6364
6365    MachineRegisterInfo &RegInfo = F->getRegInfo();
6366    const TargetRegisterClass *RC =
6367      is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
6368                (const TargetRegisterClass *) &PPC::GPRCRegClass;
6369    unsigned PtrReg = RegInfo.createVirtualRegister(RC);
6370    unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
6371    unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
6372    unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
6373    unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
6374    unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
6375    unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
6376    unsigned MaskReg = RegInfo.createVirtualRegister(RC);
6377    unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
6378    unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
6379    unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
6380    unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
6381    unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
6382    unsigned Ptr1Reg;
6383    unsigned TmpReg = RegInfo.createVirtualRegister(RC);
6384    unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
6385    //  thisMBB:
6386    //   ...
6387    //   fallthrough --> loopMBB
6388    BB->addSuccessor(loop1MBB);
6389
6390    // The 4-byte load must be aligned, while a char or short may be
6391    // anywhere in the word.  Hence all this nasty bookkeeping code.
6392    //   add ptr1, ptrA, ptrB [copy if ptrA==0]
6393    //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
6394    //   xori shift, shift1, 24 [16]
6395    //   rlwinm ptr, ptr1, 0, 0, 29
6396    //   slw newval2, newval, shift
6397    //   slw oldval2, oldval,shift
6398    //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
6399    //   slw mask, mask2, shift
6400    //   and newval3, newval2, mask
6401    //   and oldval3, oldval2, mask
6402    // loop1MBB:
6403    //   lwarx tmpDest, ptr
6404    //   and tmp, tmpDest, mask
6405    //   cmpw tmp, oldval3
6406    //   bne- midMBB
6407    // loop2MBB:
6408    //   andc tmp2, tmpDest, mask
6409    //   or tmp4, tmp2, newval3
6410    //   stwcx. tmp4, ptr
6411    //   bne- loop1MBB
6412    //   b exitBB
6413    // midMBB:
6414    //   stwcx. tmpDest, ptr
6415    // exitBB:
6416    //   srw dest, tmpDest, shift
6417    if (ptrA != ZeroReg) {
6418      Ptr1Reg = RegInfo.createVirtualRegister(RC);
6419      BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
6420        .addReg(ptrA).addReg(ptrB);
6421    } else {
6422      Ptr1Reg = ptrB;
6423    }
6424    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
6425        .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
6426    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
6427        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
6428    if (is64bit)
6429      BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
6430        .addReg(Ptr1Reg).addImm(0).addImm(61);
6431    else
6432      BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
6433        .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
6434    BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
6435        .addReg(newval).addReg(ShiftReg);
6436    BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
6437        .addReg(oldval).addReg(ShiftReg);
6438    if (is8bit)
6439      BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
6440    else {
6441      BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
6442      BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
6443        .addReg(Mask3Reg).addImm(65535);
6444    }
6445    BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
6446        .addReg(Mask2Reg).addReg(ShiftReg);
6447    BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
6448        .addReg(NewVal2Reg).addReg(MaskReg);
6449    BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
6450        .addReg(OldVal2Reg).addReg(MaskReg);
6451
6452    BB = loop1MBB;
6453    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
6454        .addReg(ZeroReg).addReg(PtrReg);
6455    BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
6456        .addReg(TmpDestReg).addReg(MaskReg);
6457    BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
6458        .addReg(TmpReg).addReg(OldVal3Reg);
6459    BuildMI(BB, dl, TII->get(PPC::BCC))
6460        .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
6461    BB->addSuccessor(loop2MBB);
6462    BB->addSuccessor(midMBB);
6463
6464    BB = loop2MBB;
6465    BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
6466        .addReg(TmpDestReg).addReg(MaskReg);
6467    BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
6468        .addReg(Tmp2Reg).addReg(NewVal3Reg);
6469    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
6470        .addReg(ZeroReg).addReg(PtrReg);
6471    BuildMI(BB, dl, TII->get(PPC::BCC))
6472      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
6473    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
6474    BB->addSuccessor(loop1MBB);
6475    BB->addSuccessor(exitMBB);
6476
6477    BB = midMBB;
6478    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
6479      .addReg(ZeroReg).addReg(PtrReg);
6480    BB->addSuccessor(exitMBB);
6481
6482    //  exitMBB:
6483    //   ...
6484    BB = exitMBB;
6485    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
6486      .addReg(ShiftReg);
6487  } else if (MI->getOpcode() == PPC::FADDrtz) {
6488    // This pseudo performs an FADD with rounding mode temporarily forced
6489    // to round-to-zero.  We emit this via custom inserter since the FPSCR
6490    // is not modeled at the SelectionDAG level.
6491    unsigned Dest = MI->getOperand(0).getReg();
6492    unsigned Src1 = MI->getOperand(1).getReg();
6493    unsigned Src2 = MI->getOperand(2).getReg();
6494    DebugLoc dl   = MI->getDebugLoc();
6495
6496    MachineRegisterInfo &RegInfo = F->getRegInfo();
6497    unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
6498
6499    // Save FPSCR value.
6500    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
6501
6502    // Set rounding mode to round-to-zero.
6503    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
6504    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
6505
6506    // Perform addition.
6507    BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
6508
6509    // Restore FPSCR value.
6510    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
6511  } else {
6512    llvm_unreachable("Unexpected instr type to insert");
6513  }
6514
6515  MI->eraseFromParent();   // The pseudo instruction is gone now.
6516  return BB;
6517}
6518
6519//===----------------------------------------------------------------------===//
6520// Target Optimization Hooks
6521//===----------------------------------------------------------------------===//
6522
6523SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
6524                                             DAGCombinerInfo &DCI) const {
6525  const TargetMachine &TM = getTargetMachine();
6526  SelectionDAG &DAG = DCI.DAG;
6527  DebugLoc dl = N->getDebugLoc();
6528  switch (N->getOpcode()) {
6529  default: break;
6530  case PPCISD::SHL:
6531    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
6532      if (C->isNullValue())   // 0 << V -> 0.
6533        return N->getOperand(0);
6534    }
6535    break;
6536  case PPCISD::SRL:
6537    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
6538      if (C->isNullValue())   // 0 >>u V -> 0.
6539        return N->getOperand(0);
6540    }
6541    break;
6542  case PPCISD::SRA:
6543    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
6544      if (C->isNullValue() ||   //  0 >>s V -> 0.
6545          C->isAllOnesValue())    // -1 >>s V -> -1.
6546        return N->getOperand(0);
6547    }
6548    break;
6549
6550  case ISD::SINT_TO_FP:
6551    if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
6552      if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
6553        // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
6554        // We allow the src/dst to be either f32/f64, but the intermediate
6555        // type must be i64.
6556        if (N->getOperand(0).getValueType() == MVT::i64 &&
6557            N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
6558          SDValue Val = N->getOperand(0).getOperand(0);
6559          if (Val.getValueType() == MVT::f32) {
6560            Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
6561            DCI.AddToWorklist(Val.getNode());
6562          }
6563
6564          Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
6565          DCI.AddToWorklist(Val.getNode());
6566          Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
6567          DCI.AddToWorklist(Val.getNode());
6568          if (N->getValueType(0) == MVT::f32) {
6569            Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
6570                              DAG.getIntPtrConstant(0));
6571            DCI.AddToWorklist(Val.getNode());
6572          }
6573          return Val;
6574        } else if (N->getOperand(0).getValueType() == MVT::i32) {
6575          // If the intermediate type is i32, we can avoid the load/store here
6576          // too.
6577        }
6578      }
6579    }
6580    break;
6581  case ISD::STORE:
6582    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
6583    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
6584        !cast<StoreSDNode>(N)->isTruncatingStore() &&
6585        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
6586        N->getOperand(1).getValueType() == MVT::i32 &&
6587        N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
6588      SDValue Val = N->getOperand(1).getOperand(0);
6589      if (Val.getValueType() == MVT::f32) {
6590        Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
6591        DCI.AddToWorklist(Val.getNode());
6592      }
6593      Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
6594      DCI.AddToWorklist(Val.getNode());
6595
6596      Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
6597                        N->getOperand(2), N->getOperand(3));
6598      DCI.AddToWorklist(Val.getNode());
6599      return Val;
6600    }
6601
6602    // Turn STORE (BSWAP) -> sthbrx/stwbrx.
6603    if (cast<StoreSDNode>(N)->isUnindexed() &&
6604        N->getOperand(1).getOpcode() == ISD::BSWAP &&
6605        N->getOperand(1).getNode()->hasOneUse() &&
6606        (N->getOperand(1).getValueType() == MVT::i32 ||
6607         N->getOperand(1).getValueType() == MVT::i16 ||
6608         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
6609          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
6610          N->getOperand(1).getValueType() == MVT::i64))) {
6611      SDValue BSwapOp = N->getOperand(1).getOperand(0);
6612      // Do an any-extend to 32-bits if this is a half-word input.
6613      if (BSwapOp.getValueType() == MVT::i16)
6614        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
6615
6616      SDValue Ops[] = {
6617        N->getOperand(0), BSwapOp, N->getOperand(2),
6618        DAG.getValueType(N->getOperand(1).getValueType())
6619      };
6620      return
6621        DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
6622                                Ops, array_lengthof(Ops),
6623                                cast<StoreSDNode>(N)->getMemoryVT(),
6624                                cast<StoreSDNode>(N)->getMemOperand());
6625    }
6626    break;
6627  case ISD::BSWAP:
6628    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
6629    if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
6630        N->getOperand(0).hasOneUse() &&
6631        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
6632         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
6633          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
6634          N->getValueType(0) == MVT::i64))) {
6635      SDValue Load = N->getOperand(0);
6636      LoadSDNode *LD = cast<LoadSDNode>(Load);
6637      // Create the byte-swapping load.
6638      SDValue Ops[] = {
6639        LD->getChain(),    // Chain
6640        LD->getBasePtr(),  // Ptr
6641        DAG.getValueType(N->getValueType(0)) // VT
6642      };
6643      SDValue BSLoad =
6644        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
6645                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
6646                                              MVT::i64 : MVT::i32, MVT::Other),
6647                                Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
6648
6649      // If this is an i16 load, insert the truncate.
6650      SDValue ResVal = BSLoad;
6651      if (N->getValueType(0) == MVT::i16)
6652        ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
6653
6654      // First, combine the bswap away.  This makes the value produced by the
6655      // load dead.
6656      DCI.CombineTo(N, ResVal);
6657
6658      // Next, combine the load away, we give it a bogus result value but a real
6659      // chain result.  The result value is dead because the bswap is dead.
6660      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
6661
6662      // Return N so it doesn't get rechecked!
6663      return SDValue(N, 0);
6664    }
6665
6666    break;
6667  case PPCISD::VCMP: {
6668    // If a VCMPo node already exists with exactly the same operands as this
6669    // node, use its result instead of this node (VCMPo computes both a CR6 and
6670    // a normal output).
6671    //
6672    if (!N->getOperand(0).hasOneUse() &&
6673        !N->getOperand(1).hasOneUse() &&
6674        !N->getOperand(2).hasOneUse()) {
6675
6676      // Scan all of the users of the LHS, looking for VCMPo's that match.
6677      SDNode *VCMPoNode = 0;
6678
6679      SDNode *LHSN = N->getOperand(0).getNode();
6680      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
6681           UI != E; ++UI)
6682        if (UI->getOpcode() == PPCISD::VCMPo &&
6683            UI->getOperand(1) == N->getOperand(1) &&
6684            UI->getOperand(2) == N->getOperand(2) &&
6685            UI->getOperand(0) == N->getOperand(0)) {
6686          VCMPoNode = *UI;
6687          break;
6688        }
6689
6690      // If there is no VCMPo node, or if the flag value has a single use, don't
6691      // transform this.
6692      if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
6693        break;
6694
6695      // Look at the (necessarily single) use of the flag value.  If it has a
6696      // chain, this transformation is more complex.  Note that multiple things
6697      // could use the value result, which we should ignore.
6698      SDNode *FlagUser = 0;
6699      for (SDNode::use_iterator UI = VCMPoNode->use_begin();
6700           FlagUser == 0; ++UI) {
6701        assert(UI != VCMPoNode->use_end() && "Didn't find user!");
6702        SDNode *User = *UI;
6703        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
6704          if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
6705            FlagUser = User;
6706            break;
6707          }
6708        }
6709      }
6710
6711      // If the user is a MFCR instruction, we know this is safe.  Otherwise we
6712      // give up for right now.
6713      if (FlagUser->getOpcode() == PPCISD::MFCR)
6714        return SDValue(VCMPoNode, 0);
6715    }
6716    break;
6717  }
6718  case ISD::BR_CC: {
6719    // If this is a branch on an altivec predicate comparison, lower this so
6720    // that we don't have to do a MFCR: instead, branch directly on CR6.  This
6721    // lowering is done pre-legalize, because the legalizer lowers the predicate
6722    // compare down to code that is difficult to reassemble.
6723    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
6724    SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
6725    int CompareOpc;
6726    bool isDot;
6727
6728    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
6729        isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
6730        getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
6731      assert(isDot && "Can't compare against a vector result!");
6732
6733      // If this is a comparison against something other than 0/1, then we know
6734      // that the condition is never/always true.
6735      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
6736      if (Val != 0 && Val != 1) {
6737        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
6738          return N->getOperand(0);
6739        // Always !=, turn it into an unconditional branch.
6740        return DAG.getNode(ISD::BR, dl, MVT::Other,
6741                           N->getOperand(0), N->getOperand(4));
6742      }
6743
6744      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
6745
6746      // Create the PPCISD altivec 'dot' comparison node.
6747      SDValue Ops[] = {
6748        LHS.getOperand(2),  // LHS of compare
6749        LHS.getOperand(3),  // RHS of compare
6750        DAG.getConstant(CompareOpc, MVT::i32)
6751      };
6752      EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
6753      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
6754
6755      // Unpack the result based on how the target uses it.
6756      PPC::Predicate CompOpc;
6757      switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
6758      default:  // Can't happen, don't crash on invalid number though.
6759      case 0:   // Branch on the value of the EQ bit of CR6.
6760        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
6761        break;
6762      case 1:   // Branch on the inverted value of the EQ bit of CR6.
6763        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
6764        break;
6765      case 2:   // Branch on the value of the LT bit of CR6.
6766        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
6767        break;
6768      case 3:   // Branch on the inverted value of the LT bit of CR6.
6769        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
6770        break;
6771      }
6772
6773      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
6774                         DAG.getConstant(CompOpc, MVT::i32),
6775                         DAG.getRegister(PPC::CR6, MVT::i32),
6776                         N->getOperand(4), CompNode.getValue(1));
6777    }
6778    break;
6779  }
6780  }
6781
6782  return SDValue();
6783}
6784
6785//===----------------------------------------------------------------------===//
6786// Inline Assembly Support
6787//===----------------------------------------------------------------------===//
6788
6789void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
6790                                                       APInt &KnownZero,
6791                                                       APInt &KnownOne,
6792                                                       const SelectionDAG &DAG,
6793                                                       unsigned Depth) const {
6794  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
6795  switch (Op.getOpcode()) {
6796  default: break;
6797  case PPCISD::LBRX: {
6798    // lhbrx is known to have the top bits cleared out.
6799    if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
6800      KnownZero = 0xFFFF0000;
6801    break;
6802  }
6803  case ISD::INTRINSIC_WO_CHAIN: {
6804    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
6805    default: break;
6806    case Intrinsic::ppc_altivec_vcmpbfp_p:
6807    case Intrinsic::ppc_altivec_vcmpeqfp_p:
6808    case Intrinsic::ppc_altivec_vcmpequb_p:
6809    case Intrinsic::ppc_altivec_vcmpequh_p:
6810    case Intrinsic::ppc_altivec_vcmpequw_p:
6811    case Intrinsic::ppc_altivec_vcmpgefp_p:
6812    case Intrinsic::ppc_altivec_vcmpgtfp_p:
6813    case Intrinsic::ppc_altivec_vcmpgtsb_p:
6814    case Intrinsic::ppc_altivec_vcmpgtsh_p:
6815    case Intrinsic::ppc_altivec_vcmpgtsw_p:
6816    case Intrinsic::ppc_altivec_vcmpgtub_p:
6817    case Intrinsic::ppc_altivec_vcmpgtuh_p:
6818    case Intrinsic::ppc_altivec_vcmpgtuw_p:
6819      KnownZero = ~1U;  // All bits but the low one are known to be zero.
6820      break;
6821    }
6822  }
6823  }
6824}
6825
6826
6827/// getConstraintType - Given a constraint, return the type of
6828/// constraint it is for this target.
6829PPCTargetLowering::ConstraintType
6830PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
6831  if (Constraint.size() == 1) {
6832    switch (Constraint[0]) {
6833    default: break;
6834    case 'b':
6835    case 'r':
6836    case 'f':
6837    case 'v':
6838    case 'y':
6839      return C_RegisterClass;
6840    case 'Z':
6841      // FIXME: While Z does indicate a memory constraint, it specifically
6842      // indicates an r+r address (used in conjunction with the 'y' modifier
6843      // in the replacement string). Currently, we're forcing the base
6844      // register to be r0 in the asm printer (which is interpreted as zero)
6845      // and forming the complete address in the second register. This is
6846      // suboptimal.
6847      return C_Memory;
6848    }
6849  }
6850  return TargetLowering::getConstraintType(Constraint);
6851}
6852
6853/// Examine constraint type and operand type and determine a weight value.
6854/// This object must already have been set up with the operand type
6855/// and the current alternative constraint selected.
6856TargetLowering::ConstraintWeight
6857PPCTargetLowering::getSingleConstraintMatchWeight(
6858    AsmOperandInfo &info, const char *constraint) const {
6859  ConstraintWeight weight = CW_Invalid;
6860  Value *CallOperandVal = info.CallOperandVal;
6861    // If we don't have a value, we can't do a match,
6862    // but allow it at the lowest weight.
6863  if (CallOperandVal == NULL)
6864    return CW_Default;
6865  Type *type = CallOperandVal->getType();
6866  // Look at the constraint type.
6867  switch (*constraint) {
6868  default:
6869    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
6870    break;
6871  case 'b':
6872    if (type->isIntegerTy())
6873      weight = CW_Register;
6874    break;
6875  case 'f':
6876    if (type->isFloatTy())
6877      weight = CW_Register;
6878    break;
6879  case 'd':
6880    if (type->isDoubleTy())
6881      weight = CW_Register;
6882    break;
6883  case 'v':
6884    if (type->isVectorTy())
6885      weight = CW_Register;
6886    break;
6887  case 'y':
6888    weight = CW_Register;
6889    break;
6890  case 'Z':
6891    weight = CW_Memory;
6892    break;
6893  }
6894  return weight;
6895}
6896
6897std::pair<unsigned, const TargetRegisterClass*>
6898PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
6899                                                EVT VT) const {
6900  if (Constraint.size() == 1) {
6901    // GCC RS6000 Constraint Letters
6902    switch (Constraint[0]) {
6903    case 'b':   // R1-R31
6904      if (VT == MVT::i64 && PPCSubTarget.isPPC64())
6905        return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
6906      return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
6907    case 'r':   // R0-R31
6908      if (VT == MVT::i64 && PPCSubTarget.isPPC64())
6909        return std::make_pair(0U, &PPC::G8RCRegClass);
6910      return std::make_pair(0U, &PPC::GPRCRegClass);
6911    case 'f':
6912      if (VT == MVT::f32 || VT == MVT::i32)
6913        return std::make_pair(0U, &PPC::F4RCRegClass);
6914      if (VT == MVT::f64 || VT == MVT::i64)
6915        return std::make_pair(0U, &PPC::F8RCRegClass);
6916      break;
6917    case 'v':
6918      return std::make_pair(0U, &PPC::VRRCRegClass);
6919    case 'y':   // crrc
6920      return std::make_pair(0U, &PPC::CRRCRegClass);
6921    }
6922  }
6923
6924  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
6925}
6926
6927
6928/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
6929/// vector.  If it is invalid, don't add anything to Ops.
6930void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
6931                                                     std::string &Constraint,
6932                                                     std::vector<SDValue>&Ops,
6933                                                     SelectionDAG &DAG) const {
6934  SDValue Result(0,0);
6935
6936  // Only support length 1 constraints.
6937  if (Constraint.length() > 1) return;
6938
6939  char Letter = Constraint[0];
6940  switch (Letter) {
6941  default: break;
6942  case 'I':
6943  case 'J':
6944  case 'K':
6945  case 'L':
6946  case 'M':
6947  case 'N':
6948  case 'O':
6949  case 'P': {
6950    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
6951    if (!CST) return; // Must be an immediate to match.
6952    unsigned Value = CST->getZExtValue();
6953    switch (Letter) {
6954    default: llvm_unreachable("Unknown constraint letter!");
6955    case 'I':  // "I" is a signed 16-bit constant.
6956      if ((short)Value == (int)Value)
6957        Result = DAG.getTargetConstant(Value, Op.getValueType());
6958      break;
6959    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
6960    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
6961      if ((short)Value == 0)
6962        Result = DAG.getTargetConstant(Value, Op.getValueType());
6963      break;
6964    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
6965      if ((Value >> 16) == 0)
6966        Result = DAG.getTargetConstant(Value, Op.getValueType());
6967      break;
6968    case 'M':  // "M" is a constant that is greater than 31.
6969      if (Value > 31)
6970        Result = DAG.getTargetConstant(Value, Op.getValueType());
6971      break;
6972    case 'N':  // "N" is a positive constant that is an exact power of two.
6973      if ((int)Value > 0 && isPowerOf2_32(Value))
6974        Result = DAG.getTargetConstant(Value, Op.getValueType());
6975      break;
6976    case 'O':  // "O" is the constant zero.
6977      if (Value == 0)
6978        Result = DAG.getTargetConstant(Value, Op.getValueType());
6979      break;
6980    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
6981      if ((short)-Value == (int)-Value)
6982        Result = DAG.getTargetConstant(Value, Op.getValueType());
6983      break;
6984    }
6985    break;
6986  }
6987  }
6988
6989  if (Result.getNode()) {
6990    Ops.push_back(Result);
6991    return;
6992  }
6993
6994  // Handle standard constraint letters.
6995  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6996}
6997
6998// isLegalAddressingMode - Return true if the addressing mode represented
6999// by AM is legal for this target, for a load/store of the specified type.
7000bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
7001                                              Type *Ty) const {
7002  // FIXME: PPC does not allow r+i addressing modes for vectors!
7003
7004  // PPC allows a sign-extended 16-bit immediate field.
7005  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
7006    return false;
7007
7008  // No global is ever allowed as a base.
7009  if (AM.BaseGV)
7010    return false;
7011
7012  // PPC only support r+r,
7013  switch (AM.Scale) {
7014  case 0:  // "r+i" or just "i", depending on HasBaseReg.
7015    break;
7016  case 1:
7017    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
7018      return false;
7019    // Otherwise we have r+r or r+i.
7020    break;
7021  case 2:
7022    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
7023      return false;
7024    // Allow 2*r as r+r.
7025    break;
7026  default:
7027    // No other scales are supported.
7028    return false;
7029  }
7030
7031  return true;
7032}
7033
7034/// isLegalAddressImmediate - Return true if the integer value can be used
7035/// as the offset of the target addressing mode for load / store of the
7036/// given type.
7037bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
7038  // PPC allows a sign-extended 16-bit immediate field.
7039  return (V > -(1 << 16) && V < (1 << 16)-1);
7040}
7041
7042bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
7043  return false;
7044}
7045
7046SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
7047                                           SelectionDAG &DAG) const {
7048  MachineFunction &MF = DAG.getMachineFunction();
7049  MachineFrameInfo *MFI = MF.getFrameInfo();
7050  MFI->setReturnAddressIsTaken(true);
7051
7052  DebugLoc dl = Op.getDebugLoc();
7053  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7054
7055  // Make sure the function does not optimize away the store of the RA to
7056  // the stack.
7057  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7058  FuncInfo->setLRStoreRequired();
7059  bool isPPC64 = PPCSubTarget.isPPC64();
7060  bool isDarwinABI = PPCSubTarget.isDarwinABI();
7061
7062  if (Depth > 0) {
7063    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
7064    SDValue Offset =
7065
7066      DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
7067                      isPPC64? MVT::i64 : MVT::i32);
7068    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
7069                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
7070                                   FrameAddr, Offset),
7071                       MachinePointerInfo(), false, false, false, 0);
7072  }
7073
7074  // Just load the return address off the stack.
7075  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
7076  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
7077                     RetAddrFI, MachinePointerInfo(), false, false, false, 0);
7078}
7079
7080SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
7081                                          SelectionDAG &DAG) const {
7082  DebugLoc dl = Op.getDebugLoc();
7083  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7084
7085  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
7086  bool isPPC64 = PtrVT == MVT::i64;
7087
7088  MachineFunction &MF = DAG.getMachineFunction();
7089  MachineFrameInfo *MFI = MF.getFrameInfo();
7090  MFI->setFrameAddressIsTaken(true);
7091
7092  // Naked functions never have a frame pointer, and so we use r1. For all
7093  // other functions, this decision must be delayed until during PEI.
7094  unsigned FrameReg;
7095  if (MF.getFunction()->getAttributes().hasAttribute(
7096        AttributeSet::FunctionIndex, Attribute::Naked))
7097    FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
7098  else
7099    FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
7100
7101  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
7102                                         PtrVT);
7103  while (Depth--)
7104    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
7105                            FrameAddr, MachinePointerInfo(), false, false,
7106                            false, 0);
7107  return FrameAddr;
7108}
7109
7110bool
7111PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
7112  // The PowerPC target isn't yet aware of offsets.
7113  return false;
7114}
7115
7116/// getOptimalMemOpType - Returns the target specific optimal type for load
7117/// and store operations as a result of memset, memcpy, and memmove
7118/// lowering. If DstAlign is zero that means it's safe to destination
7119/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
7120/// means there isn't a need to check it against alignment requirement,
7121/// probably because the source does not need to be loaded. If 'IsMemset' is
7122/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
7123/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
7124/// source is constant so it does not need to be loaded.
7125/// It returns EVT::Other if the type should be determined using generic
7126/// target-independent logic.
7127EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
7128                                           unsigned DstAlign, unsigned SrcAlign,
7129                                           bool IsMemset, bool ZeroMemset,
7130                                           bool MemcpyStrSrc,
7131                                           MachineFunction &MF) const {
7132  if (this->PPCSubTarget.isPPC64()) {
7133    return MVT::i64;
7134  } else {
7135    return MVT::i32;
7136  }
7137}
7138
7139bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
7140                                                      bool *Fast) const {
7141  if (DisablePPCUnaligned)
7142    return false;
7143
7144  // PowerPC supports unaligned memory access for simple non-vector types.
7145  // Although accessing unaligned addresses is not as efficient as accessing
7146  // aligned addresses, it is generally more efficient than manual expansion,
7147  // and generally only traps for software emulation when crossing page
7148  // boundaries.
7149
7150  if (!VT.isSimple())
7151    return false;
7152
7153  if (VT.getSimpleVT().isVector())
7154    return false;
7155
7156  if (VT == MVT::ppcf128)
7157    return false;
7158
7159  if (Fast)
7160    *Fast = true;
7161
7162  return true;
7163}
7164
7165/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
7166/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
7167/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
7168/// is expanded to mul + add.
7169bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
7170  if (!VT.isSimple())
7171    return false;
7172
7173  switch (VT.getSimpleVT().SimpleTy) {
7174  case MVT::f32:
7175  case MVT::f64:
7176  case MVT::v4f32:
7177    return true;
7178  default:
7179    break;
7180  }
7181
7182  return false;
7183}
7184
7185Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
7186  if (DisableILPPref)
7187    return TargetLowering::getSchedulingPreference(N);
7188
7189  return Sched::ILP;
7190}
7191
7192