PPCISelLowering.cpp revision ebe69fe11e48d322045d5949c83283927a0d790b
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PPCISelLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCISelLowering.h"
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPCCallingConv.h"
17#include "PPCMachineFunctionInfo.h"
18#include "PPCPerfectShuffle.h"
19#include "PPCTargetMachine.h"
20#include "PPCTargetObjectFile.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringSwitch.h"
23#include "llvm/ADT/Triple.h"
24#include "llvm/CodeGen/CallingConvLower.h"
25#include "llvm/CodeGen/MachineFrameInfo.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineLoopInfo.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/SelectionDAG.h"
31#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
32#include "llvm/IR/CallingConv.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/Intrinsics.h"
37#include "llvm/Support/CommandLine.h"
38#include "llvm/Support/ErrorHandling.h"
39#include "llvm/Support/MathExtras.h"
40#include "llvm/Support/raw_ostream.h"
41#include "llvm/Target/TargetOptions.h"
42using namespace llvm;
43
44// FIXME: Remove this once soft-float is supported.
45static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
46cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
47
48static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
49cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
50
51static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
52cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
53
54static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
55cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
56
57// FIXME: Remove this once the bug has been fixed!
58extern cl::opt<bool> ANDIGlueBug;
59
60PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
61                                     const PPCSubtarget &STI)
62    : TargetLowering(TM), Subtarget(STI) {
63  // Use _setjmp/_longjmp instead of setjmp/longjmp.
64  setUseUnderscoreSetJmp(true);
65  setUseUnderscoreLongJmp(true);
66
67  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
68  // arguments are at least 4/8 bytes aligned.
69  bool isPPC64 = Subtarget.isPPC64();
70  setMinStackArgumentAlignment(isPPC64 ? 8:4);
71
72  // Set up the register classes.
73  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
74  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
75  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
76
77  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
78  for (MVT VT : MVT::integer_valuetypes()) {
79    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
80    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
81  }
82
83  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
84
85  // PowerPC has pre-inc load and store's.
86  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
87  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
88  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
89  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
90  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
91  setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
92  setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
93  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
94  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
95  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
96  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
97  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
98  setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
99  setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
100
101  if (Subtarget.useCRBits()) {
102    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
103
104    if (isPPC64 || Subtarget.hasFPCVT()) {
105      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
106      AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
107                         isPPC64 ? MVT::i64 : MVT::i32);
108      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
109      AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,
110                         isPPC64 ? MVT::i64 : MVT::i32);
111    } else {
112      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
113      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
114    }
115
116    // PowerPC does not support direct load / store of condition registers
117    setOperationAction(ISD::LOAD, MVT::i1, Custom);
118    setOperationAction(ISD::STORE, MVT::i1, Custom);
119
120    // FIXME: Remove this once the ANDI glue bug is fixed:
121    if (ANDIGlueBug)
122      setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
123
124    for (MVT VT : MVT::integer_valuetypes()) {
125      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
126      setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
127      setTruncStoreAction(VT, MVT::i1, Expand);
128    }
129
130    addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
131  }
132
133  // This is used in the ppcf128->int sequence.  Note it has different semantics
134  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
135  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
136
137  // We do not currently implement these libm ops for PowerPC.
138  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
139  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
140  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
141  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
142  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
143  setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
144
145  // PowerPC has no SREM/UREM instructions
146  setOperationAction(ISD::SREM, MVT::i32, Expand);
147  setOperationAction(ISD::UREM, MVT::i32, Expand);
148  setOperationAction(ISD::SREM, MVT::i64, Expand);
149  setOperationAction(ISD::UREM, MVT::i64, Expand);
150
151  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
152  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
153  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
154  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
155  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
156  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
157  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
158  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
159  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
160
161  // We don't support sin/cos/sqrt/fmod/pow
162  setOperationAction(ISD::FSIN , MVT::f64, Expand);
163  setOperationAction(ISD::FCOS , MVT::f64, Expand);
164  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
165  setOperationAction(ISD::FREM , MVT::f64, Expand);
166  setOperationAction(ISD::FPOW , MVT::f64, Expand);
167  setOperationAction(ISD::FMA  , MVT::f64, Legal);
168  setOperationAction(ISD::FSIN , MVT::f32, Expand);
169  setOperationAction(ISD::FCOS , MVT::f32, Expand);
170  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
171  setOperationAction(ISD::FREM , MVT::f32, Expand);
172  setOperationAction(ISD::FPOW , MVT::f32, Expand);
173  setOperationAction(ISD::FMA  , MVT::f32, Legal);
174
175  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
176
177  // If we're enabling GP optimizations, use hardware square root
178  if (!Subtarget.hasFSQRT() &&
179      !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
180        Subtarget.hasFRE()))
181    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
182
183  if (!Subtarget.hasFSQRT() &&
184      !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
185        Subtarget.hasFRES()))
186    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
187
188  if (Subtarget.hasFCPSGN()) {
189    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
190    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
191  } else {
192    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
193    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
194  }
195
196  if (Subtarget.hasFPRND()) {
197    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
198    setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
199    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
200    setOperationAction(ISD::FROUND, MVT::f64, Legal);
201
202    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
203    setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
204    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
205    setOperationAction(ISD::FROUND, MVT::f32, Legal);
206  }
207
208  // PowerPC does not have BSWAP, CTPOP or CTTZ
209  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
210  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
211  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
212  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
213  setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
214  setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
215  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
216  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
217
218  if (Subtarget.hasPOPCNTD()) {
219    setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
220    setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
221  } else {
222    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
223    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
224  }
225
226  // PowerPC does not have ROTR
227  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
228  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
229
230  if (!Subtarget.useCRBits()) {
231    // PowerPC does not have Select
232    setOperationAction(ISD::SELECT, MVT::i32, Expand);
233    setOperationAction(ISD::SELECT, MVT::i64, Expand);
234    setOperationAction(ISD::SELECT, MVT::f32, Expand);
235    setOperationAction(ISD::SELECT, MVT::f64, Expand);
236  }
237
238  // PowerPC wants to turn select_cc of FP into fsel when possible.
239  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
240  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
241
242  // PowerPC wants to optimize integer setcc a bit
243  if (!Subtarget.useCRBits())
244    setOperationAction(ISD::SETCC, MVT::i32, Custom);
245
246  // PowerPC does not have BRCOND which requires SetCC
247  if (!Subtarget.useCRBits())
248    setOperationAction(ISD::BRCOND, MVT::Other, Expand);
249
250  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
251
252  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
253  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
254
255  // PowerPC does not have [U|S]INT_TO_FP
256  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
257  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
258
259  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
260  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
261  setOperationAction(ISD::BITCAST, MVT::i64, Expand);
262  setOperationAction(ISD::BITCAST, MVT::f64, Expand);
263
264  // We cannot sextinreg(i1).  Expand to shifts.
265  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
266
267  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
268  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
269  // support continuation, user-level threading, and etc.. As a result, no
270  // other SjLj exception interfaces are implemented and please don't build
271  // your own exception handling based on them.
272  // LLVM/Clang supports zero-cost DWARF exception handling.
273  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
274  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
275
276  // We want to legalize GlobalAddress and ConstantPool nodes into the
277  // appropriate instructions to materialize the address.
278  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
279  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
280  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
281  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
282  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
283  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
284  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
285  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
286  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
287  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
288
289  // TRAP is legal.
290  setOperationAction(ISD::TRAP, MVT::Other, Legal);
291
292  // TRAMPOLINE is custom lowered.
293  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
294  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
295
296  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
297  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
298
299  if (Subtarget.isSVR4ABI()) {
300    if (isPPC64) {
301      // VAARG always uses double-word chunks, so promote anything smaller.
302      setOperationAction(ISD::VAARG, MVT::i1, Promote);
303      AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
304      setOperationAction(ISD::VAARG, MVT::i8, Promote);
305      AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
306      setOperationAction(ISD::VAARG, MVT::i16, Promote);
307      AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
308      setOperationAction(ISD::VAARG, MVT::i32, Promote);
309      AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
310      setOperationAction(ISD::VAARG, MVT::Other, Expand);
311    } else {
312      // VAARG is custom lowered with the 32-bit SVR4 ABI.
313      setOperationAction(ISD::VAARG, MVT::Other, Custom);
314      setOperationAction(ISD::VAARG, MVT::i64, Custom);
315    }
316  } else
317    setOperationAction(ISD::VAARG, MVT::Other, Expand);
318
319  if (Subtarget.isSVR4ABI() && !isPPC64)
320    // VACOPY is custom lowered with the 32-bit SVR4 ABI.
321    setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
322  else
323    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
324
325  // Use the default implementation.
326  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
327  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
328  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
329  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
330  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
331
332  // We want to custom lower some of our intrinsics.
333  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
334
335  // To handle counter-based loop conditions.
336  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
337
338  // Comparisons that require checking two conditions.
339  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
340  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
341  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
342  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
343  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
344  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
345  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
346  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
347  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
348  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
349  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
350  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
351
352  if (Subtarget.has64BitSupport()) {
353    // They also have instructions for converting between i64 and fp.
354    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
355    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
356    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
357    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
358    // This is just the low 32 bits of a (signed) fp->i64 conversion.
359    // We cannot do this with Promote because i64 is not a legal type.
360    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
361
362    if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
363      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
364  } else {
365    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
366    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
367  }
368
369  // With the instructions enabled under FPCVT, we can do everything.
370  if (Subtarget.hasFPCVT()) {
371    if (Subtarget.has64BitSupport()) {
372      setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
373      setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
374      setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
375      setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
376    }
377
378    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
379    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
380    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
381    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
382  }
383
384  if (Subtarget.use64BitRegs()) {
385    // 64-bit PowerPC implementations can support i64 types directly
386    addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
387    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
388    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
389    // 64-bit PowerPC wants to expand i128 shifts itself.
390    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
391    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
392    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
393  } else {
394    // 32-bit PowerPC wants to expand i64 shifts itself.
395    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
396    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
397    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
398  }
399
400  if (Subtarget.hasAltivec()) {
401    // First set operation action for all vector types to expand. Then we
402    // will selectively turn on ones that can be effectively codegen'd.
403    for (MVT VT : MVT::vector_valuetypes()) {
404      // add/sub are legal for all supported vector VT's.
405      setOperationAction(ISD::ADD , VT, Legal);
406      setOperationAction(ISD::SUB , VT, Legal);
407
408      // Vector instructions introduced in P8
409      if (Subtarget.hasP8Altivec()) {
410        setOperationAction(ISD::CTPOP, VT, Legal);
411        setOperationAction(ISD::CTLZ, VT, Legal);
412      }
413      else {
414        setOperationAction(ISD::CTPOP, VT, Expand);
415        setOperationAction(ISD::CTLZ, VT, Expand);
416      }
417
418      // We promote all shuffles to v16i8.
419      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
420      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
421
422      // We promote all non-typed operations to v4i32.
423      setOperationAction(ISD::AND   , VT, Promote);
424      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
425      setOperationAction(ISD::OR    , VT, Promote);
426      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
427      setOperationAction(ISD::XOR   , VT, Promote);
428      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
429      setOperationAction(ISD::LOAD  , VT, Promote);
430      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
431      setOperationAction(ISD::SELECT, VT, Promote);
432      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
433      setOperationAction(ISD::STORE, VT, Promote);
434      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
435
436      // No other operations are legal.
437      setOperationAction(ISD::MUL , VT, Expand);
438      setOperationAction(ISD::SDIV, VT, Expand);
439      setOperationAction(ISD::SREM, VT, Expand);
440      setOperationAction(ISD::UDIV, VT, Expand);
441      setOperationAction(ISD::UREM, VT, Expand);
442      setOperationAction(ISD::FDIV, VT, Expand);
443      setOperationAction(ISD::FREM, VT, Expand);
444      setOperationAction(ISD::FNEG, VT, Expand);
445      setOperationAction(ISD::FSQRT, VT, Expand);
446      setOperationAction(ISD::FLOG, VT, Expand);
447      setOperationAction(ISD::FLOG10, VT, Expand);
448      setOperationAction(ISD::FLOG2, VT, Expand);
449      setOperationAction(ISD::FEXP, VT, Expand);
450      setOperationAction(ISD::FEXP2, VT, Expand);
451      setOperationAction(ISD::FSIN, VT, Expand);
452      setOperationAction(ISD::FCOS, VT, Expand);
453      setOperationAction(ISD::FABS, VT, Expand);
454      setOperationAction(ISD::FPOWI, VT, Expand);
455      setOperationAction(ISD::FFLOOR, VT, Expand);
456      setOperationAction(ISD::FCEIL,  VT, Expand);
457      setOperationAction(ISD::FTRUNC, VT, Expand);
458      setOperationAction(ISD::FRINT,  VT, Expand);
459      setOperationAction(ISD::FNEARBYINT, VT, Expand);
460      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
461      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
462      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
463      setOperationAction(ISD::MULHU, VT, Expand);
464      setOperationAction(ISD::MULHS, VT, Expand);
465      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
466      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
467      setOperationAction(ISD::UDIVREM, VT, Expand);
468      setOperationAction(ISD::SDIVREM, VT, Expand);
469      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
470      setOperationAction(ISD::FPOW, VT, Expand);
471      setOperationAction(ISD::BSWAP, VT, Expand);
472      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
473      setOperationAction(ISD::CTTZ, VT, Expand);
474      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
475      setOperationAction(ISD::VSELECT, VT, Expand);
476      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
477
478      for (MVT InnerVT : MVT::vector_valuetypes()) {
479        setTruncStoreAction(VT, InnerVT, Expand);
480        setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
481        setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
482        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
483      }
484    }
485
486    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
487    // with merges, splats, etc.
488    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
489
490    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
491    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
492    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
493    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
494    setOperationAction(ISD::SELECT, MVT::v4i32,
495                       Subtarget.useCRBits() ? Legal : Expand);
496    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
497    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
498    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
499    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
500    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
501    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
502    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
503    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
504    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
505
506    addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
507    addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
508    addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
509    addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
510
511    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
512    setOperationAction(ISD::FMA, MVT::v4f32, Legal);
513
514    if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
515      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
516      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
517    }
518
519    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
520    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
521    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
522
523    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
524    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
525
526    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
527    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
528    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
529    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
530
531    // Altivec does not contain unordered floating-point compare instructions
532    setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
533    setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
534    setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
535    setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
536
537    if (Subtarget.hasVSX()) {
538      setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
539      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
540
541      setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
542      setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
543      setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
544      setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
545      setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
546
547      setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
548
549      setOperationAction(ISD::MUL, MVT::v2f64, Legal);
550      setOperationAction(ISD::FMA, MVT::v2f64, Legal);
551
552      setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
553      setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
554
555      setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
556      setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
557      setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
558      setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
559      setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
560
561      // Share the Altivec comparison restrictions.
562      setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
563      setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
564      setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
565      setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
566
567      setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
568      setOperationAction(ISD::STORE, MVT::v2f64, Legal);
569
570      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
571
572      addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
573
574      addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
575      addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
576
577      // VSX v2i64 only supports non-arithmetic operations.
578      setOperationAction(ISD::ADD, MVT::v2i64, Expand);
579      setOperationAction(ISD::SUB, MVT::v2i64, Expand);
580
581      setOperationAction(ISD::SHL, MVT::v2i64, Expand);
582      setOperationAction(ISD::SRA, MVT::v2i64, Expand);
583      setOperationAction(ISD::SRL, MVT::v2i64, Expand);
584
585      setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
586
587      setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
588      AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
589      setOperationAction(ISD::STORE, MVT::v2i64, Promote);
590      AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
591
592      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
593
594      setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
595      setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
596      setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
597      setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
598
599      // Vector operation legalization checks the result type of
600      // SIGN_EXTEND_INREG, overall legalization checks the inner type.
601      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
602      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
603      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
604      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
605
606      addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
607    }
608
609    if (Subtarget.hasP8Altivec())
610      addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
611  }
612
613  if (Subtarget.hasQPX()) {
614    setOperationAction(ISD::FADD, MVT::v4f64, Legal);
615    setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
616    setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
617    setOperationAction(ISD::FREM, MVT::v4f64, Expand);
618
619    setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
620    setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
621
622    setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
623    setOperationAction(ISD::STORE , MVT::v4f64, Custom);
624
625    setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
626    setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
627
628    if (!Subtarget.useCRBits())
629      setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
630    setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
631
632    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
633    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
634    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
635    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
636    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
637    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
638    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
639
640    setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
641    setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
642
643    setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
644    setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
645    setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
646
647    setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
648    setOperationAction(ISD::FABS , MVT::v4f64, Legal);
649    setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
650    setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
651    setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
652    setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
653    setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
654    setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
655    setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
656    setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
657    setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
658
659    setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
660    setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
661
662    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
663    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
664
665    addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
666
667    setOperationAction(ISD::FADD, MVT::v4f32, Legal);
668    setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
669    setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
670    setOperationAction(ISD::FREM, MVT::v4f32, Expand);
671
672    setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
673    setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
674
675    setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
676    setOperationAction(ISD::STORE , MVT::v4f32, Custom);
677
678    if (!Subtarget.useCRBits())
679      setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
680    setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
681
682    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
683    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
684    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
685    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
686    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
687    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
688    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
689
690    setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
691    setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
692
693    setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
694    setOperationAction(ISD::FABS , MVT::v4f32, Legal);
695    setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
696    setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
697    setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
698    setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
699    setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
700    setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
701    setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
702    setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
703    setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
704
705    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
706    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
707
708    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
709    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
710
711    addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
712
713    setOperationAction(ISD::AND , MVT::v4i1, Legal);
714    setOperationAction(ISD::OR , MVT::v4i1, Legal);
715    setOperationAction(ISD::XOR , MVT::v4i1, Legal);
716
717    if (!Subtarget.useCRBits())
718      setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
719    setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
720
721    setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
722    setOperationAction(ISD::STORE , MVT::v4i1, Custom);
723
724    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
725    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
726    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
727    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
728    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
729    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
730    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
731
732    setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
733    setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
734
735    addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
736
737    setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
738    setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
739    setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
740    setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
741
742    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
743    setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
744    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
745    setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
746
747    setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
748    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
749
750    // These need to set FE_INEXACT, and so cannot be vectorized here.
751    setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
752    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
753
754    if (TM.Options.UnsafeFPMath) {
755      setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
756      setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
757
758      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
759      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
760    } else {
761      setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
762      setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
763
764      setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
765      setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
766    }
767  }
768
769  if (Subtarget.has64BitSupport())
770    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
771
772  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
773
774  if (!isPPC64) {
775    setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
776    setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
777  }
778
779  setBooleanContents(ZeroOrOneBooleanContent);
780
781  if (Subtarget.hasAltivec()) {
782    // Altivec instructions set fields to all zeros or all ones.
783    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
784  }
785
786  if (!isPPC64) {
787    // These libcalls are not available in 32-bit.
788    setLibcallName(RTLIB::SHL_I128, nullptr);
789    setLibcallName(RTLIB::SRL_I128, nullptr);
790    setLibcallName(RTLIB::SRA_I128, nullptr);
791  }
792
793  if (isPPC64) {
794    setStackPointerRegisterToSaveRestore(PPC::X1);
795    setExceptionPointerRegister(PPC::X3);
796    setExceptionSelectorRegister(PPC::X4);
797  } else {
798    setStackPointerRegisterToSaveRestore(PPC::R1);
799    setExceptionPointerRegister(PPC::R3);
800    setExceptionSelectorRegister(PPC::R4);
801  }
802
803  // We have target-specific dag combine patterns for the following nodes:
804  setTargetDAGCombine(ISD::SINT_TO_FP);
805  if (Subtarget.hasFPCVT())
806    setTargetDAGCombine(ISD::UINT_TO_FP);
807  setTargetDAGCombine(ISD::LOAD);
808  setTargetDAGCombine(ISD::STORE);
809  setTargetDAGCombine(ISD::BR_CC);
810  if (Subtarget.useCRBits())
811    setTargetDAGCombine(ISD::BRCOND);
812  setTargetDAGCombine(ISD::BSWAP);
813  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
814  setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
815  setTargetDAGCombine(ISD::INTRINSIC_VOID);
816
817  setTargetDAGCombine(ISD::SIGN_EXTEND);
818  setTargetDAGCombine(ISD::ZERO_EXTEND);
819  setTargetDAGCombine(ISD::ANY_EXTEND);
820
821  if (Subtarget.useCRBits()) {
822    setTargetDAGCombine(ISD::TRUNCATE);
823    setTargetDAGCombine(ISD::SETCC);
824    setTargetDAGCombine(ISD::SELECT_CC);
825  }
826
827  // Use reciprocal estimates.
828  if (TM.Options.UnsafeFPMath) {
829    setTargetDAGCombine(ISD::FDIV);
830    setTargetDAGCombine(ISD::FSQRT);
831  }
832
833  // Darwin long double math library functions have $LDBL128 appended.
834  if (Subtarget.isDarwin()) {
835    setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
836    setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
837    setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
838    setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
839    setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
840    setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
841    setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
842    setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
843    setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
844    setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
845  }
846
847  // With 32 condition bits, we don't need to sink (and duplicate) compares
848  // aggressively in CodeGenPrep.
849  if (Subtarget.useCRBits()) {
850    setHasMultipleConditionRegisters();
851    setJumpIsExpensive();
852  }
853
854  setMinFunctionAlignment(2);
855  if (Subtarget.isDarwin())
856    setPrefFunctionAlignment(4);
857
858  switch (Subtarget.getDarwinDirective()) {
859  default: break;
860  case PPC::DIR_970:
861  case PPC::DIR_A2:
862  case PPC::DIR_E500mc:
863  case PPC::DIR_E5500:
864  case PPC::DIR_PWR4:
865  case PPC::DIR_PWR5:
866  case PPC::DIR_PWR5X:
867  case PPC::DIR_PWR6:
868  case PPC::DIR_PWR6X:
869  case PPC::DIR_PWR7:
870  case PPC::DIR_PWR8:
871    setPrefFunctionAlignment(4);
872    setPrefLoopAlignment(4);
873    break;
874  }
875
876  setInsertFencesForAtomic(true);
877
878  if (Subtarget.enableMachineScheduler())
879    setSchedulingPreference(Sched::Source);
880  else
881    setSchedulingPreference(Sched::Hybrid);
882
883  computeRegisterProperties(STI.getRegisterInfo());
884
885  // The Freescale cores do better with aggressive inlining of memcpy and
886  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
887  if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
888      Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
889    MaxStoresPerMemset = 32;
890    MaxStoresPerMemsetOptSize = 16;
891    MaxStoresPerMemcpy = 32;
892    MaxStoresPerMemcpyOptSize = 8;
893    MaxStoresPerMemmove = 32;
894    MaxStoresPerMemmoveOptSize = 8;
895  }
896}
897
898/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
899/// the desired ByVal argument alignment.
900static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
901                             unsigned MaxMaxAlign) {
902  if (MaxAlign == MaxMaxAlign)
903    return;
904  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
905    if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
906      MaxAlign = 32;
907    else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
908      MaxAlign = 16;
909  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
910    unsigned EltAlign = 0;
911    getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
912    if (EltAlign > MaxAlign)
913      MaxAlign = EltAlign;
914  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
915    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
916      unsigned EltAlign = 0;
917      getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
918      if (EltAlign > MaxAlign)
919        MaxAlign = EltAlign;
920      if (MaxAlign == MaxMaxAlign)
921        break;
922    }
923  }
924}
925
926/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
927/// function arguments in the caller parameter area.
928unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
929  // Darwin passes everything on 4 byte boundary.
930  if (Subtarget.isDarwin())
931    return 4;
932
933  // 16byte and wider vectors are passed on 16byte boundary.
934  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
935  unsigned Align = Subtarget.isPPC64() ? 8 : 4;
936  if (Subtarget.hasAltivec() || Subtarget.hasQPX())
937    getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
938  return Align;
939}
940
941const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
942  switch (Opcode) {
943  default: return nullptr;
944  case PPCISD::FSEL:            return "PPCISD::FSEL";
945  case PPCISD::FCFID:           return "PPCISD::FCFID";
946  case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
947  case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
948  case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
949  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
950  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
951  case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
952  case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
953  case PPCISD::FRE:             return "PPCISD::FRE";
954  case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
955  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
956  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
957  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
958  case PPCISD::VPERM:           return "PPCISD::VPERM";
959  case PPCISD::CMPB:            return "PPCISD::CMPB";
960  case PPCISD::Hi:              return "PPCISD::Hi";
961  case PPCISD::Lo:              return "PPCISD::Lo";
962  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
963  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
964  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
965  case PPCISD::SRL:             return "PPCISD::SRL";
966  case PPCISD::SRA:             return "PPCISD::SRA";
967  case PPCISD::SHL:             return "PPCISD::SHL";
968  case PPCISD::CALL:            return "PPCISD::CALL";
969  case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
970  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
971  case PPCISD::BCTRL:           return "PPCISD::BCTRL";
972  case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
973  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
974  case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
975  case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
976  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
977  case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
978  case PPCISD::VCMP:            return "PPCISD::VCMP";
979  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
980  case PPCISD::LBRX:            return "PPCISD::LBRX";
981  case PPCISD::STBRX:           return "PPCISD::STBRX";
982  case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
983  case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
984  case PPCISD::LARX:            return "PPCISD::LARX";
985  case PPCISD::STCX:            return "PPCISD::STCX";
986  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
987  case PPCISD::BDNZ:            return "PPCISD::BDNZ";
988  case PPCISD::BDZ:             return "PPCISD::BDZ";
989  case PPCISD::MFFS:            return "PPCISD::MFFS";
990  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
991  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
992  case PPCISD::CR6SET:          return "PPCISD::CR6SET";
993  case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
994  case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
995  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
996  case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
997  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
998  case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
999  case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1000  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1001  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1002  case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1003  case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1004  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1005  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1006  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1007  case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1008  case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1009  case PPCISD::SC:              return "PPCISD::SC";
1010  case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1011  case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1012  case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1013  case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1014  case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1015  case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1016  }
1017}
1018
1019EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const {
1020  if (!VT.isVector())
1021    return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1022
1023  if (Subtarget.hasQPX())
1024    return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1025
1026  return VT.changeVectorElementTypeToInteger();
1027}
1028
1029bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1030  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1031  return true;
1032}
1033
1034//===----------------------------------------------------------------------===//
1035// Node matching predicates, for use by the tblgen matching code.
1036//===----------------------------------------------------------------------===//
1037
1038/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1039static bool isFloatingPointZero(SDValue Op) {
1040  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1041    return CFP->getValueAPF().isZero();
1042  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1043    // Maybe this has already been legalized into the constant pool?
1044    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1045      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1046        return CFP->getValueAPF().isZero();
1047  }
1048  return false;
1049}
1050
1051/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1052/// true if Op is undef or if it matches the specified value.
1053static bool isConstantOrUndef(int Op, int Val) {
1054  return Op < 0 || Op == Val;
1055}
1056
1057/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1058/// VPKUHUM instruction.
1059/// The ShuffleKind distinguishes between big-endian operations with
1060/// two different inputs (0), either-endian operations with two identical
1061/// inputs (1), and little-endian operantion with two different inputs (2).
1062/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1063bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1064                               SelectionDAG &DAG) {
1065  bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
1066  if (ShuffleKind == 0) {
1067    if (IsLE)
1068      return false;
1069    for (unsigned i = 0; i != 16; ++i)
1070      if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1071        return false;
1072  } else if (ShuffleKind == 2) {
1073    if (!IsLE)
1074      return false;
1075    for (unsigned i = 0; i != 16; ++i)
1076      if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1077        return false;
1078  } else if (ShuffleKind == 1) {
1079    unsigned j = IsLE ? 0 : 1;
1080    for (unsigned i = 0; i != 8; ++i)
1081      if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1082          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1083        return false;
1084  }
1085  return true;
1086}
1087
1088/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1089/// VPKUWUM instruction.
1090/// The ShuffleKind distinguishes between big-endian operations with
1091/// two different inputs (0), either-endian operations with two identical
1092/// inputs (1), and little-endian operantion with two different inputs (2).
1093/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1094bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1095                               SelectionDAG &DAG) {
1096  bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
1097  if (ShuffleKind == 0) {
1098    if (IsLE)
1099      return false;
1100    for (unsigned i = 0; i != 16; i += 2)
1101      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1102          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1103        return false;
1104  } else if (ShuffleKind == 2) {
1105    if (!IsLE)
1106      return false;
1107    for (unsigned i = 0; i != 16; i += 2)
1108      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1109          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1110        return false;
1111  } else if (ShuffleKind == 1) {
1112    unsigned j = IsLE ? 0 : 2;
1113    for (unsigned i = 0; i != 8; i += 2)
1114      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1115          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1116          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1117          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1118        return false;
1119  }
1120  return true;
1121}
1122
1123/// isVMerge - Common function, used to match vmrg* shuffles.
1124///
1125static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1126                     unsigned LHSStart, unsigned RHSStart) {
1127  if (N->getValueType(0) != MVT::v16i8)
1128    return false;
1129  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1130         "Unsupported merge size!");
1131
1132  for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1133    for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1134      if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1135                             LHSStart+j+i*UnitSize) ||
1136          !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1137                             RHSStart+j+i*UnitSize))
1138        return false;
1139    }
1140  return true;
1141}
1142
1143/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1144/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1145/// The ShuffleKind distinguishes between big-endian merges with two
1146/// different inputs (0), either-endian merges with two identical inputs (1),
1147/// and little-endian merges with two different inputs (2).  For the latter,
1148/// the input operands are swapped (see PPCInstrAltivec.td).
1149bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1150                             unsigned ShuffleKind, SelectionDAG &DAG) {
1151  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
1152    if (ShuffleKind == 1) // unary
1153      return isVMerge(N, UnitSize, 0, 0);
1154    else if (ShuffleKind == 2) // swapped
1155      return isVMerge(N, UnitSize, 0, 16);
1156    else
1157      return false;
1158  } else {
1159    if (ShuffleKind == 1) // unary
1160      return isVMerge(N, UnitSize, 8, 8);
1161    else if (ShuffleKind == 0) // normal
1162      return isVMerge(N, UnitSize, 8, 24);
1163    else
1164      return false;
1165  }
1166}
1167
1168/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1169/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1170/// The ShuffleKind distinguishes between big-endian merges with two
1171/// different inputs (0), either-endian merges with two identical inputs (1),
1172/// and little-endian merges with two different inputs (2).  For the latter,
1173/// the input operands are swapped (see PPCInstrAltivec.td).
1174bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1175                             unsigned ShuffleKind, SelectionDAG &DAG) {
1176  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
1177    if (ShuffleKind == 1) // unary
1178      return isVMerge(N, UnitSize, 8, 8);
1179    else if (ShuffleKind == 2) // swapped
1180      return isVMerge(N, UnitSize, 8, 24);
1181    else
1182      return false;
1183  } else {
1184    if (ShuffleKind == 1) // unary
1185      return isVMerge(N, UnitSize, 0, 0);
1186    else if (ShuffleKind == 0) // normal
1187      return isVMerge(N, UnitSize, 0, 16);
1188    else
1189      return false;
1190  }
1191}
1192
1193
1194/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1195/// amount, otherwise return -1.
1196/// The ShuffleKind distinguishes between big-endian operations with two
1197/// different inputs (0), either-endian operations with two identical inputs
1198/// (1), and little-endian operations with two different inputs (2).  For the
1199/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1200int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1201                             SelectionDAG &DAG) {
1202  if (N->getValueType(0) != MVT::v16i8)
1203    return -1;
1204
1205  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1206
1207  // Find the first non-undef value in the shuffle mask.
1208  unsigned i;
1209  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1210    /*search*/;
1211
1212  if (i == 16) return -1;  // all undef.
1213
1214  // Otherwise, check to see if the rest of the elements are consecutively
1215  // numbered from this value.
1216  unsigned ShiftAmt = SVOp->getMaskElt(i);
1217  if (ShiftAmt < i) return -1;
1218
1219  ShiftAmt -= i;
1220  bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
1221
1222  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1223    // Check the rest of the elements to see if they are consecutive.
1224    for (++i; i != 16; ++i)
1225      if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1226        return -1;
1227  } else if (ShuffleKind == 1) {
1228    // Check the rest of the elements to see if they are consecutive.
1229    for (++i; i != 16; ++i)
1230      if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1231        return -1;
1232  } else
1233    return -1;
1234
1235  if (ShuffleKind == 2 && isLE)
1236    ShiftAmt = 16 - ShiftAmt;
1237
1238  return ShiftAmt;
1239}
1240
1241/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1242/// specifies a splat of a single element that is suitable for input to
1243/// VSPLTB/VSPLTH/VSPLTW.
1244bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1245  assert(N->getValueType(0) == MVT::v16i8 &&
1246         (EltSize == 1 || EltSize == 2 || EltSize == 4));
1247
1248  // This is a splat operation if each element of the permute is the same, and
1249  // if the value doesn't reference the second vector.
1250  unsigned ElementBase = N->getMaskElt(0);
1251
1252  // FIXME: Handle UNDEF elements too!
1253  if (ElementBase >= 16)
1254    return false;
1255
1256  // Check that the indices are consecutive, in the case of a multi-byte element
1257  // splatted with a v16i8 mask.
1258  for (unsigned i = 1; i != EltSize; ++i)
1259    if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1260      return false;
1261
1262  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1263    if (N->getMaskElt(i) < 0) continue;
1264    for (unsigned j = 0; j != EltSize; ++j)
1265      if (N->getMaskElt(i+j) != N->getMaskElt(j))
1266        return false;
1267  }
1268  return true;
1269}
1270
1271/// isAllNegativeZeroVector - Returns true if all elements of build_vector
1272/// are -0.0.
1273bool PPC::isAllNegativeZeroVector(SDNode *N) {
1274  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
1275
1276  APInt APVal, APUndef;
1277  unsigned BitSize;
1278  bool HasAnyUndefs;
1279
1280  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
1281    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
1282      return CFP->getValueAPF().isNegZero();
1283
1284  return false;
1285}
1286
1287/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1288/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1289unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1290                                SelectionDAG &DAG) {
1291  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1292  assert(isSplatShuffleMask(SVOp, EltSize));
1293  if (DAG.getTarget().getDataLayout()->isLittleEndian())
1294    return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1295  else
1296    return SVOp->getMaskElt(0) / EltSize;
1297}
1298
1299/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1300/// by using a vspltis[bhw] instruction of the specified element size, return
1301/// the constant being splatted.  The ByteSize field indicates the number of
1302/// bytes of each element [124] -> [bhw].
1303SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1304  SDValue OpVal(nullptr, 0);
1305
1306  // If ByteSize of the splat is bigger than the element size of the
1307  // build_vector, then we have a case where we are checking for a splat where
1308  // multiple elements of the buildvector are folded together into a single
1309  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1310  unsigned EltSize = 16/N->getNumOperands();
1311  if (EltSize < ByteSize) {
1312    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1313    SDValue UniquedVals[4];
1314    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1315
1316    // See if all of the elements in the buildvector agree across.
1317    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1318      if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1319      // If the element isn't a constant, bail fully out.
1320      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1321
1322
1323      if (!UniquedVals[i&(Multiple-1)].getNode())
1324        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1325      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1326        return SDValue();  // no match.
1327    }
1328
1329    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1330    // either constant or undef values that are identical for each chunk.  See
1331    // if these chunks can form into a larger vspltis*.
1332
1333    // Check to see if all of the leading entries are either 0 or -1.  If
1334    // neither, then this won't fit into the immediate field.
1335    bool LeadingZero = true;
1336    bool LeadingOnes = true;
1337    for (unsigned i = 0; i != Multiple-1; ++i) {
1338      if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1339
1340      LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
1341      LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
1342    }
1343    // Finally, check the least significant entry.
1344    if (LeadingZero) {
1345      if (!UniquedVals[Multiple-1].getNode())
1346        return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
1347      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1348      if (Val < 16)
1349        return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
1350    }
1351    if (LeadingOnes) {
1352      if (!UniquedVals[Multiple-1].getNode())
1353        return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
1354      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1355      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1356        return DAG.getTargetConstant(Val, MVT::i32);
1357    }
1358
1359    return SDValue();
1360  }
1361
1362  // Check to see if this buildvec has a single non-undef value in its elements.
1363  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1364    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1365    if (!OpVal.getNode())
1366      OpVal = N->getOperand(i);
1367    else if (OpVal != N->getOperand(i))
1368      return SDValue();
1369  }
1370
1371  if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1372
1373  unsigned ValSizeInBytes = EltSize;
1374  uint64_t Value = 0;
1375  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1376    Value = CN->getZExtValue();
1377  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1378    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1379    Value = FloatToBits(CN->getValueAPF().convertToFloat());
1380  }
1381
1382  // If the splat value is larger than the element value, then we can never do
1383  // this splat.  The only case that we could fit the replicated bits into our
1384  // immediate field for would be zero, and we prefer to use vxor for it.
1385  if (ValSizeInBytes < ByteSize) return SDValue();
1386
1387  // If the element value is larger than the splat value, cut it in half and
1388  // check to see if the two halves are equal.  Continue doing this until we
1389  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
1390  while (ValSizeInBytes > ByteSize) {
1391    ValSizeInBytes >>= 1;
1392
1393    // If the top half equals the bottom half, we're still ok.
1394    if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
1395         (Value                        & ((1 << (8*ValSizeInBytes))-1)))
1396      return SDValue();
1397  }
1398
1399  // Properly sign extend the value.
1400  int MaskVal = SignExtend32(Value, ByteSize * 8);
1401
1402  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1403  if (MaskVal == 0) return SDValue();
1404
1405  // Finally, if this value fits in a 5 bit sext field, return it
1406  if (SignExtend32<5>(MaskVal) == MaskVal)
1407    return DAG.getTargetConstant(MaskVal, MVT::i32);
1408  return SDValue();
1409}
1410
1411/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1412/// amount, otherwise return -1.
1413int PPC::isQVALIGNIShuffleMask(SDNode *N) {
1414  EVT VT = N->getValueType(0);
1415  if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
1416    return -1;
1417
1418  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1419
1420  // Find the first non-undef value in the shuffle mask.
1421  unsigned i;
1422  for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
1423    /*search*/;
1424
1425  if (i == 4) return -1;  // all undef.
1426
1427  // Otherwise, check to see if the rest of the elements are consecutively
1428  // numbered from this value.
1429  unsigned ShiftAmt = SVOp->getMaskElt(i);
1430  if (ShiftAmt < i) return -1;
1431  ShiftAmt -= i;
1432
1433  // Check the rest of the elements to see if they are consecutive.
1434  for (++i; i != 4; ++i)
1435    if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1436      return -1;
1437
1438  return ShiftAmt;
1439}
1440
1441//===----------------------------------------------------------------------===//
1442//  Addressing Mode Selection
1443//===----------------------------------------------------------------------===//
1444
1445/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
1446/// or 64-bit immediate, and if the value can be accurately represented as a
1447/// sign extension from a 16-bit value.  If so, this returns true and the
1448/// immediate.
1449static bool isIntS16Immediate(SDNode *N, short &Imm) {
1450  if (!isa<ConstantSDNode>(N))
1451    return false;
1452
1453  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
1454  if (N->getValueType(0) == MVT::i32)
1455    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
1456  else
1457    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
1458}
1459static bool isIntS16Immediate(SDValue Op, short &Imm) {
1460  return isIntS16Immediate(Op.getNode(), Imm);
1461}
1462
1463
1464/// SelectAddressRegReg - Given the specified addressed, check to see if it
1465/// can be represented as an indexed [r+r] operation.  Returns false if it
1466/// can be more efficiently represented with [r+imm].
1467bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
1468                                            SDValue &Index,
1469                                            SelectionDAG &DAG) const {
1470  short imm = 0;
1471  if (N.getOpcode() == ISD::ADD) {
1472    if (isIntS16Immediate(N.getOperand(1), imm))
1473      return false;    // r+i
1474    if (N.getOperand(1).getOpcode() == PPCISD::Lo)
1475      return false;    // r+i
1476
1477    Base = N.getOperand(0);
1478    Index = N.getOperand(1);
1479    return true;
1480  } else if (N.getOpcode() == ISD::OR) {
1481    if (isIntS16Immediate(N.getOperand(1), imm))
1482      return false;    // r+i can fold it if we can.
1483
1484    // If this is an or of disjoint bitfields, we can codegen this as an add
1485    // (for better address arithmetic) if the LHS and RHS of the OR are provably
1486    // disjoint.
1487    APInt LHSKnownZero, LHSKnownOne;
1488    APInt RHSKnownZero, RHSKnownOne;
1489    DAG.computeKnownBits(N.getOperand(0),
1490                         LHSKnownZero, LHSKnownOne);
1491
1492    if (LHSKnownZero.getBoolValue()) {
1493      DAG.computeKnownBits(N.getOperand(1),
1494                           RHSKnownZero, RHSKnownOne);
1495      // If all of the bits are known zero on the LHS or RHS, the add won't
1496      // carry.
1497      if (~(LHSKnownZero | RHSKnownZero) == 0) {
1498        Base = N.getOperand(0);
1499        Index = N.getOperand(1);
1500        return true;
1501      }
1502    }
1503  }
1504
1505  return false;
1506}
1507
1508// If we happen to be doing an i64 load or store into a stack slot that has
1509// less than a 4-byte alignment, then the frame-index elimination may need to
1510// use an indexed load or store instruction (because the offset may not be a
1511// multiple of 4). The extra register needed to hold the offset comes from the
1512// register scavenger, and it is possible that the scavenger will need to use
1513// an emergency spill slot. As a result, we need to make sure that a spill slot
1514// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
1515// stack slot.
1516static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
1517  // FIXME: This does not handle the LWA case.
1518  if (VT != MVT::i64)
1519    return;
1520
1521  // NOTE: We'll exclude negative FIs here, which come from argument
1522  // lowering, because there are no known test cases triggering this problem
1523  // using packed structures (or similar). We can remove this exclusion if
1524  // we find such a test case. The reason why this is so test-case driven is
1525  // because this entire 'fixup' is only to prevent crashes (from the
1526  // register scavenger) on not-really-valid inputs. For example, if we have:
1527  //   %a = alloca i1
1528  //   %b = bitcast i1* %a to i64*
1529  //   store i64* a, i64 b
1530  // then the store should really be marked as 'align 1', but is not. If it
1531  // were marked as 'align 1' then the indexed form would have been
1532  // instruction-selected initially, and the problem this 'fixup' is preventing
1533  // won't happen regardless.
1534  if (FrameIdx < 0)
1535    return;
1536
1537  MachineFunction &MF = DAG.getMachineFunction();
1538  MachineFrameInfo *MFI = MF.getFrameInfo();
1539
1540  unsigned Align = MFI->getObjectAlignment(FrameIdx);
1541  if (Align >= 4)
1542    return;
1543
1544  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1545  FuncInfo->setHasNonRISpills();
1546}
1547
1548/// Returns true if the address N can be represented by a base register plus
1549/// a signed 16-bit displacement [r+imm], and if it is not better
1550/// represented as reg+reg.  If Aligned is true, only accept displacements
1551/// suitable for STD and friends, i.e. multiples of 4.
1552bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
1553                                            SDValue &Base,
1554                                            SelectionDAG &DAG,
1555                                            bool Aligned) const {
1556  // FIXME dl should come from parent load or store, not from address
1557  SDLoc dl(N);
1558  // If this can be more profitably realized as r+r, fail.
1559  if (SelectAddressRegReg(N, Disp, Base, DAG))
1560    return false;
1561
1562  if (N.getOpcode() == ISD::ADD) {
1563    short imm = 0;
1564    if (isIntS16Immediate(N.getOperand(1), imm) &&
1565        (!Aligned || (imm & 3) == 0)) {
1566      Disp = DAG.getTargetConstant(imm, N.getValueType());
1567      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1568        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1569        fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1570      } else {
1571        Base = N.getOperand(0);
1572      }
1573      return true; // [r+i]
1574    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1575      // Match LOAD (ADD (X, Lo(G))).
1576      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1577             && "Cannot handle constant offsets yet!");
1578      Disp = N.getOperand(1).getOperand(0);  // The global address.
1579      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1580             Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1581             Disp.getOpcode() == ISD::TargetConstantPool ||
1582             Disp.getOpcode() == ISD::TargetJumpTable);
1583      Base = N.getOperand(0);
1584      return true;  // [&g+r]
1585    }
1586  } else if (N.getOpcode() == ISD::OR) {
1587    short imm = 0;
1588    if (isIntS16Immediate(N.getOperand(1), imm) &&
1589        (!Aligned || (imm & 3) == 0)) {
1590      // If this is an or of disjoint bitfields, we can codegen this as an add
1591      // (for better address arithmetic) if the LHS and RHS of the OR are
1592      // provably disjoint.
1593      APInt LHSKnownZero, LHSKnownOne;
1594      DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1595
1596      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1597        // If all of the bits are known zero on the LHS or RHS, the add won't
1598        // carry.
1599        if (FrameIndexSDNode *FI =
1600              dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1601          Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1602          fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1603        } else {
1604          Base = N.getOperand(0);
1605        }
1606        Disp = DAG.getTargetConstant(imm, N.getValueType());
1607        return true;
1608      }
1609    }
1610  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1611    // Loading from a constant address.
1612
1613    // If this address fits entirely in a 16-bit sext immediate field, codegen
1614    // this as "d, 0"
1615    short Imm;
1616    if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
1617      Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
1618      Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1619                             CN->getValueType(0));
1620      return true;
1621    }
1622
1623    // Handle 32-bit sext immediates with LIS + addr mode.
1624    if ((CN->getValueType(0) == MVT::i32 ||
1625         (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
1626        (!Aligned || (CN->getZExtValue() & 3) == 0)) {
1627      int Addr = (int)CN->getZExtValue();
1628
1629      // Otherwise, break this down into an LIS + disp.
1630      Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
1631
1632      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
1633      unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1634      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1635      return true;
1636    }
1637  }
1638
1639  Disp = DAG.getTargetConstant(0, getPointerTy());
1640  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
1641    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1642    fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1643  } else
1644    Base = N;
1645  return true;      // [r+0]
1646}
1647
1648/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1649/// represented as an indexed [r+r] operation.
1650bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
1651                                                SDValue &Index,
1652                                                SelectionDAG &DAG) const {
1653  // Check to see if we can easily represent this as an [r+r] address.  This
1654  // will fail if it thinks that the address is more profitably represented as
1655  // reg+imm, e.g. where imm = 0.
1656  if (SelectAddressRegReg(N, Base, Index, DAG))
1657    return true;
1658
1659  // If the operand is an addition, always emit this as [r+r], since this is
1660  // better (for code size, and execution, as the memop does the add for free)
1661  // than emitting an explicit add.
1662  if (N.getOpcode() == ISD::ADD) {
1663    Base = N.getOperand(0);
1664    Index = N.getOperand(1);
1665    return true;
1666  }
1667
1668  // Otherwise, do it the hard way, using R0 as the base register.
1669  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1670                         N.getValueType());
1671  Index = N;
1672  return true;
1673}
1674
1675/// getPreIndexedAddressParts - returns true by value, base pointer and
1676/// offset pointer and addressing mode by reference if the node's address
1677/// can be legally represented as pre-indexed load / store address.
1678bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1679                                                  SDValue &Offset,
1680                                                  ISD::MemIndexedMode &AM,
1681                                                  SelectionDAG &DAG) const {
1682  if (DisablePPCPreinc) return false;
1683
1684  bool isLoad = true;
1685  SDValue Ptr;
1686  EVT VT;
1687  unsigned Alignment;
1688  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1689    Ptr = LD->getBasePtr();
1690    VT = LD->getMemoryVT();
1691    Alignment = LD->getAlignment();
1692  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1693    Ptr = ST->getBasePtr();
1694    VT  = ST->getMemoryVT();
1695    Alignment = ST->getAlignment();
1696    isLoad = false;
1697  } else
1698    return false;
1699
1700  // PowerPC doesn't have preinc load/store instructions for vectors (except
1701  // for QPX, which does have preinc r+r forms).
1702  if (VT.isVector()) {
1703    if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
1704      return false;
1705    } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
1706      AM = ISD::PRE_INC;
1707      return true;
1708    }
1709  }
1710
1711  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
1712
1713    // Common code will reject creating a pre-inc form if the base pointer
1714    // is a frame index, or if N is a store and the base pointer is either
1715    // the same as or a predecessor of the value being stored.  Check for
1716    // those situations here, and try with swapped Base/Offset instead.
1717    bool Swap = false;
1718
1719    if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
1720      Swap = true;
1721    else if (!isLoad) {
1722      SDValue Val = cast<StoreSDNode>(N)->getValue();
1723      if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
1724        Swap = true;
1725    }
1726
1727    if (Swap)
1728      std::swap(Base, Offset);
1729
1730    AM = ISD::PRE_INC;
1731    return true;
1732  }
1733
1734  // LDU/STU can only handle immediates that are a multiple of 4.
1735  if (VT != MVT::i64) {
1736    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
1737      return false;
1738  } else {
1739    // LDU/STU need an address with at least 4-byte alignment.
1740    if (Alignment < 4)
1741      return false;
1742
1743    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
1744      return false;
1745  }
1746
1747  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1748    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
1749    // sext i32 to i64 when addr mode is r+i.
1750    if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
1751        LD->getExtensionType() == ISD::SEXTLOAD &&
1752        isa<ConstantSDNode>(Offset))
1753      return false;
1754  }
1755
1756  AM = ISD::PRE_INC;
1757  return true;
1758}
1759
1760//===----------------------------------------------------------------------===//
1761//  LowerOperation implementation
1762//===----------------------------------------------------------------------===//
1763
1764/// GetLabelAccessInfo - Return true if we should reference labels using a
1765/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
1766static bool GetLabelAccessInfo(const TargetMachine &TM,
1767                               const PPCSubtarget &Subtarget,
1768                               unsigned &HiOpFlags, unsigned &LoOpFlags,
1769                               const GlobalValue *GV = nullptr) {
1770  HiOpFlags = PPCII::MO_HA;
1771  LoOpFlags = PPCII::MO_LO;
1772
1773  // Don't use the pic base if not in PIC relocation model.
1774  bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
1775
1776  if (isPIC) {
1777    HiOpFlags |= PPCII::MO_PIC_FLAG;
1778    LoOpFlags |= PPCII::MO_PIC_FLAG;
1779  }
1780
1781  // If this is a reference to a global value that requires a non-lazy-ptr, make
1782  // sure that instruction lowering adds it.
1783  if (GV && Subtarget.hasLazyResolverStub(GV)) {
1784    HiOpFlags |= PPCII::MO_NLP_FLAG;
1785    LoOpFlags |= PPCII::MO_NLP_FLAG;
1786
1787    if (GV->hasHiddenVisibility()) {
1788      HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1789      LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1790    }
1791  }
1792
1793  return isPIC;
1794}
1795
1796static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
1797                             SelectionDAG &DAG) {
1798  EVT PtrVT = HiPart.getValueType();
1799  SDValue Zero = DAG.getConstant(0, PtrVT);
1800  SDLoc DL(HiPart);
1801
1802  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
1803  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
1804
1805  // With PIC, the first instruction is actually "GR+hi(&G)".
1806  if (isPIC)
1807    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
1808                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
1809
1810  // Generate non-pic code that has direct accesses to the constant pool.
1811  // The address of the global is just (hi(&g)+lo(&g)).
1812  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
1813}
1814
1815static void setUsesTOCBasePtr(MachineFunction &MF) {
1816  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1817  FuncInfo->setUsesTOCBasePtr();
1818}
1819
1820static void setUsesTOCBasePtr(SelectionDAG &DAG) {
1821  setUsesTOCBasePtr(DAG.getMachineFunction());
1822}
1823
1824static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
1825                           SDValue GA) {
1826  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
1827  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
1828                DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
1829
1830  SDValue Ops[] = { GA, Reg };
1831  return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
1832                                 DAG.getVTList(VT, MVT::Other), Ops, VT,
1833                                 MachinePointerInfo::getGOT(), 0, false, true,
1834                                 false, 0);
1835}
1836
1837SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
1838                                             SelectionDAG &DAG) const {
1839  EVT PtrVT = Op.getValueType();
1840  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1841  const Constant *C = CP->getConstVal();
1842
1843  // 64-bit SVR4 ABI code is always position-independent.
1844  // The actual address of the GlobalValue is stored in the TOC.
1845  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1846    setUsesTOCBasePtr(DAG);
1847    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
1848    return getTOCEntry(DAG, SDLoc(CP), true, GA);
1849  }
1850
1851  unsigned MOHiFlag, MOLoFlag;
1852  bool isPIC =
1853      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
1854
1855  if (isPIC && Subtarget.isSVR4ABI()) {
1856    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
1857                                           PPCII::MO_PIC_FLAG);
1858    return getTOCEntry(DAG, SDLoc(CP), false, GA);
1859  }
1860
1861  SDValue CPIHi =
1862    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
1863  SDValue CPILo =
1864    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
1865  return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
1866}
1867
1868SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1869  EVT PtrVT = Op.getValueType();
1870  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1871
1872  // 64-bit SVR4 ABI code is always position-independent.
1873  // The actual address of the GlobalValue is stored in the TOC.
1874  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1875    setUsesTOCBasePtr(DAG);
1876    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1877    return getTOCEntry(DAG, SDLoc(JT), true, GA);
1878  }
1879
1880  unsigned MOHiFlag, MOLoFlag;
1881  bool isPIC =
1882      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
1883
1884  if (isPIC && Subtarget.isSVR4ABI()) {
1885    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
1886                                        PPCII::MO_PIC_FLAG);
1887    return getTOCEntry(DAG, SDLoc(GA), false, GA);
1888  }
1889
1890  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
1891  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
1892  return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
1893}
1894
1895SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
1896                                             SelectionDAG &DAG) const {
1897  EVT PtrVT = Op.getValueType();
1898  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
1899  const BlockAddress *BA = BASDN->getBlockAddress();
1900
1901  // 64-bit SVR4 ABI code is always position-independent.
1902  // The actual BlockAddress is stored in the TOC.
1903  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1904    setUsesTOCBasePtr(DAG);
1905    SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
1906    return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
1907  }
1908
1909  unsigned MOHiFlag, MOLoFlag;
1910  bool isPIC =
1911      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
1912  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
1913  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
1914  return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
1915}
1916
1917SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1918                                              SelectionDAG &DAG) const {
1919
1920  // FIXME: TLS addresses currently use medium model code sequences,
1921  // which is the most useful form.  Eventually support for small and
1922  // large models could be added if users need it, at the cost of
1923  // additional complexity.
1924  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1925  SDLoc dl(GA);
1926  const GlobalValue *GV = GA->getGlobal();
1927  EVT PtrVT = getPointerTy();
1928  bool is64bit = Subtarget.isPPC64();
1929  const Module *M = DAG.getMachineFunction().getFunction()->getParent();
1930  PICLevel::Level picLevel = M->getPICLevel();
1931
1932  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
1933
1934  if (Model == TLSModel::LocalExec) {
1935    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1936                                               PPCII::MO_TPREL_HA);
1937    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1938                                               PPCII::MO_TPREL_LO);
1939    SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
1940                                     is64bit ? MVT::i64 : MVT::i32);
1941    SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
1942    return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
1943  }
1944
1945  if (Model == TLSModel::InitialExec) {
1946    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1947    SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1948                                                PPCII::MO_TLS);
1949    SDValue GOTPtr;
1950    if (is64bit) {
1951      setUsesTOCBasePtr(DAG);
1952      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1953      GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
1954                           PtrVT, GOTReg, TGA);
1955    } else
1956      GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
1957    SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
1958                                   PtrVT, TGA, GOTPtr);
1959    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
1960  }
1961
1962  if (Model == TLSModel::GeneralDynamic) {
1963    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1964    SDValue GOTPtr;
1965    if (is64bit) {
1966      setUsesTOCBasePtr(DAG);
1967      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1968      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
1969                                   GOTReg, TGA);
1970    } else {
1971      if (picLevel == PICLevel::Small)
1972        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
1973      else
1974        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
1975    }
1976    return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
1977                       GOTPtr, TGA, TGA);
1978  }
1979
1980  if (Model == TLSModel::LocalDynamic) {
1981    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1982    SDValue GOTPtr;
1983    if (is64bit) {
1984      setUsesTOCBasePtr(DAG);
1985      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1986      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
1987                           GOTReg, TGA);
1988    } else {
1989      if (picLevel == PICLevel::Small)
1990        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
1991      else
1992        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
1993    }
1994    SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
1995                                  PtrVT, GOTPtr, TGA, TGA);
1996    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
1997                                      PtrVT, TLSAddr, TGA);
1998    return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
1999  }
2000
2001  llvm_unreachable("Unknown TLS model!");
2002}
2003
2004SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2005                                              SelectionDAG &DAG) const {
2006  EVT PtrVT = Op.getValueType();
2007  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2008  SDLoc DL(GSDN);
2009  const GlobalValue *GV = GSDN->getGlobal();
2010
2011  // 64-bit SVR4 ABI code is always position-independent.
2012  // The actual address of the GlobalValue is stored in the TOC.
2013  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2014    setUsesTOCBasePtr(DAG);
2015    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2016    return getTOCEntry(DAG, DL, true, GA);
2017  }
2018
2019  unsigned MOHiFlag, MOLoFlag;
2020  bool isPIC =
2021      GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
2022
2023  if (isPIC && Subtarget.isSVR4ABI()) {
2024    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2025                                            GSDN->getOffset(),
2026                                            PPCII::MO_PIC_FLAG);
2027    return getTOCEntry(DAG, DL, false, GA);
2028  }
2029
2030  SDValue GAHi =
2031    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2032  SDValue GALo =
2033    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2034
2035  SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
2036
2037  // If the global reference is actually to a non-lazy-pointer, we have to do an
2038  // extra load to get the address of the global.
2039  if (MOHiFlag & PPCII::MO_NLP_FLAG)
2040    Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
2041                      false, false, false, 0);
2042  return Ptr;
2043}
2044
2045SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2046  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2047  SDLoc dl(Op);
2048
2049  if (Op.getValueType() == MVT::v2i64) {
2050    // When the operands themselves are v2i64 values, we need to do something
2051    // special because VSX has no underlying comparison operations for these.
2052    if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2053      // Equality can be handled by casting to the legal type for Altivec
2054      // comparisons, everything else needs to be expanded.
2055      if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2056        return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2057                 DAG.getSetCC(dl, MVT::v4i32,
2058                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2059                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2060                   CC));
2061      }
2062
2063      return SDValue();
2064    }
2065
2066    // We handle most of these in the usual way.
2067    return Op;
2068  }
2069
2070  // If we're comparing for equality to zero, expose the fact that this is
2071  // implented as a ctlz/srl pair on ppc, so that the dag combiner can
2072  // fold the new nodes.
2073  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2074    if (C->isNullValue() && CC == ISD::SETEQ) {
2075      EVT VT = Op.getOperand(0).getValueType();
2076      SDValue Zext = Op.getOperand(0);
2077      if (VT.bitsLT(MVT::i32)) {
2078        VT = MVT::i32;
2079        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
2080      }
2081      unsigned Log2b = Log2_32(VT.getSizeInBits());
2082      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
2083      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
2084                                DAG.getConstant(Log2b, MVT::i32));
2085      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
2086    }
2087    // Leave comparisons against 0 and -1 alone for now, since they're usually
2088    // optimized.  FIXME: revisit this when we can custom lower all setcc
2089    // optimizations.
2090    if (C->isAllOnesValue() || C->isNullValue())
2091      return SDValue();
2092  }
2093
2094  // If we have an integer seteq/setne, turn it into a compare against zero
2095  // by xor'ing the rhs with the lhs, which is faster than setting a
2096  // condition register, reading it back out, and masking the correct bit.  The
2097  // normal approach here uses sub to do this instead of xor.  Using xor exposes
2098  // the result to other bit-twiddling opportunities.
2099  EVT LHSVT = Op.getOperand(0).getValueType();
2100  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2101    EVT VT = Op.getValueType();
2102    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2103                                Op.getOperand(1));
2104    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
2105  }
2106  return SDValue();
2107}
2108
2109SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
2110                                      const PPCSubtarget &Subtarget) const {
2111  SDNode *Node = Op.getNode();
2112  EVT VT = Node->getValueType(0);
2113  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2114  SDValue InChain = Node->getOperand(0);
2115  SDValue VAListPtr = Node->getOperand(1);
2116  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2117  SDLoc dl(Node);
2118
2119  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2120
2121  // gpr_index
2122  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2123                                    VAListPtr, MachinePointerInfo(SV), MVT::i8,
2124                                    false, false, false, 0);
2125  InChain = GprIndex.getValue(1);
2126
2127  if (VT == MVT::i64) {
2128    // Check if GprIndex is even
2129    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2130                                 DAG.getConstant(1, MVT::i32));
2131    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2132                                DAG.getConstant(0, MVT::i32), ISD::SETNE);
2133    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2134                                          DAG.getConstant(1, MVT::i32));
2135    // Align GprIndex to be even if it isn't
2136    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2137                           GprIndex);
2138  }
2139
2140  // fpr index is 1 byte after gpr
2141  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2142                               DAG.getConstant(1, MVT::i32));
2143
2144  // fpr
2145  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2146                                    FprPtr, MachinePointerInfo(SV), MVT::i8,
2147                                    false, false, false, 0);
2148  InChain = FprIndex.getValue(1);
2149
2150  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2151                                       DAG.getConstant(8, MVT::i32));
2152
2153  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2154                                        DAG.getConstant(4, MVT::i32));
2155
2156  // areas
2157  SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
2158                                     MachinePointerInfo(), false, false,
2159                                     false, 0);
2160  InChain = OverflowArea.getValue(1);
2161
2162  SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
2163                                    MachinePointerInfo(), false, false,
2164                                    false, 0);
2165  InChain = RegSaveArea.getValue(1);
2166
2167  // select overflow_area if index > 8
2168  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2169                            DAG.getConstant(8, MVT::i32), ISD::SETLT);
2170
2171  // adjustment constant gpr_index * 4/8
2172  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2173                                    VT.isInteger() ? GprIndex : FprIndex,
2174                                    DAG.getConstant(VT.isInteger() ? 4 : 8,
2175                                                    MVT::i32));
2176
2177  // OurReg = RegSaveArea + RegConstant
2178  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2179                               RegConstant);
2180
2181  // Floating types are 32 bytes into RegSaveArea
2182  if (VT.isFloatingPoint())
2183    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2184                         DAG.getConstant(32, MVT::i32));
2185
2186  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2187  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2188                                   VT.isInteger() ? GprIndex : FprIndex,
2189                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1,
2190                                                   MVT::i32));
2191
2192  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2193                              VT.isInteger() ? VAListPtr : FprPtr,
2194                              MachinePointerInfo(SV),
2195                              MVT::i8, false, false, 0);
2196
2197  // determine if we should load from reg_save_area or overflow_area
2198  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2199
2200  // increase overflow_area by 4/8 if gpr/fpr > 8
2201  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2202                                          DAG.getConstant(VT.isInteger() ? 4 : 8,
2203                                          MVT::i32));
2204
2205  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2206                             OverflowAreaPlusN);
2207
2208  InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
2209                              OverflowAreaPtr,
2210                              MachinePointerInfo(),
2211                              MVT::i32, false, false, 0);
2212
2213  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
2214                     false, false, false, 0);
2215}
2216
2217SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
2218                                       const PPCSubtarget &Subtarget) const {
2219  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2220
2221  // We have to copy the entire va_list struct:
2222  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2223  return DAG.getMemcpy(Op.getOperand(0), Op,
2224                       Op.getOperand(1), Op.getOperand(2),
2225                       DAG.getConstant(12, MVT::i32), 8, false, true,
2226                       MachinePointerInfo(), MachinePointerInfo());
2227}
2228
2229SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2230                                                  SelectionDAG &DAG) const {
2231  return Op.getOperand(0);
2232}
2233
2234SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2235                                                SelectionDAG &DAG) const {
2236  SDValue Chain = Op.getOperand(0);
2237  SDValue Trmp = Op.getOperand(1); // trampoline
2238  SDValue FPtr = Op.getOperand(2); // nested function
2239  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2240  SDLoc dl(Op);
2241
2242  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2243  bool isPPC64 = (PtrVT == MVT::i64);
2244  Type *IntPtrTy =
2245    DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
2246                                                             *DAG.getContext());
2247
2248  TargetLowering::ArgListTy Args;
2249  TargetLowering::ArgListEntry Entry;
2250
2251  Entry.Ty = IntPtrTy;
2252  Entry.Node = Trmp; Args.push_back(Entry);
2253
2254  // TrampSize == (isPPC64 ? 48 : 40);
2255  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
2256                               isPPC64 ? MVT::i64 : MVT::i32);
2257  Args.push_back(Entry);
2258
2259  Entry.Node = FPtr; Args.push_back(Entry);
2260  Entry.Node = Nest; Args.push_back(Entry);
2261
2262  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2263  TargetLowering::CallLoweringInfo CLI(DAG);
2264  CLI.setDebugLoc(dl).setChain(Chain)
2265    .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2266               DAG.getExternalSymbol("__trampoline_setup", PtrVT),
2267               std::move(Args), 0);
2268
2269  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2270  return CallResult.second;
2271}
2272
2273SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
2274                                        const PPCSubtarget &Subtarget) const {
2275  MachineFunction &MF = DAG.getMachineFunction();
2276  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2277
2278  SDLoc dl(Op);
2279
2280  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2281    // vastart just stores the address of the VarArgsFrameIndex slot into the
2282    // memory location argument.
2283    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2284    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2285    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2286    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2287                        MachinePointerInfo(SV),
2288                        false, false, 0);
2289  }
2290
2291  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2292  // We suppose the given va_list is already allocated.
2293  //
2294  // typedef struct {
2295  //  char gpr;     /* index into the array of 8 GPRs
2296  //                 * stored in the register save area
2297  //                 * gpr=0 corresponds to r3,
2298  //                 * gpr=1 to r4, etc.
2299  //                 */
2300  //  char fpr;     /* index into the array of 8 FPRs
2301  //                 * stored in the register save area
2302  //                 * fpr=0 corresponds to f1,
2303  //                 * fpr=1 to f2, etc.
2304  //                 */
2305  //  char *overflow_arg_area;
2306  //                /* location on stack that holds
2307  //                 * the next overflow argument
2308  //                 */
2309  //  char *reg_save_area;
2310  //               /* where r3:r10 and f1:f8 (if saved)
2311  //                * are stored
2312  //                */
2313  // } va_list[1];
2314
2315
2316  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
2317  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
2318
2319
2320  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2321
2322  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2323                                            PtrVT);
2324  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2325                                 PtrVT);
2326
2327  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2328  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
2329
2330  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2331  SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
2332
2333  uint64_t FPROffset = 1;
2334  SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
2335
2336  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2337
2338  // Store first byte : number of int regs
2339  SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
2340                                         Op.getOperand(1),
2341                                         MachinePointerInfo(SV),
2342                                         MVT::i8, false, false, 0);
2343  uint64_t nextOffset = FPROffset;
2344  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2345                                  ConstFPROffset);
2346
2347  // Store second byte : number of float regs
2348  SDValue secondStore =
2349    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2350                      MachinePointerInfo(SV, nextOffset), MVT::i8,
2351                      false, false, 0);
2352  nextOffset += StackOffset;
2353  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2354
2355  // Store second word : arguments given on stack
2356  SDValue thirdStore =
2357    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2358                 MachinePointerInfo(SV, nextOffset),
2359                 false, false, 0);
2360  nextOffset += FrameOffset;
2361  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2362
2363  // Store third word : arguments given in registers
2364  return DAG.getStore(thirdStore, dl, FR, nextPtr,
2365                      MachinePointerInfo(SV, nextOffset),
2366                      false, false, 0);
2367
2368}
2369
2370#include "PPCGenCallingConv.inc"
2371
2372// Function whose sole purpose is to kill compiler warnings
2373// stemming from unused functions included from PPCGenCallingConv.inc.
2374CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2375  return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2376}
2377
2378bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2379                                      CCValAssign::LocInfo &LocInfo,
2380                                      ISD::ArgFlagsTy &ArgFlags,
2381                                      CCState &State) {
2382  return true;
2383}
2384
2385bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2386                                             MVT &LocVT,
2387                                             CCValAssign::LocInfo &LocInfo,
2388                                             ISD::ArgFlagsTy &ArgFlags,
2389                                             CCState &State) {
2390  static const MCPhysReg ArgRegs[] = {
2391    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2392    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2393  };
2394  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2395
2396  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2397
2398  // Skip one register if the first unallocated register has an even register
2399  // number and there are still argument registers available which have not been
2400  // allocated yet. RegNum is actually an index into ArgRegs, which means we
2401  // need to skip a register if RegNum is odd.
2402  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2403    State.AllocateReg(ArgRegs[RegNum]);
2404  }
2405
2406  // Always return false here, as this function only makes sure that the first
2407  // unallocated register has an odd register number and does not actually
2408  // allocate a register for the current argument.
2409  return false;
2410}
2411
2412bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
2413                                               MVT &LocVT,
2414                                               CCValAssign::LocInfo &LocInfo,
2415                                               ISD::ArgFlagsTy &ArgFlags,
2416                                               CCState &State) {
2417  static const MCPhysReg ArgRegs[] = {
2418    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2419    PPC::F8
2420  };
2421
2422  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2423
2424  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2425
2426  // If there is only one Floating-point register left we need to put both f64
2427  // values of a split ppc_fp128 value on the stack.
2428  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
2429    State.AllocateReg(ArgRegs[RegNum]);
2430  }
2431
2432  // Always return false here, as this function only makes sure that the two f64
2433  // values a ppc_fp128 value is split into are both passed in registers or both
2434  // passed on the stack and does not actually allocate a register for the
2435  // current argument.
2436  return false;
2437}
2438
2439/// GetFPR - Get the set of FP registers that should be allocated for arguments,
2440/// on Darwin.
2441static const MCPhysReg *GetFPR() {
2442  static const MCPhysReg FPR[] = {
2443    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2444    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
2445  };
2446
2447  return FPR;
2448}
2449
2450/// GetQFPR - Get the set of QPX registers that should be allocated for
2451/// arguments.
2452static const MCPhysReg *GetQFPR() {
2453  static const MCPhysReg QFPR[] = {
2454    PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
2455    PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13
2456  };
2457
2458  return QFPR;
2459}
2460
2461/// CalculateStackSlotSize - Calculates the size reserved for this argument on
2462/// the stack.
2463static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
2464                                       unsigned PtrByteSize) {
2465  unsigned ArgSize = ArgVT.getStoreSize();
2466  if (Flags.isByVal())
2467    ArgSize = Flags.getByValSize();
2468
2469  // Round up to multiples of the pointer size, except for array members,
2470  // which are always packed.
2471  if (!Flags.isInConsecutiveRegs())
2472    ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2473
2474  return ArgSize;
2475}
2476
2477/// CalculateStackSlotAlignment - Calculates the alignment of this argument
2478/// on the stack.
2479static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
2480                                            ISD::ArgFlagsTy Flags,
2481                                            unsigned PtrByteSize) {
2482  unsigned Align = PtrByteSize;
2483
2484  // Altivec parameters are padded to a 16 byte boundary.
2485  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2486      ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2487      ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
2488    Align = 16;
2489  // QPX vector types stored in double-precision are padded to a 32 byte
2490  // boundary.
2491  else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
2492    Align = 32;
2493
2494  // ByVal parameters are aligned as requested.
2495  if (Flags.isByVal()) {
2496    unsigned BVAlign = Flags.getByValAlign();
2497    if (BVAlign > PtrByteSize) {
2498      if (BVAlign % PtrByteSize != 0)
2499          llvm_unreachable(
2500            "ByVal alignment is not a multiple of the pointer size");
2501
2502      Align = BVAlign;
2503    }
2504  }
2505
2506  // Array members are always packed to their original alignment.
2507  if (Flags.isInConsecutiveRegs()) {
2508    // If the array member was split into multiple registers, the first
2509    // needs to be aligned to the size of the full type.  (Except for
2510    // ppcf128, which is only aligned as its f64 components.)
2511    if (Flags.isSplit() && OrigVT != MVT::ppcf128)
2512      Align = OrigVT.getStoreSize();
2513    else
2514      Align = ArgVT.getStoreSize();
2515  }
2516
2517  return Align;
2518}
2519
2520/// CalculateStackSlotUsed - Return whether this argument will use its
2521/// stack slot (instead of being passed in registers).  ArgOffset,
2522/// AvailableFPRs, and AvailableVRs must hold the current argument
2523/// position, and will be updated to account for this argument.
2524static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
2525                                   ISD::ArgFlagsTy Flags,
2526                                   unsigned PtrByteSize,
2527                                   unsigned LinkageSize,
2528                                   unsigned ParamAreaSize,
2529                                   unsigned &ArgOffset,
2530                                   unsigned &AvailableFPRs,
2531                                   unsigned &AvailableVRs, bool HasQPX) {
2532  bool UseMemory = false;
2533
2534  // Respect alignment of argument on the stack.
2535  unsigned Align =
2536    CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
2537  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2538  // If there's no space left in the argument save area, we must
2539  // use memory (this check also catches zero-sized arguments).
2540  if (ArgOffset >= LinkageSize + ParamAreaSize)
2541    UseMemory = true;
2542
2543  // Allocate argument on the stack.
2544  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2545  if (Flags.isInConsecutiveRegsLast())
2546    ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2547  // If we overran the argument save area, we must use memory
2548  // (this check catches arguments passed partially in memory)
2549  if (ArgOffset > LinkageSize + ParamAreaSize)
2550    UseMemory = true;
2551
2552  // However, if the argument is actually passed in an FPR or a VR,
2553  // we don't use memory after all.
2554  if (!Flags.isByVal()) {
2555    if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
2556        // QPX registers overlap with the scalar FP registers.
2557        (HasQPX && (ArgVT == MVT::v4f32 ||
2558                    ArgVT == MVT::v4f64 ||
2559                    ArgVT == MVT::v4i1)))
2560      if (AvailableFPRs > 0) {
2561        --AvailableFPRs;
2562        return false;
2563      }
2564    if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2565        ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2566        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
2567      if (AvailableVRs > 0) {
2568        --AvailableVRs;
2569        return false;
2570      }
2571  }
2572
2573  return UseMemory;
2574}
2575
2576/// EnsureStackAlignment - Round stack frame size up from NumBytes to
2577/// ensure minimum alignment required for target.
2578static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
2579                                     unsigned NumBytes) {
2580  unsigned TargetAlign = Lowering->getStackAlignment();
2581  unsigned AlignMask = TargetAlign - 1;
2582  NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2583  return NumBytes;
2584}
2585
2586SDValue
2587PPCTargetLowering::LowerFormalArguments(SDValue Chain,
2588                                        CallingConv::ID CallConv, bool isVarArg,
2589                                        const SmallVectorImpl<ISD::InputArg>
2590                                          &Ins,
2591                                        SDLoc dl, SelectionDAG &DAG,
2592                                        SmallVectorImpl<SDValue> &InVals)
2593                                          const {
2594  if (Subtarget.isSVR4ABI()) {
2595    if (Subtarget.isPPC64())
2596      return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
2597                                         dl, DAG, InVals);
2598    else
2599      return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
2600                                         dl, DAG, InVals);
2601  } else {
2602    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
2603                                       dl, DAG, InVals);
2604  }
2605}
2606
2607SDValue
2608PPCTargetLowering::LowerFormalArguments_32SVR4(
2609                                      SDValue Chain,
2610                                      CallingConv::ID CallConv, bool isVarArg,
2611                                      const SmallVectorImpl<ISD::InputArg>
2612                                        &Ins,
2613                                      SDLoc dl, SelectionDAG &DAG,
2614                                      SmallVectorImpl<SDValue> &InVals) const {
2615
2616  // 32-bit SVR4 ABI Stack Frame Layout:
2617  //              +-----------------------------------+
2618  //        +-->  |            Back chain             |
2619  //        |     +-----------------------------------+
2620  //        |     | Floating-point register save area |
2621  //        |     +-----------------------------------+
2622  //        |     |    General register save area     |
2623  //        |     +-----------------------------------+
2624  //        |     |          CR save word             |
2625  //        |     +-----------------------------------+
2626  //        |     |         VRSAVE save word          |
2627  //        |     +-----------------------------------+
2628  //        |     |         Alignment padding         |
2629  //        |     +-----------------------------------+
2630  //        |     |     Vector register save area     |
2631  //        |     +-----------------------------------+
2632  //        |     |       Local variable space        |
2633  //        |     +-----------------------------------+
2634  //        |     |        Parameter list area        |
2635  //        |     +-----------------------------------+
2636  //        |     |           LR save word            |
2637  //        |     +-----------------------------------+
2638  // SP-->  +---  |            Back chain             |
2639  //              +-----------------------------------+
2640  //
2641  // Specifications:
2642  //   System V Application Binary Interface PowerPC Processor Supplement
2643  //   AltiVec Technology Programming Interface Manual
2644
2645  MachineFunction &MF = DAG.getMachineFunction();
2646  MachineFrameInfo *MFI = MF.getFrameInfo();
2647  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2648
2649  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2650  // Potential tail calls could cause overwriting of argument stack slots.
2651  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2652                       (CallConv == CallingConv::Fast));
2653  unsigned PtrByteSize = 4;
2654
2655  // Assign locations to all of the incoming arguments.
2656  SmallVector<CCValAssign, 16> ArgLocs;
2657  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2658                 *DAG.getContext());
2659
2660  // Reserve space for the linkage area on the stack.
2661  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
2662  CCInfo.AllocateStack(LinkageSize, PtrByteSize);
2663
2664  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
2665
2666  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2667    CCValAssign &VA = ArgLocs[i];
2668
2669    // Arguments stored in registers.
2670    if (VA.isRegLoc()) {
2671      const TargetRegisterClass *RC;
2672      EVT ValVT = VA.getValVT();
2673
2674      switch (ValVT.getSimpleVT().SimpleTy) {
2675        default:
2676          llvm_unreachable("ValVT not supported by formal arguments Lowering");
2677        case MVT::i1:
2678        case MVT::i32:
2679          RC = &PPC::GPRCRegClass;
2680          break;
2681        case MVT::f32:
2682          RC = &PPC::F4RCRegClass;
2683          break;
2684        case MVT::f64:
2685          if (Subtarget.hasVSX())
2686            RC = &PPC::VSFRCRegClass;
2687          else
2688            RC = &PPC::F8RCRegClass;
2689          break;
2690        case MVT::v16i8:
2691        case MVT::v8i16:
2692        case MVT::v4i32:
2693          RC = &PPC::VRRCRegClass;
2694          break;
2695        case MVT::v4f32:
2696          RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
2697          break;
2698        case MVT::v2f64:
2699        case MVT::v2i64:
2700          RC = &PPC::VSHRCRegClass;
2701          break;
2702        case MVT::v4f64:
2703          RC = &PPC::QFRCRegClass;
2704          break;
2705        case MVT::v4i1:
2706          RC = &PPC::QBRCRegClass;
2707          break;
2708      }
2709
2710      // Transform the arguments stored in physical registers into virtual ones.
2711      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2712      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
2713                                            ValVT == MVT::i1 ? MVT::i32 : ValVT);
2714
2715      if (ValVT == MVT::i1)
2716        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
2717
2718      InVals.push_back(ArgValue);
2719    } else {
2720      // Argument stored in memory.
2721      assert(VA.isMemLoc());
2722
2723      unsigned ArgSize = VA.getLocVT().getStoreSize();
2724      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
2725                                      isImmutable);
2726
2727      // Create load nodes to retrieve arguments from the stack.
2728      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2729      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2730                                   MachinePointerInfo(),
2731                                   false, false, false, 0));
2732    }
2733  }
2734
2735  // Assign locations to all of the incoming aggregate by value arguments.
2736  // Aggregates passed by value are stored in the local variable space of the
2737  // caller's stack frame, right above the parameter list area.
2738  SmallVector<CCValAssign, 16> ByValArgLocs;
2739  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2740                      ByValArgLocs, *DAG.getContext());
2741
2742  // Reserve stack space for the allocations in CCInfo.
2743  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
2744
2745  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
2746
2747  // Area that is at least reserved in the caller of this function.
2748  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
2749  MinReservedArea = std::max(MinReservedArea, LinkageSize);
2750
2751  // Set the size that is at least reserved in caller of this function.  Tail
2752  // call optimized function's reserved stack space needs to be aligned so that
2753  // taking the difference between two stack areas will result in an aligned
2754  // stack.
2755  MinReservedArea =
2756      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
2757  FuncInfo->setMinReservedArea(MinReservedArea);
2758
2759  SmallVector<SDValue, 8> MemOps;
2760
2761  // If the function takes variable number of arguments, make a frame index for
2762  // the start of the first vararg value... for expansion of llvm.va_start.
2763  if (isVarArg) {
2764    static const MCPhysReg GPArgRegs[] = {
2765      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2766      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2767    };
2768    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
2769
2770    static const MCPhysReg FPArgRegs[] = {
2771      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2772      PPC::F8
2773    };
2774    unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
2775    if (DisablePPCFloatInVariadic)
2776      NumFPArgRegs = 0;
2777
2778    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
2779    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
2780
2781    // Make room for NumGPArgRegs and NumFPArgRegs.
2782    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
2783                NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
2784
2785    FuncInfo->setVarArgsStackOffset(
2786      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2787                             CCInfo.getNextStackOffset(), true));
2788
2789    FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
2790    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2791
2792    // The fixed integer arguments of a variadic function are stored to the
2793    // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
2794    // the result of va_next.
2795    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
2796      // Get an existing live-in vreg, or add a new one.
2797      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
2798      if (!VReg)
2799        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
2800
2801      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2802      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2803                                   MachinePointerInfo(), false, false, 0);
2804      MemOps.push_back(Store);
2805      // Increment the address by four for the next argument to store
2806      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
2807      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2808    }
2809
2810    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
2811    // is set.
2812    // The double arguments are stored to the VarArgsFrameIndex
2813    // on the stack.
2814    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
2815      // Get an existing live-in vreg, or add a new one.
2816      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
2817      if (!VReg)
2818        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
2819
2820      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
2821      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2822                                   MachinePointerInfo(), false, false, 0);
2823      MemOps.push_back(Store);
2824      // Increment the address by eight for the next argument to store
2825      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
2826                                         PtrVT);
2827      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2828    }
2829  }
2830
2831  if (!MemOps.empty())
2832    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2833
2834  return Chain;
2835}
2836
2837// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2838// value to MVT::i64 and then truncate to the correct register size.
2839SDValue
2840PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
2841                                     SelectionDAG &DAG, SDValue ArgVal,
2842                                     SDLoc dl) const {
2843  if (Flags.isSExt())
2844    ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
2845                         DAG.getValueType(ObjectVT));
2846  else if (Flags.isZExt())
2847    ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
2848                         DAG.getValueType(ObjectVT));
2849
2850  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
2851}
2852
2853SDValue
2854PPCTargetLowering::LowerFormalArguments_64SVR4(
2855                                      SDValue Chain,
2856                                      CallingConv::ID CallConv, bool isVarArg,
2857                                      const SmallVectorImpl<ISD::InputArg>
2858                                        &Ins,
2859                                      SDLoc dl, SelectionDAG &DAG,
2860                                      SmallVectorImpl<SDValue> &InVals) const {
2861  // TODO: add description of PPC stack frame format, or at least some docs.
2862  //
2863  bool isELFv2ABI = Subtarget.isELFv2ABI();
2864  bool isLittleEndian = Subtarget.isLittleEndian();
2865  MachineFunction &MF = DAG.getMachineFunction();
2866  MachineFrameInfo *MFI = MF.getFrameInfo();
2867  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2868
2869  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
2870         "fastcc not supported on varargs functions");
2871
2872  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2873  // Potential tail calls could cause overwriting of argument stack slots.
2874  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2875                       (CallConv == CallingConv::Fast));
2876  unsigned PtrByteSize = 8;
2877  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
2878
2879  static const MCPhysReg GPR[] = {
2880    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2881    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2882  };
2883
2884  static const MCPhysReg *FPR = GetFPR();
2885
2886  static const MCPhysReg VR[] = {
2887    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2888    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2889  };
2890  static const MCPhysReg VSRH[] = {
2891    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
2892    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
2893  };
2894
2895  static const MCPhysReg *QFPR = GetQFPR();
2896
2897  const unsigned Num_GPR_Regs = array_lengthof(GPR);
2898  const unsigned Num_FPR_Regs = 13;
2899  const unsigned Num_VR_Regs  = array_lengthof(VR);
2900  const unsigned Num_QFPR_Regs = Num_FPR_Regs;
2901
2902  // Do a first pass over the arguments to determine whether the ABI
2903  // guarantees that our caller has allocated the parameter save area
2904  // on its stack frame.  In the ELFv1 ABI, this is always the case;
2905  // in the ELFv2 ABI, it is true if this is a vararg function or if
2906  // any parameter is located in a stack slot.
2907
2908  bool HasParameterArea = !isELFv2ABI || isVarArg;
2909  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
2910  unsigned NumBytes = LinkageSize;
2911  unsigned AvailableFPRs = Num_FPR_Regs;
2912  unsigned AvailableVRs = Num_VR_Regs;
2913  for (unsigned i = 0, e = Ins.size(); i != e; ++i)
2914    if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
2915                               PtrByteSize, LinkageSize, ParamAreaSize,
2916                               NumBytes, AvailableFPRs, AvailableVRs,
2917                               Subtarget.hasQPX()))
2918      HasParameterArea = true;
2919
2920  // Add DAG nodes to load the arguments or copy them out of registers.  On
2921  // entry to a function on PPC, the arguments start after the linkage area,
2922  // although the first ones are often in registers.
2923
2924  unsigned ArgOffset = LinkageSize;
2925  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
2926  unsigned &QFPR_idx = FPR_idx;
2927  SmallVector<SDValue, 8> MemOps;
2928  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
2929  unsigned CurArgIdx = 0;
2930  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
2931    SDValue ArgVal;
2932    bool needsLoad = false;
2933    EVT ObjectVT = Ins[ArgNo].VT;
2934    EVT OrigVT = Ins[ArgNo].ArgVT;
2935    unsigned ObjSize = ObjectVT.getStoreSize();
2936    unsigned ArgSize = ObjSize;
2937    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2938    if (Ins[ArgNo].isOrigArg()) {
2939      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
2940      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
2941    }
2942    // We re-align the argument offset for each argument, except when using the
2943    // fast calling convention, when we need to make sure we do that only when
2944    // we'll actually use a stack slot.
2945    unsigned CurArgOffset, Align;
2946    auto ComputeArgOffset = [&]() {
2947      /* Respect alignment of argument on the stack.  */
2948      Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
2949      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2950      CurArgOffset = ArgOffset;
2951    };
2952
2953    if (CallConv != CallingConv::Fast) {
2954      ComputeArgOffset();
2955
2956      /* Compute GPR index associated with argument offset.  */
2957      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
2958      GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
2959    }
2960
2961    // FIXME the codegen can be much improved in some cases.
2962    // We do not have to keep everything in memory.
2963    if (Flags.isByVal()) {
2964      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
2965
2966      if (CallConv == CallingConv::Fast)
2967        ComputeArgOffset();
2968
2969      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
2970      ObjSize = Flags.getByValSize();
2971      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2972      // Empty aggregate parameters do not take up registers.  Examples:
2973      //   struct { } a;
2974      //   union  { } b;
2975      //   int c[0];
2976      // etc.  However, we have to provide a place-holder in InVals, so
2977      // pretend we have an 8-byte item at the current address for that
2978      // purpose.
2979      if (!ObjSize) {
2980        int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2981        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2982        InVals.push_back(FIN);
2983        continue;
2984      }
2985
2986      // Create a stack object covering all stack doublewords occupied
2987      // by the argument.  If the argument is (fully or partially) on
2988      // the stack, or if the argument is fully in registers but the
2989      // caller has allocated the parameter save anyway, we can refer
2990      // directly to the caller's stack frame.  Otherwise, create a
2991      // local copy in our own frame.
2992      int FI;
2993      if (HasParameterArea ||
2994          ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
2995        FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
2996      else
2997        FI = MFI->CreateStackObject(ArgSize, Align, false);
2998      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2999
3000      // Handle aggregates smaller than 8 bytes.
3001      if (ObjSize < PtrByteSize) {
3002        // The value of the object is its address, which differs from the
3003        // address of the enclosing doubleword on big-endian systems.
3004        SDValue Arg = FIN;
3005        if (!isLittleEndian) {
3006          SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
3007          Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3008        }
3009        InVals.push_back(Arg);
3010
3011        if (GPR_idx != Num_GPR_Regs) {
3012          unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3013          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3014          SDValue Store;
3015
3016          if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3017            EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3018                           (ObjSize == 2 ? MVT::i16 : MVT::i32));
3019            Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3020                                      MachinePointerInfo(FuncArg),
3021                                      ObjType, false, false, 0);
3022          } else {
3023            // For sizes that don't fit a truncating store (3, 5, 6, 7),
3024            // store the whole register as-is to the parameter save area
3025            // slot.
3026            Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3027                                 MachinePointerInfo(FuncArg),
3028                                 false, false, 0);
3029          }
3030
3031          MemOps.push_back(Store);
3032        }
3033        // Whether we copied from a register or not, advance the offset
3034        // into the parameter save area by a full doubleword.
3035        ArgOffset += PtrByteSize;
3036        continue;
3037      }
3038
3039      // The value of the object is its address, which is the address of
3040      // its first stack doubleword.
3041      InVals.push_back(FIN);
3042
3043      // Store whatever pieces of the object are in registers to memory.
3044      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3045        if (GPR_idx == Num_GPR_Regs)
3046          break;
3047
3048        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3049        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3050        SDValue Addr = FIN;
3051        if (j) {
3052          SDValue Off = DAG.getConstant(j, PtrVT);
3053          Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3054        }
3055        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3056                                     MachinePointerInfo(FuncArg, j),
3057                                     false, false, 0);
3058        MemOps.push_back(Store);
3059        ++GPR_idx;
3060      }
3061      ArgOffset += ArgSize;
3062      continue;
3063    }
3064
3065    switch (ObjectVT.getSimpleVT().SimpleTy) {
3066    default: llvm_unreachable("Unhandled argument type!");
3067    case MVT::i1:
3068    case MVT::i32:
3069    case MVT::i64:
3070      // These can be scalar arguments or elements of an integer array type
3071      // passed directly.  Clang may use those instead of "byval" aggregate
3072      // types to avoid forcing arguments to memory unnecessarily.
3073      if (GPR_idx != Num_GPR_Regs) {
3074        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3075        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3076
3077        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3078          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3079          // value to MVT::i64 and then truncate to the correct register size.
3080          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3081      } else {
3082        if (CallConv == CallingConv::Fast)
3083          ComputeArgOffset();
3084
3085        needsLoad = true;
3086        ArgSize = PtrByteSize;
3087      }
3088      if (CallConv != CallingConv::Fast || needsLoad)
3089        ArgOffset += 8;
3090      break;
3091
3092    case MVT::f32:
3093    case MVT::f64:
3094      // These can be scalar arguments or elements of a float array type
3095      // passed directly.  The latter are used to implement ELFv2 homogenous
3096      // float aggregates.
3097      if (FPR_idx != Num_FPR_Regs) {
3098        unsigned VReg;
3099
3100        if (ObjectVT == MVT::f32)
3101          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
3102        else
3103          VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3104                                                ? &PPC::VSFRCRegClass
3105                                                : &PPC::F8RCRegClass);
3106
3107        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3108        ++FPR_idx;
3109      } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3110        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3111        // once we support fp <-> gpr moves.
3112
3113        // This can only ever happen in the presence of f32 array types,
3114        // since otherwise we never run out of FPRs before running out
3115        // of GPRs.
3116        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3117        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3118
3119        if (ObjectVT == MVT::f32) {
3120          if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3121            ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3122                                 DAG.getConstant(32, MVT::i32));
3123          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3124        }
3125
3126        ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3127      } else {
3128        if (CallConv == CallingConv::Fast)
3129          ComputeArgOffset();
3130
3131        needsLoad = true;
3132      }
3133
3134      // When passing an array of floats, the array occupies consecutive
3135      // space in the argument area; only round up to the next doubleword
3136      // at the end of the array.  Otherwise, each float takes 8 bytes.
3137      if (CallConv != CallingConv::Fast || needsLoad) {
3138        ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3139        ArgOffset += ArgSize;
3140        if (Flags.isInConsecutiveRegsLast())
3141          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3142      }
3143      break;
3144    case MVT::v4f32:
3145    case MVT::v4i32:
3146    case MVT::v8i16:
3147    case MVT::v16i8:
3148    case MVT::v2f64:
3149    case MVT::v2i64:
3150      if (!Subtarget.hasQPX()) {
3151      // These can be scalar arguments or elements of a vector array type
3152      // passed directly.  The latter are used to implement ELFv2 homogenous
3153      // vector aggregates.
3154      if (VR_idx != Num_VR_Regs) {
3155        unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
3156                        MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
3157                        MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3158        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3159        ++VR_idx;
3160      } else {
3161        if (CallConv == CallingConv::Fast)
3162          ComputeArgOffset();
3163
3164        needsLoad = true;
3165      }
3166      if (CallConv != CallingConv::Fast || needsLoad)
3167        ArgOffset += 16;
3168      break;
3169      } // not QPX
3170
3171      assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3172             "Invalid QPX parameter type");
3173      /* fall through */
3174
3175    case MVT::v4f64:
3176    case MVT::v4i1:
3177      // QPX vectors are treated like their scalar floating-point subregisters
3178      // (except that they're larger).
3179      unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3180      if (QFPR_idx != Num_QFPR_Regs) {
3181        const TargetRegisterClass *RC;
3182        switch (ObjectVT.getSimpleVT().SimpleTy) {
3183        case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3184        case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3185        default:         RC = &PPC::QBRCRegClass; break;
3186        }
3187
3188        unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3189        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3190        ++QFPR_idx;
3191      } else {
3192        if (CallConv == CallingConv::Fast)
3193          ComputeArgOffset();
3194        needsLoad = true;
3195      }
3196      if (CallConv != CallingConv::Fast || needsLoad)
3197        ArgOffset += Sz;
3198      break;
3199    }
3200
3201    // We need to load the argument to a virtual register if we determined
3202    // above that we ran out of physical registers of the appropriate type.
3203    if (needsLoad) {
3204      if (ObjSize < ArgSize && !isLittleEndian)
3205        CurArgOffset += ArgSize - ObjSize;
3206      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3207      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3208      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3209                           false, false, false, 0);
3210    }
3211
3212    InVals.push_back(ArgVal);
3213  }
3214
3215  // Area that is at least reserved in the caller of this function.
3216  unsigned MinReservedArea;
3217  if (HasParameterArea)
3218    MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3219  else
3220    MinReservedArea = LinkageSize;
3221
3222  // Set the size that is at least reserved in caller of this function.  Tail
3223  // call optimized functions' reserved stack space needs to be aligned so that
3224  // taking the difference between two stack areas will result in an aligned
3225  // stack.
3226  MinReservedArea =
3227      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3228  FuncInfo->setMinReservedArea(MinReservedArea);
3229
3230  // If the function takes variable number of arguments, make a frame index for
3231  // the start of the first vararg value... for expansion of llvm.va_start.
3232  if (isVarArg) {
3233    int Depth = ArgOffset;
3234
3235    FuncInfo->setVarArgsFrameIndex(
3236      MFI->CreateFixedObject(PtrByteSize, Depth, true));
3237    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3238
3239    // If this function is vararg, store any remaining integer argument regs
3240    // to their spots on the stack so that they may be loaded by deferencing the
3241    // result of va_next.
3242    for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3243         GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3244      unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3245      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3246      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3247                                   MachinePointerInfo(), false, false, 0);
3248      MemOps.push_back(Store);
3249      // Increment the address by four for the next argument to store
3250      SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
3251      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3252    }
3253  }
3254
3255  if (!MemOps.empty())
3256    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3257
3258  return Chain;
3259}
3260
3261SDValue
3262PPCTargetLowering::LowerFormalArguments_Darwin(
3263                                      SDValue Chain,
3264                                      CallingConv::ID CallConv, bool isVarArg,
3265                                      const SmallVectorImpl<ISD::InputArg>
3266                                        &Ins,
3267                                      SDLoc dl, SelectionDAG &DAG,
3268                                      SmallVectorImpl<SDValue> &InVals) const {
3269  // TODO: add description of PPC stack frame format, or at least some docs.
3270  //
3271  MachineFunction &MF = DAG.getMachineFunction();
3272  MachineFrameInfo *MFI = MF.getFrameInfo();
3273  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3274
3275  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3276  bool isPPC64 = PtrVT == MVT::i64;
3277  // Potential tail calls could cause overwriting of argument stack slots.
3278  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3279                       (CallConv == CallingConv::Fast));
3280  unsigned PtrByteSize = isPPC64 ? 8 : 4;
3281  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3282  unsigned ArgOffset = LinkageSize;
3283  // Area that is at least reserved in caller of this function.
3284  unsigned MinReservedArea = ArgOffset;
3285
3286  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
3287    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3288    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3289  };
3290  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
3291    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3292    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3293  };
3294
3295  static const MCPhysReg *FPR = GetFPR();
3296
3297  static const MCPhysReg VR[] = {
3298    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3299    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3300  };
3301
3302  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3303  const unsigned Num_FPR_Regs = 13;
3304  const unsigned Num_VR_Regs  = array_lengthof( VR);
3305
3306  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3307
3308  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3309
3310  // In 32-bit non-varargs functions, the stack space for vectors is after the
3311  // stack space for non-vectors.  We do not use this space unless we have
3312  // too many vectors to fit in registers, something that only occurs in
3313  // constructed examples:), but we have to walk the arglist to figure
3314  // that out...for the pathological case, compute VecArgOffset as the
3315  // start of the vector parameter area.  Computing VecArgOffset is the
3316  // entire point of the following loop.
3317  unsigned VecArgOffset = ArgOffset;
3318  if (!isVarArg && !isPPC64) {
3319    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3320         ++ArgNo) {
3321      EVT ObjectVT = Ins[ArgNo].VT;
3322      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3323
3324      if (Flags.isByVal()) {
3325        // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3326        unsigned ObjSize = Flags.getByValSize();
3327        unsigned ArgSize =
3328                ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3329        VecArgOffset += ArgSize;
3330        continue;
3331      }
3332
3333      switch(ObjectVT.getSimpleVT().SimpleTy) {
3334      default: llvm_unreachable("Unhandled argument type!");
3335      case MVT::i1:
3336      case MVT::i32:
3337      case MVT::f32:
3338        VecArgOffset += 4;
3339        break;
3340      case MVT::i64:  // PPC64
3341      case MVT::f64:
3342        // FIXME: We are guaranteed to be !isPPC64 at this point.
3343        // Does MVT::i64 apply?
3344        VecArgOffset += 8;
3345        break;
3346      case MVT::v4f32:
3347      case MVT::v4i32:
3348      case MVT::v8i16:
3349      case MVT::v16i8:
3350        // Nothing to do, we're only looking at Nonvector args here.
3351        break;
3352      }
3353    }
3354  }
3355  // We've found where the vector parameter area in memory is.  Skip the
3356  // first 12 parameters; these don't use that memory.
3357  VecArgOffset = ((VecArgOffset+15)/16)*16;
3358  VecArgOffset += 12*16;
3359
3360  // Add DAG nodes to load the arguments or copy them out of registers.  On
3361  // entry to a function on PPC, the arguments start after the linkage area,
3362  // although the first ones are often in registers.
3363
3364  SmallVector<SDValue, 8> MemOps;
3365  unsigned nAltivecParamsAtEnd = 0;
3366  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3367  unsigned CurArgIdx = 0;
3368  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3369    SDValue ArgVal;
3370    bool needsLoad = false;
3371    EVT ObjectVT = Ins[ArgNo].VT;
3372    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3373    unsigned ArgSize = ObjSize;
3374    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3375    if (Ins[ArgNo].isOrigArg()) {
3376      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3377      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3378    }
3379    unsigned CurArgOffset = ArgOffset;
3380
3381    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3382    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3383        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3384      if (isVarArg || isPPC64) {
3385        MinReservedArea = ((MinReservedArea+15)/16)*16;
3386        MinReservedArea += CalculateStackSlotSize(ObjectVT,
3387                                                  Flags,
3388                                                  PtrByteSize);
3389      } else  nAltivecParamsAtEnd++;
3390    } else
3391      // Calculate min reserved area.
3392      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3393                                                Flags,
3394                                                PtrByteSize);
3395
3396    // FIXME the codegen can be much improved in some cases.
3397    // We do not have to keep everything in memory.
3398    if (Flags.isByVal()) {
3399      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3400
3401      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3402      ObjSize = Flags.getByValSize();
3403      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3404      // Objects of size 1 and 2 are right justified, everything else is
3405      // left justified.  This means the memory address is adjusted forwards.
3406      if (ObjSize==1 || ObjSize==2) {
3407        CurArgOffset = CurArgOffset + (4 - ObjSize);
3408      }
3409      // The value of the object is its address.
3410      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
3411      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3412      InVals.push_back(FIN);
3413      if (ObjSize==1 || ObjSize==2) {
3414        if (GPR_idx != Num_GPR_Regs) {
3415          unsigned VReg;
3416          if (isPPC64)
3417            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3418          else
3419            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3420          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3421          EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
3422          SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
3423                                            MachinePointerInfo(FuncArg),
3424                                            ObjType, false, false, 0);
3425          MemOps.push_back(Store);
3426          ++GPR_idx;
3427        }
3428
3429        ArgOffset += PtrByteSize;
3430
3431        continue;
3432      }
3433      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3434        // Store whatever pieces of the object are in registers
3435        // to memory.  ArgOffset will be the address of the beginning
3436        // of the object.
3437        if (GPR_idx != Num_GPR_Regs) {
3438          unsigned VReg;
3439          if (isPPC64)
3440            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3441          else
3442            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3443          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
3444          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3445          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3446          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3447                                       MachinePointerInfo(FuncArg, j),
3448                                       false, false, 0);
3449          MemOps.push_back(Store);
3450          ++GPR_idx;
3451          ArgOffset += PtrByteSize;
3452        } else {
3453          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
3454          break;
3455        }
3456      }
3457      continue;
3458    }
3459
3460    switch (ObjectVT.getSimpleVT().SimpleTy) {
3461    default: llvm_unreachable("Unhandled argument type!");
3462    case MVT::i1:
3463    case MVT::i32:
3464      if (!isPPC64) {
3465        if (GPR_idx != Num_GPR_Regs) {
3466          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3467          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3468
3469          if (ObjectVT == MVT::i1)
3470            ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
3471
3472          ++GPR_idx;
3473        } else {
3474          needsLoad = true;
3475          ArgSize = PtrByteSize;
3476        }
3477        // All int arguments reserve stack space in the Darwin ABI.
3478        ArgOffset += PtrByteSize;
3479        break;
3480      }
3481      // FALLTHROUGH
3482    case MVT::i64:  // PPC64
3483      if (GPR_idx != Num_GPR_Regs) {
3484        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3485        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3486
3487        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3488          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3489          // value to MVT::i64 and then truncate to the correct register size.
3490          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3491
3492        ++GPR_idx;
3493      } else {
3494        needsLoad = true;
3495        ArgSize = PtrByteSize;
3496      }
3497      // All int arguments reserve stack space in the Darwin ABI.
3498      ArgOffset += 8;
3499      break;
3500
3501    case MVT::f32:
3502    case MVT::f64:
3503      // Every 4 bytes of argument space consumes one of the GPRs available for
3504      // argument passing.
3505      if (GPR_idx != Num_GPR_Regs) {
3506        ++GPR_idx;
3507        if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
3508          ++GPR_idx;
3509      }
3510      if (FPR_idx != Num_FPR_Regs) {
3511        unsigned VReg;
3512
3513        if (ObjectVT == MVT::f32)
3514          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
3515        else
3516          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
3517
3518        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3519        ++FPR_idx;
3520      } else {
3521        needsLoad = true;
3522      }
3523
3524      // All FP arguments reserve stack space in the Darwin ABI.
3525      ArgOffset += isPPC64 ? 8 : ObjSize;
3526      break;
3527    case MVT::v4f32:
3528    case MVT::v4i32:
3529    case MVT::v8i16:
3530    case MVT::v16i8:
3531      // Note that vector arguments in registers don't reserve stack space,
3532      // except in varargs functions.
3533      if (VR_idx != Num_VR_Regs) {
3534        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3535        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3536        if (isVarArg) {
3537          while ((ArgOffset % 16) != 0) {
3538            ArgOffset += PtrByteSize;
3539            if (GPR_idx != Num_GPR_Regs)
3540              GPR_idx++;
3541          }
3542          ArgOffset += 16;
3543          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
3544        }
3545        ++VR_idx;
3546      } else {
3547        if (!isVarArg && !isPPC64) {
3548          // Vectors go after all the nonvectors.
3549          CurArgOffset = VecArgOffset;
3550          VecArgOffset += 16;
3551        } else {
3552          // Vectors are aligned.
3553          ArgOffset = ((ArgOffset+15)/16)*16;
3554          CurArgOffset = ArgOffset;
3555          ArgOffset += 16;
3556        }
3557        needsLoad = true;
3558      }
3559      break;
3560    }
3561
3562    // We need to load the argument to a virtual register if we determined above
3563    // that we ran out of physical registers of the appropriate type.
3564    if (needsLoad) {
3565      int FI = MFI->CreateFixedObject(ObjSize,
3566                                      CurArgOffset + (ArgSize - ObjSize),
3567                                      isImmutable);
3568      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3569      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3570                           false, false, false, 0);
3571    }
3572
3573    InVals.push_back(ArgVal);
3574  }
3575
3576  // Allow for Altivec parameters at the end, if needed.
3577  if (nAltivecParamsAtEnd) {
3578    MinReservedArea = ((MinReservedArea+15)/16)*16;
3579    MinReservedArea += 16*nAltivecParamsAtEnd;
3580  }
3581
3582  // Area that is at least reserved in the caller of this function.
3583  MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
3584
3585  // Set the size that is at least reserved in caller of this function.  Tail
3586  // call optimized functions' reserved stack space needs to be aligned so that
3587  // taking the difference between two stack areas will result in an aligned
3588  // stack.
3589  MinReservedArea =
3590      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3591  FuncInfo->setMinReservedArea(MinReservedArea);
3592
3593  // If the function takes variable number of arguments, make a frame index for
3594  // the start of the first vararg value... for expansion of llvm.va_start.
3595  if (isVarArg) {
3596    int Depth = ArgOffset;
3597
3598    FuncInfo->setVarArgsFrameIndex(
3599      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
3600                             Depth, true));
3601    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3602
3603    // If this function is vararg, store any remaining integer argument regs
3604    // to their spots on the stack so that they may be loaded by deferencing the
3605    // result of va_next.
3606    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
3607      unsigned VReg;
3608
3609      if (isPPC64)
3610        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3611      else
3612        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3613
3614      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3615      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3616                                   MachinePointerInfo(), false, false, 0);
3617      MemOps.push_back(Store);
3618      // Increment the address by four for the next argument to store
3619      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
3620      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3621    }
3622  }
3623
3624  if (!MemOps.empty())
3625    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3626
3627  return Chain;
3628}
3629
3630/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
3631/// adjusted to accommodate the arguments for the tailcall.
3632static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
3633                                   unsigned ParamSize) {
3634
3635  if (!isTailCall) return 0;
3636
3637  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3638  unsigned CallerMinReservedArea = FI->getMinReservedArea();
3639  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
3640  // Remember only if the new adjustement is bigger.
3641  if (SPDiff < FI->getTailCallSPDelta())
3642    FI->setTailCallSPDelta(SPDiff);
3643
3644  return SPDiff;
3645}
3646
3647/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3648/// for tail call optimization. Targets which want to do tail call
3649/// optimization should implement this function.
3650bool
3651PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
3652                                                     CallingConv::ID CalleeCC,
3653                                                     bool isVarArg,
3654                                      const SmallVectorImpl<ISD::InputArg> &Ins,
3655                                                     SelectionDAG& DAG) const {
3656  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
3657    return false;
3658
3659  // Variable argument functions are not supported.
3660  if (isVarArg)
3661    return false;
3662
3663  MachineFunction &MF = DAG.getMachineFunction();
3664  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
3665  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
3666    // Functions containing by val parameters are not supported.
3667    for (unsigned i = 0; i != Ins.size(); i++) {
3668       ISD::ArgFlagsTy Flags = Ins[i].Flags;
3669       if (Flags.isByVal()) return false;
3670    }
3671
3672    // Non-PIC/GOT tail calls are supported.
3673    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
3674      return true;
3675
3676    // At the moment we can only do local tail calls (in same module, hidden
3677    // or protected) if we are generating PIC.
3678    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
3679      return G->getGlobal()->hasHiddenVisibility()
3680          || G->getGlobal()->hasProtectedVisibility();
3681  }
3682
3683  return false;
3684}
3685
3686/// isCallCompatibleAddress - Return the immediate to use if the specified
3687/// 32-bit value is representable in the immediate field of a BxA instruction.
3688static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
3689  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
3690  if (!C) return nullptr;
3691
3692  int Addr = C->getZExtValue();
3693  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
3694      SignExtend32<26>(Addr) != Addr)
3695    return nullptr;  // Top 6 bits have to be sext of immediate.
3696
3697  return DAG.getConstant((int)C->getZExtValue() >> 2,
3698                         DAG.getTargetLoweringInfo().getPointerTy()).getNode();
3699}
3700
3701namespace {
3702
3703struct TailCallArgumentInfo {
3704  SDValue Arg;
3705  SDValue FrameIdxOp;
3706  int       FrameIdx;
3707
3708  TailCallArgumentInfo() : FrameIdx(0) {}
3709};
3710
3711}
3712
3713/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
3714static void
3715StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
3716                                           SDValue Chain,
3717                   const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
3718                   SmallVectorImpl<SDValue> &MemOpChains,
3719                   SDLoc dl) {
3720  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
3721    SDValue Arg = TailCallArgs[i].Arg;
3722    SDValue FIN = TailCallArgs[i].FrameIdxOp;
3723    int FI = TailCallArgs[i].FrameIdx;
3724    // Store relative to framepointer.
3725    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
3726                                       MachinePointerInfo::getFixedStack(FI),
3727                                       false, false, 0));
3728  }
3729}
3730
3731/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
3732/// the appropriate stack slot for the tail call optimized function call.
3733static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
3734                                               MachineFunction &MF,
3735                                               SDValue Chain,
3736                                               SDValue OldRetAddr,
3737                                               SDValue OldFP,
3738                                               int SPDiff,
3739                                               bool isPPC64,
3740                                               bool isDarwinABI,
3741                                               SDLoc dl) {
3742  if (SPDiff) {
3743    // Calculate the new stack slot for the return address.
3744    int SlotSize = isPPC64 ? 8 : 4;
3745    const PPCFrameLowering *FL =
3746        MF.getSubtarget<PPCSubtarget>().getFrameLowering();
3747    int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
3748    int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
3749                                                          NewRetAddrLoc, true);
3750    EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3751    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
3752    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
3753                         MachinePointerInfo::getFixedStack(NewRetAddr),
3754                         false, false, 0);
3755
3756    // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
3757    // slot as the FP is never overwritten.
3758    if (isDarwinABI) {
3759      int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
3760      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
3761                                                          true);
3762      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
3763      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
3764                           MachinePointerInfo::getFixedStack(NewFPIdx),
3765                           false, false, 0);
3766    }
3767  }
3768  return Chain;
3769}
3770
3771/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
3772/// the position of the argument.
3773static void
3774CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
3775                         SDValue Arg, int SPDiff, unsigned ArgOffset,
3776                     SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
3777  int Offset = ArgOffset + SPDiff;
3778  uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
3779  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3780  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3781  SDValue FIN = DAG.getFrameIndex(FI, VT);
3782  TailCallArgumentInfo Info;
3783  Info.Arg = Arg;
3784  Info.FrameIdxOp = FIN;
3785  Info.FrameIdx = FI;
3786  TailCallArguments.push_back(Info);
3787}
3788
3789/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
3790/// stack slot. Returns the chain as result and the loaded frame pointers in
3791/// LROpOut/FPOpout. Used when tail calling.
3792SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
3793                                                        int SPDiff,
3794                                                        SDValue Chain,
3795                                                        SDValue &LROpOut,
3796                                                        SDValue &FPOpOut,
3797                                                        bool isDarwinABI,
3798                                                        SDLoc dl) const {
3799  if (SPDiff) {
3800    // Load the LR and FP stack slot for later adjusting.
3801    EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
3802    LROpOut = getReturnAddrFrameIndex(DAG);
3803    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
3804                          false, false, false, 0);
3805    Chain = SDValue(LROpOut.getNode(), 1);
3806
3807    // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
3808    // slot as the FP is never overwritten.
3809    if (isDarwinABI) {
3810      FPOpOut = getFramePointerFrameIndex(DAG);
3811      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
3812                            false, false, false, 0);
3813      Chain = SDValue(FPOpOut.getNode(), 1);
3814    }
3815  }
3816  return Chain;
3817}
3818
3819/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
3820/// by "Src" to address "Dst" of size "Size".  Alignment information is
3821/// specified by the specific parameter attribute. The copy will be passed as
3822/// a byval function parameter.
3823/// Sometimes what we are copying is the end of a larger object, the part that
3824/// does not fit in registers.
3825static SDValue
3826CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
3827                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
3828                          SDLoc dl) {
3829  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
3830  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
3831                       false, false, MachinePointerInfo(),
3832                       MachinePointerInfo());
3833}
3834
3835/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
3836/// tail calls.
3837static void
3838LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
3839                 SDValue Arg, SDValue PtrOff, int SPDiff,
3840                 unsigned ArgOffset, bool isPPC64, bool isTailCall,
3841                 bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
3842                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
3843                 SDLoc dl) {
3844  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3845  if (!isTailCall) {
3846    if (isVector) {
3847      SDValue StackPtr;
3848      if (isPPC64)
3849        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
3850      else
3851        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3852      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
3853                           DAG.getConstant(ArgOffset, PtrVT));
3854    }
3855    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
3856                                       MachinePointerInfo(), false, false, 0));
3857  // Calculate and remember argument location.
3858  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
3859                                  TailCallArguments);
3860}
3861
3862static
3863void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
3864                     SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
3865                     SDValue LROp, SDValue FPOp, bool isDarwinABI,
3866                     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
3867  MachineFunction &MF = DAG.getMachineFunction();
3868
3869  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
3870  // might overwrite each other in case of tail call optimization.
3871  SmallVector<SDValue, 8> MemOpChains2;
3872  // Do not flag preceding copytoreg stuff together with the following stuff.
3873  InFlag = SDValue();
3874  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
3875                                    MemOpChains2, dl);
3876  if (!MemOpChains2.empty())
3877    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3878
3879  // Store the return address to the appropriate stack slot.
3880  Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
3881                                        isPPC64, isDarwinABI, dl);
3882
3883  // Emit callseq_end just before tailcall node.
3884  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
3885                             DAG.getIntPtrConstant(0, true), InFlag, dl);
3886  InFlag = Chain.getValue(1);
3887}
3888
3889// Is this global address that of a function that can be called by name? (as
3890// opposed to something that must hold a descriptor for an indirect call).
3891static bool isFunctionGlobalAddress(SDValue Callee) {
3892  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3893    if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
3894        Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
3895      return false;
3896
3897    return G->getGlobal()->getType()->getElementType()->isFunctionTy();
3898  }
3899
3900  return false;
3901}
3902
3903static
3904unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
3905                     SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
3906                     bool isTailCall, bool IsPatchPoint,
3907                     SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
3908                     SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
3909                     ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
3910
3911  bool isPPC64 = Subtarget.isPPC64();
3912  bool isSVR4ABI = Subtarget.isSVR4ABI();
3913  bool isELFv2ABI = Subtarget.isELFv2ABI();
3914
3915  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3916  NodeTys.push_back(MVT::Other);   // Returns a chain
3917  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
3918
3919  unsigned CallOpc = PPCISD::CALL;
3920
3921  bool needIndirectCall = true;
3922  if (!isSVR4ABI || !isPPC64)
3923    if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
3924      // If this is an absolute destination address, use the munged value.
3925      Callee = SDValue(Dest, 0);
3926      needIndirectCall = false;
3927    }
3928
3929  if (isFunctionGlobalAddress(Callee)) {
3930    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
3931    // A call to a TLS address is actually an indirect call to a
3932    // thread-specific pointer.
3933    unsigned OpFlags = 0;
3934    if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
3935         (Subtarget.getTargetTriple().isMacOSX() &&
3936          Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
3937         (G->getGlobal()->isDeclaration() ||
3938          G->getGlobal()->isWeakForLinker())) ||
3939        (Subtarget.isTargetELF() && !isPPC64 &&
3940         !G->getGlobal()->hasLocalLinkage() &&
3941         DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
3942      // PC-relative references to external symbols should go through $stub,
3943      // unless we're building with the leopard linker or later, which
3944      // automatically synthesizes these stubs.
3945      OpFlags = PPCII::MO_PLT_OR_STUB;
3946    }
3947
3948    // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
3949    // every direct call is) turn it into a TargetGlobalAddress /
3950    // TargetExternalSymbol node so that legalize doesn't hack it.
3951    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
3952                                        Callee.getValueType(), 0, OpFlags);
3953    needIndirectCall = false;
3954  }
3955
3956  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3957    unsigned char OpFlags = 0;
3958
3959    if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
3960         (Subtarget.getTargetTriple().isMacOSX() &&
3961          Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
3962        (Subtarget.isTargetELF() && !isPPC64 &&
3963         DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
3964      // PC-relative references to external symbols should go through $stub,
3965      // unless we're building with the leopard linker or later, which
3966      // automatically synthesizes these stubs.
3967      OpFlags = PPCII::MO_PLT_OR_STUB;
3968    }
3969
3970    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
3971                                         OpFlags);
3972    needIndirectCall = false;
3973  }
3974
3975  if (IsPatchPoint) {
3976    // We'll form an invalid direct call when lowering a patchpoint; the full
3977    // sequence for an indirect call is complicated, and many of the
3978    // instructions introduced might have side effects (and, thus, can't be
3979    // removed later). The call itself will be removed as soon as the
3980    // argument/return lowering is complete, so the fact that it has the wrong
3981    // kind of operands should not really matter.
3982    needIndirectCall = false;
3983  }
3984
3985  if (needIndirectCall) {
3986    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
3987    // to do the call, we can't use PPCISD::CALL.
3988    SDValue MTCTROps[] = {Chain, Callee, InFlag};
3989
3990    if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
3991      // Function pointers in the 64-bit SVR4 ABI do not point to the function
3992      // entry point, but to the function descriptor (the function entry point
3993      // address is part of the function descriptor though).
3994      // The function descriptor is a three doubleword structure with the
3995      // following fields: function entry point, TOC base address and
3996      // environment pointer.
3997      // Thus for a call through a function pointer, the following actions need
3998      // to be performed:
3999      //   1. Save the TOC of the caller in the TOC save area of its stack
4000      //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4001      //   2. Load the address of the function entry point from the function
4002      //      descriptor.
4003      //   3. Load the TOC of the callee from the function descriptor into r2.
4004      //   4. Load the environment pointer from the function descriptor into
4005      //      r11.
4006      //   5. Branch to the function entry point address.
4007      //   6. On return of the callee, the TOC of the caller needs to be
4008      //      restored (this is done in FinishCall()).
4009      //
4010      // The loads are scheduled at the beginning of the call sequence, and the
4011      // register copies are flagged together to ensure that no other
4012      // operations can be scheduled in between. E.g. without flagging the
4013      // copies together, a TOC access in the caller could be scheduled between
4014      // the assignment of the callee TOC and the branch to the callee, which
4015      // results in the TOC access going through the TOC of the callee instead
4016      // of going through the TOC of the caller, which leads to incorrect code.
4017
4018      // Load the address of the function entry point from the function
4019      // descriptor.
4020      SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4021      if (LDChain.getValueType() == MVT::Glue)
4022        LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4023
4024      bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
4025
4026      MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4027      SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4028                                        false, false, LoadsInv, 8);
4029
4030      // Load environment pointer into r11.
4031      SDValue PtrOff = DAG.getIntPtrConstant(16);
4032      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4033      SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
4034                                       MPI.getWithOffset(16), false, false,
4035                                       LoadsInv, 8);
4036
4037      SDValue TOCOff = DAG.getIntPtrConstant(8);
4038      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4039      SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
4040                                   MPI.getWithOffset(8), false, false,
4041                                   LoadsInv, 8);
4042
4043      setUsesTOCBasePtr(DAG);
4044      SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4045                                        InFlag);
4046      Chain = TOCVal.getValue(0);
4047      InFlag = TOCVal.getValue(1);
4048
4049      SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4050                                        InFlag);
4051
4052      Chain = EnvVal.getValue(0);
4053      InFlag = EnvVal.getValue(1);
4054
4055      MTCTROps[0] = Chain;
4056      MTCTROps[1] = LoadFuncPtr;
4057      MTCTROps[2] = InFlag;
4058    }
4059
4060    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4061                        makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4062    InFlag = Chain.getValue(1);
4063
4064    NodeTys.clear();
4065    NodeTys.push_back(MVT::Other);
4066    NodeTys.push_back(MVT::Glue);
4067    Ops.push_back(Chain);
4068    CallOpc = PPCISD::BCTRL;
4069    Callee.setNode(nullptr);
4070    // Add use of X11 (holding environment pointer)
4071    if (isSVR4ABI && isPPC64 && !isELFv2ABI)
4072      Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4073    // Add CTR register as callee so a bctr can be emitted later.
4074    if (isTailCall)
4075      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4076  }
4077
4078  // If this is a direct call, pass the chain and the callee.
4079  if (Callee.getNode()) {
4080    Ops.push_back(Chain);
4081    Ops.push_back(Callee);
4082  }
4083  // If this is a tail call add stack pointer delta.
4084  if (isTailCall)
4085    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
4086
4087  // Add argument registers to the end of the list so that they are known live
4088  // into the call.
4089  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4090    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4091                                  RegsToPass[i].second.getValueType()));
4092
4093  // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4094  // into the call.
4095  if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
4096    setUsesTOCBasePtr(DAG);
4097    Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4098  }
4099
4100  return CallOpc;
4101}
4102
4103static
4104bool isLocalCall(const SDValue &Callee)
4105{
4106  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4107    return !G->getGlobal()->isDeclaration() &&
4108           !G->getGlobal()->isWeakForLinker();
4109  return false;
4110}
4111
4112SDValue
4113PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
4114                                   CallingConv::ID CallConv, bool isVarArg,
4115                                   const SmallVectorImpl<ISD::InputArg> &Ins,
4116                                   SDLoc dl, SelectionDAG &DAG,
4117                                   SmallVectorImpl<SDValue> &InVals) const {
4118
4119  SmallVector<CCValAssign, 16> RVLocs;
4120  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4121                    *DAG.getContext());
4122  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4123
4124  // Copy all of the result registers out of their specified physreg.
4125  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4126    CCValAssign &VA = RVLocs[i];
4127    assert(VA.isRegLoc() && "Can only return in registers!");
4128
4129    SDValue Val = DAG.getCopyFromReg(Chain, dl,
4130                                     VA.getLocReg(), VA.getLocVT(), InFlag);
4131    Chain = Val.getValue(1);
4132    InFlag = Val.getValue(2);
4133
4134    switch (VA.getLocInfo()) {
4135    default: llvm_unreachable("Unknown loc info!");
4136    case CCValAssign::Full: break;
4137    case CCValAssign::AExt:
4138      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4139      break;
4140    case CCValAssign::ZExt:
4141      Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4142                        DAG.getValueType(VA.getValVT()));
4143      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4144      break;
4145    case CCValAssign::SExt:
4146      Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4147                        DAG.getValueType(VA.getValVT()));
4148      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4149      break;
4150    }
4151
4152    InVals.push_back(Val);
4153  }
4154
4155  return Chain;
4156}
4157
4158SDValue
4159PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
4160                              bool isTailCall, bool isVarArg, bool IsPatchPoint,
4161                              SelectionDAG &DAG,
4162                              SmallVector<std::pair<unsigned, SDValue>, 8>
4163                                &RegsToPass,
4164                              SDValue InFlag, SDValue Chain,
4165                              SDValue CallSeqStart, SDValue &Callee,
4166                              int SPDiff, unsigned NumBytes,
4167                              const SmallVectorImpl<ISD::InputArg> &Ins,
4168                              SmallVectorImpl<SDValue> &InVals,
4169                              ImmutableCallSite *CS) const {
4170
4171  std::vector<EVT> NodeTys;
4172  SmallVector<SDValue, 8> Ops;
4173  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4174                                 SPDiff, isTailCall, IsPatchPoint, RegsToPass,
4175                                 Ops, NodeTys, CS, Subtarget);
4176
4177  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4178  if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4179    Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4180
4181  // When performing tail call optimization the callee pops its arguments off
4182  // the stack. Account for this here so these bytes can be pushed back on in
4183  // PPCFrameLowering::eliminateCallFramePseudoInstr.
4184  int BytesCalleePops =
4185    (CallConv == CallingConv::Fast &&
4186     getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
4187
4188  // Add a register mask operand representing the call-preserved registers.
4189  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4190  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
4191  assert(Mask && "Missing call preserved mask for calling convention");
4192  Ops.push_back(DAG.getRegisterMask(Mask));
4193
4194  if (InFlag.getNode())
4195    Ops.push_back(InFlag);
4196
4197  // Emit tail call.
4198  if (isTailCall) {
4199    assert(((Callee.getOpcode() == ISD::Register &&
4200             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4201            Callee.getOpcode() == ISD::TargetExternalSymbol ||
4202            Callee.getOpcode() == ISD::TargetGlobalAddress ||
4203            isa<ConstantSDNode>(Callee)) &&
4204    "Expecting an global address, external symbol, absolute value or register");
4205
4206    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4207  }
4208
4209  // Add a NOP immediately after the branch instruction when using the 64-bit
4210  // SVR4 ABI. At link time, if caller and callee are in a different module and
4211  // thus have a different TOC, the call will be replaced with a call to a stub
4212  // function which saves the current TOC, loads the TOC of the callee and
4213  // branches to the callee. The NOP will be replaced with a load instruction
4214  // which restores the TOC of the caller from the TOC save slot of the current
4215  // stack frame. If caller and callee belong to the same module (and have the
4216  // same TOC), the NOP will remain unchanged.
4217
4218  if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4219      !IsPatchPoint) {
4220    if (CallOpc == PPCISD::BCTRL) {
4221      // This is a call through a function pointer.
4222      // Restore the caller TOC from the save area into R2.
4223      // See PrepareCall() for more information about calls through function
4224      // pointers in the 64-bit SVR4 ABI.
4225      // We are using a target-specific load with r2 hard coded, because the
4226      // result of a target-independent load would never go directly into r2,
4227      // since r2 is a reserved register (which prevents the register allocator
4228      // from allocating it), resulting in an additional register being
4229      // allocated and an unnecessary move instruction being generated.
4230      CallOpc = PPCISD::BCTRL_LOAD_TOC;
4231
4232      EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4233      SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4234      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4235      SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
4236      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4237
4238      // The address needs to go after the chain input but before the flag (or
4239      // any other variadic arguments).
4240      Ops.insert(std::next(Ops.begin()), AddTOC);
4241    } else if ((CallOpc == PPCISD::CALL) &&
4242               (!isLocalCall(Callee) ||
4243                DAG.getTarget().getRelocationModel() == Reloc::PIC_))
4244      // Otherwise insert NOP for non-local calls.
4245      CallOpc = PPCISD::CALL_NOP;
4246  }
4247
4248  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
4249  InFlag = Chain.getValue(1);
4250
4251  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
4252                             DAG.getIntPtrConstant(BytesCalleePops, true),
4253                             InFlag, dl);
4254  if (!Ins.empty())
4255    InFlag = Chain.getValue(1);
4256
4257  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
4258                         Ins, dl, DAG, InVals);
4259}
4260
4261SDValue
4262PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4263                             SmallVectorImpl<SDValue> &InVals) const {
4264  SelectionDAG &DAG                     = CLI.DAG;
4265  SDLoc &dl                             = CLI.DL;
4266  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4267  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
4268  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
4269  SDValue Chain                         = CLI.Chain;
4270  SDValue Callee                        = CLI.Callee;
4271  bool &isTailCall                      = CLI.IsTailCall;
4272  CallingConv::ID CallConv              = CLI.CallConv;
4273  bool isVarArg                         = CLI.IsVarArg;
4274  bool IsPatchPoint                     = CLI.IsPatchPoint;
4275  ImmutableCallSite *CS                 = CLI.CS;
4276
4277  if (isTailCall)
4278    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
4279                                                   Ins, DAG);
4280
4281  if (!isTailCall && CS && CS->isMustTailCall())
4282    report_fatal_error("failed to perform tail call elimination on a call "
4283                       "site marked musttail");
4284
4285  if (Subtarget.isSVR4ABI()) {
4286    if (Subtarget.isPPC64())
4287      return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
4288                              isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4289                              dl, DAG, InVals, CS);
4290    else
4291      return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
4292                              isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4293                              dl, DAG, InVals, CS);
4294  }
4295
4296  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
4297                          isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4298                          dl, DAG, InVals, CS);
4299}
4300
4301SDValue
4302PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
4303                                    CallingConv::ID CallConv, bool isVarArg,
4304                                    bool isTailCall, bool IsPatchPoint,
4305                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
4306                                    const SmallVectorImpl<SDValue> &OutVals,
4307                                    const SmallVectorImpl<ISD::InputArg> &Ins,
4308                                    SDLoc dl, SelectionDAG &DAG,
4309                                    SmallVectorImpl<SDValue> &InVals,
4310                                    ImmutableCallSite *CS) const {
4311  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
4312  // of the 32-bit SVR4 ABI stack frame layout.
4313
4314  assert((CallConv == CallingConv::C ||
4315          CallConv == CallingConv::Fast) && "Unknown calling convention!");
4316
4317  unsigned PtrByteSize = 4;
4318
4319  MachineFunction &MF = DAG.getMachineFunction();
4320
4321  // Mark this function as potentially containing a function that contains a
4322  // tail call. As a consequence the frame pointer will be used for dynamicalloc
4323  // and restoring the callers stack pointer in this functions epilog. This is
4324  // done because by tail calling the called function might overwrite the value
4325  // in this function's (MF) stack pointer stack slot 0(SP).
4326  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4327      CallConv == CallingConv::Fast)
4328    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4329
4330  // Count how many bytes are to be pushed on the stack, including the linkage
4331  // area, parameter list area and the part of the local variable space which
4332  // contains copies of aggregates which are passed by value.
4333
4334  // Assign locations to all of the outgoing arguments.
4335  SmallVector<CCValAssign, 16> ArgLocs;
4336  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4337                 *DAG.getContext());
4338
4339  // Reserve space for the linkage area on the stack.
4340  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
4341                       PtrByteSize);
4342
4343  if (isVarArg) {
4344    // Handle fixed and variable vector arguments differently.
4345    // Fixed vector arguments go into registers as long as registers are
4346    // available. Variable vector arguments always go into memory.
4347    unsigned NumArgs = Outs.size();
4348
4349    for (unsigned i = 0; i != NumArgs; ++i) {
4350      MVT ArgVT = Outs[i].VT;
4351      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4352      bool Result;
4353
4354      if (Outs[i].IsFixed) {
4355        Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
4356                               CCInfo);
4357      } else {
4358        Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
4359                                      ArgFlags, CCInfo);
4360      }
4361
4362      if (Result) {
4363#ifndef NDEBUG
4364        errs() << "Call operand #" << i << " has unhandled type "
4365             << EVT(ArgVT).getEVTString() << "\n";
4366#endif
4367        llvm_unreachable(nullptr);
4368      }
4369    }
4370  } else {
4371    // All arguments are treated the same.
4372    CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
4373  }
4374
4375  // Assign locations to all of the outgoing aggregate by value arguments.
4376  SmallVector<CCValAssign, 16> ByValArgLocs;
4377  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4378                      ByValArgLocs, *DAG.getContext());
4379
4380  // Reserve stack space for the allocations in CCInfo.
4381  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
4382
4383  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
4384
4385  // Size of the linkage area, parameter list area and the part of the local
4386  // space variable where copies of aggregates which are passed by value are
4387  // stored.
4388  unsigned NumBytes = CCByValInfo.getNextStackOffset();
4389
4390  // Calculate by how many bytes the stack has to be adjusted in case of tail
4391  // call optimization.
4392  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4393
4394  // Adjust the stack pointer for the new arguments...
4395  // These operations are automatically eliminated by the prolog/epilog pass
4396  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
4397                               dl);
4398  SDValue CallSeqStart = Chain;
4399
4400  // Load the return address and frame pointer so it can be moved somewhere else
4401  // later.
4402  SDValue LROp, FPOp;
4403  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
4404                                       dl);
4405
4406  // Set up a copy of the stack pointer for use loading and storing any
4407  // arguments that may not fit in the registers available for argument
4408  // passing.
4409  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4410
4411  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4412  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4413  SmallVector<SDValue, 8> MemOpChains;
4414
4415  bool seenFloatArg = false;
4416  // Walk the register/memloc assignments, inserting copies/loads.
4417  for (unsigned i = 0, j = 0, e = ArgLocs.size();
4418       i != e;
4419       ++i) {
4420    CCValAssign &VA = ArgLocs[i];
4421    SDValue Arg = OutVals[i];
4422    ISD::ArgFlagsTy Flags = Outs[i].Flags;
4423
4424    if (Flags.isByVal()) {
4425      // Argument is an aggregate which is passed by value, thus we need to
4426      // create a copy of it in the local variable space of the current stack
4427      // frame (which is the stack frame of the caller) and pass the address of
4428      // this copy to the callee.
4429      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
4430      CCValAssign &ByValVA = ByValArgLocs[j++];
4431      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
4432
4433      // Memory reserved in the local variable space of the callers stack frame.
4434      unsigned LocMemOffset = ByValVA.getLocMemOffset();
4435
4436      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
4437      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
4438
4439      // Create a copy of the argument in the local area of the current
4440      // stack frame.
4441      SDValue MemcpyCall =
4442        CreateCopyOfByValArgument(Arg, PtrOff,
4443                                  CallSeqStart.getNode()->getOperand(0),
4444                                  Flags, DAG, dl);
4445
4446      // This must go outside the CALLSEQ_START..END.
4447      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4448                           CallSeqStart.getNode()->getOperand(1),
4449                           SDLoc(MemcpyCall));
4450      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4451                             NewCallSeqStart.getNode());
4452      Chain = CallSeqStart = NewCallSeqStart;
4453
4454      // Pass the address of the aggregate copy on the stack either in a
4455      // physical register or in the parameter list area of the current stack
4456      // frame to the callee.
4457      Arg = PtrOff;
4458    }
4459
4460    if (VA.isRegLoc()) {
4461      if (Arg.getValueType() == MVT::i1)
4462        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
4463
4464      seenFloatArg |= VA.getLocVT().isFloatingPoint();
4465      // Put argument in a physical register.
4466      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4467    } else {
4468      // Put argument in the parameter list area of the current stack frame.
4469      assert(VA.isMemLoc());
4470      unsigned LocMemOffset = VA.getLocMemOffset();
4471
4472      if (!isTailCall) {
4473        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
4474        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
4475
4476        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
4477                                           MachinePointerInfo(),
4478                                           false, false, 0));
4479      } else {
4480        // Calculate and remember argument location.
4481        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
4482                                 TailCallArguments);
4483      }
4484    }
4485  }
4486
4487  if (!MemOpChains.empty())
4488    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4489
4490  // Build a sequence of copy-to-reg nodes chained together with token chain
4491  // and flag operands which copy the outgoing args into the appropriate regs.
4492  SDValue InFlag;
4493  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4494    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4495                             RegsToPass[i].second, InFlag);
4496    InFlag = Chain.getValue(1);
4497  }
4498
4499  // Set CR bit 6 to true if this is a vararg call with floating args passed in
4500  // registers.
4501  if (isVarArg) {
4502    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
4503    SDValue Ops[] = { Chain, InFlag };
4504
4505    Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
4506                        dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
4507
4508    InFlag = Chain.getValue(1);
4509  }
4510
4511  if (isTailCall)
4512    PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
4513                    false, TailCallArguments);
4514
4515  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
4516                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
4517                    NumBytes, Ins, InVals, CS);
4518}
4519
4520// Copy an argument into memory, being careful to do this outside the
4521// call sequence for the call to which the argument belongs.
4522SDValue
4523PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
4524                                              SDValue CallSeqStart,
4525                                              ISD::ArgFlagsTy Flags,
4526                                              SelectionDAG &DAG,
4527                                              SDLoc dl) const {
4528  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
4529                        CallSeqStart.getNode()->getOperand(0),
4530                        Flags, DAG, dl);
4531  // The MEMCPY must go outside the CALLSEQ_START..END.
4532  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4533                             CallSeqStart.getNode()->getOperand(1),
4534                             SDLoc(MemcpyCall));
4535  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4536                         NewCallSeqStart.getNode());
4537  return NewCallSeqStart;
4538}
4539
4540SDValue
4541PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
4542                                    CallingConv::ID CallConv, bool isVarArg,
4543                                    bool isTailCall, bool IsPatchPoint,
4544                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
4545                                    const SmallVectorImpl<SDValue> &OutVals,
4546                                    const SmallVectorImpl<ISD::InputArg> &Ins,
4547                                    SDLoc dl, SelectionDAG &DAG,
4548                                    SmallVectorImpl<SDValue> &InVals,
4549                                    ImmutableCallSite *CS) const {
4550
4551  bool isELFv2ABI = Subtarget.isELFv2ABI();
4552  bool isLittleEndian = Subtarget.isLittleEndian();
4553  unsigned NumOps = Outs.size();
4554
4555  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4556  unsigned PtrByteSize = 8;
4557
4558  MachineFunction &MF = DAG.getMachineFunction();
4559
4560  // Mark this function as potentially containing a function that contains a
4561  // tail call. As a consequence the frame pointer will be used for dynamicalloc
4562  // and restoring the callers stack pointer in this functions epilog. This is
4563  // done because by tail calling the called function might overwrite the value
4564  // in this function's (MF) stack pointer stack slot 0(SP).
4565  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4566      CallConv == CallingConv::Fast)
4567    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4568
4569  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4570         "fastcc not supported on varargs functions");
4571
4572  // Count how many bytes are to be pushed on the stack, including the linkage
4573  // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
4574  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
4575  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
4576  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4577  unsigned NumBytes = LinkageSize;
4578  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4579  unsigned &QFPR_idx = FPR_idx;
4580
4581  static const MCPhysReg GPR[] = {
4582    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4583    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4584  };
4585  static const MCPhysReg *FPR = GetFPR();
4586
4587  static const MCPhysReg VR[] = {
4588    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4589    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4590  };
4591  static const MCPhysReg VSRH[] = {
4592    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
4593    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
4594  };
4595
4596  static const MCPhysReg *QFPR = GetQFPR();
4597
4598  const unsigned NumGPRs = array_lengthof(GPR);
4599  const unsigned NumFPRs = 13;
4600  const unsigned NumVRs  = array_lengthof(VR);
4601  const unsigned NumQFPRs = NumFPRs;
4602
4603  // When using the fast calling convention, we don't provide backing for
4604  // arguments that will be in registers.
4605  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
4606
4607  // Add up all the space actually used.
4608  for (unsigned i = 0; i != NumOps; ++i) {
4609    ISD::ArgFlagsTy Flags = Outs[i].Flags;
4610    EVT ArgVT = Outs[i].VT;
4611    EVT OrigVT = Outs[i].ArgVT;
4612
4613    if (CallConv == CallingConv::Fast) {
4614      if (Flags.isByVal())
4615        NumGPRsUsed += (Flags.getByValSize()+7)/8;
4616      else
4617        switch (ArgVT.getSimpleVT().SimpleTy) {
4618        default: llvm_unreachable("Unexpected ValueType for argument!");
4619        case MVT::i1:
4620        case MVT::i32:
4621        case MVT::i64:
4622          if (++NumGPRsUsed <= NumGPRs)
4623            continue;
4624          break;
4625        case MVT::v4i32:
4626        case MVT::v8i16:
4627        case MVT::v16i8:
4628        case MVT::v2f64:
4629        case MVT::v2i64:
4630          if (++NumVRsUsed <= NumVRs)
4631            continue;
4632          break;
4633        case MVT::v4f32:
4634	  // When using QPX, this is handled like a FP register, otherwise, it
4635	  // is an Altivec register.
4636          if (Subtarget.hasQPX()) {
4637            if (++NumFPRsUsed <= NumFPRs)
4638              continue;
4639          } else {
4640            if (++NumVRsUsed <= NumVRs)
4641              continue;
4642          }
4643          break;
4644        case MVT::f32:
4645        case MVT::f64:
4646        case MVT::v4f64: // QPX
4647        case MVT::v4i1:  // QPX
4648          if (++NumFPRsUsed <= NumFPRs)
4649            continue;
4650          break;
4651        }
4652    }
4653
4654    /* Respect alignment of argument on the stack.  */
4655    unsigned Align =
4656      CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4657    NumBytes = ((NumBytes + Align - 1) / Align) * Align;
4658
4659    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4660    if (Flags.isInConsecutiveRegsLast())
4661      NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4662  }
4663
4664  unsigned NumBytesActuallyUsed = NumBytes;
4665
4666  // The prolog code of the callee may store up to 8 GPR argument registers to
4667  // the stack, allowing va_start to index over them in memory if its varargs.
4668  // Because we cannot tell if this is needed on the caller side, we have to
4669  // conservatively assume that it is needed.  As such, make sure we have at
4670  // least enough stack space for the caller to store the 8 GPRs.
4671  // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
4672  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
4673
4674  // Tail call needs the stack to be aligned.
4675  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4676      CallConv == CallingConv::Fast)
4677    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
4678
4679  // Calculate by how many bytes the stack has to be adjusted in case of tail
4680  // call optimization.
4681  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4682
4683  // To protect arguments on the stack from being clobbered in a tail call,
4684  // force all the loads to happen before doing any other lowering.
4685  if (isTailCall)
4686    Chain = DAG.getStackArgumentTokenFactor(Chain);
4687
4688  // Adjust the stack pointer for the new arguments...
4689  // These operations are automatically eliminated by the prolog/epilog pass
4690  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
4691                               dl);
4692  SDValue CallSeqStart = Chain;
4693
4694  // Load the return address and frame pointer so it can be move somewhere else
4695  // later.
4696  SDValue LROp, FPOp;
4697  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
4698                                       dl);
4699
4700  // Set up a copy of the stack pointer for use loading and storing any
4701  // arguments that may not fit in the registers available for argument
4702  // passing.
4703  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4704
4705  // Figure out which arguments are going to go in registers, and which in
4706  // memory.  Also, if this is a vararg function, floating point operations
4707  // must be stored to our stack, and loaded into integer regs as well, if
4708  // any integer regs are available for argument passing.
4709  unsigned ArgOffset = LinkageSize;
4710
4711  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4712  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4713
4714  SmallVector<SDValue, 8> MemOpChains;
4715  for (unsigned i = 0; i != NumOps; ++i) {
4716    SDValue Arg = OutVals[i];
4717    ISD::ArgFlagsTy Flags = Outs[i].Flags;
4718    EVT ArgVT = Outs[i].VT;
4719    EVT OrigVT = Outs[i].ArgVT;
4720
4721    // PtrOff will be used to store the current argument to the stack if a
4722    // register cannot be found for it.
4723    SDValue PtrOff;
4724
4725    // We re-align the argument offset for each argument, except when using the
4726    // fast calling convention, when we need to make sure we do that only when
4727    // we'll actually use a stack slot.
4728    auto ComputePtrOff = [&]() {
4729      /* Respect alignment of argument on the stack.  */
4730      unsigned Align =
4731        CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4732      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
4733
4734      PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
4735
4736      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4737    };
4738
4739    if (CallConv != CallingConv::Fast) {
4740      ComputePtrOff();
4741
4742      /* Compute GPR index associated with argument offset.  */
4743      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4744      GPR_idx = std::min(GPR_idx, NumGPRs);
4745    }
4746
4747    // Promote integers to 64-bit values.
4748    if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
4749      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
4750      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4751      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
4752    }
4753
4754    // FIXME memcpy is used way more than necessary.  Correctness first.
4755    // Note: "by value" is code for passing a structure by value, not
4756    // basic types.
4757    if (Flags.isByVal()) {
4758      // Note: Size includes alignment padding, so
4759      //   struct x { short a; char b; }
4760      // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
4761      // These are the proper values we need for right-justifying the
4762      // aggregate in a parameter register.
4763      unsigned Size = Flags.getByValSize();
4764
4765      // An empty aggregate parameter takes up no storage and no
4766      // registers.
4767      if (Size == 0)
4768        continue;
4769
4770      if (CallConv == CallingConv::Fast)
4771        ComputePtrOff();
4772
4773      // All aggregates smaller than 8 bytes must be passed right-justified.
4774      if (Size==1 || Size==2 || Size==4) {
4775        EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
4776        if (GPR_idx != NumGPRs) {
4777          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
4778                                        MachinePointerInfo(), VT,
4779                                        false, false, false, 0);
4780          MemOpChains.push_back(Load.getValue(1));
4781          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4782
4783          ArgOffset += PtrByteSize;
4784          continue;
4785        }
4786      }
4787
4788      if (GPR_idx == NumGPRs && Size < 8) {
4789        SDValue AddPtr = PtrOff;
4790        if (!isLittleEndian) {
4791          SDValue Const = DAG.getConstant(PtrByteSize - Size,
4792                                          PtrOff.getValueType());
4793          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4794        }
4795        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4796                                                          CallSeqStart,
4797                                                          Flags, DAG, dl);
4798        ArgOffset += PtrByteSize;
4799        continue;
4800      }
4801      // Copy entire object into memory.  There are cases where gcc-generated
4802      // code assumes it is there, even if it could be put entirely into
4803      // registers.  (This is not what the doc says.)
4804
4805      // FIXME: The above statement is likely due to a misunderstanding of the
4806      // documents.  All arguments must be copied into the parameter area BY
4807      // THE CALLEE in the event that the callee takes the address of any
4808      // formal argument.  That has not yet been implemented.  However, it is
4809      // reasonable to use the stack area as a staging area for the register
4810      // load.
4811
4812      // Skip this for small aggregates, as we will use the same slot for a
4813      // right-justified copy, below.
4814      if (Size >= 8)
4815        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
4816                                                          CallSeqStart,
4817                                                          Flags, DAG, dl);
4818
4819      // When a register is available, pass a small aggregate right-justified.
4820      if (Size < 8 && GPR_idx != NumGPRs) {
4821        // The easiest way to get this right-justified in a register
4822        // is to copy the structure into the rightmost portion of a
4823        // local variable slot, then load the whole slot into the
4824        // register.
4825        // FIXME: The memcpy seems to produce pretty awful code for
4826        // small aggregates, particularly for packed ones.
4827        // FIXME: It would be preferable to use the slot in the
4828        // parameter save area instead of a new local variable.
4829        SDValue AddPtr = PtrOff;
4830        if (!isLittleEndian) {
4831          SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
4832          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4833        }
4834        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4835                                                          CallSeqStart,
4836                                                          Flags, DAG, dl);
4837
4838        // Load the slot into the register.
4839        SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
4840                                   MachinePointerInfo(),
4841                                   false, false, false, 0);
4842        MemOpChains.push_back(Load.getValue(1));
4843        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4844
4845        // Done with this argument.
4846        ArgOffset += PtrByteSize;
4847        continue;
4848      }
4849
4850      // For aggregates larger than PtrByteSize, copy the pieces of the
4851      // object that fit into registers from the parameter save area.
4852      for (unsigned j=0; j<Size; j+=PtrByteSize) {
4853        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
4854        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
4855        if (GPR_idx != NumGPRs) {
4856          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
4857                                     MachinePointerInfo(),
4858                                     false, false, false, 0);
4859          MemOpChains.push_back(Load.getValue(1));
4860          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4861          ArgOffset += PtrByteSize;
4862        } else {
4863          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
4864          break;
4865        }
4866      }
4867      continue;
4868    }
4869
4870    switch (Arg.getSimpleValueType().SimpleTy) {
4871    default: llvm_unreachable("Unexpected ValueType for argument!");
4872    case MVT::i1:
4873    case MVT::i32:
4874    case MVT::i64:
4875      // These can be scalar arguments or elements of an integer array type
4876      // passed directly.  Clang may use those instead of "byval" aggregate
4877      // types to avoid forcing arguments to memory unnecessarily.
4878      if (GPR_idx != NumGPRs) {
4879        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
4880      } else {
4881        if (CallConv == CallingConv::Fast)
4882          ComputePtrOff();
4883
4884        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4885                         true, isTailCall, false, MemOpChains,
4886                         TailCallArguments, dl);
4887        if (CallConv == CallingConv::Fast)
4888          ArgOffset += PtrByteSize;
4889      }
4890      if (CallConv != CallingConv::Fast)
4891        ArgOffset += PtrByteSize;
4892      break;
4893    case MVT::f32:
4894    case MVT::f64: {
4895      // These can be scalar arguments or elements of a float array type
4896      // passed directly.  The latter are used to implement ELFv2 homogenous
4897      // float aggregates.
4898
4899      // Named arguments go into FPRs first, and once they overflow, the
4900      // remaining arguments go into GPRs and then the parameter save area.
4901      // Unnamed arguments for vararg functions always go to GPRs and
4902      // then the parameter save area.  For now, put all arguments to vararg
4903      // routines always in both locations (FPR *and* GPR or stack slot).
4904      bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
4905      bool NeededLoad = false;
4906
4907      // First load the argument into the next available FPR.
4908      if (FPR_idx != NumFPRs)
4909        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
4910
4911      // Next, load the argument into GPR or stack slot if needed.
4912      if (!NeedGPROrStack)
4913        ;
4914      else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
4915        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4916        // once we support fp <-> gpr moves.
4917
4918        // In the non-vararg case, this can only ever happen in the
4919        // presence of f32 array types, since otherwise we never run
4920        // out of FPRs before running out of GPRs.
4921        SDValue ArgVal;
4922
4923        // Double values are always passed in a single GPR.
4924        if (Arg.getValueType() != MVT::f32) {
4925          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
4926
4927        // Non-array float values are extended and passed in a GPR.
4928        } else if (!Flags.isInConsecutiveRegs()) {
4929          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
4930          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
4931
4932        // If we have an array of floats, we collect every odd element
4933        // together with its predecessor into one GPR.
4934        } else if (ArgOffset % PtrByteSize != 0) {
4935          SDValue Lo, Hi;
4936          Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
4937          Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
4938          if (!isLittleEndian)
4939            std::swap(Lo, Hi);
4940          ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
4941
4942        // The final element, if even, goes into the first half of a GPR.
4943        } else if (Flags.isInConsecutiveRegsLast()) {
4944          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
4945          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
4946          if (!isLittleEndian)
4947            ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
4948                                 DAG.getConstant(32, MVT::i32));
4949
4950        // Non-final even elements are skipped; they will be handled
4951        // together the with subsequent argument on the next go-around.
4952        } else
4953          ArgVal = SDValue();
4954
4955        if (ArgVal.getNode())
4956          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
4957      } else {
4958        if (CallConv == CallingConv::Fast)
4959          ComputePtrOff();
4960
4961        // Single-precision floating-point values are mapped to the
4962        // second (rightmost) word of the stack doubleword.
4963        if (Arg.getValueType() == MVT::f32 &&
4964            !isLittleEndian && !Flags.isInConsecutiveRegs()) {
4965          SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
4966          PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
4967        }
4968
4969        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4970                         true, isTailCall, false, MemOpChains,
4971                         TailCallArguments, dl);
4972
4973        NeededLoad = true;
4974      }
4975      // When passing an array of floats, the array occupies consecutive
4976      // space in the argument area; only round up to the next doubleword
4977      // at the end of the array.  Otherwise, each float takes 8 bytes.
4978      if (CallConv != CallingConv::Fast || NeededLoad) {
4979        ArgOffset += (Arg.getValueType() == MVT::f32 &&
4980                      Flags.isInConsecutiveRegs()) ? 4 : 8;
4981        if (Flags.isInConsecutiveRegsLast())
4982          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4983      }
4984      break;
4985    }
4986    case MVT::v4f32:
4987    case MVT::v4i32:
4988    case MVT::v8i16:
4989    case MVT::v16i8:
4990    case MVT::v2f64:
4991    case MVT::v2i64:
4992      if (!Subtarget.hasQPX()) {
4993      // These can be scalar arguments or elements of a vector array type
4994      // passed directly.  The latter are used to implement ELFv2 homogenous
4995      // vector aggregates.
4996
4997      // For a varargs call, named arguments go into VRs or on the stack as
4998      // usual; unnamed arguments always go to the stack or the corresponding
4999      // GPRs when within range.  For now, we always put the value in both
5000      // locations (or even all three).
5001      if (isVarArg) {
5002        // We could elide this store in the case where the object fits
5003        // entirely in R registers.  Maybe later.
5004        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5005                                     MachinePointerInfo(), false, false, 0);
5006        MemOpChains.push_back(Store);
5007        if (VR_idx != NumVRs) {
5008          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
5009                                     MachinePointerInfo(),
5010                                     false, false, false, 0);
5011          MemOpChains.push_back(Load.getValue(1));
5012
5013          unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5014                           Arg.getSimpleValueType() == MVT::v2i64) ?
5015                          VSRH[VR_idx] : VR[VR_idx];
5016          ++VR_idx;
5017
5018          RegsToPass.push_back(std::make_pair(VReg, Load));
5019        }
5020        ArgOffset += 16;
5021        for (unsigned i=0; i<16; i+=PtrByteSize) {
5022          if (GPR_idx == NumGPRs)
5023            break;
5024          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5025                                  DAG.getConstant(i, PtrVT));
5026          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5027                                     false, false, false, 0);
5028          MemOpChains.push_back(Load.getValue(1));
5029          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5030        }
5031        break;
5032      }
5033
5034      // Non-varargs Altivec params go into VRs or on the stack.
5035      if (VR_idx != NumVRs) {
5036        unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5037                         Arg.getSimpleValueType() == MVT::v2i64) ?
5038                        VSRH[VR_idx] : VR[VR_idx];
5039        ++VR_idx;
5040
5041        RegsToPass.push_back(std::make_pair(VReg, Arg));
5042      } else {
5043        if (CallConv == CallingConv::Fast)
5044          ComputePtrOff();
5045
5046        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5047                         true, isTailCall, true, MemOpChains,
5048                         TailCallArguments, dl);
5049        if (CallConv == CallingConv::Fast)
5050          ArgOffset += 16;
5051      }
5052
5053      if (CallConv != CallingConv::Fast)
5054        ArgOffset += 16;
5055      break;
5056      } // not QPX
5057
5058      assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5059             "Invalid QPX parameter type");
5060
5061      /* fall through */
5062    case MVT::v4f64:
5063    case MVT::v4i1: {
5064      bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5065      if (isVarArg) {
5066        // We could elide this store in the case where the object fits
5067        // entirely in R registers.  Maybe later.
5068        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5069                                     MachinePointerInfo(), false, false, 0);
5070        MemOpChains.push_back(Store);
5071        if (QFPR_idx != NumQFPRs) {
5072          SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
5073                                     Store, PtrOff, MachinePointerInfo(),
5074                                     false, false, false, 0);
5075          MemOpChains.push_back(Load.getValue(1));
5076          RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5077        }
5078        ArgOffset += (IsF32 ? 16 : 32);
5079        for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5080          if (GPR_idx == NumGPRs)
5081            break;
5082          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5083                                  DAG.getConstant(i, PtrVT));
5084          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5085                                     false, false, false, 0);
5086          MemOpChains.push_back(Load.getValue(1));
5087          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5088        }
5089        break;
5090      }
5091
5092      // Non-varargs QPX params go into registers or on the stack.
5093      if (QFPR_idx != NumQFPRs) {
5094        RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5095      } else {
5096        if (CallConv == CallingConv::Fast)
5097          ComputePtrOff();
5098
5099        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5100                         true, isTailCall, true, MemOpChains,
5101                         TailCallArguments, dl);
5102        if (CallConv == CallingConv::Fast)
5103          ArgOffset += (IsF32 ? 16 : 32);
5104      }
5105
5106      if (CallConv != CallingConv::Fast)
5107        ArgOffset += (IsF32 ? 16 : 32);
5108      break;
5109      }
5110    }
5111  }
5112
5113  assert(NumBytesActuallyUsed == ArgOffset);
5114  (void)NumBytesActuallyUsed;
5115
5116  if (!MemOpChains.empty())
5117    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5118
5119  // Check if this is an indirect call (MTCTR/BCTRL).
5120  // See PrepareCall() for more information about calls through function
5121  // pointers in the 64-bit SVR4 ABI.
5122  if (!isTailCall && !IsPatchPoint &&
5123      !isFunctionGlobalAddress(Callee) &&
5124      !isa<ExternalSymbolSDNode>(Callee)) {
5125    // Load r2 into a virtual register and store it to the TOC save area.
5126    setUsesTOCBasePtr(DAG);
5127    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5128    // TOC save area offset.
5129    unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5130    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
5131    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5132    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
5133                         MachinePointerInfo::getStack(TOCSaveOffset),
5134                         false, false, 0);
5135    // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5136    // This does not mean the MTCTR instruction must use R12; it's easier
5137    // to model this as an extra parameter, so do that.
5138    if (isELFv2ABI && !IsPatchPoint)
5139      RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5140  }
5141
5142  // Build a sequence of copy-to-reg nodes chained together with token chain
5143  // and flag operands which copy the outgoing args into the appropriate regs.
5144  SDValue InFlag;
5145  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5146    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5147                             RegsToPass[i].second, InFlag);
5148    InFlag = Chain.getValue(1);
5149  }
5150
5151  if (isTailCall)
5152    PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
5153                    FPOp, true, TailCallArguments);
5154
5155  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
5156                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5157                    NumBytes, Ins, InVals, CS);
5158}
5159
5160SDValue
5161PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
5162                                    CallingConv::ID CallConv, bool isVarArg,
5163                                    bool isTailCall, bool IsPatchPoint,
5164                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
5165                                    const SmallVectorImpl<SDValue> &OutVals,
5166                                    const SmallVectorImpl<ISD::InputArg> &Ins,
5167                                    SDLoc dl, SelectionDAG &DAG,
5168                                    SmallVectorImpl<SDValue> &InVals,
5169                                    ImmutableCallSite *CS) const {
5170
5171  unsigned NumOps = Outs.size();
5172
5173  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5174  bool isPPC64 = PtrVT == MVT::i64;
5175  unsigned PtrByteSize = isPPC64 ? 8 : 4;
5176
5177  MachineFunction &MF = DAG.getMachineFunction();
5178
5179  // Mark this function as potentially containing a function that contains a
5180  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5181  // and restoring the callers stack pointer in this functions epilog. This is
5182  // done because by tail calling the called function might overwrite the value
5183  // in this function's (MF) stack pointer stack slot 0(SP).
5184  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5185      CallConv == CallingConv::Fast)
5186    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5187
5188  // Count how many bytes are to be pushed on the stack, including the linkage
5189  // area, and parameter passing area.  We start with 24/48 bytes, which is
5190  // prereserved space for [SP][CR][LR][3 x unused].
5191  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5192  unsigned NumBytes = LinkageSize;
5193
5194  // Add up all the space actually used.
5195  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
5196  // they all go in registers, but we must reserve stack space for them for
5197  // possible use by the caller.  In varargs or 64-bit calls, parameters are
5198  // assigned stack space in order, with padding so Altivec parameters are
5199  // 16-byte aligned.
5200  unsigned nAltivecParamsAtEnd = 0;
5201  for (unsigned i = 0; i != NumOps; ++i) {
5202    ISD::ArgFlagsTy Flags = Outs[i].Flags;
5203    EVT ArgVT = Outs[i].VT;
5204    // Varargs Altivec parameters are padded to a 16 byte boundary.
5205    if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
5206        ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
5207        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
5208      if (!isVarArg && !isPPC64) {
5209        // Non-varargs Altivec parameters go after all the non-Altivec
5210        // parameters; handle those later so we know how much padding we need.
5211        nAltivecParamsAtEnd++;
5212        continue;
5213      }
5214      // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
5215      NumBytes = ((NumBytes+15)/16)*16;
5216    }
5217    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5218  }
5219
5220  // Allow for Altivec parameters at the end, if needed.
5221  if (nAltivecParamsAtEnd) {
5222    NumBytes = ((NumBytes+15)/16)*16;
5223    NumBytes += 16*nAltivecParamsAtEnd;
5224  }
5225
5226  // The prolog code of the callee may store up to 8 GPR argument registers to
5227  // the stack, allowing va_start to index over them in memory if its varargs.
5228  // Because we cannot tell if this is needed on the caller side, we have to
5229  // conservatively assume that it is needed.  As such, make sure we have at
5230  // least enough stack space for the caller to store the 8 GPRs.
5231  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5232
5233  // Tail call needs the stack to be aligned.
5234  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5235      CallConv == CallingConv::Fast)
5236    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5237
5238  // Calculate by how many bytes the stack has to be adjusted in case of tail
5239  // call optimization.
5240  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5241
5242  // To protect arguments on the stack from being clobbered in a tail call,
5243  // force all the loads to happen before doing any other lowering.
5244  if (isTailCall)
5245    Chain = DAG.getStackArgumentTokenFactor(Chain);
5246
5247  // Adjust the stack pointer for the new arguments...
5248  // These operations are automatically eliminated by the prolog/epilog pass
5249  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
5250                               dl);
5251  SDValue CallSeqStart = Chain;
5252
5253  // Load the return address and frame pointer so it can be move somewhere else
5254  // later.
5255  SDValue LROp, FPOp;
5256  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
5257                                       dl);
5258
5259  // Set up a copy of the stack pointer for use loading and storing any
5260  // arguments that may not fit in the registers available for argument
5261  // passing.
5262  SDValue StackPtr;
5263  if (isPPC64)
5264    StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5265  else
5266    StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5267
5268  // Figure out which arguments are going to go in registers, and which in
5269  // memory.  Also, if this is a vararg function, floating point operations
5270  // must be stored to our stack, and loaded into integer regs as well, if
5271  // any integer regs are available for argument passing.
5272  unsigned ArgOffset = LinkageSize;
5273  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5274
5275  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
5276    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
5277    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
5278  };
5279  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
5280    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5281    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5282  };
5283  static const MCPhysReg *FPR = GetFPR();
5284
5285  static const MCPhysReg VR[] = {
5286    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5287    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5288  };
5289  const unsigned NumGPRs = array_lengthof(GPR_32);
5290  const unsigned NumFPRs = 13;
5291  const unsigned NumVRs  = array_lengthof(VR);
5292
5293  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
5294
5295  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5296  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5297
5298  SmallVector<SDValue, 8> MemOpChains;
5299  for (unsigned i = 0; i != NumOps; ++i) {
5300    SDValue Arg = OutVals[i];
5301    ISD::ArgFlagsTy Flags = Outs[i].Flags;
5302
5303    // PtrOff will be used to store the current argument to the stack if a
5304    // register cannot be found for it.
5305    SDValue PtrOff;
5306
5307    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
5308
5309    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5310
5311    // On PPC64, promote integers to 64-bit values.
5312    if (isPPC64 && Arg.getValueType() == MVT::i32) {
5313      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5314      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5315      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5316    }
5317
5318    // FIXME memcpy is used way more than necessary.  Correctness first.
5319    // Note: "by value" is code for passing a structure by value, not
5320    // basic types.
5321    if (Flags.isByVal()) {
5322      unsigned Size = Flags.getByValSize();
5323      // Very small objects are passed right-justified.  Everything else is
5324      // passed left-justified.
5325      if (Size==1 || Size==2) {
5326        EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
5327        if (GPR_idx != NumGPRs) {
5328          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5329                                        MachinePointerInfo(), VT,
5330                                        false, false, false, 0);
5331          MemOpChains.push_back(Load.getValue(1));
5332          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5333
5334          ArgOffset += PtrByteSize;
5335        } else {
5336          SDValue Const = DAG.getConstant(PtrByteSize - Size,
5337                                          PtrOff.getValueType());
5338          SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5339          Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5340                                                            CallSeqStart,
5341                                                            Flags, DAG, dl);
5342          ArgOffset += PtrByteSize;
5343        }
5344        continue;
5345      }
5346      // Copy entire object into memory.  There are cases where gcc-generated
5347      // code assumes it is there, even if it could be put entirely into
5348      // registers.  (This is not what the doc says.)
5349      Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5350                                                        CallSeqStart,
5351                                                        Flags, DAG, dl);
5352
5353      // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
5354      // copy the pieces of the object that fit into registers from the
5355      // parameter save area.
5356      for (unsigned j=0; j<Size; j+=PtrByteSize) {
5357        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
5358        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5359        if (GPR_idx != NumGPRs) {
5360          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
5361                                     MachinePointerInfo(),
5362                                     false, false, false, 0);
5363          MemOpChains.push_back(Load.getValue(1));
5364          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5365          ArgOffset += PtrByteSize;
5366        } else {
5367          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5368          break;
5369        }
5370      }
5371      continue;
5372    }
5373
5374    switch (Arg.getSimpleValueType().SimpleTy) {
5375    default: llvm_unreachable("Unexpected ValueType for argument!");
5376    case MVT::i1:
5377    case MVT::i32:
5378    case MVT::i64:
5379      if (GPR_idx != NumGPRs) {
5380        if (Arg.getValueType() == MVT::i1)
5381          Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
5382
5383        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5384      } else {
5385        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5386                         isPPC64, isTailCall, false, MemOpChains,
5387                         TailCallArguments, dl);
5388      }
5389      ArgOffset += PtrByteSize;
5390      break;
5391    case MVT::f32:
5392    case MVT::f64:
5393      if (FPR_idx != NumFPRs) {
5394        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5395
5396        if (isVarArg) {
5397          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5398                                       MachinePointerInfo(), false, false, 0);
5399          MemOpChains.push_back(Store);
5400
5401          // Float varargs are always shadowed in available integer registers
5402          if (GPR_idx != NumGPRs) {
5403            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
5404                                       MachinePointerInfo(), false, false,
5405                                       false, 0);
5406            MemOpChains.push_back(Load.getValue(1));
5407            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5408          }
5409          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
5410            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
5411            PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5412            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
5413                                       MachinePointerInfo(),
5414                                       false, false, false, 0);
5415            MemOpChains.push_back(Load.getValue(1));
5416            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5417          }
5418        } else {
5419          // If we have any FPRs remaining, we may also have GPRs remaining.
5420          // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
5421          // GPRs.
5422          if (GPR_idx != NumGPRs)
5423            ++GPR_idx;
5424          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
5425              !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
5426            ++GPR_idx;
5427        }
5428      } else
5429        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5430                         isPPC64, isTailCall, false, MemOpChains,
5431                         TailCallArguments, dl);
5432      if (isPPC64)
5433        ArgOffset += 8;
5434      else
5435        ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
5436      break;
5437    case MVT::v4f32:
5438    case MVT::v4i32:
5439    case MVT::v8i16:
5440    case MVT::v16i8:
5441      if (isVarArg) {
5442        // These go aligned on the stack, or in the corresponding R registers
5443        // when within range.  The Darwin PPC ABI doc claims they also go in
5444        // V registers; in fact gcc does this only for arguments that are
5445        // prototyped, not for those that match the ...  We do it for all
5446        // arguments, seems to work.
5447        while (ArgOffset % 16 !=0) {
5448          ArgOffset += PtrByteSize;
5449          if (GPR_idx != NumGPRs)
5450            GPR_idx++;
5451        }
5452        // We could elide this store in the case where the object fits
5453        // entirely in R registers.  Maybe later.
5454        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5455                            DAG.getConstant(ArgOffset, PtrVT));
5456        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5457                                     MachinePointerInfo(), false, false, 0);
5458        MemOpChains.push_back(Store);
5459        if (VR_idx != NumVRs) {
5460          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
5461                                     MachinePointerInfo(),
5462                                     false, false, false, 0);
5463          MemOpChains.push_back(Load.getValue(1));
5464          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5465        }
5466        ArgOffset += 16;
5467        for (unsigned i=0; i<16; i+=PtrByteSize) {
5468          if (GPR_idx == NumGPRs)
5469            break;
5470          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5471                                  DAG.getConstant(i, PtrVT));
5472          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5473                                     false, false, false, 0);
5474          MemOpChains.push_back(Load.getValue(1));
5475          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5476        }
5477        break;
5478      }
5479
5480      // Non-varargs Altivec params generally go in registers, but have
5481      // stack space allocated at the end.
5482      if (VR_idx != NumVRs) {
5483        // Doesn't have GPR space allocated.
5484        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5485      } else if (nAltivecParamsAtEnd==0) {
5486        // We are emitting Altivec params in order.
5487        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5488                         isPPC64, isTailCall, true, MemOpChains,
5489                         TailCallArguments, dl);
5490        ArgOffset += 16;
5491      }
5492      break;
5493    }
5494  }
5495  // If all Altivec parameters fit in registers, as they usually do,
5496  // they get stack space following the non-Altivec parameters.  We
5497  // don't track this here because nobody below needs it.
5498  // If there are more Altivec parameters than fit in registers emit
5499  // the stores here.
5500  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
5501    unsigned j = 0;
5502    // Offset is aligned; skip 1st 12 params which go in V registers.
5503    ArgOffset = ((ArgOffset+15)/16)*16;
5504    ArgOffset += 12*16;
5505    for (unsigned i = 0; i != NumOps; ++i) {
5506      SDValue Arg = OutVals[i];
5507      EVT ArgType = Outs[i].VT;
5508      if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
5509          ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
5510        if (++j > NumVRs) {
5511          SDValue PtrOff;
5512          // We are emitting Altivec params in order.
5513          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5514                           isPPC64, isTailCall, true, MemOpChains,
5515                           TailCallArguments, dl);
5516          ArgOffset += 16;
5517        }
5518      }
5519    }
5520  }
5521
5522  if (!MemOpChains.empty())
5523    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5524
5525  // On Darwin, R12 must contain the address of an indirect callee.  This does
5526  // not mean the MTCTR instruction must use R12; it's easier to model this as
5527  // an extra parameter, so do that.
5528  if (!isTailCall &&
5529      !isFunctionGlobalAddress(Callee) &&
5530      !isa<ExternalSymbolSDNode>(Callee) &&
5531      !isBLACompatibleAddress(Callee, DAG))
5532    RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
5533                                                   PPC::R12), Callee));
5534
5535  // Build a sequence of copy-to-reg nodes chained together with token chain
5536  // and flag operands which copy the outgoing args into the appropriate regs.
5537  SDValue InFlag;
5538  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5539    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5540                             RegsToPass[i].second, InFlag);
5541    InFlag = Chain.getValue(1);
5542  }
5543
5544  if (isTailCall)
5545    PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
5546                    FPOp, true, TailCallArguments);
5547
5548  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
5549                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5550                    NumBytes, Ins, InVals, CS);
5551}
5552
5553bool
5554PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
5555                                  MachineFunction &MF, bool isVarArg,
5556                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
5557                                  LLVMContext &Context) const {
5558  SmallVector<CCValAssign, 16> RVLocs;
5559  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
5560  return CCInfo.CheckReturn(Outs, RetCC_PPC);
5561}
5562
5563SDValue
5564PPCTargetLowering::LowerReturn(SDValue Chain,
5565                               CallingConv::ID CallConv, bool isVarArg,
5566                               const SmallVectorImpl<ISD::OutputArg> &Outs,
5567                               const SmallVectorImpl<SDValue> &OutVals,
5568                               SDLoc dl, SelectionDAG &DAG) const {
5569
5570  SmallVector<CCValAssign, 16> RVLocs;
5571  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5572                 *DAG.getContext());
5573  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
5574
5575  SDValue Flag;
5576  SmallVector<SDValue, 4> RetOps(1, Chain);
5577
5578  // Copy the result values into the output registers.
5579  for (unsigned i = 0; i != RVLocs.size(); ++i) {
5580    CCValAssign &VA = RVLocs[i];
5581    assert(VA.isRegLoc() && "Can only return in registers!");
5582
5583    SDValue Arg = OutVals[i];
5584
5585    switch (VA.getLocInfo()) {
5586    default: llvm_unreachable("Unknown loc info!");
5587    case CCValAssign::Full: break;
5588    case CCValAssign::AExt:
5589      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
5590      break;
5591    case CCValAssign::ZExt:
5592      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
5593      break;
5594    case CCValAssign::SExt:
5595      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
5596      break;
5597    }
5598
5599    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
5600    Flag = Chain.getValue(1);
5601    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5602  }
5603
5604  RetOps[0] = Chain;  // Update chain.
5605
5606  // Add the flag if we have it.
5607  if (Flag.getNode())
5608    RetOps.push_back(Flag);
5609
5610  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
5611}
5612
5613SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
5614                                   const PPCSubtarget &Subtarget) const {
5615  // When we pop the dynamic allocation we need to restore the SP link.
5616  SDLoc dl(Op);
5617
5618  // Get the corect type for pointers.
5619  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5620
5621  // Construct the stack pointer operand.
5622  bool isPPC64 = Subtarget.isPPC64();
5623  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
5624  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
5625
5626  // Get the operands for the STACKRESTORE.
5627  SDValue Chain = Op.getOperand(0);
5628  SDValue SaveSP = Op.getOperand(1);
5629
5630  // Load the old link SP.
5631  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
5632                                   MachinePointerInfo(),
5633                                   false, false, false, 0);
5634
5635  // Restore the stack pointer.
5636  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
5637
5638  // Store the old link SP.
5639  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
5640                      false, false, 0);
5641}
5642
5643
5644
5645SDValue
5646PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
5647  MachineFunction &MF = DAG.getMachineFunction();
5648  bool isPPC64 = Subtarget.isPPC64();
5649  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5650
5651  // Get current frame pointer save index.  The users of this index will be
5652  // primarily DYNALLOC instructions.
5653  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
5654  int RASI = FI->getReturnAddrSaveIndex();
5655
5656  // If the frame pointer save index hasn't been defined yet.
5657  if (!RASI) {
5658    // Find out what the fix offset of the frame pointer save area.
5659    int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
5660    // Allocate the frame index for frame pointer save area.
5661    RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
5662    // Save the result.
5663    FI->setReturnAddrSaveIndex(RASI);
5664  }
5665  return DAG.getFrameIndex(RASI, PtrVT);
5666}
5667
5668SDValue
5669PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
5670  MachineFunction &MF = DAG.getMachineFunction();
5671  bool isPPC64 = Subtarget.isPPC64();
5672  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5673
5674  // Get current frame pointer save index.  The users of this index will be
5675  // primarily DYNALLOC instructions.
5676  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
5677  int FPSI = FI->getFramePointerSaveIndex();
5678
5679  // If the frame pointer save index hasn't been defined yet.
5680  if (!FPSI) {
5681    // Find out what the fix offset of the frame pointer save area.
5682    int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
5683    // Allocate the frame index for frame pointer save area.
5684    FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
5685    // Save the result.
5686    FI->setFramePointerSaveIndex(FPSI);
5687  }
5688  return DAG.getFrameIndex(FPSI, PtrVT);
5689}
5690
5691SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
5692                                         SelectionDAG &DAG,
5693                                         const PPCSubtarget &Subtarget) const {
5694  // Get the inputs.
5695  SDValue Chain = Op.getOperand(0);
5696  SDValue Size  = Op.getOperand(1);
5697  SDLoc dl(Op);
5698
5699  // Get the corect type for pointers.
5700  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5701  // Negate the size.
5702  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
5703                                  DAG.getConstant(0, PtrVT), Size);
5704  // Construct a node for the frame pointer save index.
5705  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
5706  // Build a DYNALLOC node.
5707  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
5708  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
5709  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
5710}
5711
5712SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
5713                                               SelectionDAG &DAG) const {
5714  SDLoc DL(Op);
5715  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
5716                     DAG.getVTList(MVT::i32, MVT::Other),
5717                     Op.getOperand(0), Op.getOperand(1));
5718}
5719
5720SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
5721                                                SelectionDAG &DAG) const {
5722  SDLoc DL(Op);
5723  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
5724                     Op.getOperand(0), Op.getOperand(1));
5725}
5726
5727SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
5728  if (Op.getValueType().isVector())
5729    return LowerVectorLoad(Op, DAG);
5730
5731  assert(Op.getValueType() == MVT::i1 &&
5732         "Custom lowering only for i1 loads");
5733
5734  // First, load 8 bits into 32 bits, then truncate to 1 bit.
5735
5736  SDLoc dl(Op);
5737  LoadSDNode *LD = cast<LoadSDNode>(Op);
5738
5739  SDValue Chain = LD->getChain();
5740  SDValue BasePtr = LD->getBasePtr();
5741  MachineMemOperand *MMO = LD->getMemOperand();
5742
5743  SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
5744                                 BasePtr, MVT::i8, MMO);
5745  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
5746
5747  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
5748  return DAG.getMergeValues(Ops, dl);
5749}
5750
5751SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
5752  if (Op.getOperand(1).getValueType().isVector())
5753    return LowerVectorStore(Op, DAG);
5754
5755  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
5756         "Custom lowering only for i1 stores");
5757
5758  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
5759
5760  SDLoc dl(Op);
5761  StoreSDNode *ST = cast<StoreSDNode>(Op);
5762
5763  SDValue Chain = ST->getChain();
5764  SDValue BasePtr = ST->getBasePtr();
5765  SDValue Value = ST->getValue();
5766  MachineMemOperand *MMO = ST->getMemOperand();
5767
5768  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
5769  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
5770}
5771
5772// FIXME: Remove this once the ANDI glue bug is fixed:
5773SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
5774  assert(Op.getValueType() == MVT::i1 &&
5775         "Custom lowering only for i1 results");
5776
5777  SDLoc DL(Op);
5778  return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
5779                     Op.getOperand(0));
5780}
5781
5782/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
5783/// possible.
5784SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5785  // Not FP? Not a fsel.
5786  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
5787      !Op.getOperand(2).getValueType().isFloatingPoint())
5788    return Op;
5789
5790  // We might be able to do better than this under some circumstances, but in
5791  // general, fsel-based lowering of select is a finite-math-only optimization.
5792  // For more information, see section F.3 of the 2.06 ISA specification.
5793  if (!DAG.getTarget().Options.NoInfsFPMath ||
5794      !DAG.getTarget().Options.NoNaNsFPMath)
5795    return Op;
5796
5797  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5798
5799  EVT ResVT = Op.getValueType();
5800  EVT CmpVT = Op.getOperand(0).getValueType();
5801  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
5802  SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
5803  SDLoc dl(Op);
5804
5805  // If the RHS of the comparison is a 0.0, we don't need to do the
5806  // subtraction at all.
5807  SDValue Sel1;
5808  if (isFloatingPointZero(RHS))
5809    switch (CC) {
5810    default: break;       // SETUO etc aren't handled by fsel.
5811    case ISD::SETNE:
5812      std::swap(TV, FV);
5813    case ISD::SETEQ:
5814      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5815        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5816      Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
5817      if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
5818        Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
5819      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5820                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
5821    case ISD::SETULT:
5822    case ISD::SETLT:
5823      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
5824    case ISD::SETOGE:
5825    case ISD::SETGE:
5826      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5827        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5828      return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
5829    case ISD::SETUGT:
5830    case ISD::SETGT:
5831      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
5832    case ISD::SETOLE:
5833    case ISD::SETLE:
5834      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5835        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5836      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5837                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
5838    }
5839
5840  SDValue Cmp;
5841  switch (CC) {
5842  default: break;       // SETUO etc aren't handled by fsel.
5843  case ISD::SETNE:
5844    std::swap(TV, FV);
5845  case ISD::SETEQ:
5846    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5847    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5848      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5849    Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5850    if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
5851      Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
5852    return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5853                       DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
5854  case ISD::SETULT:
5855  case ISD::SETLT:
5856    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5857    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5858      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5859    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
5860  case ISD::SETOGE:
5861  case ISD::SETGE:
5862    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5863    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5864      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5865    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5866  case ISD::SETUGT:
5867  case ISD::SETGT:
5868    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
5869    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5870      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5871    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
5872  case ISD::SETOLE:
5873  case ISD::SETLE:
5874    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
5875    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5876      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5877    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5878  }
5879  return Op;
5880}
5881
5882void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
5883                                               SelectionDAG &DAG,
5884                                               SDLoc dl) const {
5885  assert(Op.getOperand(0).getValueType().isFloatingPoint());
5886  SDValue Src = Op.getOperand(0);
5887  if (Src.getValueType() == MVT::f32)
5888    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
5889
5890  SDValue Tmp;
5891  switch (Op.getSimpleValueType().SimpleTy) {
5892  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
5893  case MVT::i32:
5894    Tmp = DAG.getNode(
5895        Op.getOpcode() == ISD::FP_TO_SINT
5896            ? PPCISD::FCTIWZ
5897            : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
5898        dl, MVT::f64, Src);
5899    break;
5900  case MVT::i64:
5901    assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
5902           "i64 FP_TO_UINT is supported only with FPCVT");
5903    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
5904                                                        PPCISD::FCTIDUZ,
5905                      dl, MVT::f64, Src);
5906    break;
5907  }
5908
5909  // Convert the FP value to an int value through memory.
5910  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
5911    (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
5912  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
5913  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
5914  MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
5915
5916  // Emit a store to the stack slot.
5917  SDValue Chain;
5918  if (i32Stack) {
5919    MachineFunction &MF = DAG.getMachineFunction();
5920    MachineMemOperand *MMO =
5921      MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
5922    SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
5923    Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
5924              DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
5925  } else
5926    Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
5927                         MPI, false, false, 0);
5928
5929  // Result is a load from the stack slot.  If loading 4 bytes, make sure to
5930  // add in a bias.
5931  if (Op.getValueType() == MVT::i32 && !i32Stack) {
5932    FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
5933                        DAG.getConstant(4, FIPtr.getValueType()));
5934    MPI = MPI.getWithOffset(4);
5935  }
5936
5937  RLI.Chain = Chain;
5938  RLI.Ptr = FIPtr;
5939  RLI.MPI = MPI;
5940}
5941
5942SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
5943                                          SDLoc dl) const {
5944  ReuseLoadInfo RLI;
5945  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
5946
5947  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
5948                     false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
5949                     RLI.Ranges);
5950}
5951
5952// We're trying to insert a regular store, S, and then a load, L. If the
5953// incoming value, O, is a load, we might just be able to have our load use the
5954// address used by O. However, we don't know if anything else will store to
5955// that address before we can load from it. To prevent this situation, we need
5956// to insert our load, L, into the chain as a peer of O. To do this, we give L
5957// the same chain operand as O, we create a token factor from the chain results
5958// of O and L, and we replace all uses of O's chain result with that token
5959// factor (see spliceIntoChain below for this last part).
5960bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
5961                                            ReuseLoadInfo &RLI,
5962                                            SelectionDAG &DAG,
5963                                            ISD::LoadExtType ET) const {
5964  SDLoc dl(Op);
5965  if (ET == ISD::NON_EXTLOAD &&
5966      (Op.getOpcode() == ISD::FP_TO_UINT ||
5967       Op.getOpcode() == ISD::FP_TO_SINT) &&
5968      isOperationLegalOrCustom(Op.getOpcode(),
5969                               Op.getOperand(0).getValueType())) {
5970
5971    LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
5972    return true;
5973  }
5974
5975  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
5976  if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
5977      LD->isNonTemporal())
5978    return false;
5979  if (LD->getMemoryVT() != MemVT)
5980    return false;
5981
5982  RLI.Ptr = LD->getBasePtr();
5983  if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) {
5984    assert(LD->getAddressingMode() == ISD::PRE_INC &&
5985           "Non-pre-inc AM on PPC?");
5986    RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
5987                          LD->getOffset());
5988  }
5989
5990  RLI.Chain = LD->getChain();
5991  RLI.MPI = LD->getPointerInfo();
5992  RLI.IsInvariant = LD->isInvariant();
5993  RLI.Alignment = LD->getAlignment();
5994  RLI.AAInfo = LD->getAAInfo();
5995  RLI.Ranges = LD->getRanges();
5996
5997  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
5998  return true;
5999}
6000
6001// Given the head of the old chain, ResChain, insert a token factor containing
6002// it and NewResChain, and make users of ResChain now be users of that token
6003// factor.
6004void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6005                                        SDValue NewResChain,
6006                                        SelectionDAG &DAG) const {
6007  if (!ResChain)
6008    return;
6009
6010  SDLoc dl(NewResChain);
6011
6012  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
6013                           NewResChain, DAG.getUNDEF(MVT::Other));
6014  assert(TF.getNode() != NewResChain.getNode() &&
6015         "A new TF really is required here");
6016
6017  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6018  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6019}
6020
6021SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6022                                          SelectionDAG &DAG) const {
6023  SDLoc dl(Op);
6024
6025  if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6026    if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6027      return SDValue();
6028
6029    SDValue Value = Op.getOperand(0);
6030    // The values are now known to be -1 (false) or 1 (true). To convert this
6031    // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6032    // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6033    Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6034
6035    SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64);
6036    FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
6037                          FPHalfs, FPHalfs, FPHalfs, FPHalfs);
6038
6039    Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6040
6041    if (Op.getValueType() != MVT::v4f64)
6042      Value = DAG.getNode(ISD::FP_ROUND, dl,
6043                          Op.getValueType(), Value, DAG.getIntPtrConstant(1));
6044    return Value;
6045  }
6046
6047  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6048  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6049    return SDValue();
6050
6051  if (Op.getOperand(0).getValueType() == MVT::i1)
6052    return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
6053                       DAG.getConstantFP(1.0, Op.getValueType()),
6054                       DAG.getConstantFP(0.0, Op.getValueType()));
6055
6056  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
6057         "UINT_TO_FP is supported only with FPCVT");
6058
6059  // If we have FCFIDS, then use it when converting to single-precision.
6060  // Otherwise, convert to double-precision and then round.
6061  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6062                       ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
6063                                                            : PPCISD::FCFIDS)
6064                       : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
6065                                                            : PPCISD::FCFID);
6066  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6067                  ? MVT::f32
6068                  : MVT::f64;
6069
6070  if (Op.getOperand(0).getValueType() == MVT::i64) {
6071    SDValue SINT = Op.getOperand(0);
6072    // When converting to single-precision, we actually need to convert
6073    // to double-precision first and then round to single-precision.
6074    // To avoid double-rounding effects during that operation, we have
6075    // to prepare the input operand.  Bits that might be truncated when
6076    // converting to double-precision are replaced by a bit that won't
6077    // be lost at this stage, but is below the single-precision rounding
6078    // position.
6079    //
6080    // However, if -enable-unsafe-fp-math is in effect, accept double
6081    // rounding to avoid the extra overhead.
6082    if (Op.getValueType() == MVT::f32 &&
6083        !Subtarget.hasFPCVT() &&
6084        !DAG.getTarget().Options.UnsafeFPMath) {
6085
6086      // Twiddle input to make sure the low 11 bits are zero.  (If this
6087      // is the case, we are guaranteed the value will fit into the 53 bit
6088      // mantissa of an IEEE double-precision value without rounding.)
6089      // If any of those low 11 bits were not zero originally, make sure
6090      // bit 12 (value 2048) is set instead, so that the final rounding
6091      // to single-precision gets the correct result.
6092      SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6093                                  SINT, DAG.getConstant(2047, MVT::i64));
6094      Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
6095                          Round, DAG.getConstant(2047, MVT::i64));
6096      Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
6097      Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6098                          Round, DAG.getConstant(-2048, MVT::i64));
6099
6100      // However, we cannot use that value unconditionally: if the magnitude
6101      // of the input value is small, the bit-twiddling we did above might
6102      // end up visibly changing the output.  Fortunately, in that case, we
6103      // don't need to twiddle bits since the original input will convert
6104      // exactly to double-precision floating-point already.  Therefore,
6105      // construct a conditional to use the original value if the top 11
6106      // bits are all sign-bit copies, and use the rounded value computed
6107      // above otherwise.
6108      SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
6109                                 SINT, DAG.getConstant(53, MVT::i32));
6110      Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
6111                         Cond, DAG.getConstant(1, MVT::i64));
6112      Cond = DAG.getSetCC(dl, MVT::i32,
6113                          Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
6114
6115      SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
6116    }
6117
6118    ReuseLoadInfo RLI;
6119    SDValue Bits;
6120
6121    MachineFunction &MF = DAG.getMachineFunction();
6122    if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
6123      Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
6124                         false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
6125                         RLI.Ranges);
6126      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6127    } else if (Subtarget.hasLFIWAX() &&
6128               canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
6129      MachineMemOperand *MMO =
6130        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6131                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6132      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6133      Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
6134                                     DAG.getVTList(MVT::f64, MVT::Other),
6135                                     Ops, MVT::i32, MMO);
6136      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6137    } else if (Subtarget.hasFPCVT() &&
6138               canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
6139      MachineMemOperand *MMO =
6140        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6141                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6142      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6143      Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
6144                                     DAG.getVTList(MVT::f64, MVT::Other),
6145                                     Ops, MVT::i32, MMO);
6146      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6147    } else if (((Subtarget.hasLFIWAX() &&
6148                 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
6149                (Subtarget.hasFPCVT() &&
6150                 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
6151               SINT.getOperand(0).getValueType() == MVT::i32) {
6152      MachineFrameInfo *FrameInfo = MF.getFrameInfo();
6153      EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
6154
6155      int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
6156      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6157
6158      SDValue Store =
6159        DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
6160                     MachinePointerInfo::getFixedStack(FrameIdx),
6161                     false, false, 0);
6162
6163      assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6164             "Expected an i32 store");
6165
6166      RLI.Ptr = FIdx;
6167      RLI.Chain = Store;
6168      RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
6169      RLI.Alignment = 4;
6170
6171      MachineMemOperand *MMO =
6172        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6173                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6174      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6175      Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
6176                                     PPCISD::LFIWZX : PPCISD::LFIWAX,
6177                                     dl, DAG.getVTList(MVT::f64, MVT::Other),
6178                                     Ops, MVT::i32, MMO);
6179    } else
6180      Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
6181
6182    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
6183
6184    if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6185      FP = DAG.getNode(ISD::FP_ROUND, dl,
6186                       MVT::f32, FP, DAG.getIntPtrConstant(0));
6187    return FP;
6188  }
6189
6190  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
6191         "Unhandled INT_TO_FP type in custom expander!");
6192  // Since we only generate this in 64-bit mode, we can take advantage of
6193  // 64-bit registers.  In particular, sign extend the input value into the
6194  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
6195  // then lfd it and fcfid it.
6196  MachineFunction &MF = DAG.getMachineFunction();
6197  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
6198  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
6199
6200  SDValue Ld;
6201  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
6202    ReuseLoadInfo RLI;
6203    bool ReusingLoad;
6204    if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
6205                                            DAG))) {
6206      int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
6207      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6208
6209      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
6210                                   MachinePointerInfo::getFixedStack(FrameIdx),
6211                                   false, false, 0);
6212
6213      assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6214             "Expected an i32 store");
6215
6216      RLI.Ptr = FIdx;
6217      RLI.Chain = Store;
6218      RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
6219      RLI.Alignment = 4;
6220    }
6221
6222    MachineMemOperand *MMO =
6223      MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6224                              RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6225    SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6226    Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
6227                                   PPCISD::LFIWZX : PPCISD::LFIWAX,
6228                                 dl, DAG.getVTList(MVT::f64, MVT::Other),
6229                                 Ops, MVT::i32, MMO);
6230    if (ReusingLoad)
6231      spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
6232  } else {
6233    assert(Subtarget.isPPC64() &&
6234           "i32->FP without LFIWAX supported only on PPC64");
6235
6236    int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
6237    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6238
6239    SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
6240                                Op.getOperand(0));
6241
6242    // STD the extended value into the stack slot.
6243    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
6244                                 MachinePointerInfo::getFixedStack(FrameIdx),
6245                                 false, false, 0);
6246
6247    // Load the value as a double.
6248    Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
6249                     MachinePointerInfo::getFixedStack(FrameIdx),
6250                     false, false, false, 0);
6251  }
6252
6253  // FCFID it and return it.
6254  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
6255  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6256    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
6257  return FP;
6258}
6259
6260SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
6261                                            SelectionDAG &DAG) const {
6262  SDLoc dl(Op);
6263  /*
6264   The rounding mode is in bits 30:31 of FPSR, and has the following
6265   settings:
6266     00 Round to nearest
6267     01 Round to 0
6268     10 Round to +inf
6269     11 Round to -inf
6270
6271  FLT_ROUNDS, on the other hand, expects the following:
6272    -1 Undefined
6273     0 Round to 0
6274     1 Round to nearest
6275     2 Round to +inf
6276     3 Round to -inf
6277
6278  To perform the conversion, we do:
6279    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
6280  */
6281
6282  MachineFunction &MF = DAG.getMachineFunction();
6283  EVT VT = Op.getValueType();
6284  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
6285
6286  // Save FP Control Word to register
6287  EVT NodeTys[] = {
6288    MVT::f64,    // return register
6289    MVT::Glue    // unused in this context
6290  };
6291  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
6292
6293  // Save FP register to stack slot
6294  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
6295  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
6296  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
6297                               StackSlot, MachinePointerInfo(), false, false,0);
6298
6299  // Load FP Control Word from low 32 bits of stack slot.
6300  SDValue Four = DAG.getConstant(4, PtrVT);
6301  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
6302  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
6303                            false, false, false, 0);
6304
6305  // Transform as necessary
6306  SDValue CWD1 =
6307    DAG.getNode(ISD::AND, dl, MVT::i32,
6308                CWD, DAG.getConstant(3, MVT::i32));
6309  SDValue CWD2 =
6310    DAG.getNode(ISD::SRL, dl, MVT::i32,
6311                DAG.getNode(ISD::AND, dl, MVT::i32,
6312                            DAG.getNode(ISD::XOR, dl, MVT::i32,
6313                                        CWD, DAG.getConstant(3, MVT::i32)),
6314                            DAG.getConstant(3, MVT::i32)),
6315                DAG.getConstant(1, MVT::i32));
6316
6317  SDValue RetVal =
6318    DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
6319
6320  return DAG.getNode((VT.getSizeInBits() < 16 ?
6321                      ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
6322}
6323
6324SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6325  EVT VT = Op.getValueType();
6326  unsigned BitWidth = VT.getSizeInBits();
6327  SDLoc dl(Op);
6328  assert(Op.getNumOperands() == 3 &&
6329         VT == Op.getOperand(1).getValueType() &&
6330         "Unexpected SHL!");
6331
6332  // Expand into a bunch of logical ops.  Note that these ops
6333  // depend on the PPC behavior for oversized shift amounts.
6334  SDValue Lo = Op.getOperand(0);
6335  SDValue Hi = Op.getOperand(1);
6336  SDValue Amt = Op.getOperand(2);
6337  EVT AmtVT = Amt.getValueType();
6338
6339  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6340                             DAG.getConstant(BitWidth, AmtVT), Amt);
6341  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
6342  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
6343  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
6344  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6345                             DAG.getConstant(-BitWidth, AmtVT));
6346  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
6347  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6348  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
6349  SDValue OutOps[] = { OutLo, OutHi };
6350  return DAG.getMergeValues(OutOps, dl);
6351}
6352
6353SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6354  EVT VT = Op.getValueType();
6355  SDLoc dl(Op);
6356  unsigned BitWidth = VT.getSizeInBits();
6357  assert(Op.getNumOperands() == 3 &&
6358         VT == Op.getOperand(1).getValueType() &&
6359         "Unexpected SRL!");
6360
6361  // Expand into a bunch of logical ops.  Note that these ops
6362  // depend on the PPC behavior for oversized shift amounts.
6363  SDValue Lo = Op.getOperand(0);
6364  SDValue Hi = Op.getOperand(1);
6365  SDValue Amt = Op.getOperand(2);
6366  EVT AmtVT = Amt.getValueType();
6367
6368  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6369                             DAG.getConstant(BitWidth, AmtVT), Amt);
6370  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6371  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6372  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6373  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6374                             DAG.getConstant(-BitWidth, AmtVT));
6375  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
6376  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6377  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
6378  SDValue OutOps[] = { OutLo, OutHi };
6379  return DAG.getMergeValues(OutOps, dl);
6380}
6381
6382SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
6383  SDLoc dl(Op);
6384  EVT VT = Op.getValueType();
6385  unsigned BitWidth = VT.getSizeInBits();
6386  assert(Op.getNumOperands() == 3 &&
6387         VT == Op.getOperand(1).getValueType() &&
6388         "Unexpected SRA!");
6389
6390  // Expand into a bunch of logical ops, followed by a select_cc.
6391  SDValue Lo = Op.getOperand(0);
6392  SDValue Hi = Op.getOperand(1);
6393  SDValue Amt = Op.getOperand(2);
6394  EVT AmtVT = Amt.getValueType();
6395
6396  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6397                             DAG.getConstant(BitWidth, AmtVT), Amt);
6398  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6399  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6400  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6401  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6402                             DAG.getConstant(-BitWidth, AmtVT));
6403  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
6404  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
6405  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
6406                                  Tmp4, Tmp6, ISD::SETLE);
6407  SDValue OutOps[] = { OutLo, OutHi };
6408  return DAG.getMergeValues(OutOps, dl);
6409}
6410
6411//===----------------------------------------------------------------------===//
6412// Vector related lowering.
6413//
6414
6415/// BuildSplatI - Build a canonical splati of Val with an element size of
6416/// SplatSize.  Cast the result to VT.
6417static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
6418                             SelectionDAG &DAG, SDLoc dl) {
6419  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
6420
6421  static const EVT VTys[] = { // canonical VT to use for each size.
6422    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
6423  };
6424
6425  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
6426
6427  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
6428  if (Val == -1)
6429    SplatSize = 1;
6430
6431  EVT CanonicalVT = VTys[SplatSize-1];
6432
6433  // Build a canonical splat for this value.
6434  SDValue Elt = DAG.getConstant(Val, MVT::i32);
6435  SmallVector<SDValue, 8> Ops;
6436  Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
6437  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
6438  return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
6439}
6440
6441/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
6442/// specified intrinsic ID.
6443static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
6444                                SelectionDAG &DAG, SDLoc dl,
6445                                EVT DestVT = MVT::Other) {
6446  if (DestVT == MVT::Other) DestVT = Op.getValueType();
6447  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6448                     DAG.getConstant(IID, MVT::i32), Op);
6449}
6450
6451/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
6452/// specified intrinsic ID.
6453static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
6454                                SelectionDAG &DAG, SDLoc dl,
6455                                EVT DestVT = MVT::Other) {
6456  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
6457  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6458                     DAG.getConstant(IID, MVT::i32), LHS, RHS);
6459}
6460
6461/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
6462/// specified intrinsic ID.
6463static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
6464                                SDValue Op2, SelectionDAG &DAG,
6465                                SDLoc dl, EVT DestVT = MVT::Other) {
6466  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
6467  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6468                     DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
6469}
6470
6471
6472/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
6473/// amount.  The result has the specified value type.
6474static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
6475                             EVT VT, SelectionDAG &DAG, SDLoc dl) {
6476  // Force LHS/RHS to be the right type.
6477  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
6478  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
6479
6480  int Ops[16];
6481  for (unsigned i = 0; i != 16; ++i)
6482    Ops[i] = i + Amt;
6483  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
6484  return DAG.getNode(ISD::BITCAST, dl, VT, T);
6485}
6486
6487// If this is a case we can't handle, return null and let the default
6488// expansion code take care of it.  If we CAN select this case, and if it
6489// selects to a single instruction, return Op.  Otherwise, if we can codegen
6490// this case more efficiently than a constant pool load, lower it to the
6491// sequence of ops that should be used.
6492SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
6493                                             SelectionDAG &DAG) const {
6494  SDLoc dl(Op);
6495  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6496  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
6497
6498  if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
6499    // We first build an i32 vector, load it into a QPX register,
6500    // then convert it to a floating-point vector and compare it
6501    // to a zero vector to get the boolean result.
6502    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
6503    int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
6504    MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
6505    EVT PtrVT = getPointerTy();
6506    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6507
6508    assert(BVN->getNumOperands() == 4 &&
6509      "BUILD_VECTOR for v4i1 does not have 4 operands");
6510
6511    bool IsConst = true;
6512    for (unsigned i = 0; i < 4; ++i) {
6513      if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
6514      if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
6515        IsConst = false;
6516        break;
6517      }
6518    }
6519
6520    if (IsConst) {
6521      Constant *One =
6522        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
6523      Constant *NegOne =
6524        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
6525
6526      SmallVector<Constant*, 4> CV(4, NegOne);
6527      for (unsigned i = 0; i < 4; ++i) {
6528        if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
6529          CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
6530        else if (cast<ConstantSDNode>(BVN->getOperand(i))->
6531                   getConstantIntValue()->isZero())
6532          continue;
6533        else
6534          CV[i] = One;
6535      }
6536
6537      Constant *CP = ConstantVector::get(CV);
6538      SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(),
6539                      16 /* alignment */);
6540
6541      SmallVector<SDValue, 2> Ops;
6542      Ops.push_back(DAG.getEntryNode());
6543      Ops.push_back(CPIdx);
6544
6545      SmallVector<EVT, 2> ValueVTs;
6546      ValueVTs.push_back(MVT::v4i1);
6547      ValueVTs.push_back(MVT::Other); // chain
6548      SDVTList VTs = DAG.getVTList(ValueVTs);
6549
6550      return DAG.getMemIntrinsicNode(PPCISD::QVLFSb,
6551        dl, VTs, Ops, MVT::v4f32,
6552        MachinePointerInfo::getConstantPool());
6553    }
6554
6555    SmallVector<SDValue, 4> Stores;
6556    for (unsigned i = 0; i < 4; ++i) {
6557      if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
6558
6559      unsigned Offset = 4*i;
6560      SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType());
6561      Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
6562
6563      unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
6564      if (StoreSize > 4) {
6565        Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
6566                                           BVN->getOperand(i), Idx,
6567                                           PtrInfo.getWithOffset(Offset),
6568                                           MVT::i32, false, false, 0));
6569      } else {
6570        SDValue StoreValue = BVN->getOperand(i);
6571        if (StoreSize < 4)
6572          StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
6573
6574        Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
6575                                      StoreValue, Idx,
6576                                      PtrInfo.getWithOffset(Offset),
6577                                      false, false, 0));
6578      }
6579    }
6580
6581    SDValue StoreChain;
6582    if (!Stores.empty())
6583      StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6584    else
6585      StoreChain = DAG.getEntryNode();
6586
6587    // Now load from v4i32 into the QPX register; this will extend it to
6588    // v4i64 but not yet convert it to a floating point. Nevertheless, this
6589    // is typed as v4f64 because the QPX register integer states are not
6590    // explicitly represented.
6591
6592    SmallVector<SDValue, 2> Ops;
6593    Ops.push_back(StoreChain);
6594    Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, MVT::i32));
6595    Ops.push_back(FIdx);
6596
6597    SmallVector<EVT, 2> ValueVTs;
6598    ValueVTs.push_back(MVT::v4f64);
6599    ValueVTs.push_back(MVT::Other); // chain
6600    SDVTList VTs = DAG.getVTList(ValueVTs);
6601
6602    SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
6603      dl, VTs, Ops, MVT::v4i32, PtrInfo);
6604    LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
6605      DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, MVT::i32),
6606      LoadedVect);
6607
6608    SDValue FPZeros = DAG.getConstantFP(0.0, MVT::f64);
6609    FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
6610                          FPZeros, FPZeros, FPZeros, FPZeros);
6611
6612    return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
6613  }
6614
6615  // All other QPX vectors are handled by generic code.
6616  if (Subtarget.hasQPX())
6617    return SDValue();
6618
6619  // Check if this is a splat of a constant value.
6620  APInt APSplatBits, APSplatUndef;
6621  unsigned SplatBitSize;
6622  bool HasAnyUndefs;
6623  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
6624                             HasAnyUndefs, 0, true) || SplatBitSize > 32)
6625    return SDValue();
6626
6627  unsigned SplatBits = APSplatBits.getZExtValue();
6628  unsigned SplatUndef = APSplatUndef.getZExtValue();
6629  unsigned SplatSize = SplatBitSize / 8;
6630
6631  // First, handle single instruction cases.
6632
6633  // All zeros?
6634  if (SplatBits == 0) {
6635    // Canonicalize all zero vectors to be v4i32.
6636    if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
6637      SDValue Z = DAG.getConstant(0, MVT::i32);
6638      Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
6639      Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
6640    }
6641    return Op;
6642  }
6643
6644  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
6645  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
6646                    (32-SplatBitSize));
6647  if (SextVal >= -16 && SextVal <= 15)
6648    return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
6649
6650
6651  // Two instruction sequences.
6652
6653  // If this value is in the range [-32,30] and is even, use:
6654  //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
6655  // If this value is in the range [17,31] and is odd, use:
6656  //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
6657  // If this value is in the range [-31,-17] and is odd, use:
6658  //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
6659  // Note the last two are three-instruction sequences.
6660  if (SextVal >= -32 && SextVal <= 31) {
6661    // To avoid having these optimizations undone by constant folding,
6662    // we convert to a pseudo that will be expanded later into one of
6663    // the above forms.
6664    SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
6665    EVT VT = (SplatSize == 1 ? MVT::v16i8 :
6666              (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
6667    SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32);
6668    SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
6669    if (VT == Op.getValueType())
6670      return RetVal;
6671    else
6672      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
6673  }
6674
6675  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
6676  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
6677  // for fneg/fabs.
6678  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
6679    // Make -1 and vspltisw -1:
6680    SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
6681
6682    // Make the VSLW intrinsic, computing 0x8000_0000.
6683    SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
6684                                   OnesV, DAG, dl);
6685
6686    // xor by OnesV to invert it.
6687    Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
6688    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6689  }
6690
6691  // The remaining cases assume either big endian element order or
6692  // a splat-size that equates to the element size of the vector
6693  // to be built.  An example that doesn't work for little endian is
6694  // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
6695  // and a vector element size of 16 bits.  The code below will
6696  // produce the vector in big endian element order, which for little
6697  // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
6698
6699  // For now, just avoid these optimizations in that case.
6700  // FIXME: Develop correct optimizations for LE with mismatched
6701  // splat and element sizes.
6702
6703  if (Subtarget.isLittleEndian() &&
6704      SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
6705    return SDValue();
6706
6707  // Check to see if this is a wide variety of vsplti*, binop self cases.
6708  static const signed char SplatCsts[] = {
6709    -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
6710    -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
6711  };
6712
6713  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
6714    // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
6715    // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
6716    int i = SplatCsts[idx];
6717
6718    // Figure out what shift amount will be used by altivec if shifted by i in
6719    // this splat size.
6720    unsigned TypeShiftAmt = i & (SplatBitSize-1);
6721
6722    // vsplti + shl self.
6723    if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
6724      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6725      static const unsigned IIDs[] = { // Intrinsic to use for each size.
6726        Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
6727        Intrinsic::ppc_altivec_vslw
6728      };
6729      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6730      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6731    }
6732
6733    // vsplti + srl self.
6734    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
6735      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6736      static const unsigned IIDs[] = { // Intrinsic to use for each size.
6737        Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
6738        Intrinsic::ppc_altivec_vsrw
6739      };
6740      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6741      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6742    }
6743
6744    // vsplti + sra self.
6745    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
6746      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6747      static const unsigned IIDs[] = { // Intrinsic to use for each size.
6748        Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
6749        Intrinsic::ppc_altivec_vsraw
6750      };
6751      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6752      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6753    }
6754
6755    // vsplti + rol self.
6756    if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
6757                         ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
6758      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6759      static const unsigned IIDs[] = { // Intrinsic to use for each size.
6760        Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
6761        Intrinsic::ppc_altivec_vrlw
6762      };
6763      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6764      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6765    }
6766
6767    // t = vsplti c, result = vsldoi t, t, 1
6768    if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
6769      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
6770      return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
6771    }
6772    // t = vsplti c, result = vsldoi t, t, 2
6773    if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
6774      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
6775      return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
6776    }
6777    // t = vsplti c, result = vsldoi t, t, 3
6778    if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
6779      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
6780      return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
6781    }
6782  }
6783
6784  return SDValue();
6785}
6786
6787/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6788/// the specified operations to build the shuffle.
6789static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6790                                      SDValue RHS, SelectionDAG &DAG,
6791                                      SDLoc dl) {
6792  unsigned OpNum = (PFEntry >> 26) & 0x0F;
6793  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
6794  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
6795
6796  enum {
6797    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6798    OP_VMRGHW,
6799    OP_VMRGLW,
6800    OP_VSPLTISW0,
6801    OP_VSPLTISW1,
6802    OP_VSPLTISW2,
6803    OP_VSPLTISW3,
6804    OP_VSLDOI4,
6805    OP_VSLDOI8,
6806    OP_VSLDOI12
6807  };
6808
6809  if (OpNum == OP_COPY) {
6810    if (LHSID == (1*9+2)*9+3) return LHS;
6811    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
6812    return RHS;
6813  }
6814
6815  SDValue OpLHS, OpRHS;
6816  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6817  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6818
6819  int ShufIdxs[16];
6820  switch (OpNum) {
6821  default: llvm_unreachable("Unknown i32 permute!");
6822  case OP_VMRGHW:
6823    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
6824    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
6825    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
6826    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
6827    break;
6828  case OP_VMRGLW:
6829    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
6830    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
6831    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
6832    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
6833    break;
6834  case OP_VSPLTISW0:
6835    for (unsigned i = 0; i != 16; ++i)
6836      ShufIdxs[i] = (i&3)+0;
6837    break;
6838  case OP_VSPLTISW1:
6839    for (unsigned i = 0; i != 16; ++i)
6840      ShufIdxs[i] = (i&3)+4;
6841    break;
6842  case OP_VSPLTISW2:
6843    for (unsigned i = 0; i != 16; ++i)
6844      ShufIdxs[i] = (i&3)+8;
6845    break;
6846  case OP_VSPLTISW3:
6847    for (unsigned i = 0; i != 16; ++i)
6848      ShufIdxs[i] = (i&3)+12;
6849    break;
6850  case OP_VSLDOI4:
6851    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
6852  case OP_VSLDOI8:
6853    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
6854  case OP_VSLDOI12:
6855    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
6856  }
6857  EVT VT = OpLHS.getValueType();
6858  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
6859  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
6860  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
6861  return DAG.getNode(ISD::BITCAST, dl, VT, T);
6862}
6863
6864/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
6865/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
6866/// return the code it can be lowered into.  Worst case, it can always be
6867/// lowered into a vperm.
6868SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
6869                                               SelectionDAG &DAG) const {
6870  SDLoc dl(Op);
6871  SDValue V1 = Op.getOperand(0);
6872  SDValue V2 = Op.getOperand(1);
6873  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
6874  EVT VT = Op.getValueType();
6875  bool isLittleEndian = Subtarget.isLittleEndian();
6876
6877  if (Subtarget.hasQPX()) {
6878    if (VT.getVectorNumElements() != 4)
6879      return SDValue();
6880
6881    if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
6882
6883    int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
6884    if (AlignIdx != -1) {
6885      return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
6886                         DAG.getConstant(AlignIdx, MVT::i32));
6887    } else if (SVOp->isSplat()) {
6888      int SplatIdx = SVOp->getSplatIndex();
6889      if (SplatIdx >= 4) {
6890        std::swap(V1, V2);
6891        SplatIdx -= 4;
6892      }
6893
6894      // FIXME: If SplatIdx == 0 and the input came from a load, then there is
6895      // nothing to do.
6896
6897      return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
6898                         DAG.getConstant(SplatIdx, MVT::i32));
6899    }
6900
6901    // Lower this into a qvgpci/qvfperm pair.
6902
6903    // Compute the qvgpci literal
6904    unsigned idx = 0;
6905    for (unsigned i = 0; i < 4; ++i) {
6906      int m = SVOp->getMaskElt(i);
6907      unsigned mm = m >= 0 ? (unsigned) m : i;
6908      idx |= mm << (3-i)*3;
6909    }
6910
6911    SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
6912                             DAG.getConstant(idx, MVT::i32));
6913    return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
6914  }
6915
6916  // Cases that are handled by instructions that take permute immediates
6917  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
6918  // selected by the instruction selector.
6919  if (V2.getOpcode() == ISD::UNDEF) {
6920    if (PPC::isSplatShuffleMask(SVOp, 1) ||
6921        PPC::isSplatShuffleMask(SVOp, 2) ||
6922        PPC::isSplatShuffleMask(SVOp, 4) ||
6923        PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
6924        PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
6925        PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
6926        PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
6927        PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
6928        PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
6929        PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
6930        PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
6931        PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
6932      return Op;
6933    }
6934  }
6935
6936  // Altivec has a variety of "shuffle immediates" that take two vector inputs
6937  // and produce a fixed permutation.  If any of these match, do not lower to
6938  // VPERM.
6939  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
6940  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
6941      PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
6942      PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
6943      PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
6944      PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
6945      PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
6946      PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
6947      PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
6948      PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
6949    return Op;
6950
6951  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
6952  // perfect shuffle table to emit an optimal matching sequence.
6953  ArrayRef<int> PermMask = SVOp->getMask();
6954
6955  unsigned PFIndexes[4];
6956  bool isFourElementShuffle = true;
6957  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
6958    unsigned EltNo = 8;   // Start out undef.
6959    for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
6960      if (PermMask[i*4+j] < 0)
6961        continue;   // Undef, ignore it.
6962
6963      unsigned ByteSource = PermMask[i*4+j];
6964      if ((ByteSource & 3) != j) {
6965        isFourElementShuffle = false;
6966        break;
6967      }
6968
6969      if (EltNo == 8) {
6970        EltNo = ByteSource/4;
6971      } else if (EltNo != ByteSource/4) {
6972        isFourElementShuffle = false;
6973        break;
6974      }
6975    }
6976    PFIndexes[i] = EltNo;
6977  }
6978
6979  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
6980  // perfect shuffle vector to determine if it is cost effective to do this as
6981  // discrete instructions, or whether we should use a vperm.
6982  // For now, we skip this for little endian until such time as we have a
6983  // little-endian perfect shuffle table.
6984  if (isFourElementShuffle && !isLittleEndian) {
6985    // Compute the index in the perfect shuffle table.
6986    unsigned PFTableIndex =
6987      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6988
6989    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6990    unsigned Cost  = (PFEntry >> 30);
6991
6992    // Determining when to avoid vperm is tricky.  Many things affect the cost
6993    // of vperm, particularly how many times the perm mask needs to be computed.
6994    // For example, if the perm mask can be hoisted out of a loop or is already
6995    // used (perhaps because there are multiple permutes with the same shuffle
6996    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
6997    // the loop requires an extra register.
6998    //
6999    // As a compromise, we only emit discrete instructions if the shuffle can be
7000    // generated in 3 or fewer operations.  When we have loop information
7001    // available, if this block is within a loop, we should avoid using vperm
7002    // for 3-operation perms and use a constant pool load instead.
7003    if (Cost < 3)
7004      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7005  }
7006
7007  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
7008  // vector that will get spilled to the constant pool.
7009  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
7010
7011  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
7012  // that it is in input element units, not in bytes.  Convert now.
7013
7014  // For little endian, the order of the input vectors is reversed, and
7015  // the permutation mask is complemented with respect to 31.  This is
7016  // necessary to produce proper semantics with the big-endian-biased vperm
7017  // instruction.
7018  EVT EltVT = V1.getValueType().getVectorElementType();
7019  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
7020
7021  SmallVector<SDValue, 16> ResultMask;
7022  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
7023    unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
7024
7025    for (unsigned j = 0; j != BytesPerElement; ++j)
7026      if (isLittleEndian)
7027        ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),
7028                                             MVT::i32));
7029      else
7030        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
7031                                             MVT::i32));
7032  }
7033
7034  SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
7035                                  ResultMask);
7036  if (isLittleEndian)
7037    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7038                       V2, V1, VPermMask);
7039  else
7040    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7041                       V1, V2, VPermMask);
7042}
7043
7044/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
7045/// altivec comparison.  If it is, return true and fill in Opc/isDot with
7046/// information about the intrinsic.
7047static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
7048                                  bool &isDot) {
7049  unsigned IntrinsicID =
7050    cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
7051  CompareOpc = -1;
7052  isDot = false;
7053  switch (IntrinsicID) {
7054  default: return false;
7055    // Comparison predicates.
7056  case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
7057  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
7058  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
7059  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
7060  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
7061  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
7062  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
7063  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
7064  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
7065  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
7066  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
7067  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
7068  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
7069
7070    // Normal Comparisons.
7071  case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
7072  case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
7073  case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
7074  case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
7075  case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
7076  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
7077  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
7078  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
7079  case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
7080  case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
7081  case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
7082  case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
7083  case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
7084  }
7085  return true;
7086}
7087
7088/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
7089/// lower, do it, otherwise return null.
7090SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
7091                                                   SelectionDAG &DAG) const {
7092  // If this is a lowered altivec predicate compare, CompareOpc is set to the
7093  // opcode number of the comparison.
7094  SDLoc dl(Op);
7095  int CompareOpc;
7096  bool isDot;
7097  if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
7098    return SDValue();    // Don't custom lower most intrinsics.
7099
7100  // If this is a non-dot comparison, make the VCMP node and we are done.
7101  if (!isDot) {
7102    SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
7103                              Op.getOperand(1), Op.getOperand(2),
7104                              DAG.getConstant(CompareOpc, MVT::i32));
7105    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
7106  }
7107
7108  // Create the PPCISD altivec 'dot' comparison node.
7109  SDValue Ops[] = {
7110    Op.getOperand(2),  // LHS
7111    Op.getOperand(3),  // RHS
7112    DAG.getConstant(CompareOpc, MVT::i32)
7113  };
7114  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
7115  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
7116
7117  // Now that we have the comparison, emit a copy from the CR to a GPR.
7118  // This is flagged to the above dot comparison.
7119  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
7120                                DAG.getRegister(PPC::CR6, MVT::i32),
7121                                CompNode.getValue(1));
7122
7123  // Unpack the result based on how the target uses it.
7124  unsigned BitNo;   // Bit # of CR6.
7125  bool InvertBit;   // Invert result?
7126  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
7127  default:  // Can't happen, don't crash on invalid number though.
7128  case 0:   // Return the value of the EQ bit of CR6.
7129    BitNo = 0; InvertBit = false;
7130    break;
7131  case 1:   // Return the inverted value of the EQ bit of CR6.
7132    BitNo = 0; InvertBit = true;
7133    break;
7134  case 2:   // Return the value of the LT bit of CR6.
7135    BitNo = 2; InvertBit = false;
7136    break;
7137  case 3:   // Return the inverted value of the LT bit of CR6.
7138    BitNo = 2; InvertBit = true;
7139    break;
7140  }
7141
7142  // Shift the bit into the low position.
7143  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
7144                      DAG.getConstant(8-(3-BitNo), MVT::i32));
7145  // Isolate the bit.
7146  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
7147                      DAG.getConstant(1, MVT::i32));
7148
7149  // If we are supposed to, toggle the bit.
7150  if (InvertBit)
7151    Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
7152                        DAG.getConstant(1, MVT::i32));
7153  return Flags;
7154}
7155
7156SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
7157                                                  SelectionDAG &DAG) const {
7158  SDLoc dl(Op);
7159  // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
7160  // instructions), but for smaller types, we need to first extend up to v2i32
7161  // before doing going farther.
7162  if (Op.getValueType() == MVT::v2i64) {
7163    EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
7164    if (ExtVT != MVT::v2i32) {
7165      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
7166      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
7167                       DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
7168                                        ExtVT.getVectorElementType(), 4)));
7169      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
7170      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
7171                       DAG.getValueType(MVT::v2i32));
7172    }
7173
7174    return Op;
7175  }
7176
7177  return SDValue();
7178}
7179
7180SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
7181                                                   SelectionDAG &DAG) const {
7182  SDLoc dl(Op);
7183  // Create a stack slot that is 16-byte aligned.
7184  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7185  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7186  EVT PtrVT = getPointerTy();
7187  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7188
7189  // Store the input value into Value#0 of the stack slot.
7190  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
7191                               Op.getOperand(0), FIdx, MachinePointerInfo(),
7192                               false, false, 0);
7193  // Load it out.
7194  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
7195                     false, false, false, 0);
7196}
7197
7198SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
7199                                                   SelectionDAG &DAG) const {
7200  SDLoc dl(Op);
7201  SDNode *N = Op.getNode();
7202
7203  assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
7204         "Unknown extract_vector_elt type");
7205
7206  SDValue Value = N->getOperand(0);
7207
7208  // The first part of this is like the store lowering except that we don't
7209  // need to track the chain.
7210
7211  // The values are now known to be -1 (false) or 1 (true). To convert this
7212  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7213  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7214  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7215
7216  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
7217  // understand how to form the extending load.
7218  SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64);
7219  FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
7220                        FPHalfs, FPHalfs, FPHalfs, FPHalfs);
7221
7222  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7223
7224  // Now convert to an integer and store.
7225  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7226    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32),
7227    Value);
7228
7229  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7230  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7231  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
7232  EVT PtrVT = getPointerTy();
7233  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7234
7235  SDValue StoreChain = DAG.getEntryNode();
7236  SmallVector<SDValue, 2> Ops;
7237  Ops.push_back(StoreChain);
7238  Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32));
7239  Ops.push_back(Value);
7240  Ops.push_back(FIdx);
7241
7242  SmallVector<EVT, 2> ValueVTs;
7243  ValueVTs.push_back(MVT::Other); // chain
7244  SDVTList VTs = DAG.getVTList(ValueVTs);
7245
7246  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
7247    dl, VTs, Ops, MVT::v4i32, PtrInfo);
7248
7249  // Extract the value requested.
7250  unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
7251  SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType());
7252  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7253
7254  SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
7255                               PtrInfo.getWithOffset(Offset),
7256                               false, false, false, 0);
7257
7258  if (!Subtarget.useCRBits())
7259    return IntVal;
7260
7261  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
7262}
7263
7264/// Lowering for QPX v4i1 loads
7265SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
7266                                           SelectionDAG &DAG) const {
7267  SDLoc dl(Op);
7268  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
7269  SDValue LoadChain = LN->getChain();
7270  SDValue BasePtr = LN->getBasePtr();
7271
7272  if (Op.getValueType() == MVT::v4f64 ||
7273      Op.getValueType() == MVT::v4f32) {
7274    EVT MemVT = LN->getMemoryVT();
7275    unsigned Alignment = LN->getAlignment();
7276
7277    // If this load is properly aligned, then it is legal.
7278    if (Alignment >= MemVT.getStoreSize())
7279      return Op;
7280
7281    EVT ScalarVT = Op.getValueType().getScalarType(),
7282        ScalarMemVT = MemVT.getScalarType();
7283    unsigned Stride = ScalarMemVT.getStoreSize();
7284
7285    SmallVector<SDValue, 8> Vals, LoadChains;
7286    for (unsigned Idx = 0; Idx < 4; ++Idx) {
7287      SDValue Load;
7288      if (ScalarVT != ScalarMemVT)
7289        Load =
7290          DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
7291                         BasePtr,
7292                         LN->getPointerInfo().getWithOffset(Idx*Stride),
7293                         ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(),
7294                         LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
7295                         LN->getAAInfo());
7296      else
7297        Load =
7298          DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
7299                       LN->getPointerInfo().getWithOffset(Idx*Stride),
7300                       LN->isVolatile(), LN->isNonTemporal(),
7301                       LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
7302                       LN->getAAInfo());
7303
7304      if (Idx == 0 && LN->isIndexed()) {
7305        assert(LN->getAddressingMode() == ISD::PRE_INC &&
7306               "Unknown addressing mode on vector load");
7307        Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
7308                                  LN->getAddressingMode());
7309      }
7310
7311      Vals.push_back(Load);
7312      LoadChains.push_back(Load.getValue(1));
7313
7314      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
7315                            DAG.getConstant(Stride, BasePtr.getValueType()));
7316    }
7317
7318    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
7319    SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
7320                                   Op.getValueType(), Vals);
7321
7322    if (LN->isIndexed()) {
7323      SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
7324      return DAG.getMergeValues(RetOps, dl);
7325    }
7326
7327    SDValue RetOps[] = { Value, TF };
7328    return DAG.getMergeValues(RetOps, dl);
7329  }
7330
7331  assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
7332  assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
7333
7334  // To lower v4i1 from a byte array, we load the byte elements of the
7335  // vector and then reuse the BUILD_VECTOR logic.
7336
7337  SmallVector<SDValue, 4> VectElmts, VectElmtChains;
7338  for (unsigned i = 0; i < 4; ++i) {
7339    SDValue Idx = DAG.getConstant(i, BasePtr.getValueType());
7340    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
7341
7342    VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD,
7343                        dl, MVT::i32, LoadChain, Idx,
7344                        LN->getPointerInfo().getWithOffset(i),
7345                        MVT::i8 /* memory type */,
7346                        LN->isVolatile(), LN->isNonTemporal(),
7347                        LN->isInvariant(),
7348                        1 /* alignment */, LN->getAAInfo()));
7349    VectElmtChains.push_back(VectElmts[i].getValue(1));
7350  }
7351
7352  LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
7353  SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts);
7354
7355  SDValue RVals[] = { Value, LoadChain };
7356  return DAG.getMergeValues(RVals, dl);
7357}
7358
7359/// Lowering for QPX v4i1 stores
7360SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
7361                                            SelectionDAG &DAG) const {
7362  SDLoc dl(Op);
7363  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
7364  SDValue StoreChain = SN->getChain();
7365  SDValue BasePtr = SN->getBasePtr();
7366  SDValue Value = SN->getValue();
7367
7368  if (Value.getValueType() == MVT::v4f64 ||
7369      Value.getValueType() == MVT::v4f32) {
7370    EVT MemVT = SN->getMemoryVT();
7371    unsigned Alignment = SN->getAlignment();
7372
7373    // If this store is properly aligned, then it is legal.
7374    if (Alignment >= MemVT.getStoreSize())
7375      return Op;
7376
7377    EVT ScalarVT = Value.getValueType().getScalarType(),
7378        ScalarMemVT = MemVT.getScalarType();
7379    unsigned Stride = ScalarMemVT.getStoreSize();
7380
7381    SmallVector<SDValue, 8> Stores;
7382    for (unsigned Idx = 0; Idx < 4; ++Idx) {
7383      SDValue Ex =
7384        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
7385                    DAG.getConstant(Idx, getVectorIdxTy()));
7386      SDValue Store;
7387      if (ScalarVT != ScalarMemVT)
7388        Store =
7389          DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
7390                            SN->getPointerInfo().getWithOffset(Idx*Stride),
7391                            ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(),
7392                            MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
7393      else
7394        Store =
7395          DAG.getStore(StoreChain, dl, Ex, BasePtr,
7396                       SN->getPointerInfo().getWithOffset(Idx*Stride),
7397                       SN->isVolatile(), SN->isNonTemporal(),
7398                       MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
7399
7400      if (Idx == 0 && SN->isIndexed()) {
7401        assert(SN->getAddressingMode() == ISD::PRE_INC &&
7402               "Unknown addressing mode on vector store");
7403        Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
7404                                    SN->getAddressingMode());
7405      }
7406
7407      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
7408                            DAG.getConstant(Stride, BasePtr.getValueType()));
7409      Stores.push_back(Store);
7410    }
7411
7412    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7413
7414    if (SN->isIndexed()) {
7415      SDValue RetOps[] = { TF, Stores[0].getValue(1) };
7416      return DAG.getMergeValues(RetOps, dl);
7417    }
7418
7419    return TF;
7420  }
7421
7422  assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
7423  assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
7424
7425  // The values are now known to be -1 (false) or 1 (true). To convert this
7426  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7427  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7428  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7429
7430  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
7431  // understand how to form the extending load.
7432  SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64);
7433  FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
7434                        FPHalfs, FPHalfs, FPHalfs, FPHalfs);
7435
7436  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7437
7438  // Now convert to an integer and store.
7439  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7440    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32),
7441    Value);
7442
7443  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7444  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7445  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
7446  EVT PtrVT = getPointerTy();
7447  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7448
7449  SmallVector<SDValue, 2> Ops;
7450  Ops.push_back(StoreChain);
7451  Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32));
7452  Ops.push_back(Value);
7453  Ops.push_back(FIdx);
7454
7455  SmallVector<EVT, 2> ValueVTs;
7456  ValueVTs.push_back(MVT::Other); // chain
7457  SDVTList VTs = DAG.getVTList(ValueVTs);
7458
7459  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
7460    dl, VTs, Ops, MVT::v4i32, PtrInfo);
7461
7462  // Move data into the byte array.
7463  SmallVector<SDValue, 4> Loads, LoadChains;
7464  for (unsigned i = 0; i < 4; ++i) {
7465    unsigned Offset = 4*i;
7466    SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType());
7467    Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7468
7469    Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
7470                                   PtrInfo.getWithOffset(Offset),
7471                                   false, false, false, 0));
7472    LoadChains.push_back(Loads[i].getValue(1));
7473  }
7474
7475  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
7476
7477  SmallVector<SDValue, 4> Stores;
7478  for (unsigned i = 0; i < 4; ++i) {
7479    SDValue Idx = DAG.getConstant(i, BasePtr.getValueType());
7480    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
7481
7482    Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx,
7483                                       SN->getPointerInfo().getWithOffset(i),
7484                                       MVT::i8 /* memory type */,
7485                                       SN->isNonTemporal(), SN->isVolatile(),
7486                                       1 /* alignment */, SN->getAAInfo()));
7487  }
7488
7489  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7490
7491  return StoreChain;
7492}
7493
7494SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
7495  SDLoc dl(Op);
7496  if (Op.getValueType() == MVT::v4i32) {
7497    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7498
7499    SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
7500    SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
7501
7502    SDValue RHSSwap =   // = vrlw RHS, 16
7503      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
7504
7505    // Shrinkify inputs to v8i16.
7506    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
7507    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
7508    RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
7509
7510    // Low parts multiplied together, generating 32-bit results (we ignore the
7511    // top parts).
7512    SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
7513                                        LHS, RHS, DAG, dl, MVT::v4i32);
7514
7515    SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
7516                                      LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
7517    // Shift the high parts up 16 bits.
7518    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
7519                              Neg16, DAG, dl);
7520    return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
7521  } else if (Op.getValueType() == MVT::v8i16) {
7522    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7523
7524    SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
7525
7526    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
7527                            LHS, RHS, Zero, DAG, dl);
7528  } else if (Op.getValueType() == MVT::v16i8) {
7529    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7530    bool isLittleEndian = Subtarget.isLittleEndian();
7531
7532    // Multiply the even 8-bit parts, producing 16-bit sums.
7533    SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
7534                                           LHS, RHS, DAG, dl, MVT::v8i16);
7535    EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
7536
7537    // Multiply the odd 8-bit parts, producing 16-bit sums.
7538    SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
7539                                          LHS, RHS, DAG, dl, MVT::v8i16);
7540    OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
7541
7542    // Merge the results together.  Because vmuleub and vmuloub are
7543    // instructions with a big-endian bias, we must reverse the
7544    // element numbering and reverse the meaning of "odd" and "even"
7545    // when generating little endian code.
7546    int Ops[16];
7547    for (unsigned i = 0; i != 8; ++i) {
7548      if (isLittleEndian) {
7549        Ops[i*2  ] = 2*i;
7550        Ops[i*2+1] = 2*i+16;
7551      } else {
7552        Ops[i*2  ] = 2*i+1;
7553        Ops[i*2+1] = 2*i+1+16;
7554      }
7555    }
7556    if (isLittleEndian)
7557      return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
7558    else
7559      return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
7560  } else {
7561    llvm_unreachable("Unknown mul to lower!");
7562  }
7563}
7564
7565/// LowerOperation - Provide custom lowering hooks for some operations.
7566///
7567SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
7568  switch (Op.getOpcode()) {
7569  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
7570  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
7571  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
7572  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
7573  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
7574  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
7575  case ISD::SETCC:              return LowerSETCC(Op, DAG);
7576  case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
7577  case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
7578  case ISD::VASTART:
7579    return LowerVASTART(Op, DAG, Subtarget);
7580
7581  case ISD::VAARG:
7582    return LowerVAARG(Op, DAG, Subtarget);
7583
7584  case ISD::VACOPY:
7585    return LowerVACOPY(Op, DAG, Subtarget);
7586
7587  case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, Subtarget);
7588  case ISD::DYNAMIC_STACKALLOC:
7589    return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
7590
7591  case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
7592  case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
7593
7594  case ISD::LOAD:               return LowerLOAD(Op, DAG);
7595  case ISD::STORE:              return LowerSTORE(Op, DAG);
7596  case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
7597  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
7598  case ISD::FP_TO_UINT:
7599  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
7600                                                      SDLoc(Op));
7601  case ISD::UINT_TO_FP:
7602  case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
7603  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
7604
7605  // Lower 64-bit shifts.
7606  case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
7607  case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
7608  case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
7609
7610  // Vector-related lowering.
7611  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
7612  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
7613  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7614  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
7615  case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
7616  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
7617  case ISD::MUL:                return LowerMUL(Op, DAG);
7618
7619  // For counter-based loop handling.
7620  case ISD::INTRINSIC_W_CHAIN:  return SDValue();
7621
7622  // Frame & Return address.
7623  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
7624  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
7625  }
7626}
7627
7628void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
7629                                           SmallVectorImpl<SDValue>&Results,
7630                                           SelectionDAG &DAG) const {
7631  SDLoc dl(N);
7632  switch (N->getOpcode()) {
7633  default:
7634    llvm_unreachable("Do not know how to custom type legalize this operation!");
7635  case ISD::READCYCLECOUNTER: {
7636    SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
7637    SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
7638
7639    Results.push_back(RTB);
7640    Results.push_back(RTB.getValue(1));
7641    Results.push_back(RTB.getValue(2));
7642    break;
7643  }
7644  case ISD::INTRINSIC_W_CHAIN: {
7645    if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
7646        Intrinsic::ppc_is_decremented_ctr_nonzero)
7647      break;
7648
7649    assert(N->getValueType(0) == MVT::i1 &&
7650           "Unexpected result type for CTR decrement intrinsic");
7651    EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
7652    SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
7653    SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
7654                                 N->getOperand(1));
7655
7656    Results.push_back(NewInt);
7657    Results.push_back(NewInt.getValue(1));
7658    break;
7659  }
7660  case ISD::VAARG: {
7661    if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
7662      return;
7663
7664    EVT VT = N->getValueType(0);
7665
7666    if (VT == MVT::i64) {
7667      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
7668
7669      Results.push_back(NewNode);
7670      Results.push_back(NewNode.getValue(1));
7671    }
7672    return;
7673  }
7674  case ISD::FP_ROUND_INREG: {
7675    assert(N->getValueType(0) == MVT::ppcf128);
7676    assert(N->getOperand(0).getValueType() == MVT::ppcf128);
7677    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7678                             MVT::f64, N->getOperand(0),
7679                             DAG.getIntPtrConstant(0));
7680    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7681                             MVT::f64, N->getOperand(0),
7682                             DAG.getIntPtrConstant(1));
7683
7684    // Add the two halves of the long double in round-to-zero mode.
7685    SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7686
7687    // We know the low half is about to be thrown away, so just use something
7688    // convenient.
7689    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
7690                                FPreg, FPreg));
7691    return;
7692  }
7693  case ISD::FP_TO_SINT:
7694    // LowerFP_TO_INT() can only handle f32 and f64.
7695    if (N->getOperand(0).getValueType() == MVT::ppcf128)
7696      return;
7697    Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
7698    return;
7699  }
7700}
7701
7702
7703//===----------------------------------------------------------------------===//
7704//  Other Lowering Code
7705//===----------------------------------------------------------------------===//
7706
7707static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
7708  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
7709  Function *Func = Intrinsic::getDeclaration(M, Id);
7710  return Builder.CreateCall(Func);
7711}
7712
7713// The mappings for emitLeading/TrailingFence is taken from
7714// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
7715Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
7716                                         AtomicOrdering Ord, bool IsStore,
7717                                         bool IsLoad) const {
7718  if (Ord == SequentiallyConsistent)
7719    return callIntrinsic(Builder, Intrinsic::ppc_sync);
7720  else if (isAtLeastRelease(Ord))
7721    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
7722  else
7723    return nullptr;
7724}
7725
7726Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
7727                                          AtomicOrdering Ord, bool IsStore,
7728                                          bool IsLoad) const {
7729  if (IsLoad && isAtLeastAcquire(Ord))
7730    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
7731  // FIXME: this is too conservative, a dependent branch + isync is enough.
7732  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
7733  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
7734  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
7735  else
7736    return nullptr;
7737}
7738
7739MachineBasicBlock *
7740PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
7741                                    bool is64bit, unsigned BinOpcode) const {
7742  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
7743  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7744
7745  const BasicBlock *LLVM_BB = BB->getBasicBlock();
7746  MachineFunction *F = BB->getParent();
7747  MachineFunction::iterator It = BB;
7748  ++It;
7749
7750  unsigned dest = MI->getOperand(0).getReg();
7751  unsigned ptrA = MI->getOperand(1).getReg();
7752  unsigned ptrB = MI->getOperand(2).getReg();
7753  unsigned incr = MI->getOperand(3).getReg();
7754  DebugLoc dl = MI->getDebugLoc();
7755
7756  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
7757  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
7758  F->insert(It, loopMBB);
7759  F->insert(It, exitMBB);
7760  exitMBB->splice(exitMBB->begin(), BB,
7761                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
7762  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
7763
7764  MachineRegisterInfo &RegInfo = F->getRegInfo();
7765  unsigned TmpReg = (!BinOpcode) ? incr :
7766    RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass
7767                                           : &PPC::GPRCRegClass);
7768
7769  //  thisMBB:
7770  //   ...
7771  //   fallthrough --> loopMBB
7772  BB->addSuccessor(loopMBB);
7773
7774  //  loopMBB:
7775  //   l[wd]arx dest, ptr
7776  //   add r0, dest, incr
7777  //   st[wd]cx. r0, ptr
7778  //   bne- loopMBB
7779  //   fallthrough --> exitMBB
7780  BB = loopMBB;
7781  BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
7782    .addReg(ptrA).addReg(ptrB);
7783  if (BinOpcode)
7784    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
7785  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
7786    .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
7787  BuildMI(BB, dl, TII->get(PPC::BCC))
7788    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
7789  BB->addSuccessor(loopMBB);
7790  BB->addSuccessor(exitMBB);
7791
7792  //  exitMBB:
7793  //   ...
7794  BB = exitMBB;
7795  return BB;
7796}
7797
7798MachineBasicBlock *
7799PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
7800                                            MachineBasicBlock *BB,
7801                                            bool is8bit,    // operation
7802                                            unsigned BinOpcode) const {
7803  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
7804  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7805  // In 64 bit mode we have to use 64 bits for addresses, even though the
7806  // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
7807  // registers without caring whether they're 32 or 64, but here we're
7808  // doing actual arithmetic on the addresses.
7809  bool is64bit = Subtarget.isPPC64();
7810  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
7811
7812  const BasicBlock *LLVM_BB = BB->getBasicBlock();
7813  MachineFunction *F = BB->getParent();
7814  MachineFunction::iterator It = BB;
7815  ++It;
7816
7817  unsigned dest = MI->getOperand(0).getReg();
7818  unsigned ptrA = MI->getOperand(1).getReg();
7819  unsigned ptrB = MI->getOperand(2).getReg();
7820  unsigned incr = MI->getOperand(3).getReg();
7821  DebugLoc dl = MI->getDebugLoc();
7822
7823  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
7824  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
7825  F->insert(It, loopMBB);
7826  F->insert(It, exitMBB);
7827  exitMBB->splice(exitMBB->begin(), BB,
7828                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
7829  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
7830
7831  MachineRegisterInfo &RegInfo = F->getRegInfo();
7832  const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
7833                                          : &PPC::GPRCRegClass;
7834  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
7835  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
7836  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
7837  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
7838  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
7839  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
7840  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
7841  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
7842  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
7843  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
7844  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
7845  unsigned Ptr1Reg;
7846  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
7847
7848  //  thisMBB:
7849  //   ...
7850  //   fallthrough --> loopMBB
7851  BB->addSuccessor(loopMBB);
7852
7853  // The 4-byte load must be aligned, while a char or short may be
7854  // anywhere in the word.  Hence all this nasty bookkeeping code.
7855  //   add ptr1, ptrA, ptrB [copy if ptrA==0]
7856  //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
7857  //   xori shift, shift1, 24 [16]
7858  //   rlwinm ptr, ptr1, 0, 0, 29
7859  //   slw incr2, incr, shift
7860  //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
7861  //   slw mask, mask2, shift
7862  //  loopMBB:
7863  //   lwarx tmpDest, ptr
7864  //   add tmp, tmpDest, incr2
7865  //   andc tmp2, tmpDest, mask
7866  //   and tmp3, tmp, mask
7867  //   or tmp4, tmp3, tmp2
7868  //   stwcx. tmp4, ptr
7869  //   bne- loopMBB
7870  //   fallthrough --> exitMBB
7871  //   srw dest, tmpDest, shift
7872  if (ptrA != ZeroReg) {
7873    Ptr1Reg = RegInfo.createVirtualRegister(RC);
7874    BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
7875      .addReg(ptrA).addReg(ptrB);
7876  } else {
7877    Ptr1Reg = ptrB;
7878  }
7879  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
7880      .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
7881  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
7882      .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
7883  if (is64bit)
7884    BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
7885      .addReg(Ptr1Reg).addImm(0).addImm(61);
7886  else
7887    BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
7888      .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
7889  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
7890      .addReg(incr).addReg(ShiftReg);
7891  if (is8bit)
7892    BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
7893  else {
7894    BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
7895    BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
7896  }
7897  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
7898      .addReg(Mask2Reg).addReg(ShiftReg);
7899
7900  BB = loopMBB;
7901  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
7902    .addReg(ZeroReg).addReg(PtrReg);
7903  if (BinOpcode)
7904    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
7905      .addReg(Incr2Reg).addReg(TmpDestReg);
7906  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
7907    .addReg(TmpDestReg).addReg(MaskReg);
7908  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
7909    .addReg(TmpReg).addReg(MaskReg);
7910  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
7911    .addReg(Tmp3Reg).addReg(Tmp2Reg);
7912  BuildMI(BB, dl, TII->get(PPC::STWCX))
7913    .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
7914  BuildMI(BB, dl, TII->get(PPC::BCC))
7915    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
7916  BB->addSuccessor(loopMBB);
7917  BB->addSuccessor(exitMBB);
7918
7919  //  exitMBB:
7920  //   ...
7921  BB = exitMBB;
7922  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
7923    .addReg(ShiftReg);
7924  return BB;
7925}
7926
7927llvm::MachineBasicBlock*
7928PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
7929                                    MachineBasicBlock *MBB) const {
7930  DebugLoc DL = MI->getDebugLoc();
7931  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7932
7933  MachineFunction *MF = MBB->getParent();
7934  MachineRegisterInfo &MRI = MF->getRegInfo();
7935
7936  const BasicBlock *BB = MBB->getBasicBlock();
7937  MachineFunction::iterator I = MBB;
7938  ++I;
7939
7940  // Memory Reference
7941  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
7942  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
7943
7944  unsigned DstReg = MI->getOperand(0).getReg();
7945  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
7946  assert(RC->hasType(MVT::i32) && "Invalid destination!");
7947  unsigned mainDstReg = MRI.createVirtualRegister(RC);
7948  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
7949
7950  MVT PVT = getPointerTy();
7951  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
7952         "Invalid Pointer Size!");
7953  // For v = setjmp(buf), we generate
7954  //
7955  // thisMBB:
7956  //  SjLjSetup mainMBB
7957  //  bl mainMBB
7958  //  v_restore = 1
7959  //  b sinkMBB
7960  //
7961  // mainMBB:
7962  //  buf[LabelOffset] = LR
7963  //  v_main = 0
7964  //
7965  // sinkMBB:
7966  //  v = phi(main, restore)
7967  //
7968
7969  MachineBasicBlock *thisMBB = MBB;
7970  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
7971  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
7972  MF->insert(I, mainMBB);
7973  MF->insert(I, sinkMBB);
7974
7975  MachineInstrBuilder MIB;
7976
7977  // Transfer the remainder of BB and its successor edges to sinkMBB.
7978  sinkMBB->splice(sinkMBB->begin(), MBB,
7979                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
7980  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7981
7982  // Note that the structure of the jmp_buf used here is not compatible
7983  // with that used by libc, and is not designed to be. Specifically, it
7984  // stores only those 'reserved' registers that LLVM does not otherwise
7985  // understand how to spill. Also, by convention, by the time this
7986  // intrinsic is called, Clang has already stored the frame address in the
7987  // first slot of the buffer and stack address in the third. Following the
7988  // X86 target code, we'll store the jump address in the second slot. We also
7989  // need to save the TOC pointer (R2) to handle jumps between shared
7990  // libraries, and that will be stored in the fourth slot. The thread
7991  // identifier (R13) is not affected.
7992
7993  // thisMBB:
7994  const int64_t LabelOffset = 1 * PVT.getStoreSize();
7995  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
7996  const int64_t BPOffset    = 4 * PVT.getStoreSize();
7997
7998  // Prepare IP either in reg.
7999  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
8000  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
8001  unsigned BufReg = MI->getOperand(1).getReg();
8002
8003  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
8004    setUsesTOCBasePtr(*MBB->getParent());
8005    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
8006            .addReg(PPC::X2)
8007            .addImm(TOCOffset)
8008            .addReg(BufReg);
8009    MIB.setMemRefs(MMOBegin, MMOEnd);
8010  }
8011
8012  // Naked functions never have a base pointer, and so we use r1. For all
8013  // other functions, this decision must be delayed until during PEI.
8014  unsigned BaseReg;
8015  if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
8016    BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
8017  else
8018    BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
8019
8020  MIB = BuildMI(*thisMBB, MI, DL,
8021                TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
8022            .addReg(BaseReg)
8023            .addImm(BPOffset)
8024            .addReg(BufReg);
8025  MIB.setMemRefs(MMOBegin, MMOEnd);
8026
8027  // Setup
8028  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
8029  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
8030  MIB.addRegMask(TRI->getNoPreservedMask());
8031
8032  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
8033
8034  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
8035          .addMBB(mainMBB);
8036  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
8037
8038  thisMBB->addSuccessor(mainMBB, /* weight */ 0);
8039  thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
8040
8041  // mainMBB:
8042  //  mainDstReg = 0
8043  MIB =
8044      BuildMI(mainMBB, DL,
8045              TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
8046
8047  // Store IP
8048  if (Subtarget.isPPC64()) {
8049    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
8050            .addReg(LabelReg)
8051            .addImm(LabelOffset)
8052            .addReg(BufReg);
8053  } else {
8054    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
8055            .addReg(LabelReg)
8056            .addImm(LabelOffset)
8057            .addReg(BufReg);
8058  }
8059
8060  MIB.setMemRefs(MMOBegin, MMOEnd);
8061
8062  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
8063  mainMBB->addSuccessor(sinkMBB);
8064
8065  // sinkMBB:
8066  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
8067          TII->get(PPC::PHI), DstReg)
8068    .addReg(mainDstReg).addMBB(mainMBB)
8069    .addReg(restoreDstReg).addMBB(thisMBB);
8070
8071  MI->eraseFromParent();
8072  return sinkMBB;
8073}
8074
8075MachineBasicBlock *
8076PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
8077                                     MachineBasicBlock *MBB) const {
8078  DebugLoc DL = MI->getDebugLoc();
8079  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8080
8081  MachineFunction *MF = MBB->getParent();
8082  MachineRegisterInfo &MRI = MF->getRegInfo();
8083
8084  // Memory Reference
8085  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
8086  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
8087
8088  MVT PVT = getPointerTy();
8089  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8090         "Invalid Pointer Size!");
8091
8092  const TargetRegisterClass *RC =
8093    (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
8094  unsigned Tmp = MRI.createVirtualRegister(RC);
8095  // Since FP is only updated here but NOT referenced, it's treated as GPR.
8096  unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
8097  unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
8098  unsigned BP =
8099      (PVT == MVT::i64)
8100          ? PPC::X30
8101          : (Subtarget.isSVR4ABI() &&
8102                     MF->getTarget().getRelocationModel() == Reloc::PIC_
8103                 ? PPC::R29
8104                 : PPC::R30);
8105
8106  MachineInstrBuilder MIB;
8107
8108  const int64_t LabelOffset = 1 * PVT.getStoreSize();
8109  const int64_t SPOffset    = 2 * PVT.getStoreSize();
8110  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
8111  const int64_t BPOffset    = 4 * PVT.getStoreSize();
8112
8113  unsigned BufReg = MI->getOperand(0).getReg();
8114
8115  // Reload FP (the jumped-to function may not have had a
8116  // frame pointer, and if so, then its r31 will be restored
8117  // as necessary).
8118  if (PVT == MVT::i64) {
8119    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
8120            .addImm(0)
8121            .addReg(BufReg);
8122  } else {
8123    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
8124            .addImm(0)
8125            .addReg(BufReg);
8126  }
8127  MIB.setMemRefs(MMOBegin, MMOEnd);
8128
8129  // Reload IP
8130  if (PVT == MVT::i64) {
8131    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
8132            .addImm(LabelOffset)
8133            .addReg(BufReg);
8134  } else {
8135    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
8136            .addImm(LabelOffset)
8137            .addReg(BufReg);
8138  }
8139  MIB.setMemRefs(MMOBegin, MMOEnd);
8140
8141  // Reload SP
8142  if (PVT == MVT::i64) {
8143    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
8144            .addImm(SPOffset)
8145            .addReg(BufReg);
8146  } else {
8147    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
8148            .addImm(SPOffset)
8149            .addReg(BufReg);
8150  }
8151  MIB.setMemRefs(MMOBegin, MMOEnd);
8152
8153  // Reload BP
8154  if (PVT == MVT::i64) {
8155    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
8156            .addImm(BPOffset)
8157            .addReg(BufReg);
8158  } else {
8159    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
8160            .addImm(BPOffset)
8161            .addReg(BufReg);
8162  }
8163  MIB.setMemRefs(MMOBegin, MMOEnd);
8164
8165  // Reload TOC
8166  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
8167    setUsesTOCBasePtr(*MBB->getParent());
8168    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
8169            .addImm(TOCOffset)
8170            .addReg(BufReg);
8171
8172    MIB.setMemRefs(MMOBegin, MMOEnd);
8173  }
8174
8175  // Jump
8176  BuildMI(*MBB, MI, DL,
8177          TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
8178  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
8179
8180  MI->eraseFromParent();
8181  return MBB;
8182}
8183
8184MachineBasicBlock *
8185PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
8186                                               MachineBasicBlock *BB) const {
8187  if (MI->getOpcode() == TargetOpcode::STACKMAP ||
8188      MI->getOpcode() == TargetOpcode::PATCHPOINT) {
8189    if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
8190        MI->getOpcode() == TargetOpcode::PATCHPOINT) {
8191      // Call lowering should have added an r2 operand to indicate a dependence
8192      // on the TOC base pointer value. It can't however, because there is no
8193      // way to mark the dependence as implicit there, and so the stackmap code
8194      // will confuse it with a regular operand. Instead, add the dependence
8195      // here.
8196      setUsesTOCBasePtr(*BB->getParent());
8197      MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
8198    }
8199
8200    return emitPatchPoint(MI, BB);
8201  }
8202
8203  if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
8204      MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
8205    return emitEHSjLjSetJmp(MI, BB);
8206  } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
8207             MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
8208    return emitEHSjLjLongJmp(MI, BB);
8209  }
8210
8211  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8212
8213  // To "insert" these instructions we actually have to insert their
8214  // control-flow patterns.
8215  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8216  MachineFunction::iterator It = BB;
8217  ++It;
8218
8219  MachineFunction *F = BB->getParent();
8220
8221  if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8222                              MI->getOpcode() == PPC::SELECT_CC_I8 ||
8223                              MI->getOpcode() == PPC::SELECT_I4 ||
8224                              MI->getOpcode() == PPC::SELECT_I8)) {
8225    SmallVector<MachineOperand, 2> Cond;
8226    if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8227        MI->getOpcode() == PPC::SELECT_CC_I8)
8228      Cond.push_back(MI->getOperand(4));
8229    else
8230      Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
8231    Cond.push_back(MI->getOperand(1));
8232
8233    DebugLoc dl = MI->getDebugLoc();
8234    TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
8235                      Cond, MI->getOperand(2).getReg(),
8236                      MI->getOperand(3).getReg());
8237  } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8238             MI->getOpcode() == PPC::SELECT_CC_I8 ||
8239             MI->getOpcode() == PPC::SELECT_CC_F4 ||
8240             MI->getOpcode() == PPC::SELECT_CC_F8 ||
8241             MI->getOpcode() == PPC::SELECT_CC_QFRC ||
8242             MI->getOpcode() == PPC::SELECT_CC_QSRC ||
8243             MI->getOpcode() == PPC::SELECT_CC_QBRC ||
8244             MI->getOpcode() == PPC::SELECT_CC_VRRC ||
8245             MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
8246             MI->getOpcode() == PPC::SELECT_CC_VSRC ||
8247             MI->getOpcode() == PPC::SELECT_I4 ||
8248             MI->getOpcode() == PPC::SELECT_I8 ||
8249             MI->getOpcode() == PPC::SELECT_F4 ||
8250             MI->getOpcode() == PPC::SELECT_F8 ||
8251             MI->getOpcode() == PPC::SELECT_QFRC ||
8252             MI->getOpcode() == PPC::SELECT_QSRC ||
8253             MI->getOpcode() == PPC::SELECT_QBRC ||
8254             MI->getOpcode() == PPC::SELECT_VRRC ||
8255             MI->getOpcode() == PPC::SELECT_VSFRC ||
8256             MI->getOpcode() == PPC::SELECT_VSRC) {
8257    // The incoming instruction knows the destination vreg to set, the
8258    // condition code register to branch on, the true/false values to
8259    // select between, and a branch opcode to use.
8260
8261    //  thisMBB:
8262    //  ...
8263    //   TrueVal = ...
8264    //   cmpTY ccX, r1, r2
8265    //   bCC copy1MBB
8266    //   fallthrough --> copy0MBB
8267    MachineBasicBlock *thisMBB = BB;
8268    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
8269    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
8270    DebugLoc dl = MI->getDebugLoc();
8271    F->insert(It, copy0MBB);
8272    F->insert(It, sinkMBB);
8273
8274    // Transfer the remainder of BB and its successor edges to sinkMBB.
8275    sinkMBB->splice(sinkMBB->begin(), BB,
8276                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
8277    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
8278
8279    // Next, add the true and fallthrough blocks as its successors.
8280    BB->addSuccessor(copy0MBB);
8281    BB->addSuccessor(sinkMBB);
8282
8283    if (MI->getOpcode() == PPC::SELECT_I4 ||
8284        MI->getOpcode() == PPC::SELECT_I8 ||
8285        MI->getOpcode() == PPC::SELECT_F4 ||
8286        MI->getOpcode() == PPC::SELECT_F8 ||
8287        MI->getOpcode() == PPC::SELECT_QFRC ||
8288        MI->getOpcode() == PPC::SELECT_QSRC ||
8289        MI->getOpcode() == PPC::SELECT_QBRC ||
8290        MI->getOpcode() == PPC::SELECT_VRRC ||
8291        MI->getOpcode() == PPC::SELECT_VSFRC ||
8292        MI->getOpcode() == PPC::SELECT_VSRC) {
8293      BuildMI(BB, dl, TII->get(PPC::BC))
8294        .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
8295    } else {
8296      unsigned SelectPred = MI->getOperand(4).getImm();
8297      BuildMI(BB, dl, TII->get(PPC::BCC))
8298        .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
8299    }
8300
8301    //  copy0MBB:
8302    //   %FalseValue = ...
8303    //   # fallthrough to sinkMBB
8304    BB = copy0MBB;
8305
8306    // Update machine-CFG edges
8307    BB->addSuccessor(sinkMBB);
8308
8309    //  sinkMBB:
8310    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
8311    //  ...
8312    BB = sinkMBB;
8313    BuildMI(*BB, BB->begin(), dl,
8314            TII->get(PPC::PHI), MI->getOperand(0).getReg())
8315      .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
8316      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
8317  } else if (MI->getOpcode() == PPC::ReadTB) {
8318    // To read the 64-bit time-base register on a 32-bit target, we read the
8319    // two halves. Should the counter have wrapped while it was being read, we
8320    // need to try again.
8321    // ...
8322    // readLoop:
8323    // mfspr Rx,TBU # load from TBU
8324    // mfspr Ry,TB  # load from TB
8325    // mfspr Rz,TBU # load from TBU
8326    // cmpw crX,Rx,Rz # check if ‘old’=’new’
8327    // bne readLoop   # branch if they're not equal
8328    // ...
8329
8330    MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
8331    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
8332    DebugLoc dl = MI->getDebugLoc();
8333    F->insert(It, readMBB);
8334    F->insert(It, sinkMBB);
8335
8336    // Transfer the remainder of BB and its successor edges to sinkMBB.
8337    sinkMBB->splice(sinkMBB->begin(), BB,
8338                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
8339    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
8340
8341    BB->addSuccessor(readMBB);
8342    BB = readMBB;
8343
8344    MachineRegisterInfo &RegInfo = F->getRegInfo();
8345    unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
8346    unsigned LoReg = MI->getOperand(0).getReg();
8347    unsigned HiReg = MI->getOperand(1).getReg();
8348
8349    BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
8350    BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
8351    BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
8352
8353    unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
8354
8355    BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
8356      .addReg(HiReg).addReg(ReadAgainReg);
8357    BuildMI(BB, dl, TII->get(PPC::BCC))
8358      .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
8359
8360    BB->addSuccessor(readMBB);
8361    BB->addSuccessor(sinkMBB);
8362  }
8363  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
8364    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
8365  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
8366    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
8367  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
8368    BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
8369  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
8370    BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
8371
8372  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
8373    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
8374  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
8375    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
8376  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
8377    BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
8378  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
8379    BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
8380
8381  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
8382    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
8383  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
8384    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
8385  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
8386    BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
8387  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
8388    BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
8389
8390  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
8391    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
8392  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
8393    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
8394  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
8395    BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
8396  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
8397    BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
8398
8399  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
8400    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
8401  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
8402    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
8403  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
8404    BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
8405  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
8406    BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
8407
8408  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
8409    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
8410  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
8411    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
8412  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
8413    BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
8414  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
8415    BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
8416
8417  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
8418    BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
8419  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
8420    BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
8421  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
8422    BB = EmitAtomicBinary(MI, BB, false, 0);
8423  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
8424    BB = EmitAtomicBinary(MI, BB, true, 0);
8425
8426  else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
8427           MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
8428    bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
8429
8430    unsigned dest   = MI->getOperand(0).getReg();
8431    unsigned ptrA   = MI->getOperand(1).getReg();
8432    unsigned ptrB   = MI->getOperand(2).getReg();
8433    unsigned oldval = MI->getOperand(3).getReg();
8434    unsigned newval = MI->getOperand(4).getReg();
8435    DebugLoc dl     = MI->getDebugLoc();
8436
8437    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
8438    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
8439    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
8440    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8441    F->insert(It, loop1MBB);
8442    F->insert(It, loop2MBB);
8443    F->insert(It, midMBB);
8444    F->insert(It, exitMBB);
8445    exitMBB->splice(exitMBB->begin(), BB,
8446                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
8447    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8448
8449    //  thisMBB:
8450    //   ...
8451    //   fallthrough --> loopMBB
8452    BB->addSuccessor(loop1MBB);
8453
8454    // loop1MBB:
8455    //   l[wd]arx dest, ptr
8456    //   cmp[wd] dest, oldval
8457    //   bne- midMBB
8458    // loop2MBB:
8459    //   st[wd]cx. newval, ptr
8460    //   bne- loopMBB
8461    //   b exitBB
8462    // midMBB:
8463    //   st[wd]cx. dest, ptr
8464    // exitBB:
8465    BB = loop1MBB;
8466    BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
8467      .addReg(ptrA).addReg(ptrB);
8468    BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
8469      .addReg(oldval).addReg(dest);
8470    BuildMI(BB, dl, TII->get(PPC::BCC))
8471      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
8472    BB->addSuccessor(loop2MBB);
8473    BB->addSuccessor(midMBB);
8474
8475    BB = loop2MBB;
8476    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
8477      .addReg(newval).addReg(ptrA).addReg(ptrB);
8478    BuildMI(BB, dl, TII->get(PPC::BCC))
8479      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
8480    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
8481    BB->addSuccessor(loop1MBB);
8482    BB->addSuccessor(exitMBB);
8483
8484    BB = midMBB;
8485    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
8486      .addReg(dest).addReg(ptrA).addReg(ptrB);
8487    BB->addSuccessor(exitMBB);
8488
8489    //  exitMBB:
8490    //   ...
8491    BB = exitMBB;
8492  } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
8493             MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
8494    // We must use 64-bit registers for addresses when targeting 64-bit,
8495    // since we're actually doing arithmetic on them.  Other registers
8496    // can be 32-bit.
8497    bool is64bit = Subtarget.isPPC64();
8498    bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
8499
8500    unsigned dest   = MI->getOperand(0).getReg();
8501    unsigned ptrA   = MI->getOperand(1).getReg();
8502    unsigned ptrB   = MI->getOperand(2).getReg();
8503    unsigned oldval = MI->getOperand(3).getReg();
8504    unsigned newval = MI->getOperand(4).getReg();
8505    DebugLoc dl     = MI->getDebugLoc();
8506
8507    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
8508    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
8509    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
8510    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8511    F->insert(It, loop1MBB);
8512    F->insert(It, loop2MBB);
8513    F->insert(It, midMBB);
8514    F->insert(It, exitMBB);
8515    exitMBB->splice(exitMBB->begin(), BB,
8516                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
8517    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8518
8519    MachineRegisterInfo &RegInfo = F->getRegInfo();
8520    const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
8521                                            : &PPC::GPRCRegClass;
8522    unsigned PtrReg = RegInfo.createVirtualRegister(RC);
8523    unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
8524    unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
8525    unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
8526    unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
8527    unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
8528    unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
8529    unsigned MaskReg = RegInfo.createVirtualRegister(RC);
8530    unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
8531    unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
8532    unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
8533    unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
8534    unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
8535    unsigned Ptr1Reg;
8536    unsigned TmpReg = RegInfo.createVirtualRegister(RC);
8537    unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
8538    //  thisMBB:
8539    //   ...
8540    //   fallthrough --> loopMBB
8541    BB->addSuccessor(loop1MBB);
8542
8543    // The 4-byte load must be aligned, while a char or short may be
8544    // anywhere in the word.  Hence all this nasty bookkeeping code.
8545    //   add ptr1, ptrA, ptrB [copy if ptrA==0]
8546    //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
8547    //   xori shift, shift1, 24 [16]
8548    //   rlwinm ptr, ptr1, 0, 0, 29
8549    //   slw newval2, newval, shift
8550    //   slw oldval2, oldval,shift
8551    //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
8552    //   slw mask, mask2, shift
8553    //   and newval3, newval2, mask
8554    //   and oldval3, oldval2, mask
8555    // loop1MBB:
8556    //   lwarx tmpDest, ptr
8557    //   and tmp, tmpDest, mask
8558    //   cmpw tmp, oldval3
8559    //   bne- midMBB
8560    // loop2MBB:
8561    //   andc tmp2, tmpDest, mask
8562    //   or tmp4, tmp2, newval3
8563    //   stwcx. tmp4, ptr
8564    //   bne- loop1MBB
8565    //   b exitBB
8566    // midMBB:
8567    //   stwcx. tmpDest, ptr
8568    // exitBB:
8569    //   srw dest, tmpDest, shift
8570    if (ptrA != ZeroReg) {
8571      Ptr1Reg = RegInfo.createVirtualRegister(RC);
8572      BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
8573        .addReg(ptrA).addReg(ptrB);
8574    } else {
8575      Ptr1Reg = ptrB;
8576    }
8577    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
8578        .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
8579    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
8580        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
8581    if (is64bit)
8582      BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
8583        .addReg(Ptr1Reg).addImm(0).addImm(61);
8584    else
8585      BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
8586        .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
8587    BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
8588        .addReg(newval).addReg(ShiftReg);
8589    BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
8590        .addReg(oldval).addReg(ShiftReg);
8591    if (is8bit)
8592      BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
8593    else {
8594      BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
8595      BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
8596        .addReg(Mask3Reg).addImm(65535);
8597    }
8598    BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
8599        .addReg(Mask2Reg).addReg(ShiftReg);
8600    BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
8601        .addReg(NewVal2Reg).addReg(MaskReg);
8602    BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
8603        .addReg(OldVal2Reg).addReg(MaskReg);
8604
8605    BB = loop1MBB;
8606    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
8607        .addReg(ZeroReg).addReg(PtrReg);
8608    BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
8609        .addReg(TmpDestReg).addReg(MaskReg);
8610    BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
8611        .addReg(TmpReg).addReg(OldVal3Reg);
8612    BuildMI(BB, dl, TII->get(PPC::BCC))
8613        .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
8614    BB->addSuccessor(loop2MBB);
8615    BB->addSuccessor(midMBB);
8616
8617    BB = loop2MBB;
8618    BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
8619        .addReg(TmpDestReg).addReg(MaskReg);
8620    BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
8621        .addReg(Tmp2Reg).addReg(NewVal3Reg);
8622    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
8623        .addReg(ZeroReg).addReg(PtrReg);
8624    BuildMI(BB, dl, TII->get(PPC::BCC))
8625      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
8626    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
8627    BB->addSuccessor(loop1MBB);
8628    BB->addSuccessor(exitMBB);
8629
8630    BB = midMBB;
8631    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
8632      .addReg(ZeroReg).addReg(PtrReg);
8633    BB->addSuccessor(exitMBB);
8634
8635    //  exitMBB:
8636    //   ...
8637    BB = exitMBB;
8638    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
8639      .addReg(ShiftReg);
8640  } else if (MI->getOpcode() == PPC::FADDrtz) {
8641    // This pseudo performs an FADD with rounding mode temporarily forced
8642    // to round-to-zero.  We emit this via custom inserter since the FPSCR
8643    // is not modeled at the SelectionDAG level.
8644    unsigned Dest = MI->getOperand(0).getReg();
8645    unsigned Src1 = MI->getOperand(1).getReg();
8646    unsigned Src2 = MI->getOperand(2).getReg();
8647    DebugLoc dl   = MI->getDebugLoc();
8648
8649    MachineRegisterInfo &RegInfo = F->getRegInfo();
8650    unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
8651
8652    // Save FPSCR value.
8653    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
8654
8655    // Set rounding mode to round-to-zero.
8656    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
8657    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
8658
8659    // Perform addition.
8660    BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
8661
8662    // Restore FPSCR value.
8663    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
8664  } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
8665             MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
8666             MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
8667             MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
8668    unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
8669                       MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
8670                      PPC::ANDIo8 : PPC::ANDIo;
8671    bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
8672                 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
8673
8674    MachineRegisterInfo &RegInfo = F->getRegInfo();
8675    unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
8676                                                  &PPC::GPRCRegClass :
8677                                                  &PPC::G8RCRegClass);
8678
8679    DebugLoc dl   = MI->getDebugLoc();
8680    BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
8681      .addReg(MI->getOperand(1).getReg()).addImm(1);
8682    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
8683            MI->getOperand(0).getReg())
8684      .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
8685  } else {
8686    llvm_unreachable("Unexpected instr type to insert");
8687  }
8688
8689  MI->eraseFromParent();   // The pseudo instruction is gone now.
8690  return BB;
8691}
8692
8693//===----------------------------------------------------------------------===//
8694// Target Optimization Hooks
8695//===----------------------------------------------------------------------===//
8696
8697SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
8698                                            DAGCombinerInfo &DCI,
8699                                            unsigned &RefinementSteps,
8700                                            bool &UseOneConstNR) const {
8701  EVT VT = Operand.getValueType();
8702  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
8703      (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
8704      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
8705      (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
8706      (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
8707      (VT == MVT::v4f64 && Subtarget.hasQPX())) {
8708    // Convergence is quadratic, so we essentially double the number of digits
8709    // correct after every iteration. For both FRE and FRSQRTE, the minimum
8710    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
8711    // 2^-14. IEEE float has 23 digits and double has 52 digits.
8712    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
8713    if (VT.getScalarType() == MVT::f64)
8714      ++RefinementSteps;
8715    UseOneConstNR = true;
8716    return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
8717  }
8718  return SDValue();
8719}
8720
8721SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
8722                                            DAGCombinerInfo &DCI,
8723                                            unsigned &RefinementSteps) const {
8724  EVT VT = Operand.getValueType();
8725  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
8726      (VT == MVT::f64 && Subtarget.hasFRE()) ||
8727      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
8728      (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
8729      (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
8730      (VT == MVT::v4f64 && Subtarget.hasQPX())) {
8731    // Convergence is quadratic, so we essentially double the number of digits
8732    // correct after every iteration. For both FRE and FRSQRTE, the minimum
8733    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
8734    // 2^-14. IEEE float has 23 digits and double has 52 digits.
8735    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
8736    if (VT.getScalarType() == MVT::f64)
8737      ++RefinementSteps;
8738    return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
8739  }
8740  return SDValue();
8741}
8742
8743bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
8744  // Note: This functionality is used only when unsafe-fp-math is enabled, and
8745  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
8746  // enabled for division), this functionality is redundant with the default
8747  // combiner logic (once the division -> reciprocal/multiply transformation
8748  // has taken place). As a result, this matters more for older cores than for
8749  // newer ones.
8750
8751  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
8752  // reciprocal if there are two or more FDIVs (for embedded cores with only
8753  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
8754  switch (Subtarget.getDarwinDirective()) {
8755  default:
8756    return NumUsers > 2;
8757  case PPC::DIR_440:
8758  case PPC::DIR_A2:
8759  case PPC::DIR_E500mc:
8760  case PPC::DIR_E5500:
8761    return NumUsers > 1;
8762  }
8763}
8764
8765static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
8766                            unsigned Bytes, int Dist,
8767                            SelectionDAG &DAG) {
8768  if (VT.getSizeInBits() / 8 != Bytes)
8769    return false;
8770
8771  SDValue BaseLoc = Base->getBasePtr();
8772  if (Loc.getOpcode() == ISD::FrameIndex) {
8773    if (BaseLoc.getOpcode() != ISD::FrameIndex)
8774      return false;
8775    const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
8776    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
8777    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
8778    int FS  = MFI->getObjectSize(FI);
8779    int BFS = MFI->getObjectSize(BFI);
8780    if (FS != BFS || FS != (int)Bytes) return false;
8781    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
8782  }
8783
8784  // Handle X+C
8785  if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
8786      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
8787    return true;
8788
8789  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8790  const GlobalValue *GV1 = nullptr;
8791  const GlobalValue *GV2 = nullptr;
8792  int64_t Offset1 = 0;
8793  int64_t Offset2 = 0;
8794  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
8795  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
8796  if (isGA1 && isGA2 && GV1 == GV2)
8797    return Offset1 == (Offset2 + Dist*Bytes);
8798  return false;
8799}
8800
8801// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
8802// not enforce equality of the chain operands.
8803static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
8804                            unsigned Bytes, int Dist,
8805                            SelectionDAG &DAG) {
8806  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
8807    EVT VT = LS->getMemoryVT();
8808    SDValue Loc = LS->getBasePtr();
8809    return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
8810  }
8811
8812  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
8813    EVT VT;
8814    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
8815    default: return false;
8816    case Intrinsic::ppc_qpx_qvlfd:
8817    case Intrinsic::ppc_qpx_qvlfda:
8818      VT = MVT::v4f64;
8819      break;
8820    case Intrinsic::ppc_qpx_qvlfs:
8821    case Intrinsic::ppc_qpx_qvlfsa:
8822      VT = MVT::v4f32;
8823      break;
8824    case Intrinsic::ppc_qpx_qvlfcd:
8825    case Intrinsic::ppc_qpx_qvlfcda:
8826      VT = MVT::v2f64;
8827      break;
8828    case Intrinsic::ppc_qpx_qvlfcs:
8829    case Intrinsic::ppc_qpx_qvlfcsa:
8830      VT = MVT::v2f32;
8831      break;
8832    case Intrinsic::ppc_qpx_qvlfiwa:
8833    case Intrinsic::ppc_qpx_qvlfiwz:
8834    case Intrinsic::ppc_altivec_lvx:
8835    case Intrinsic::ppc_altivec_lvxl:
8836    case Intrinsic::ppc_vsx_lxvw4x:
8837      VT = MVT::v4i32;
8838      break;
8839    case Intrinsic::ppc_vsx_lxvd2x:
8840      VT = MVT::v2f64;
8841      break;
8842    case Intrinsic::ppc_altivec_lvebx:
8843      VT = MVT::i8;
8844      break;
8845    case Intrinsic::ppc_altivec_lvehx:
8846      VT = MVT::i16;
8847      break;
8848    case Intrinsic::ppc_altivec_lvewx:
8849      VT = MVT::i32;
8850      break;
8851    }
8852
8853    return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
8854  }
8855
8856  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
8857    EVT VT;
8858    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
8859    default: return false;
8860    case Intrinsic::ppc_qpx_qvstfd:
8861    case Intrinsic::ppc_qpx_qvstfda:
8862      VT = MVT::v4f64;
8863      break;
8864    case Intrinsic::ppc_qpx_qvstfs:
8865    case Intrinsic::ppc_qpx_qvstfsa:
8866      VT = MVT::v4f32;
8867      break;
8868    case Intrinsic::ppc_qpx_qvstfcd:
8869    case Intrinsic::ppc_qpx_qvstfcda:
8870      VT = MVT::v2f64;
8871      break;
8872    case Intrinsic::ppc_qpx_qvstfcs:
8873    case Intrinsic::ppc_qpx_qvstfcsa:
8874      VT = MVT::v2f32;
8875      break;
8876    case Intrinsic::ppc_qpx_qvstfiw:
8877    case Intrinsic::ppc_qpx_qvstfiwa:
8878    case Intrinsic::ppc_altivec_stvx:
8879    case Intrinsic::ppc_altivec_stvxl:
8880    case Intrinsic::ppc_vsx_stxvw4x:
8881      VT = MVT::v4i32;
8882      break;
8883    case Intrinsic::ppc_vsx_stxvd2x:
8884      VT = MVT::v2f64;
8885      break;
8886    case Intrinsic::ppc_altivec_stvebx:
8887      VT = MVT::i8;
8888      break;
8889    case Intrinsic::ppc_altivec_stvehx:
8890      VT = MVT::i16;
8891      break;
8892    case Intrinsic::ppc_altivec_stvewx:
8893      VT = MVT::i32;
8894      break;
8895    }
8896
8897    return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
8898  }
8899
8900  return false;
8901}
8902
8903// Return true is there is a nearyby consecutive load to the one provided
8904// (regardless of alignment). We search up and down the chain, looking though
8905// token factors and other loads (but nothing else). As a result, a true result
8906// indicates that it is safe to create a new consecutive load adjacent to the
8907// load provided.
8908static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
8909  SDValue Chain = LD->getChain();
8910  EVT VT = LD->getMemoryVT();
8911
8912  SmallSet<SDNode *, 16> LoadRoots;
8913  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
8914  SmallSet<SDNode *, 16> Visited;
8915
8916  // First, search up the chain, branching to follow all token-factor operands.
8917  // If we find a consecutive load, then we're done, otherwise, record all
8918  // nodes just above the top-level loads and token factors.
8919  while (!Queue.empty()) {
8920    SDNode *ChainNext = Queue.pop_back_val();
8921    if (!Visited.insert(ChainNext).second)
8922      continue;
8923
8924    if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
8925      if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
8926        return true;
8927
8928      if (!Visited.count(ChainLD->getChain().getNode()))
8929        Queue.push_back(ChainLD->getChain().getNode());
8930    } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
8931      for (const SDUse &O : ChainNext->ops())
8932        if (!Visited.count(O.getNode()))
8933          Queue.push_back(O.getNode());
8934    } else
8935      LoadRoots.insert(ChainNext);
8936  }
8937
8938  // Second, search down the chain, starting from the top-level nodes recorded
8939  // in the first phase. These top-level nodes are the nodes just above all
8940  // loads and token factors. Starting with their uses, recursively look though
8941  // all loads (just the chain uses) and token factors to find a consecutive
8942  // load.
8943  Visited.clear();
8944  Queue.clear();
8945
8946  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
8947       IE = LoadRoots.end(); I != IE; ++I) {
8948    Queue.push_back(*I);
8949
8950    while (!Queue.empty()) {
8951      SDNode *LoadRoot = Queue.pop_back_val();
8952      if (!Visited.insert(LoadRoot).second)
8953        continue;
8954
8955      if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
8956        if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
8957          return true;
8958
8959      for (SDNode::use_iterator UI = LoadRoot->use_begin(),
8960           UE = LoadRoot->use_end(); UI != UE; ++UI)
8961        if (((isa<MemSDNode>(*UI) &&
8962            cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
8963            UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
8964          Queue.push_back(*UI);
8965    }
8966  }
8967
8968  return false;
8969}
8970
8971SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
8972                                                  DAGCombinerInfo &DCI) const {
8973  SelectionDAG &DAG = DCI.DAG;
8974  SDLoc dl(N);
8975
8976  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
8977  // If we're tracking CR bits, we need to be careful that we don't have:
8978  //   trunc(binary-ops(zext(x), zext(y)))
8979  // or
8980  //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
8981  // such that we're unnecessarily moving things into GPRs when it would be
8982  // better to keep them in CR bits.
8983
8984  // Note that trunc here can be an actual i1 trunc, or can be the effective
8985  // truncation that comes from a setcc or select_cc.
8986  if (N->getOpcode() == ISD::TRUNCATE &&
8987      N->getValueType(0) != MVT::i1)
8988    return SDValue();
8989
8990  if (N->getOperand(0).getValueType() != MVT::i32 &&
8991      N->getOperand(0).getValueType() != MVT::i64)
8992    return SDValue();
8993
8994  if (N->getOpcode() == ISD::SETCC ||
8995      N->getOpcode() == ISD::SELECT_CC) {
8996    // If we're looking at a comparison, then we need to make sure that the
8997    // high bits (all except for the first) don't matter the result.
8998    ISD::CondCode CC =
8999      cast<CondCodeSDNode>(N->getOperand(
9000        N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
9001    unsigned OpBits = N->getOperand(0).getValueSizeInBits();
9002
9003    if (ISD::isSignedIntSetCC(CC)) {
9004      if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
9005          DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
9006        return SDValue();
9007    } else if (ISD::isUnsignedIntSetCC(CC)) {
9008      if (!DAG.MaskedValueIsZero(N->getOperand(0),
9009                                 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
9010          !DAG.MaskedValueIsZero(N->getOperand(1),
9011                                 APInt::getHighBitsSet(OpBits, OpBits-1)))
9012        return SDValue();
9013    } else {
9014      // This is neither a signed nor an unsigned comparison, just make sure
9015      // that the high bits are equal.
9016      APInt Op1Zero, Op1One;
9017      APInt Op2Zero, Op2One;
9018      DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
9019      DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
9020
9021      // We don't really care about what is known about the first bit (if
9022      // anything), so clear it in all masks prior to comparing them.
9023      Op1Zero.clearBit(0); Op1One.clearBit(0);
9024      Op2Zero.clearBit(0); Op2One.clearBit(0);
9025
9026      if (Op1Zero != Op2Zero || Op1One != Op2One)
9027        return SDValue();
9028    }
9029  }
9030
9031  // We now know that the higher-order bits are irrelevant, we just need to
9032  // make sure that all of the intermediate operations are bit operations, and
9033  // all inputs are extensions.
9034  if (N->getOperand(0).getOpcode() != ISD::AND &&
9035      N->getOperand(0).getOpcode() != ISD::OR  &&
9036      N->getOperand(0).getOpcode() != ISD::XOR &&
9037      N->getOperand(0).getOpcode() != ISD::SELECT &&
9038      N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
9039      N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
9040      N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
9041      N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
9042      N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
9043    return SDValue();
9044
9045  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
9046      N->getOperand(1).getOpcode() != ISD::AND &&
9047      N->getOperand(1).getOpcode() != ISD::OR  &&
9048      N->getOperand(1).getOpcode() != ISD::XOR &&
9049      N->getOperand(1).getOpcode() != ISD::SELECT &&
9050      N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
9051      N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
9052      N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
9053      N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
9054      N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
9055    return SDValue();
9056
9057  SmallVector<SDValue, 4> Inputs;
9058  SmallVector<SDValue, 8> BinOps, PromOps;
9059  SmallPtrSet<SDNode *, 16> Visited;
9060
9061  for (unsigned i = 0; i < 2; ++i) {
9062    if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9063          N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9064          N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
9065          N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
9066        isa<ConstantSDNode>(N->getOperand(i)))
9067      Inputs.push_back(N->getOperand(i));
9068    else
9069      BinOps.push_back(N->getOperand(i));
9070
9071    if (N->getOpcode() == ISD::TRUNCATE)
9072      break;
9073  }
9074
9075  // Visit all inputs, collect all binary operations (and, or, xor and
9076  // select) that are all fed by extensions.
9077  while (!BinOps.empty()) {
9078    SDValue BinOp = BinOps.back();
9079    BinOps.pop_back();
9080
9081    if (!Visited.insert(BinOp.getNode()).second)
9082      continue;
9083
9084    PromOps.push_back(BinOp);
9085
9086    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
9087      // The condition of the select is not promoted.
9088      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
9089        continue;
9090      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
9091        continue;
9092
9093      if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9094            BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9095            BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
9096           BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
9097          isa<ConstantSDNode>(BinOp.getOperand(i))) {
9098        Inputs.push_back(BinOp.getOperand(i));
9099      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
9100                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
9101                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
9102                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
9103                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
9104                 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
9105                 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9106                 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9107                 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
9108        BinOps.push_back(BinOp.getOperand(i));
9109      } else {
9110        // We have an input that is not an extension or another binary
9111        // operation; we'll abort this transformation.
9112        return SDValue();
9113      }
9114    }
9115  }
9116
9117  // Make sure that this is a self-contained cluster of operations (which
9118  // is not quite the same thing as saying that everything has only one
9119  // use).
9120  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9121    if (isa<ConstantSDNode>(Inputs[i]))
9122      continue;
9123
9124    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
9125                              UE = Inputs[i].getNode()->use_end();
9126         UI != UE; ++UI) {
9127      SDNode *User = *UI;
9128      if (User != N && !Visited.count(User))
9129        return SDValue();
9130
9131      // Make sure that we're not going to promote the non-output-value
9132      // operand(s) or SELECT or SELECT_CC.
9133      // FIXME: Although we could sometimes handle this, and it does occur in
9134      // practice that one of the condition inputs to the select is also one of
9135      // the outputs, we currently can't deal with this.
9136      if (User->getOpcode() == ISD::SELECT) {
9137        if (User->getOperand(0) == Inputs[i])
9138          return SDValue();
9139      } else if (User->getOpcode() == ISD::SELECT_CC) {
9140        if (User->getOperand(0) == Inputs[i] ||
9141            User->getOperand(1) == Inputs[i])
9142          return SDValue();
9143      }
9144    }
9145  }
9146
9147  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
9148    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
9149                              UE = PromOps[i].getNode()->use_end();
9150         UI != UE; ++UI) {
9151      SDNode *User = *UI;
9152      if (User != N && !Visited.count(User))
9153        return SDValue();
9154
9155      // Make sure that we're not going to promote the non-output-value
9156      // operand(s) or SELECT or SELECT_CC.
9157      // FIXME: Although we could sometimes handle this, and it does occur in
9158      // practice that one of the condition inputs to the select is also one of
9159      // the outputs, we currently can't deal with this.
9160      if (User->getOpcode() == ISD::SELECT) {
9161        if (User->getOperand(0) == PromOps[i])
9162          return SDValue();
9163      } else if (User->getOpcode() == ISD::SELECT_CC) {
9164        if (User->getOperand(0) == PromOps[i] ||
9165            User->getOperand(1) == PromOps[i])
9166          return SDValue();
9167      }
9168    }
9169  }
9170
9171  // Replace all inputs with the extension operand.
9172  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9173    // Constants may have users outside the cluster of to-be-promoted nodes,
9174    // and so we need to replace those as we do the promotions.
9175    if (isa<ConstantSDNode>(Inputs[i]))
9176      continue;
9177    else
9178      DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
9179  }
9180
9181  // Replace all operations (these are all the same, but have a different
9182  // (i1) return type). DAG.getNode will validate that the types of
9183  // a binary operator match, so go through the list in reverse so that
9184  // we've likely promoted both operands first. Any intermediate truncations or
9185  // extensions disappear.
9186  while (!PromOps.empty()) {
9187    SDValue PromOp = PromOps.back();
9188    PromOps.pop_back();
9189
9190    if (PromOp.getOpcode() == ISD::TRUNCATE ||
9191        PromOp.getOpcode() == ISD::SIGN_EXTEND ||
9192        PromOp.getOpcode() == ISD::ZERO_EXTEND ||
9193        PromOp.getOpcode() == ISD::ANY_EXTEND) {
9194      if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
9195          PromOp.getOperand(0).getValueType() != MVT::i1) {
9196        // The operand is not yet ready (see comment below).
9197        PromOps.insert(PromOps.begin(), PromOp);
9198        continue;
9199      }
9200
9201      SDValue RepValue = PromOp.getOperand(0);
9202      if (isa<ConstantSDNode>(RepValue))
9203        RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
9204
9205      DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
9206      continue;
9207    }
9208
9209    unsigned C;
9210    switch (PromOp.getOpcode()) {
9211    default:             C = 0; break;
9212    case ISD::SELECT:    C = 1; break;
9213    case ISD::SELECT_CC: C = 2; break;
9214    }
9215
9216    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
9217         PromOp.getOperand(C).getValueType() != MVT::i1) ||
9218        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
9219         PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
9220      // The to-be-promoted operands of this node have not yet been
9221      // promoted (this should be rare because we're going through the
9222      // list backward, but if one of the operands has several users in
9223      // this cluster of to-be-promoted nodes, it is possible).
9224      PromOps.insert(PromOps.begin(), PromOp);
9225      continue;
9226    }
9227
9228    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
9229                                PromOp.getNode()->op_end());
9230
9231    // If there are any constant inputs, make sure they're replaced now.
9232    for (unsigned i = 0; i < 2; ++i)
9233      if (isa<ConstantSDNode>(Ops[C+i]))
9234        Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
9235
9236    DAG.ReplaceAllUsesOfValueWith(PromOp,
9237      DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
9238  }
9239
9240  // Now we're left with the initial truncation itself.
9241  if (N->getOpcode() == ISD::TRUNCATE)
9242    return N->getOperand(0);
9243
9244  // Otherwise, this is a comparison. The operands to be compared have just
9245  // changed type (to i1), but everything else is the same.
9246  return SDValue(N, 0);
9247}
9248
9249SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
9250                                                  DAGCombinerInfo &DCI) const {
9251  SelectionDAG &DAG = DCI.DAG;
9252  SDLoc dl(N);
9253
9254  // If we're tracking CR bits, we need to be careful that we don't have:
9255  //   zext(binary-ops(trunc(x), trunc(y)))
9256  // or
9257  //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
9258  // such that we're unnecessarily moving things into CR bits that can more
9259  // efficiently stay in GPRs. Note that if we're not certain that the high
9260  // bits are set as required by the final extension, we still may need to do
9261  // some masking to get the proper behavior.
9262
9263  // This same functionality is important on PPC64 when dealing with
9264  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
9265  // the return values of functions. Because it is so similar, it is handled
9266  // here as well.
9267
9268  if (N->getValueType(0) != MVT::i32 &&
9269      N->getValueType(0) != MVT::i64)
9270    return SDValue();
9271
9272  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
9273        (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
9274    return SDValue();
9275
9276  if (N->getOperand(0).getOpcode() != ISD::AND &&
9277      N->getOperand(0).getOpcode() != ISD::OR  &&
9278      N->getOperand(0).getOpcode() != ISD::XOR &&
9279      N->getOperand(0).getOpcode() != ISD::SELECT &&
9280      N->getOperand(0).getOpcode() != ISD::SELECT_CC)
9281    return SDValue();
9282
9283  SmallVector<SDValue, 4> Inputs;
9284  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
9285  SmallPtrSet<SDNode *, 16> Visited;
9286
9287  // Visit all inputs, collect all binary operations (and, or, xor and
9288  // select) that are all fed by truncations.
9289  while (!BinOps.empty()) {
9290    SDValue BinOp = BinOps.back();
9291    BinOps.pop_back();
9292
9293    if (!Visited.insert(BinOp.getNode()).second)
9294      continue;
9295
9296    PromOps.push_back(BinOp);
9297
9298    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
9299      // The condition of the select is not promoted.
9300      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
9301        continue;
9302      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
9303        continue;
9304
9305      if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
9306          isa<ConstantSDNode>(BinOp.getOperand(i))) {
9307        Inputs.push_back(BinOp.getOperand(i));
9308      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
9309                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
9310                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
9311                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
9312                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
9313        BinOps.push_back(BinOp.getOperand(i));
9314      } else {
9315        // We have an input that is not a truncation or another binary
9316        // operation; we'll abort this transformation.
9317        return SDValue();
9318      }
9319    }
9320  }
9321
9322  // The operands of a select that must be truncated when the select is
9323  // promoted because the operand is actually part of the to-be-promoted set.
9324  DenseMap<SDNode *, EVT> SelectTruncOp[2];
9325
9326  // Make sure that this is a self-contained cluster of operations (which
9327  // is not quite the same thing as saying that everything has only one
9328  // use).
9329  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9330    if (isa<ConstantSDNode>(Inputs[i]))
9331      continue;
9332
9333    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
9334                              UE = Inputs[i].getNode()->use_end();
9335         UI != UE; ++UI) {
9336      SDNode *User = *UI;
9337      if (User != N && !Visited.count(User))
9338        return SDValue();
9339
9340      // If we're going to promote the non-output-value operand(s) or SELECT or
9341      // SELECT_CC, record them for truncation.
9342      if (User->getOpcode() == ISD::SELECT) {
9343        if (User->getOperand(0) == Inputs[i])
9344          SelectTruncOp[0].insert(std::make_pair(User,
9345                                    User->getOperand(0).getValueType()));
9346      } else if (User->getOpcode() == ISD::SELECT_CC) {
9347        if (User->getOperand(0) == Inputs[i])
9348          SelectTruncOp[0].insert(std::make_pair(User,
9349                                    User->getOperand(0).getValueType()));
9350        if (User->getOperand(1) == Inputs[i])
9351          SelectTruncOp[1].insert(std::make_pair(User,
9352                                    User->getOperand(1).getValueType()));
9353      }
9354    }
9355  }
9356
9357  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
9358    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
9359                              UE = PromOps[i].getNode()->use_end();
9360         UI != UE; ++UI) {
9361      SDNode *User = *UI;
9362      if (User != N && !Visited.count(User))
9363        return SDValue();
9364
9365      // If we're going to promote the non-output-value operand(s) or SELECT or
9366      // SELECT_CC, record them for truncation.
9367      if (User->getOpcode() == ISD::SELECT) {
9368        if (User->getOperand(0) == PromOps[i])
9369          SelectTruncOp[0].insert(std::make_pair(User,
9370                                    User->getOperand(0).getValueType()));
9371      } else if (User->getOpcode() == ISD::SELECT_CC) {
9372        if (User->getOperand(0) == PromOps[i])
9373          SelectTruncOp[0].insert(std::make_pair(User,
9374                                    User->getOperand(0).getValueType()));
9375        if (User->getOperand(1) == PromOps[i])
9376          SelectTruncOp[1].insert(std::make_pair(User,
9377                                    User->getOperand(1).getValueType()));
9378      }
9379    }
9380  }
9381
9382  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
9383  bool ReallyNeedsExt = false;
9384  if (N->getOpcode() != ISD::ANY_EXTEND) {
9385    // If all of the inputs are not already sign/zero extended, then
9386    // we'll still need to do that at the end.
9387    for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9388      if (isa<ConstantSDNode>(Inputs[i]))
9389        continue;
9390
9391      unsigned OpBits =
9392        Inputs[i].getOperand(0).getValueSizeInBits();
9393      assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
9394
9395      if ((N->getOpcode() == ISD::ZERO_EXTEND &&
9396           !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
9397                                  APInt::getHighBitsSet(OpBits,
9398                                                        OpBits-PromBits))) ||
9399          (N->getOpcode() == ISD::SIGN_EXTEND &&
9400           DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
9401             (OpBits-(PromBits-1)))) {
9402        ReallyNeedsExt = true;
9403        break;
9404      }
9405    }
9406  }
9407
9408  // Replace all inputs, either with the truncation operand, or a
9409  // truncation or extension to the final output type.
9410  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9411    // Constant inputs need to be replaced with the to-be-promoted nodes that
9412    // use them because they might have users outside of the cluster of
9413    // promoted nodes.
9414    if (isa<ConstantSDNode>(Inputs[i]))
9415      continue;
9416
9417    SDValue InSrc = Inputs[i].getOperand(0);
9418    if (Inputs[i].getValueType() == N->getValueType(0))
9419      DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
9420    else if (N->getOpcode() == ISD::SIGN_EXTEND)
9421      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9422        DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
9423    else if (N->getOpcode() == ISD::ZERO_EXTEND)
9424      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9425        DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
9426    else
9427      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9428        DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
9429  }
9430
9431  // Replace all operations (these are all the same, but have a different
9432  // (promoted) return type). DAG.getNode will validate that the types of
9433  // a binary operator match, so go through the list in reverse so that
9434  // we've likely promoted both operands first.
9435  while (!PromOps.empty()) {
9436    SDValue PromOp = PromOps.back();
9437    PromOps.pop_back();
9438
9439    unsigned C;
9440    switch (PromOp.getOpcode()) {
9441    default:             C = 0; break;
9442    case ISD::SELECT:    C = 1; break;
9443    case ISD::SELECT_CC: C = 2; break;
9444    }
9445
9446    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
9447         PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
9448        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
9449         PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
9450      // The to-be-promoted operands of this node have not yet been
9451      // promoted (this should be rare because we're going through the
9452      // list backward, but if one of the operands has several users in
9453      // this cluster of to-be-promoted nodes, it is possible).
9454      PromOps.insert(PromOps.begin(), PromOp);
9455      continue;
9456    }
9457
9458    // For SELECT and SELECT_CC nodes, we do a similar check for any
9459    // to-be-promoted comparison inputs.
9460    if (PromOp.getOpcode() == ISD::SELECT ||
9461        PromOp.getOpcode() == ISD::SELECT_CC) {
9462      if ((SelectTruncOp[0].count(PromOp.getNode()) &&
9463           PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
9464          (SelectTruncOp[1].count(PromOp.getNode()) &&
9465           PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
9466        PromOps.insert(PromOps.begin(), PromOp);
9467        continue;
9468      }
9469    }
9470
9471    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
9472                                PromOp.getNode()->op_end());
9473
9474    // If this node has constant inputs, then they'll need to be promoted here.
9475    for (unsigned i = 0; i < 2; ++i) {
9476      if (!isa<ConstantSDNode>(Ops[C+i]))
9477        continue;
9478      if (Ops[C+i].getValueType() == N->getValueType(0))
9479        continue;
9480
9481      if (N->getOpcode() == ISD::SIGN_EXTEND)
9482        Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9483      else if (N->getOpcode() == ISD::ZERO_EXTEND)
9484        Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9485      else
9486        Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9487    }
9488
9489    // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
9490    // truncate them again to the original value type.
9491    if (PromOp.getOpcode() == ISD::SELECT ||
9492        PromOp.getOpcode() == ISD::SELECT_CC) {
9493      auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
9494      if (SI0 != SelectTruncOp[0].end())
9495        Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
9496      auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
9497      if (SI1 != SelectTruncOp[1].end())
9498        Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
9499    }
9500
9501    DAG.ReplaceAllUsesOfValueWith(PromOp,
9502      DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
9503  }
9504
9505  // Now we're left with the initial extension itself.
9506  if (!ReallyNeedsExt)
9507    return N->getOperand(0);
9508
9509  // To zero extend, just mask off everything except for the first bit (in the
9510  // i1 case).
9511  if (N->getOpcode() == ISD::ZERO_EXTEND)
9512    return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
9513                       DAG.getConstant(APInt::getLowBitsSet(
9514                                         N->getValueSizeInBits(0), PromBits),
9515                                       N->getValueType(0)));
9516
9517  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
9518         "Invalid extension type");
9519  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
9520  SDValue ShiftCst =
9521    DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);
9522  return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
9523                     DAG.getNode(ISD::SHL, dl, N->getValueType(0),
9524                                 N->getOperand(0), ShiftCst), ShiftCst);
9525}
9526
9527SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
9528                                              DAGCombinerInfo &DCI) const {
9529  assert((N->getOpcode() == ISD::SINT_TO_FP ||
9530          N->getOpcode() == ISD::UINT_TO_FP) &&
9531         "Need an int -> FP conversion node here");
9532
9533  if (!Subtarget.has64BitSupport())
9534    return SDValue();
9535
9536  SelectionDAG &DAG = DCI.DAG;
9537  SDLoc dl(N);
9538  SDValue Op(N, 0);
9539
9540  // Don't handle ppc_fp128 here or i1 conversions.
9541  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
9542    return SDValue();
9543  if (Op.getOperand(0).getValueType() == MVT::i1)
9544    return SDValue();
9545
9546  // For i32 intermediate values, unfortunately, the conversion functions
9547  // leave the upper 32 bits of the value are undefined. Within the set of
9548  // scalar instructions, we have no method for zero- or sign-extending the
9549  // value. Thus, we cannot handle i32 intermediate values here.
9550  if (Op.getOperand(0).getValueType() == MVT::i32)
9551    return SDValue();
9552
9553  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
9554         "UINT_TO_FP is supported only with FPCVT");
9555
9556  // If we have FCFIDS, then use it when converting to single-precision.
9557  // Otherwise, convert to double-precision and then round.
9558  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
9559                       ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
9560                                                            : PPCISD::FCFIDS)
9561                       : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
9562                                                            : PPCISD::FCFID);
9563  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
9564                  ? MVT::f32
9565                  : MVT::f64;
9566
9567  // If we're converting from a float, to an int, and back to a float again,
9568  // then we don't need the store/load pair at all.
9569  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
9570       Subtarget.hasFPCVT()) ||
9571      (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
9572    SDValue Src = Op.getOperand(0).getOperand(0);
9573    if (Src.getValueType() == MVT::f32) {
9574      Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
9575      DCI.AddToWorklist(Src.getNode());
9576    }
9577
9578    unsigned FCTOp =
9579      Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
9580                                                        PPCISD::FCTIDUZ;
9581
9582    SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
9583    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
9584
9585    if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9586      FP = DAG.getNode(ISD::FP_ROUND, dl,
9587                       MVT::f32, FP, DAG.getIntPtrConstant(0));
9588      DCI.AddToWorklist(FP.getNode());
9589    }
9590
9591    return FP;
9592  }
9593
9594  return SDValue();
9595}
9596
9597// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
9598// builtins) into loads with swaps.
9599SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
9600                                              DAGCombinerInfo &DCI) const {
9601  SelectionDAG &DAG = DCI.DAG;
9602  SDLoc dl(N);
9603  SDValue Chain;
9604  SDValue Base;
9605  MachineMemOperand *MMO;
9606
9607  switch (N->getOpcode()) {
9608  default:
9609    llvm_unreachable("Unexpected opcode for little endian VSX load");
9610  case ISD::LOAD: {
9611    LoadSDNode *LD = cast<LoadSDNode>(N);
9612    Chain = LD->getChain();
9613    Base = LD->getBasePtr();
9614    MMO = LD->getMemOperand();
9615    // If the MMO suggests this isn't a load of a full vector, leave
9616    // things alone.  For a built-in, we have to make the change for
9617    // correctness, so if there is a size problem that will be a bug.
9618    if (MMO->getSize() < 16)
9619      return SDValue();
9620    break;
9621  }
9622  case ISD::INTRINSIC_W_CHAIN: {
9623    MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
9624    Chain = Intrin->getChain();
9625    Base = Intrin->getBasePtr();
9626    MMO = Intrin->getMemOperand();
9627    break;
9628  }
9629  }
9630
9631  MVT VecTy = N->getValueType(0).getSimpleVT();
9632  SDValue LoadOps[] = { Chain, Base };
9633  SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
9634                                         DAG.getVTList(VecTy, MVT::Other),
9635                                         LoadOps, VecTy, MMO);
9636  DCI.AddToWorklist(Load.getNode());
9637  Chain = Load.getValue(1);
9638  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
9639                             DAG.getVTList(VecTy, MVT::Other), Chain, Load);
9640  DCI.AddToWorklist(Swap.getNode());
9641  return Swap;
9642}
9643
9644// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
9645// builtins) into stores with swaps.
9646SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
9647                                               DAGCombinerInfo &DCI) const {
9648  SelectionDAG &DAG = DCI.DAG;
9649  SDLoc dl(N);
9650  SDValue Chain;
9651  SDValue Base;
9652  unsigned SrcOpnd;
9653  MachineMemOperand *MMO;
9654
9655  switch (N->getOpcode()) {
9656  default:
9657    llvm_unreachable("Unexpected opcode for little endian VSX store");
9658  case ISD::STORE: {
9659    StoreSDNode *ST = cast<StoreSDNode>(N);
9660    Chain = ST->getChain();
9661    Base = ST->getBasePtr();
9662    MMO = ST->getMemOperand();
9663    SrcOpnd = 1;
9664    // If the MMO suggests this isn't a store of a full vector, leave
9665    // things alone.  For a built-in, we have to make the change for
9666    // correctness, so if there is a size problem that will be a bug.
9667    if (MMO->getSize() < 16)
9668      return SDValue();
9669    break;
9670  }
9671  case ISD::INTRINSIC_VOID: {
9672    MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
9673    Chain = Intrin->getChain();
9674    // Intrin->getBasePtr() oddly does not get what we want.
9675    Base = Intrin->getOperand(3);
9676    MMO = Intrin->getMemOperand();
9677    SrcOpnd = 2;
9678    break;
9679  }
9680  }
9681
9682  SDValue Src = N->getOperand(SrcOpnd);
9683  MVT VecTy = Src.getValueType().getSimpleVT();
9684  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
9685                             DAG.getVTList(VecTy, MVT::Other), Chain, Src);
9686  DCI.AddToWorklist(Swap.getNode());
9687  Chain = Swap.getValue(1);
9688  SDValue StoreOps[] = { Chain, Swap, Base };
9689  SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
9690                                          DAG.getVTList(MVT::Other),
9691                                          StoreOps, VecTy, MMO);
9692  DCI.AddToWorklist(Store.getNode());
9693  return Store;
9694}
9695
9696SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
9697                                             DAGCombinerInfo &DCI) const {
9698  SelectionDAG &DAG = DCI.DAG;
9699  SDLoc dl(N);
9700  switch (N->getOpcode()) {
9701  default: break;
9702  case PPCISD::SHL:
9703    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
9704      if (C->isNullValue())   // 0 << V -> 0.
9705        return N->getOperand(0);
9706    }
9707    break;
9708  case PPCISD::SRL:
9709    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
9710      if (C->isNullValue())   // 0 >>u V -> 0.
9711        return N->getOperand(0);
9712    }
9713    break;
9714  case PPCISD::SRA:
9715    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
9716      if (C->isNullValue() ||   //  0 >>s V -> 0.
9717          C->isAllOnesValue())    // -1 >>s V -> -1.
9718        return N->getOperand(0);
9719    }
9720    break;
9721  case ISD::SIGN_EXTEND:
9722  case ISD::ZERO_EXTEND:
9723  case ISD::ANY_EXTEND:
9724    return DAGCombineExtBoolTrunc(N, DCI);
9725  case ISD::TRUNCATE:
9726  case ISD::SETCC:
9727  case ISD::SELECT_CC:
9728    return DAGCombineTruncBoolExt(N, DCI);
9729  case ISD::SINT_TO_FP:
9730  case ISD::UINT_TO_FP:
9731    return combineFPToIntToFP(N, DCI);
9732  case ISD::STORE: {
9733    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
9734    if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
9735        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
9736        N->getOperand(1).getValueType() == MVT::i32 &&
9737        N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
9738      SDValue Val = N->getOperand(1).getOperand(0);
9739      if (Val.getValueType() == MVT::f32) {
9740        Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
9741        DCI.AddToWorklist(Val.getNode());
9742      }
9743      Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
9744      DCI.AddToWorklist(Val.getNode());
9745
9746      SDValue Ops[] = {
9747        N->getOperand(0), Val, N->getOperand(2),
9748        DAG.getValueType(N->getOperand(1).getValueType())
9749      };
9750
9751      Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
9752              DAG.getVTList(MVT::Other), Ops,
9753              cast<StoreSDNode>(N)->getMemoryVT(),
9754              cast<StoreSDNode>(N)->getMemOperand());
9755      DCI.AddToWorklist(Val.getNode());
9756      return Val;
9757    }
9758
9759    // Turn STORE (BSWAP) -> sthbrx/stwbrx.
9760    if (cast<StoreSDNode>(N)->isUnindexed() &&
9761        N->getOperand(1).getOpcode() == ISD::BSWAP &&
9762        N->getOperand(1).getNode()->hasOneUse() &&
9763        (N->getOperand(1).getValueType() == MVT::i32 ||
9764         N->getOperand(1).getValueType() == MVT::i16 ||
9765         (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
9766          N->getOperand(1).getValueType() == MVT::i64))) {
9767      SDValue BSwapOp = N->getOperand(1).getOperand(0);
9768      // Do an any-extend to 32-bits if this is a half-word input.
9769      if (BSwapOp.getValueType() == MVT::i16)
9770        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
9771
9772      SDValue Ops[] = {
9773        N->getOperand(0), BSwapOp, N->getOperand(2),
9774        DAG.getValueType(N->getOperand(1).getValueType())
9775      };
9776      return
9777        DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
9778                                Ops, cast<StoreSDNode>(N)->getMemoryVT(),
9779                                cast<StoreSDNode>(N)->getMemOperand());
9780    }
9781
9782    // For little endian, VSX stores require generating xxswapd/lxvd2x.
9783    EVT VT = N->getOperand(1).getValueType();
9784    if (VT.isSimple()) {
9785      MVT StoreVT = VT.getSimpleVT();
9786      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
9787          (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
9788           StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
9789        return expandVSXStoreForLE(N, DCI);
9790    }
9791    break;
9792  }
9793  case ISD::LOAD: {
9794    LoadSDNode *LD = cast<LoadSDNode>(N);
9795    EVT VT = LD->getValueType(0);
9796
9797    // For little endian, VSX loads require generating lxvd2x/xxswapd.
9798    if (VT.isSimple()) {
9799      MVT LoadVT = VT.getSimpleVT();
9800      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
9801          (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
9802           LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
9803        return expandVSXLoadForLE(N, DCI);
9804    }
9805
9806    EVT MemVT = LD->getMemoryVT();
9807    Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
9808    unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
9809    Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
9810    unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy);
9811    if (LD->isUnindexed() && VT.isVector() &&
9812        ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
9813          // P8 and later hardware should just use LOAD.
9814          !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
9815                                       VT == MVT::v4i32 || VT == MVT::v4f32)) ||
9816         (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
9817          LD->getAlignment() >= ScalarABIAlignment)) &&
9818        LD->getAlignment() < ABIAlignment) {
9819      // This is a type-legal unaligned Altivec or QPX load.
9820      SDValue Chain = LD->getChain();
9821      SDValue Ptr = LD->getBasePtr();
9822      bool isLittleEndian = Subtarget.isLittleEndian();
9823
9824      // This implements the loading of unaligned vectors as described in
9825      // the venerable Apple Velocity Engine overview. Specifically:
9826      // https://developer.apple.com/hardwaredrivers/ve/alignment.html
9827      // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
9828      //
9829      // The general idea is to expand a sequence of one or more unaligned
9830      // loads into an alignment-based permutation-control instruction (lvsl
9831      // or lvsr), a series of regular vector loads (which always truncate
9832      // their input address to an aligned address), and a series of
9833      // permutations.  The results of these permutations are the requested
9834      // loaded values.  The trick is that the last "extra" load is not taken
9835      // from the address you might suspect (sizeof(vector) bytes after the
9836      // last requested load), but rather sizeof(vector) - 1 bytes after the
9837      // last requested vector. The point of this is to avoid a page fault if
9838      // the base address happened to be aligned. This works because if the
9839      // base address is aligned, then adding less than a full vector length
9840      // will cause the last vector in the sequence to be (re)loaded.
9841      // Otherwise, the next vector will be fetched as you might suspect was
9842      // necessary.
9843
9844      // We might be able to reuse the permutation generation from
9845      // a different base address offset from this one by an aligned amount.
9846      // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
9847      // optimization later.
9848      Intrinsic::ID Intr, IntrLD, IntrPerm;
9849      MVT PermCntlTy, PermTy, LDTy;
9850      if (Subtarget.hasAltivec()) {
9851        Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
9852                                 Intrinsic::ppc_altivec_lvsl;
9853        IntrLD = Intrinsic::ppc_altivec_lvx;
9854        IntrPerm = Intrinsic::ppc_altivec_vperm;
9855        PermCntlTy = MVT::v16i8;
9856        PermTy = MVT::v4i32;
9857        LDTy = MVT::v4i32;
9858      } else {
9859        Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
9860                                       Intrinsic::ppc_qpx_qvlpcls;
9861        IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
9862                                       Intrinsic::ppc_qpx_qvlfs;
9863        IntrPerm = Intrinsic::ppc_qpx_qvfperm;
9864        PermCntlTy = MVT::v4f64;
9865        PermTy = MVT::v4f64;
9866        LDTy = MemVT.getSimpleVT();
9867      }
9868
9869      SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
9870
9871      // Create the new MMO for the new base load. It is like the original MMO,
9872      // but represents an area in memory almost twice the vector size centered
9873      // on the original address. If the address is unaligned, we might start
9874      // reading up to (sizeof(vector)-1) bytes below the address of the
9875      // original unaligned load.
9876      MachineFunction &MF = DAG.getMachineFunction();
9877      MachineMemOperand *BaseMMO =
9878        MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
9879                                2*MemVT.getStoreSize()-1);
9880
9881      // Create the new base load.
9882      SDValue LDXIntID = DAG.getTargetConstant(IntrLD, getPointerTy());
9883      SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
9884      SDValue BaseLoad =
9885        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
9886                                DAG.getVTList(PermTy, MVT::Other),
9887                                BaseLoadOps, LDTy, BaseMMO);
9888
9889      // Note that the value of IncOffset (which is provided to the next
9890      // load's pointer info offset value, and thus used to calculate the
9891      // alignment), and the value of IncValue (which is actually used to
9892      // increment the pointer value) are different! This is because we
9893      // require the next load to appear to be aligned, even though it
9894      // is actually offset from the base pointer by a lesser amount.
9895      int IncOffset = VT.getSizeInBits() / 8;
9896      int IncValue = IncOffset;
9897
9898      // Walk (both up and down) the chain looking for another load at the real
9899      // (aligned) offset (the alignment of the other load does not matter in
9900      // this case). If found, then do not use the offset reduction trick, as
9901      // that will prevent the loads from being later combined (as they would
9902      // otherwise be duplicates).
9903      if (!findConsecutiveLoad(LD, DAG))
9904        --IncValue;
9905
9906      SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
9907      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
9908
9909      MachineMemOperand *ExtraMMO =
9910        MF.getMachineMemOperand(LD->getMemOperand(),
9911                                1, 2*MemVT.getStoreSize()-1);
9912      SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
9913      SDValue ExtraLoad =
9914        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
9915                                DAG.getVTList(PermTy, MVT::Other),
9916                                ExtraLoadOps, LDTy, ExtraMMO);
9917
9918      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
9919        BaseLoad.getValue(1), ExtraLoad.getValue(1));
9920
9921      // Because vperm has a big-endian bias, we must reverse the order
9922      // of the input vectors and complement the permute control vector
9923      // when generating little endian code.  We have already handled the
9924      // latter by using lvsr instead of lvsl, so just reverse BaseLoad
9925      // and ExtraLoad here.
9926      SDValue Perm;
9927      if (isLittleEndian)
9928        Perm = BuildIntrinsicOp(IntrPerm,
9929                                ExtraLoad, BaseLoad, PermCntl, DAG, dl);
9930      else
9931        Perm = BuildIntrinsicOp(IntrPerm,
9932                                BaseLoad, ExtraLoad, PermCntl, DAG, dl);
9933
9934      if (VT != PermTy)
9935        Perm = Subtarget.hasAltivec() ?
9936                 DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
9937                 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
9938                               DAG.getTargetConstant(1, MVT::i64));
9939                               // second argument is 1 because this rounding
9940                               // is always exact.
9941
9942      // The output of the permutation is our loaded result, the TokenFactor is
9943      // our new chain.
9944      DCI.CombineTo(N, Perm, TF);
9945      return SDValue(N, 0);
9946    }
9947    }
9948    break;
9949    case ISD::INTRINSIC_WO_CHAIN: {
9950      bool isLittleEndian = Subtarget.isLittleEndian();
9951      unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
9952      Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
9953                                           : Intrinsic::ppc_altivec_lvsl);
9954      if ((IID == Intr ||
9955           IID == Intrinsic::ppc_qpx_qvlpcld  ||
9956           IID == Intrinsic::ppc_qpx_qvlpcls) &&
9957        N->getOperand(1)->getOpcode() == ISD::ADD) {
9958        SDValue Add = N->getOperand(1);
9959
9960        int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
9961                   5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
9962
9963        if (DAG.MaskedValueIsZero(
9964                Add->getOperand(1),
9965                APInt::getAllOnesValue(Bits /* alignment */)
9966                    .zext(
9967                        Add.getValueType().getScalarType().getSizeInBits()))) {
9968          SDNode *BasePtr = Add->getOperand(0).getNode();
9969          for (SDNode::use_iterator UI = BasePtr->use_begin(),
9970                                    UE = BasePtr->use_end();
9971               UI != UE; ++UI) {
9972            if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
9973                cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
9974              // We've found another LVSL/LVSR, and this address is an aligned
9975              // multiple of that one. The results will be the same, so use the
9976              // one we've just found instead.
9977
9978              return SDValue(*UI, 0);
9979            }
9980          }
9981        }
9982
9983        if (isa<ConstantSDNode>(Add->getOperand(1))) {
9984          SDNode *BasePtr = Add->getOperand(0).getNode();
9985          for (SDNode::use_iterator UI = BasePtr->use_begin(),
9986               UE = BasePtr->use_end(); UI != UE; ++UI) {
9987            if (UI->getOpcode() == ISD::ADD &&
9988                isa<ConstantSDNode>(UI->getOperand(1)) &&
9989                (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
9990                 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
9991                (1ULL << Bits) == 0) {
9992              SDNode *OtherAdd = *UI;
9993              for (SDNode::use_iterator VI = OtherAdd->use_begin(),
9994                   VE = OtherAdd->use_end(); VI != VE; ++VI) {
9995                if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
9996                    cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
9997                  return SDValue(*VI, 0);
9998                }
9999              }
10000            }
10001          }
10002        }
10003      }
10004    }
10005
10006    break;
10007  case ISD::INTRINSIC_W_CHAIN: {
10008    // For little endian, VSX loads require generating lxvd2x/xxswapd.
10009    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
10010      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10011      default:
10012        break;
10013      case Intrinsic::ppc_vsx_lxvw4x:
10014      case Intrinsic::ppc_vsx_lxvd2x:
10015        return expandVSXLoadForLE(N, DCI);
10016      }
10017    }
10018    break;
10019  }
10020  case ISD::INTRINSIC_VOID: {
10021    // For little endian, VSX stores require generating xxswapd/stxvd2x.
10022    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
10023      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10024      default:
10025        break;
10026      case Intrinsic::ppc_vsx_stxvw4x:
10027      case Intrinsic::ppc_vsx_stxvd2x:
10028        return expandVSXStoreForLE(N, DCI);
10029      }
10030    }
10031    break;
10032  }
10033  case ISD::BSWAP:
10034    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
10035    if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
10036        N->getOperand(0).hasOneUse() &&
10037        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
10038         (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
10039          N->getValueType(0) == MVT::i64))) {
10040      SDValue Load = N->getOperand(0);
10041      LoadSDNode *LD = cast<LoadSDNode>(Load);
10042      // Create the byte-swapping load.
10043      SDValue Ops[] = {
10044        LD->getChain(),    // Chain
10045        LD->getBasePtr(),  // Ptr
10046        DAG.getValueType(N->getValueType(0)) // VT
10047      };
10048      SDValue BSLoad =
10049        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
10050                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
10051                                              MVT::i64 : MVT::i32, MVT::Other),
10052                                Ops, LD->getMemoryVT(), LD->getMemOperand());
10053
10054      // If this is an i16 load, insert the truncate.
10055      SDValue ResVal = BSLoad;
10056      if (N->getValueType(0) == MVT::i16)
10057        ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
10058
10059      // First, combine the bswap away.  This makes the value produced by the
10060      // load dead.
10061      DCI.CombineTo(N, ResVal);
10062
10063      // Next, combine the load away, we give it a bogus result value but a real
10064      // chain result.  The result value is dead because the bswap is dead.
10065      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
10066
10067      // Return N so it doesn't get rechecked!
10068      return SDValue(N, 0);
10069    }
10070
10071    break;
10072  case PPCISD::VCMP: {
10073    // If a VCMPo node already exists with exactly the same operands as this
10074    // node, use its result instead of this node (VCMPo computes both a CR6 and
10075    // a normal output).
10076    //
10077    if (!N->getOperand(0).hasOneUse() &&
10078        !N->getOperand(1).hasOneUse() &&
10079        !N->getOperand(2).hasOneUse()) {
10080
10081      // Scan all of the users of the LHS, looking for VCMPo's that match.
10082      SDNode *VCMPoNode = nullptr;
10083
10084      SDNode *LHSN = N->getOperand(0).getNode();
10085      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
10086           UI != E; ++UI)
10087        if (UI->getOpcode() == PPCISD::VCMPo &&
10088            UI->getOperand(1) == N->getOperand(1) &&
10089            UI->getOperand(2) == N->getOperand(2) &&
10090            UI->getOperand(0) == N->getOperand(0)) {
10091          VCMPoNode = *UI;
10092          break;
10093        }
10094
10095      // If there is no VCMPo node, or if the flag value has a single use, don't
10096      // transform this.
10097      if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
10098        break;
10099
10100      // Look at the (necessarily single) use of the flag value.  If it has a
10101      // chain, this transformation is more complex.  Note that multiple things
10102      // could use the value result, which we should ignore.
10103      SDNode *FlagUser = nullptr;
10104      for (SDNode::use_iterator UI = VCMPoNode->use_begin();
10105           FlagUser == nullptr; ++UI) {
10106        assert(UI != VCMPoNode->use_end() && "Didn't find user!");
10107        SDNode *User = *UI;
10108        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
10109          if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
10110            FlagUser = User;
10111            break;
10112          }
10113        }
10114      }
10115
10116      // If the user is a MFOCRF instruction, we know this is safe.
10117      // Otherwise we give up for right now.
10118      if (FlagUser->getOpcode() == PPCISD::MFOCRF)
10119        return SDValue(VCMPoNode, 0);
10120    }
10121    break;
10122  }
10123  case ISD::BRCOND: {
10124    SDValue Cond = N->getOperand(1);
10125    SDValue Target = N->getOperand(2);
10126
10127    if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
10128        cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
10129          Intrinsic::ppc_is_decremented_ctr_nonzero) {
10130
10131      // We now need to make the intrinsic dead (it cannot be instruction
10132      // selected).
10133      DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
10134      assert(Cond.getNode()->hasOneUse() &&
10135             "Counter decrement has more than one use");
10136
10137      return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
10138                         N->getOperand(0), Target);
10139    }
10140  }
10141  break;
10142  case ISD::BR_CC: {
10143    // If this is a branch on an altivec predicate comparison, lower this so
10144    // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
10145    // lowering is done pre-legalize, because the legalizer lowers the predicate
10146    // compare down to code that is difficult to reassemble.
10147    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
10148    SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
10149
10150    // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
10151    // value. If so, pass-through the AND to get to the intrinsic.
10152    if (LHS.getOpcode() == ISD::AND &&
10153        LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
10154        cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
10155          Intrinsic::ppc_is_decremented_ctr_nonzero &&
10156        isa<ConstantSDNode>(LHS.getOperand(1)) &&
10157        !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
10158          isZero())
10159      LHS = LHS.getOperand(0);
10160
10161    if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
10162        cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
10163          Intrinsic::ppc_is_decremented_ctr_nonzero &&
10164        isa<ConstantSDNode>(RHS)) {
10165      assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
10166             "Counter decrement comparison is not EQ or NE");
10167
10168      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
10169      bool isBDNZ = (CC == ISD::SETEQ && Val) ||
10170                    (CC == ISD::SETNE && !Val);
10171
10172      // We now need to make the intrinsic dead (it cannot be instruction
10173      // selected).
10174      DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
10175      assert(LHS.getNode()->hasOneUse() &&
10176             "Counter decrement has more than one use");
10177
10178      return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
10179                         N->getOperand(0), N->getOperand(4));
10180    }
10181
10182    int CompareOpc;
10183    bool isDot;
10184
10185    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10186        isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
10187        getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
10188      assert(isDot && "Can't compare against a vector result!");
10189
10190      // If this is a comparison against something other than 0/1, then we know
10191      // that the condition is never/always true.
10192      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
10193      if (Val != 0 && Val != 1) {
10194        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
10195          return N->getOperand(0);
10196        // Always !=, turn it into an unconditional branch.
10197        return DAG.getNode(ISD::BR, dl, MVT::Other,
10198                           N->getOperand(0), N->getOperand(4));
10199      }
10200
10201      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
10202
10203      // Create the PPCISD altivec 'dot' comparison node.
10204      SDValue Ops[] = {
10205        LHS.getOperand(2),  // LHS of compare
10206        LHS.getOperand(3),  // RHS of compare
10207        DAG.getConstant(CompareOpc, MVT::i32)
10208      };
10209      EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
10210      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
10211
10212      // Unpack the result based on how the target uses it.
10213      PPC::Predicate CompOpc;
10214      switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
10215      default:  // Can't happen, don't crash on invalid number though.
10216      case 0:   // Branch on the value of the EQ bit of CR6.
10217        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
10218        break;
10219      case 1:   // Branch on the inverted value of the EQ bit of CR6.
10220        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
10221        break;
10222      case 2:   // Branch on the value of the LT bit of CR6.
10223        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
10224        break;
10225      case 3:   // Branch on the inverted value of the LT bit of CR6.
10226        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
10227        break;
10228      }
10229
10230      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
10231                         DAG.getConstant(CompOpc, MVT::i32),
10232                         DAG.getRegister(PPC::CR6, MVT::i32),
10233                         N->getOperand(4), CompNode.getValue(1));
10234    }
10235    break;
10236  }
10237  }
10238
10239  return SDValue();
10240}
10241
10242SDValue
10243PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10244                                  SelectionDAG &DAG,
10245                                  std::vector<SDNode *> *Created) const {
10246  // fold (sdiv X, pow2)
10247  EVT VT = N->getValueType(0);
10248  if (VT == MVT::i64 && !Subtarget.isPPC64())
10249    return SDValue();
10250  if ((VT != MVT::i32 && VT != MVT::i64) ||
10251      !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
10252    return SDValue();
10253
10254  SDLoc DL(N);
10255  SDValue N0 = N->getOperand(0);
10256
10257  bool IsNegPow2 = (-Divisor).isPowerOf2();
10258  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
10259  SDValue ShiftAmt = DAG.getConstant(Lg2, VT);
10260
10261  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
10262  if (Created)
10263    Created->push_back(Op.getNode());
10264
10265  if (IsNegPow2) {
10266    Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op);
10267    if (Created)
10268      Created->push_back(Op.getNode());
10269  }
10270
10271  return Op;
10272}
10273
10274//===----------------------------------------------------------------------===//
10275// Inline Assembly Support
10276//===----------------------------------------------------------------------===//
10277
10278void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
10279                                                      APInt &KnownZero,
10280                                                      APInt &KnownOne,
10281                                                      const SelectionDAG &DAG,
10282                                                      unsigned Depth) const {
10283  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
10284  switch (Op.getOpcode()) {
10285  default: break;
10286  case PPCISD::LBRX: {
10287    // lhbrx is known to have the top bits cleared out.
10288    if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
10289      KnownZero = 0xFFFF0000;
10290    break;
10291  }
10292  case ISD::INTRINSIC_WO_CHAIN: {
10293    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
10294    default: break;
10295    case Intrinsic::ppc_altivec_vcmpbfp_p:
10296    case Intrinsic::ppc_altivec_vcmpeqfp_p:
10297    case Intrinsic::ppc_altivec_vcmpequb_p:
10298    case Intrinsic::ppc_altivec_vcmpequh_p:
10299    case Intrinsic::ppc_altivec_vcmpequw_p:
10300    case Intrinsic::ppc_altivec_vcmpgefp_p:
10301    case Intrinsic::ppc_altivec_vcmpgtfp_p:
10302    case Intrinsic::ppc_altivec_vcmpgtsb_p:
10303    case Intrinsic::ppc_altivec_vcmpgtsh_p:
10304    case Intrinsic::ppc_altivec_vcmpgtsw_p:
10305    case Intrinsic::ppc_altivec_vcmpgtub_p:
10306    case Intrinsic::ppc_altivec_vcmpgtuh_p:
10307    case Intrinsic::ppc_altivec_vcmpgtuw_p:
10308      KnownZero = ~1U;  // All bits but the low one are known to be zero.
10309      break;
10310    }
10311  }
10312  }
10313}
10314
10315unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
10316  switch (Subtarget.getDarwinDirective()) {
10317  default: break;
10318  case PPC::DIR_970:
10319  case PPC::DIR_PWR4:
10320  case PPC::DIR_PWR5:
10321  case PPC::DIR_PWR5X:
10322  case PPC::DIR_PWR6:
10323  case PPC::DIR_PWR6X:
10324  case PPC::DIR_PWR7:
10325  case PPC::DIR_PWR8: {
10326    if (!ML)
10327      break;
10328
10329    const PPCInstrInfo *TII = Subtarget.getInstrInfo();
10330
10331    // For small loops (between 5 and 8 instructions), align to a 32-byte
10332    // boundary so that the entire loop fits in one instruction-cache line.
10333    uint64_t LoopSize = 0;
10334    for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
10335      for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J)
10336        LoopSize += TII->GetInstSizeInBytes(J);
10337
10338    if (LoopSize > 16 && LoopSize <= 32)
10339      return 5;
10340
10341    break;
10342  }
10343  }
10344
10345  return TargetLowering::getPrefLoopAlignment(ML);
10346}
10347
10348/// getConstraintType - Given a constraint, return the type of
10349/// constraint it is for this target.
10350PPCTargetLowering::ConstraintType
10351PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
10352  if (Constraint.size() == 1) {
10353    switch (Constraint[0]) {
10354    default: break;
10355    case 'b':
10356    case 'r':
10357    case 'f':
10358    case 'v':
10359    case 'y':
10360      return C_RegisterClass;
10361    case 'Z':
10362      // FIXME: While Z does indicate a memory constraint, it specifically
10363      // indicates an r+r address (used in conjunction with the 'y' modifier
10364      // in the replacement string). Currently, we're forcing the base
10365      // register to be r0 in the asm printer (which is interpreted as zero)
10366      // and forming the complete address in the second register. This is
10367      // suboptimal.
10368      return C_Memory;
10369    }
10370  } else if (Constraint == "wc") { // individual CR bits.
10371    return C_RegisterClass;
10372  } else if (Constraint == "wa" || Constraint == "wd" ||
10373             Constraint == "wf" || Constraint == "ws") {
10374    return C_RegisterClass; // VSX registers.
10375  }
10376  return TargetLowering::getConstraintType(Constraint);
10377}
10378
10379/// Examine constraint type and operand type and determine a weight value.
10380/// This object must already have been set up with the operand type
10381/// and the current alternative constraint selected.
10382TargetLowering::ConstraintWeight
10383PPCTargetLowering::getSingleConstraintMatchWeight(
10384    AsmOperandInfo &info, const char *constraint) const {
10385  ConstraintWeight weight = CW_Invalid;
10386  Value *CallOperandVal = info.CallOperandVal;
10387    // If we don't have a value, we can't do a match,
10388    // but allow it at the lowest weight.
10389  if (!CallOperandVal)
10390    return CW_Default;
10391  Type *type = CallOperandVal->getType();
10392
10393  // Look at the constraint type.
10394  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
10395    return CW_Register; // an individual CR bit.
10396  else if ((StringRef(constraint) == "wa" ||
10397            StringRef(constraint) == "wd" ||
10398            StringRef(constraint) == "wf") &&
10399           type->isVectorTy())
10400    return CW_Register;
10401  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
10402    return CW_Register;
10403
10404  switch (*constraint) {
10405  default:
10406    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
10407    break;
10408  case 'b':
10409    if (type->isIntegerTy())
10410      weight = CW_Register;
10411    break;
10412  case 'f':
10413    if (type->isFloatTy())
10414      weight = CW_Register;
10415    break;
10416  case 'd':
10417    if (type->isDoubleTy())
10418      weight = CW_Register;
10419    break;
10420  case 'v':
10421    if (type->isVectorTy())
10422      weight = CW_Register;
10423    break;
10424  case 'y':
10425    weight = CW_Register;
10426    break;
10427  case 'Z':
10428    weight = CW_Memory;
10429    break;
10430  }
10431  return weight;
10432}
10433
10434std::pair<unsigned, const TargetRegisterClass *>
10435PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
10436                                                const std::string &Constraint,
10437                                                MVT VT) const {
10438  if (Constraint.size() == 1) {
10439    // GCC RS6000 Constraint Letters
10440    switch (Constraint[0]) {
10441    case 'b':   // R1-R31
10442      if (VT == MVT::i64 && Subtarget.isPPC64())
10443        return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
10444      return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
10445    case 'r':   // R0-R31
10446      if (VT == MVT::i64 && Subtarget.isPPC64())
10447        return std::make_pair(0U, &PPC::G8RCRegClass);
10448      return std::make_pair(0U, &PPC::GPRCRegClass);
10449    case 'f':
10450      if (VT == MVT::f32 || VT == MVT::i32)
10451        return std::make_pair(0U, &PPC::F4RCRegClass);
10452      if (VT == MVT::f64 || VT == MVT::i64)
10453        return std::make_pair(0U, &PPC::F8RCRegClass);
10454      if (VT == MVT::v4f64 && Subtarget.hasQPX())
10455        return std::make_pair(0U, &PPC::QFRCRegClass);
10456      if (VT == MVT::v4f32 && Subtarget.hasQPX())
10457        return std::make_pair(0U, &PPC::QSRCRegClass);
10458      break;
10459    case 'v':
10460      if (VT == MVT::v4f64 && Subtarget.hasQPX())
10461        return std::make_pair(0U, &PPC::QFRCRegClass);
10462      if (VT == MVT::v4f32 && Subtarget.hasQPX())
10463        return std::make_pair(0U, &PPC::QSRCRegClass);
10464      return std::make_pair(0U, &PPC::VRRCRegClass);
10465    case 'y':   // crrc
10466      return std::make_pair(0U, &PPC::CRRCRegClass);
10467    }
10468  } else if (Constraint == "wc") { // an individual CR bit.
10469    return std::make_pair(0U, &PPC::CRBITRCRegClass);
10470  } else if (Constraint == "wa" || Constraint == "wd" ||
10471             Constraint == "wf") {
10472    return std::make_pair(0U, &PPC::VSRCRegClass);
10473  } else if (Constraint == "ws") {
10474    return std::make_pair(0U, &PPC::VSFRCRegClass);
10475  }
10476
10477  std::pair<unsigned, const TargetRegisterClass *> R =
10478      TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10479
10480  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
10481  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
10482  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
10483  // register.
10484  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
10485  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
10486  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
10487      PPC::GPRCRegClass.contains(R.first))
10488    return std::make_pair(TRI->getMatchingSuperReg(R.first,
10489                            PPC::sub_32, &PPC::G8RCRegClass),
10490                          &PPC::G8RCRegClass);
10491
10492  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
10493  if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
10494    R.first = PPC::CR0;
10495    R.second = &PPC::CRRCRegClass;
10496  }
10497
10498  return R;
10499}
10500
10501
10502/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
10503/// vector.  If it is invalid, don't add anything to Ops.
10504void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
10505                                                     std::string &Constraint,
10506                                                     std::vector<SDValue>&Ops,
10507                                                     SelectionDAG &DAG) const {
10508  SDValue Result;
10509
10510  // Only support length 1 constraints.
10511  if (Constraint.length() > 1) return;
10512
10513  char Letter = Constraint[0];
10514  switch (Letter) {
10515  default: break;
10516  case 'I':
10517  case 'J':
10518  case 'K':
10519  case 'L':
10520  case 'M':
10521  case 'N':
10522  case 'O':
10523  case 'P': {
10524    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
10525    if (!CST) return; // Must be an immediate to match.
10526    int64_t Value = CST->getSExtValue();
10527    EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
10528                         // numbers are printed as such.
10529    switch (Letter) {
10530    default: llvm_unreachable("Unknown constraint letter!");
10531    case 'I':  // "I" is a signed 16-bit constant.
10532      if (isInt<16>(Value))
10533        Result = DAG.getTargetConstant(Value, TCVT);
10534      break;
10535    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
10536      if (isShiftedUInt<16, 16>(Value))
10537        Result = DAG.getTargetConstant(Value, TCVT);
10538      break;
10539    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
10540      if (isShiftedInt<16, 16>(Value))
10541        Result = DAG.getTargetConstant(Value, TCVT);
10542      break;
10543    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
10544      if (isUInt<16>(Value))
10545        Result = DAG.getTargetConstant(Value, TCVT);
10546      break;
10547    case 'M':  // "M" is a constant that is greater than 31.
10548      if (Value > 31)
10549        Result = DAG.getTargetConstant(Value, TCVT);
10550      break;
10551    case 'N':  // "N" is a positive constant that is an exact power of two.
10552      if (Value > 0 && isPowerOf2_64(Value))
10553        Result = DAG.getTargetConstant(Value, TCVT);
10554      break;
10555    case 'O':  // "O" is the constant zero.
10556      if (Value == 0)
10557        Result = DAG.getTargetConstant(Value, TCVT);
10558      break;
10559    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
10560      if (isInt<16>(-Value))
10561        Result = DAG.getTargetConstant(Value, TCVT);
10562      break;
10563    }
10564    break;
10565  }
10566  }
10567
10568  if (Result.getNode()) {
10569    Ops.push_back(Result);
10570    return;
10571  }
10572
10573  // Handle standard constraint letters.
10574  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
10575}
10576
10577// isLegalAddressingMode - Return true if the addressing mode represented
10578// by AM is legal for this target, for a load/store of the specified type.
10579bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
10580                                              Type *Ty) const {
10581  // PPC does not allow r+i addressing modes for vectors!
10582  if (Ty->isVectorTy() && AM.BaseOffs != 0)
10583    return false;
10584
10585  // PPC allows a sign-extended 16-bit immediate field.
10586  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
10587    return false;
10588
10589  // No global is ever allowed as a base.
10590  if (AM.BaseGV)
10591    return false;
10592
10593  // PPC only support r+r,
10594  switch (AM.Scale) {
10595  case 0:  // "r+i" or just "i", depending on HasBaseReg.
10596    break;
10597  case 1:
10598    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
10599      return false;
10600    // Otherwise we have r+r or r+i.
10601    break;
10602  case 2:
10603    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
10604      return false;
10605    // Allow 2*r as r+r.
10606    break;
10607  default:
10608    // No other scales are supported.
10609    return false;
10610  }
10611
10612  return true;
10613}
10614
10615SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
10616                                           SelectionDAG &DAG) const {
10617  MachineFunction &MF = DAG.getMachineFunction();
10618  MachineFrameInfo *MFI = MF.getFrameInfo();
10619  MFI->setReturnAddressIsTaken(true);
10620
10621  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
10622    return SDValue();
10623
10624  SDLoc dl(Op);
10625  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10626
10627  // Make sure the function does not optimize away the store of the RA to
10628  // the stack.
10629  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
10630  FuncInfo->setLRStoreRequired();
10631  bool isPPC64 = Subtarget.isPPC64();
10632
10633  if (Depth > 0) {
10634    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
10635    SDValue Offset =
10636        DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(),
10637                        isPPC64 ? MVT::i64 : MVT::i32);
10638    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
10639                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
10640                                   FrameAddr, Offset),
10641                       MachinePointerInfo(), false, false, false, 0);
10642  }
10643
10644  // Just load the return address off the stack.
10645  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
10646  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
10647                     RetAddrFI, MachinePointerInfo(), false, false, false, 0);
10648}
10649
10650SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
10651                                          SelectionDAG &DAG) const {
10652  SDLoc dl(Op);
10653  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10654
10655  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
10656  bool isPPC64 = PtrVT == MVT::i64;
10657
10658  MachineFunction &MF = DAG.getMachineFunction();
10659  MachineFrameInfo *MFI = MF.getFrameInfo();
10660  MFI->setFrameAddressIsTaken(true);
10661
10662  // Naked functions never have a frame pointer, and so we use r1. For all
10663  // other functions, this decision must be delayed until during PEI.
10664  unsigned FrameReg;
10665  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
10666    FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
10667  else
10668    FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
10669
10670  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
10671                                         PtrVT);
10672  while (Depth--)
10673    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
10674                            FrameAddr, MachinePointerInfo(), false, false,
10675                            false, 0);
10676  return FrameAddr;
10677}
10678
10679// FIXME? Maybe this could be a TableGen attribute on some registers and
10680// this table could be generated automatically from RegInfo.
10681unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
10682                                              EVT VT) const {
10683  bool isPPC64 = Subtarget.isPPC64();
10684  bool isDarwinABI = Subtarget.isDarwinABI();
10685
10686  if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
10687      (!isPPC64 && VT != MVT::i32))
10688    report_fatal_error("Invalid register global variable type");
10689
10690  bool is64Bit = isPPC64 && VT == MVT::i64;
10691  unsigned Reg = StringSwitch<unsigned>(RegName)
10692                   .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
10693                   .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
10694                   .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
10695                                  (is64Bit ? PPC::X13 : PPC::R13))
10696                   .Default(0);
10697
10698  if (Reg)
10699    return Reg;
10700  report_fatal_error("Invalid register name global variable");
10701}
10702
10703bool
10704PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
10705  // The PowerPC target isn't yet aware of offsets.
10706  return false;
10707}
10708
10709bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
10710                                           const CallInst &I,
10711                                           unsigned Intrinsic) const {
10712
10713  switch (Intrinsic) {
10714  case Intrinsic::ppc_qpx_qvlfd:
10715  case Intrinsic::ppc_qpx_qvlfs:
10716  case Intrinsic::ppc_qpx_qvlfcd:
10717  case Intrinsic::ppc_qpx_qvlfcs:
10718  case Intrinsic::ppc_qpx_qvlfiwa:
10719  case Intrinsic::ppc_qpx_qvlfiwz:
10720  case Intrinsic::ppc_altivec_lvx:
10721  case Intrinsic::ppc_altivec_lvxl:
10722  case Intrinsic::ppc_altivec_lvebx:
10723  case Intrinsic::ppc_altivec_lvehx:
10724  case Intrinsic::ppc_altivec_lvewx:
10725  case Intrinsic::ppc_vsx_lxvd2x:
10726  case Intrinsic::ppc_vsx_lxvw4x: {
10727    EVT VT;
10728    switch (Intrinsic) {
10729    case Intrinsic::ppc_altivec_lvebx:
10730      VT = MVT::i8;
10731      break;
10732    case Intrinsic::ppc_altivec_lvehx:
10733      VT = MVT::i16;
10734      break;
10735    case Intrinsic::ppc_altivec_lvewx:
10736      VT = MVT::i32;
10737      break;
10738    case Intrinsic::ppc_vsx_lxvd2x:
10739      VT = MVT::v2f64;
10740      break;
10741    case Intrinsic::ppc_qpx_qvlfd:
10742      VT = MVT::v4f64;
10743      break;
10744    case Intrinsic::ppc_qpx_qvlfs:
10745      VT = MVT::v4f32;
10746      break;
10747    case Intrinsic::ppc_qpx_qvlfcd:
10748      VT = MVT::v2f64;
10749      break;
10750    case Intrinsic::ppc_qpx_qvlfcs:
10751      VT = MVT::v2f32;
10752      break;
10753    default:
10754      VT = MVT::v4i32;
10755      break;
10756    }
10757
10758    Info.opc = ISD::INTRINSIC_W_CHAIN;
10759    Info.memVT = VT;
10760    Info.ptrVal = I.getArgOperand(0);
10761    Info.offset = -VT.getStoreSize()+1;
10762    Info.size = 2*VT.getStoreSize()-1;
10763    Info.align = 1;
10764    Info.vol = false;
10765    Info.readMem = true;
10766    Info.writeMem = false;
10767    return true;
10768  }
10769  case Intrinsic::ppc_qpx_qvlfda:
10770  case Intrinsic::ppc_qpx_qvlfsa:
10771  case Intrinsic::ppc_qpx_qvlfcda:
10772  case Intrinsic::ppc_qpx_qvlfcsa:
10773  case Intrinsic::ppc_qpx_qvlfiwaa:
10774  case Intrinsic::ppc_qpx_qvlfiwza: {
10775    EVT VT;
10776    switch (Intrinsic) {
10777    case Intrinsic::ppc_qpx_qvlfda:
10778      VT = MVT::v4f64;
10779      break;
10780    case Intrinsic::ppc_qpx_qvlfsa:
10781      VT = MVT::v4f32;
10782      break;
10783    case Intrinsic::ppc_qpx_qvlfcda:
10784      VT = MVT::v2f64;
10785      break;
10786    case Intrinsic::ppc_qpx_qvlfcsa:
10787      VT = MVT::v2f32;
10788      break;
10789    default:
10790      VT = MVT::v4i32;
10791      break;
10792    }
10793
10794    Info.opc = ISD::INTRINSIC_W_CHAIN;
10795    Info.memVT = VT;
10796    Info.ptrVal = I.getArgOperand(0);
10797    Info.offset = 0;
10798    Info.size = VT.getStoreSize();
10799    Info.align = 1;
10800    Info.vol = false;
10801    Info.readMem = true;
10802    Info.writeMem = false;
10803    return true;
10804  }
10805  case Intrinsic::ppc_qpx_qvstfd:
10806  case Intrinsic::ppc_qpx_qvstfs:
10807  case Intrinsic::ppc_qpx_qvstfcd:
10808  case Intrinsic::ppc_qpx_qvstfcs:
10809  case Intrinsic::ppc_qpx_qvstfiw:
10810  case Intrinsic::ppc_altivec_stvx:
10811  case Intrinsic::ppc_altivec_stvxl:
10812  case Intrinsic::ppc_altivec_stvebx:
10813  case Intrinsic::ppc_altivec_stvehx:
10814  case Intrinsic::ppc_altivec_stvewx:
10815  case Intrinsic::ppc_vsx_stxvd2x:
10816  case Intrinsic::ppc_vsx_stxvw4x: {
10817    EVT VT;
10818    switch (Intrinsic) {
10819    case Intrinsic::ppc_altivec_stvebx:
10820      VT = MVT::i8;
10821      break;
10822    case Intrinsic::ppc_altivec_stvehx:
10823      VT = MVT::i16;
10824      break;
10825    case Intrinsic::ppc_altivec_stvewx:
10826      VT = MVT::i32;
10827      break;
10828    case Intrinsic::ppc_vsx_stxvd2x:
10829      VT = MVT::v2f64;
10830      break;
10831    case Intrinsic::ppc_qpx_qvstfd:
10832      VT = MVT::v4f64;
10833      break;
10834    case Intrinsic::ppc_qpx_qvstfs:
10835      VT = MVT::v4f32;
10836      break;
10837    case Intrinsic::ppc_qpx_qvstfcd:
10838      VT = MVT::v2f64;
10839      break;
10840    case Intrinsic::ppc_qpx_qvstfcs:
10841      VT = MVT::v2f32;
10842      break;
10843    default:
10844      VT = MVT::v4i32;
10845      break;
10846    }
10847
10848    Info.opc = ISD::INTRINSIC_VOID;
10849    Info.memVT = VT;
10850    Info.ptrVal = I.getArgOperand(1);
10851    Info.offset = -VT.getStoreSize()+1;
10852    Info.size = 2*VT.getStoreSize()-1;
10853    Info.align = 1;
10854    Info.vol = false;
10855    Info.readMem = false;
10856    Info.writeMem = true;
10857    return true;
10858  }
10859  case Intrinsic::ppc_qpx_qvstfda:
10860  case Intrinsic::ppc_qpx_qvstfsa:
10861  case Intrinsic::ppc_qpx_qvstfcda:
10862  case Intrinsic::ppc_qpx_qvstfcsa:
10863  case Intrinsic::ppc_qpx_qvstfiwa: {
10864    EVT VT;
10865    switch (Intrinsic) {
10866    case Intrinsic::ppc_qpx_qvstfda:
10867      VT = MVT::v4f64;
10868      break;
10869    case Intrinsic::ppc_qpx_qvstfsa:
10870      VT = MVT::v4f32;
10871      break;
10872    case Intrinsic::ppc_qpx_qvstfcda:
10873      VT = MVT::v2f64;
10874      break;
10875    case Intrinsic::ppc_qpx_qvstfcsa:
10876      VT = MVT::v2f32;
10877      break;
10878    default:
10879      VT = MVT::v4i32;
10880      break;
10881    }
10882
10883    Info.opc = ISD::INTRINSIC_VOID;
10884    Info.memVT = VT;
10885    Info.ptrVal = I.getArgOperand(1);
10886    Info.offset = 0;
10887    Info.size = VT.getStoreSize();
10888    Info.align = 1;
10889    Info.vol = false;
10890    Info.readMem = false;
10891    Info.writeMem = true;
10892    return true;
10893  }
10894  default:
10895    break;
10896  }
10897
10898  return false;
10899}
10900
10901/// getOptimalMemOpType - Returns the target specific optimal type for load
10902/// and store operations as a result of memset, memcpy, and memmove
10903/// lowering. If DstAlign is zero that means it's safe to destination
10904/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
10905/// means there isn't a need to check it against alignment requirement,
10906/// probably because the source does not need to be loaded. If 'IsMemset' is
10907/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
10908/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
10909/// source is constant so it does not need to be loaded.
10910/// It returns EVT::Other if the type should be determined using generic
10911/// target-independent logic.
10912EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
10913                                           unsigned DstAlign, unsigned SrcAlign,
10914                                           bool IsMemset, bool ZeroMemset,
10915                                           bool MemcpyStrSrc,
10916                                           MachineFunction &MF) const {
10917  if (Subtarget.isPPC64()) {
10918    return MVT::i64;
10919  } else {
10920    return MVT::i32;
10921  }
10922}
10923
10924/// \brief Returns true if it is beneficial to convert a load of a constant
10925/// to just the constant itself.
10926bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
10927                                                          Type *Ty) const {
10928  assert(Ty->isIntegerTy());
10929
10930  unsigned BitSize = Ty->getPrimitiveSizeInBits();
10931  if (BitSize == 0 || BitSize > 64)
10932    return false;
10933  return true;
10934}
10935
10936bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
10937  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
10938    return false;
10939  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
10940  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
10941  return NumBits1 == 64 && NumBits2 == 32;
10942}
10943
10944bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
10945  if (!VT1.isInteger() || !VT2.isInteger())
10946    return false;
10947  unsigned NumBits1 = VT1.getSizeInBits();
10948  unsigned NumBits2 = VT2.getSizeInBits();
10949  return NumBits1 == 64 && NumBits2 == 32;
10950}
10951
10952bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
10953  // Generally speaking, zexts are not free, but they are free when they can be
10954  // folded with other operations.
10955  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
10956    EVT MemVT = LD->getMemoryVT();
10957    if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
10958         (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
10959        (LD->getExtensionType() == ISD::NON_EXTLOAD ||
10960         LD->getExtensionType() == ISD::ZEXTLOAD))
10961      return true;
10962  }
10963
10964  // FIXME: Add other cases...
10965  //  - 32-bit shifts with a zext to i64
10966  //  - zext after ctlz, bswap, etc.
10967  //  - zext after and by a constant mask
10968
10969  return TargetLowering::isZExtFree(Val, VT2);
10970}
10971
10972bool PPCTargetLowering::isFPExtFree(EVT VT) const {
10973  assert(VT.isFloatingPoint());
10974  return true;
10975}
10976
10977bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
10978  return isInt<16>(Imm) || isUInt<16>(Imm);
10979}
10980
10981bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
10982  return isInt<16>(Imm) || isUInt<16>(Imm);
10983}
10984
10985bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
10986                                                       unsigned,
10987                                                       unsigned,
10988                                                       bool *Fast) const {
10989  if (DisablePPCUnaligned)
10990    return false;
10991
10992  // PowerPC supports unaligned memory access for simple non-vector types.
10993  // Although accessing unaligned addresses is not as efficient as accessing
10994  // aligned addresses, it is generally more efficient than manual expansion,
10995  // and generally only traps for software emulation when crossing page
10996  // boundaries.
10997
10998  if (!VT.isSimple())
10999    return false;
11000
11001  if (VT.getSimpleVT().isVector()) {
11002    if (Subtarget.hasVSX()) {
11003      if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
11004          VT != MVT::v4f32 && VT != MVT::v4i32)
11005        return false;
11006    } else {
11007      return false;
11008    }
11009  }
11010
11011  if (VT == MVT::ppcf128)
11012    return false;
11013
11014  if (Fast)
11015    *Fast = true;
11016
11017  return true;
11018}
11019
11020bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
11021  VT = VT.getScalarType();
11022
11023  if (!VT.isSimple())
11024    return false;
11025
11026  switch (VT.getSimpleVT().SimpleTy) {
11027  case MVT::f32:
11028  case MVT::f64:
11029    return true;
11030  default:
11031    break;
11032  }
11033
11034  return false;
11035}
11036
11037const MCPhysReg *
11038PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
11039  // LR is a callee-save register, but we must treat it as clobbered by any call
11040  // site. Hence we include LR in the scratch registers, which are in turn added
11041  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
11042  // to CTR, which is used by any indirect call.
11043  static const MCPhysReg ScratchRegs[] = {
11044    PPC::X12, PPC::LR8, PPC::CTR8, 0
11045  };
11046
11047  return ScratchRegs;
11048}
11049
11050bool
11051PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
11052                     EVT VT , unsigned DefinedValues) const {
11053  if (VT == MVT::v2i64)
11054    return false;
11055
11056  if (Subtarget.hasQPX()) {
11057    if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
11058      return true;
11059  }
11060
11061  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
11062}
11063
11064Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
11065  if (DisableILPPref || Subtarget.enableMachineScheduler())
11066    return TargetLowering::getSchedulingPreference(N);
11067
11068  return Sched::ILP;
11069}
11070
11071// Create a fast isel object.
11072FastISel *
11073PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
11074                                  const TargetLibraryInfo *LibInfo) const {
11075  return PPC::createFastISel(FuncInfo, LibInfo);
11076}
11077