1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PPCISelLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCISelLowering.h"
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPCCallingConv.h"
17#include "PPCCCState.h"
18#include "PPCMachineFunctionInfo.h"
19#include "PPCPerfectShuffle.h"
20#include "PPCTargetMachine.h"
21#include "PPCTargetObjectFile.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringSwitch.h"
25#include "llvm/ADT/Triple.h"
26#include "llvm/CodeGen/CallingConvLower.h"
27#include "llvm/CodeGen/MachineFrameInfo.h"
28#include "llvm/CodeGen/MachineFunction.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/MachineLoopInfo.h"
31#include "llvm/CodeGen/MachineRegisterInfo.h"
32#include "llvm/CodeGen/SelectionDAG.h"
33#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
34#include "llvm/IR/CallingConv.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/Intrinsics.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/ErrorHandling.h"
41#include "llvm/Support/Format.h"
42#include "llvm/Support/MathExtras.h"
43#include "llvm/Support/raw_ostream.h"
44#include "llvm/Target/TargetOptions.h"
45#include <list>
46
47using namespace llvm;
48
49#define DEBUG_TYPE "ppc-lowering"
50
51static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
52cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
53
54static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
55cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
56
57static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
58cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
59
60static cl::opt<bool> DisableSCO("disable-ppc-sco",
61cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
62
63STATISTIC(NumTailCalls, "Number of tail calls");
64STATISTIC(NumSiblingCalls, "Number of sibling calls");
65
66// FIXME: Remove this once the bug has been fixed!
67extern cl::opt<bool> ANDIGlueBug;
68
69PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
70                                     const PPCSubtarget &STI)
71    : TargetLowering(TM), Subtarget(STI) {
72  // Use _setjmp/_longjmp instead of setjmp/longjmp.
73  setUseUnderscoreSetJmp(true);
74  setUseUnderscoreLongJmp(true);
75
76  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
77  // arguments are at least 4/8 bytes aligned.
78  bool isPPC64 = Subtarget.isPPC64();
79  setMinStackArgumentAlignment(isPPC64 ? 8:4);
80
81  // Set up the register classes.
82  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
83  if (!useSoftFloat()) {
84    addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
85    addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
86  }
87
88  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
89  for (MVT VT : MVT::integer_valuetypes()) {
90    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
91    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
92  }
93
94  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
95
96  // PowerPC has pre-inc load and store's.
97  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
98  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
99  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
100  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
101  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
102  setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
103  setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
104  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
105  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
106  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
107  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
108  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
109  setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
110  setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
111
112  if (Subtarget.useCRBits()) {
113    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
114
115    if (isPPC64 || Subtarget.hasFPCVT()) {
116      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
117      AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
118                         isPPC64 ? MVT::i64 : MVT::i32);
119      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
120      AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
121                        isPPC64 ? MVT::i64 : MVT::i32);
122    } else {
123      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
124      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
125    }
126
127    // PowerPC does not support direct load / store of condition registers
128    setOperationAction(ISD::LOAD, MVT::i1, Custom);
129    setOperationAction(ISD::STORE, MVT::i1, Custom);
130
131    // FIXME: Remove this once the ANDI glue bug is fixed:
132    if (ANDIGlueBug)
133      setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
134
135    for (MVT VT : MVT::integer_valuetypes()) {
136      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
137      setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
138      setTruncStoreAction(VT, MVT::i1, Expand);
139    }
140
141    addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
142  }
143
144  // This is used in the ppcf128->int sequence.  Note it has different semantics
145  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
146  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
147
148  // We do not currently implement these libm ops for PowerPC.
149  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
150  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
151  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
152  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
153  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
154  setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
155
156  // PowerPC has no SREM/UREM instructions
157  setOperationAction(ISD::SREM, MVT::i32, Expand);
158  setOperationAction(ISD::UREM, MVT::i32, Expand);
159  setOperationAction(ISD::SREM, MVT::i64, Expand);
160  setOperationAction(ISD::UREM, MVT::i64, Expand);
161
162  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
163  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
164  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
165  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
166  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
167  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
168  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
169  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
170  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
171
172  // We don't support sin/cos/sqrt/fmod/pow
173  setOperationAction(ISD::FSIN , MVT::f64, Expand);
174  setOperationAction(ISD::FCOS , MVT::f64, Expand);
175  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
176  setOperationAction(ISD::FREM , MVT::f64, Expand);
177  setOperationAction(ISD::FPOW , MVT::f64, Expand);
178  setOperationAction(ISD::FMA  , MVT::f64, Legal);
179  setOperationAction(ISD::FSIN , MVT::f32, Expand);
180  setOperationAction(ISD::FCOS , MVT::f32, Expand);
181  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
182  setOperationAction(ISD::FREM , MVT::f32, Expand);
183  setOperationAction(ISD::FPOW , MVT::f32, Expand);
184  setOperationAction(ISD::FMA  , MVT::f32, Legal);
185
186  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
187
188  // If we're enabling GP optimizations, use hardware square root
189  if (!Subtarget.hasFSQRT() &&
190      !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
191        Subtarget.hasFRE()))
192    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
193
194  if (!Subtarget.hasFSQRT() &&
195      !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
196        Subtarget.hasFRES()))
197    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
198
199  if (Subtarget.hasFCPSGN()) {
200    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
201    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
202  } else {
203    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
204    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
205  }
206
207  if (Subtarget.hasFPRND()) {
208    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
209    setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
210    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
211    setOperationAction(ISD::FROUND, MVT::f64, Legal);
212
213    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
214    setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
215    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
216    setOperationAction(ISD::FROUND, MVT::f32, Legal);
217  }
218
219  // PowerPC does not have BSWAP, CTPOP or CTTZ
220  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
221  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
222  setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
223  setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
224
225  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
226    setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
227    setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
228  } else {
229    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
230    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
231  }
232
233  // PowerPC does not have ROTR
234  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
235  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
236
237  if (!Subtarget.useCRBits()) {
238    // PowerPC does not have Select
239    setOperationAction(ISD::SELECT, MVT::i32, Expand);
240    setOperationAction(ISD::SELECT, MVT::i64, Expand);
241    setOperationAction(ISD::SELECT, MVT::f32, Expand);
242    setOperationAction(ISD::SELECT, MVT::f64, Expand);
243  }
244
245  // PowerPC wants to turn select_cc of FP into fsel when possible.
246  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
247  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
248
249  // PowerPC wants to optimize integer setcc a bit
250  if (!Subtarget.useCRBits())
251    setOperationAction(ISD::SETCC, MVT::i32, Custom);
252
253  // PowerPC does not have BRCOND which requires SetCC
254  if (!Subtarget.useCRBits())
255    setOperationAction(ISD::BRCOND, MVT::Other, Expand);
256
257  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
258
259  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
260  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
261
262  // PowerPC does not have [U|S]INT_TO_FP
263  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
264  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
265
266  if (Subtarget.hasDirectMove() && isPPC64) {
267    setOperationAction(ISD::BITCAST, MVT::f32, Legal);
268    setOperationAction(ISD::BITCAST, MVT::i32, Legal);
269    setOperationAction(ISD::BITCAST, MVT::i64, Legal);
270    setOperationAction(ISD::BITCAST, MVT::f64, Legal);
271  } else {
272    setOperationAction(ISD::BITCAST, MVT::f32, Expand);
273    setOperationAction(ISD::BITCAST, MVT::i32, Expand);
274    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
275    setOperationAction(ISD::BITCAST, MVT::f64, Expand);
276  }
277
278  // We cannot sextinreg(i1).  Expand to shifts.
279  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
280
281  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
282  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
283  // support continuation, user-level threading, and etc.. As a result, no
284  // other SjLj exception interfaces are implemented and please don't build
285  // your own exception handling based on them.
286  // LLVM/Clang supports zero-cost DWARF exception handling.
287  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
288  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
289
290  // We want to legalize GlobalAddress and ConstantPool nodes into the
291  // appropriate instructions to materialize the address.
292  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
293  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
294  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
295  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
296  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
297  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
298  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
299  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
300  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
301  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
302
303  // TRAP is legal.
304  setOperationAction(ISD::TRAP, MVT::Other, Legal);
305
306  // TRAMPOLINE is custom lowered.
307  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
308  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
309
310  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
311  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
312
313  if (Subtarget.isSVR4ABI()) {
314    if (isPPC64) {
315      // VAARG always uses double-word chunks, so promote anything smaller.
316      setOperationAction(ISD::VAARG, MVT::i1, Promote);
317      AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
318      setOperationAction(ISD::VAARG, MVT::i8, Promote);
319      AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
320      setOperationAction(ISD::VAARG, MVT::i16, Promote);
321      AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
322      setOperationAction(ISD::VAARG, MVT::i32, Promote);
323      AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
324      setOperationAction(ISD::VAARG, MVT::Other, Expand);
325    } else {
326      // VAARG is custom lowered with the 32-bit SVR4 ABI.
327      setOperationAction(ISD::VAARG, MVT::Other, Custom);
328      setOperationAction(ISD::VAARG, MVT::i64, Custom);
329    }
330  } else
331    setOperationAction(ISD::VAARG, MVT::Other, Expand);
332
333  if (Subtarget.isSVR4ABI() && !isPPC64)
334    // VACOPY is custom lowered with the 32-bit SVR4 ABI.
335    setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
336  else
337    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
338
339  // Use the default implementation.
340  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
341  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
342  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
343  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
344  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
345  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
346  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
347
348  // We want to custom lower some of our intrinsics.
349  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
350
351  // To handle counter-based loop conditions.
352  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
353
354  // Comparisons that require checking two conditions.
355  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
356  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
357  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
358  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
359  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
360  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
361  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
362  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
363  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
364  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
365  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
366  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
367
368  if (Subtarget.has64BitSupport()) {
369    // They also have instructions for converting between i64 and fp.
370    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
371    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
372    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
373    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
374    // This is just the low 32 bits of a (signed) fp->i64 conversion.
375    // We cannot do this with Promote because i64 is not a legal type.
376    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
377
378    if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
379      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
380  } else {
381    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
382    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
383  }
384
385  // With the instructions enabled under FPCVT, we can do everything.
386  if (Subtarget.hasFPCVT()) {
387    if (Subtarget.has64BitSupport()) {
388      setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
389      setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
390      setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
391      setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
392    }
393
394    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
395    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
396    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
397    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
398  }
399
400  if (Subtarget.use64BitRegs()) {
401    // 64-bit PowerPC implementations can support i64 types directly
402    addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
403    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
404    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
405    // 64-bit PowerPC wants to expand i128 shifts itself.
406    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
407    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
408    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
409  } else {
410    // 32-bit PowerPC wants to expand i64 shifts itself.
411    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
412    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
413    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
414  }
415
416  if (Subtarget.hasAltivec()) {
417    // First set operation action for all vector types to expand. Then we
418    // will selectively turn on ones that can be effectively codegen'd.
419    for (MVT VT : MVT::vector_valuetypes()) {
420      // add/sub are legal for all supported vector VT's.
421      setOperationAction(ISD::ADD, VT, Legal);
422      setOperationAction(ISD::SUB, VT, Legal);
423
424      // Vector instructions introduced in P8
425      if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
426        setOperationAction(ISD::CTPOP, VT, Legal);
427        setOperationAction(ISD::CTLZ, VT, Legal);
428      }
429      else {
430        setOperationAction(ISD::CTPOP, VT, Expand);
431        setOperationAction(ISD::CTLZ, VT, Expand);
432      }
433
434      // We promote all shuffles to v16i8.
435      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
436      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
437
438      // We promote all non-typed operations to v4i32.
439      setOperationAction(ISD::AND   , VT, Promote);
440      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
441      setOperationAction(ISD::OR    , VT, Promote);
442      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
443      setOperationAction(ISD::XOR   , VT, Promote);
444      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
445      setOperationAction(ISD::LOAD  , VT, Promote);
446      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
447      setOperationAction(ISD::SELECT, VT, Promote);
448      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
449      setOperationAction(ISD::SELECT_CC, VT, Promote);
450      AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
451      setOperationAction(ISD::STORE, VT, Promote);
452      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
453
454      // No other operations are legal.
455      setOperationAction(ISD::MUL , VT, Expand);
456      setOperationAction(ISD::SDIV, VT, Expand);
457      setOperationAction(ISD::SREM, VT, Expand);
458      setOperationAction(ISD::UDIV, VT, Expand);
459      setOperationAction(ISD::UREM, VT, Expand);
460      setOperationAction(ISD::FDIV, VT, Expand);
461      setOperationAction(ISD::FREM, VT, Expand);
462      setOperationAction(ISD::FNEG, VT, Expand);
463      setOperationAction(ISD::FSQRT, VT, Expand);
464      setOperationAction(ISD::FLOG, VT, Expand);
465      setOperationAction(ISD::FLOG10, VT, Expand);
466      setOperationAction(ISD::FLOG2, VT, Expand);
467      setOperationAction(ISD::FEXP, VT, Expand);
468      setOperationAction(ISD::FEXP2, VT, Expand);
469      setOperationAction(ISD::FSIN, VT, Expand);
470      setOperationAction(ISD::FCOS, VT, Expand);
471      setOperationAction(ISD::FABS, VT, Expand);
472      setOperationAction(ISD::FPOWI, VT, Expand);
473      setOperationAction(ISD::FFLOOR, VT, Expand);
474      setOperationAction(ISD::FCEIL,  VT, Expand);
475      setOperationAction(ISD::FTRUNC, VT, Expand);
476      setOperationAction(ISD::FRINT,  VT, Expand);
477      setOperationAction(ISD::FNEARBYINT, VT, Expand);
478      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
479      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
480      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
481      setOperationAction(ISD::MULHU, VT, Expand);
482      setOperationAction(ISD::MULHS, VT, Expand);
483      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
484      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
485      setOperationAction(ISD::UDIVREM, VT, Expand);
486      setOperationAction(ISD::SDIVREM, VT, Expand);
487      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
488      setOperationAction(ISD::FPOW, VT, Expand);
489      setOperationAction(ISD::BSWAP, VT, Expand);
490      setOperationAction(ISD::CTTZ, VT, Expand);
491      setOperationAction(ISD::VSELECT, VT, Expand);
492      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
493      setOperationAction(ISD::ROTL, VT, Expand);
494      setOperationAction(ISD::ROTR, VT, Expand);
495
496      for (MVT InnerVT : MVT::vector_valuetypes()) {
497        setTruncStoreAction(VT, InnerVT, Expand);
498        setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
499        setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
500        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
501      }
502    }
503
504    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
505    // with merges, splats, etc.
506    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
507
508    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
509    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
510    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
511    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
512    setOperationAction(ISD::SELECT, MVT::v4i32,
513                       Subtarget.useCRBits() ? Legal : Expand);
514    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
515    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
516    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
517    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
518    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
519    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
520    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
521    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
522    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
523
524    addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
525    addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
526    addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
527    addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
528
529    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
530    setOperationAction(ISD::FMA, MVT::v4f32, Legal);
531
532    if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
533      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
534      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
535    }
536
537    if (Subtarget.hasP8Altivec())
538      setOperationAction(ISD::MUL, MVT::v4i32, Legal);
539    else
540      setOperationAction(ISD::MUL, MVT::v4i32, Custom);
541
542    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
543    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
544
545    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
546    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
547
548    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
549    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
550    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
551    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
552
553    // Altivec does not contain unordered floating-point compare instructions
554    setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
555    setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
556    setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
557    setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
558
559    if (Subtarget.hasVSX()) {
560      setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
561      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
562      if (Subtarget.hasP8Vector()) {
563        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
564        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
565      }
566      if (Subtarget.hasDirectMove() && isPPC64) {
567        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
568        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
569        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
570        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
571        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
572        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
573        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
574        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
575      }
576      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
577
578      setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
579      setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
580      setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
581      setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
582      setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
583
584      setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
585
586      setOperationAction(ISD::MUL, MVT::v2f64, Legal);
587      setOperationAction(ISD::FMA, MVT::v2f64, Legal);
588
589      setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
590      setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
591
592      setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
593      setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
594      setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
595      setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
596      setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
597
598      // Share the Altivec comparison restrictions.
599      setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
600      setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
601      setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
602      setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
603
604      setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
605      setOperationAction(ISD::STORE, MVT::v2f64, Legal);
606
607      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
608
609      if (Subtarget.hasP8Vector())
610        addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
611
612      addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
613
614      addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
615      addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
616      addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
617
618      if (Subtarget.hasP8Altivec()) {
619        setOperationAction(ISD::SHL, MVT::v2i64, Legal);
620        setOperationAction(ISD::SRA, MVT::v2i64, Legal);
621        setOperationAction(ISD::SRL, MVT::v2i64, Legal);
622
623        setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
624      }
625      else {
626        setOperationAction(ISD::SHL, MVT::v2i64, Expand);
627        setOperationAction(ISD::SRA, MVT::v2i64, Expand);
628        setOperationAction(ISD::SRL, MVT::v2i64, Expand);
629
630        setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
631
632        // VSX v2i64 only supports non-arithmetic operations.
633        setOperationAction(ISD::ADD, MVT::v2i64, Expand);
634        setOperationAction(ISD::SUB, MVT::v2i64, Expand);
635      }
636
637      setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
638      AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
639      setOperationAction(ISD::STORE, MVT::v2i64, Promote);
640      AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
641
642      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
643
644      setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
645      setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
646      setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
647      setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
648
649      // Vector operation legalization checks the result type of
650      // SIGN_EXTEND_INREG, overall legalization checks the inner type.
651      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
652      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
653      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
654      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
655
656      setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
657      setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
658      setOperationAction(ISD::FABS, MVT::v4f32, Legal);
659      setOperationAction(ISD::FABS, MVT::v2f64, Legal);
660
661      addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
662    }
663
664    if (Subtarget.hasP8Altivec()) {
665      addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
666      addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
667    }
668    if (Subtarget.hasP9Vector()) {
669      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
670      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
671    }
672  }
673
674  if (Subtarget.hasQPX()) {
675    setOperationAction(ISD::FADD, MVT::v4f64, Legal);
676    setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
677    setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
678    setOperationAction(ISD::FREM, MVT::v4f64, Expand);
679
680    setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
681    setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
682
683    setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
684    setOperationAction(ISD::STORE , MVT::v4f64, Custom);
685
686    setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
687    setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
688
689    if (!Subtarget.useCRBits())
690      setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
691    setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
692
693    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
694    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
695    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
696    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
697    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
698    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
699    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
700
701    setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
702    setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
703
704    setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
705    setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
706    setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
707
708    setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
709    setOperationAction(ISD::FABS , MVT::v4f64, Legal);
710    setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
711    setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
712    setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
713    setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
714    setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
715    setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
716    setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
717    setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
718    setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
719
720    setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
721    setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
722
723    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
724    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
725
726    addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
727
728    setOperationAction(ISD::FADD, MVT::v4f32, Legal);
729    setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
730    setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
731    setOperationAction(ISD::FREM, MVT::v4f32, Expand);
732
733    setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
734    setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
735
736    setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
737    setOperationAction(ISD::STORE , MVT::v4f32, Custom);
738
739    if (!Subtarget.useCRBits())
740      setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
741    setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
742
743    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
744    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
745    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
746    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
747    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
748    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
749    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
750
751    setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
752    setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
753
754    setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
755    setOperationAction(ISD::FABS , MVT::v4f32, Legal);
756    setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
757    setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
758    setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
759    setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
760    setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
761    setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
762    setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
763    setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
764    setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
765
766    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
767    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
768
769    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
770    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
771
772    addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
773
774    setOperationAction(ISD::AND , MVT::v4i1, Legal);
775    setOperationAction(ISD::OR , MVT::v4i1, Legal);
776    setOperationAction(ISD::XOR , MVT::v4i1, Legal);
777
778    if (!Subtarget.useCRBits())
779      setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
780    setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
781
782    setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
783    setOperationAction(ISD::STORE , MVT::v4i1, Custom);
784
785    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
786    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
787    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
788    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
789    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
790    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
791    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
792
793    setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
794    setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
795
796    addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
797
798    setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
799    setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
800    setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
801    setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
802
803    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
804    setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
805    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
806    setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
807
808    setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
809    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
810
811    // These need to set FE_INEXACT, and so cannot be vectorized here.
812    setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
813    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
814
815    if (TM.Options.UnsafeFPMath) {
816      setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
817      setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
818
819      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
820      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
821    } else {
822      setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
823      setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
824
825      setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
826      setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
827    }
828  }
829
830  if (Subtarget.has64BitSupport())
831    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
832
833  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
834
835  if (!isPPC64) {
836    setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
837    setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
838  }
839
840  setBooleanContents(ZeroOrOneBooleanContent);
841
842  if (Subtarget.hasAltivec()) {
843    // Altivec instructions set fields to all zeros or all ones.
844    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
845  }
846
847  if (!isPPC64) {
848    // These libcalls are not available in 32-bit.
849    setLibcallName(RTLIB::SHL_I128, nullptr);
850    setLibcallName(RTLIB::SRL_I128, nullptr);
851    setLibcallName(RTLIB::SRA_I128, nullptr);
852  }
853
854  setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
855
856  // We have target-specific dag combine patterns for the following nodes:
857  setTargetDAGCombine(ISD::SINT_TO_FP);
858  setTargetDAGCombine(ISD::BUILD_VECTOR);
859  if (Subtarget.hasFPCVT())
860    setTargetDAGCombine(ISD::UINT_TO_FP);
861  setTargetDAGCombine(ISD::LOAD);
862  setTargetDAGCombine(ISD::STORE);
863  setTargetDAGCombine(ISD::BR_CC);
864  if (Subtarget.useCRBits())
865    setTargetDAGCombine(ISD::BRCOND);
866  setTargetDAGCombine(ISD::BSWAP);
867  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
868  setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
869  setTargetDAGCombine(ISD::INTRINSIC_VOID);
870
871  setTargetDAGCombine(ISD::SIGN_EXTEND);
872  setTargetDAGCombine(ISD::ZERO_EXTEND);
873  setTargetDAGCombine(ISD::ANY_EXTEND);
874
875  if (Subtarget.useCRBits()) {
876    setTargetDAGCombine(ISD::TRUNCATE);
877    setTargetDAGCombine(ISD::SETCC);
878    setTargetDAGCombine(ISD::SELECT_CC);
879  }
880
881  // Use reciprocal estimates.
882  if (TM.Options.UnsafeFPMath) {
883    setTargetDAGCombine(ISD::FDIV);
884    setTargetDAGCombine(ISD::FSQRT);
885  }
886
887  // Darwin long double math library functions have $LDBL128 appended.
888  if (Subtarget.isDarwin()) {
889    setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
890    setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
891    setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
892    setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
893    setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
894    setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
895    setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
896    setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
897    setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
898    setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
899  }
900
901  // With 32 condition bits, we don't need to sink (and duplicate) compares
902  // aggressively in CodeGenPrep.
903  if (Subtarget.useCRBits()) {
904    setHasMultipleConditionRegisters();
905    setJumpIsExpensive();
906  }
907
908  setMinFunctionAlignment(2);
909  if (Subtarget.isDarwin())
910    setPrefFunctionAlignment(4);
911
912  switch (Subtarget.getDarwinDirective()) {
913  default: break;
914  case PPC::DIR_970:
915  case PPC::DIR_A2:
916  case PPC::DIR_E500mc:
917  case PPC::DIR_E5500:
918  case PPC::DIR_PWR4:
919  case PPC::DIR_PWR5:
920  case PPC::DIR_PWR5X:
921  case PPC::DIR_PWR6:
922  case PPC::DIR_PWR6X:
923  case PPC::DIR_PWR7:
924  case PPC::DIR_PWR8:
925  case PPC::DIR_PWR9:
926    setPrefFunctionAlignment(4);
927    setPrefLoopAlignment(4);
928    break;
929  }
930
931  if (Subtarget.enableMachineScheduler())
932    setSchedulingPreference(Sched::Source);
933  else
934    setSchedulingPreference(Sched::Hybrid);
935
936  computeRegisterProperties(STI.getRegisterInfo());
937
938  // The Freescale cores do better with aggressive inlining of memcpy and
939  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
940  if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
941      Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
942    MaxStoresPerMemset = 32;
943    MaxStoresPerMemsetOptSize = 16;
944    MaxStoresPerMemcpy = 32;
945    MaxStoresPerMemcpyOptSize = 8;
946    MaxStoresPerMemmove = 32;
947    MaxStoresPerMemmoveOptSize = 8;
948  } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
949    // The A2 also benefits from (very) aggressive inlining of memcpy and
950    // friends. The overhead of a the function call, even when warm, can be
951    // over one hundred cycles.
952    MaxStoresPerMemset = 128;
953    MaxStoresPerMemcpy = 128;
954    MaxStoresPerMemmove = 128;
955  }
956}
957
958/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
959/// the desired ByVal argument alignment.
960static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
961                             unsigned MaxMaxAlign) {
962  if (MaxAlign == MaxMaxAlign)
963    return;
964  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
965    if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
966      MaxAlign = 32;
967    else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
968      MaxAlign = 16;
969  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
970    unsigned EltAlign = 0;
971    getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
972    if (EltAlign > MaxAlign)
973      MaxAlign = EltAlign;
974  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
975    for (auto *EltTy : STy->elements()) {
976      unsigned EltAlign = 0;
977      getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
978      if (EltAlign > MaxAlign)
979        MaxAlign = EltAlign;
980      if (MaxAlign == MaxMaxAlign)
981        break;
982    }
983  }
984}
985
986/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
987/// function arguments in the caller parameter area.
988unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
989                                                  const DataLayout &DL) const {
990  // Darwin passes everything on 4 byte boundary.
991  if (Subtarget.isDarwin())
992    return 4;
993
994  // 16byte and wider vectors are passed on 16byte boundary.
995  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
996  unsigned Align = Subtarget.isPPC64() ? 8 : 4;
997  if (Subtarget.hasAltivec() || Subtarget.hasQPX())
998    getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
999  return Align;
1000}
1001
1002bool PPCTargetLowering::useSoftFloat() const {
1003  return Subtarget.useSoftFloat();
1004}
1005
1006const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1007  switch ((PPCISD::NodeType)Opcode) {
1008  case PPCISD::FIRST_NUMBER:    break;
1009  case PPCISD::FSEL:            return "PPCISD::FSEL";
1010  case PPCISD::FCFID:           return "PPCISD::FCFID";
1011  case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1012  case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1013  case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1014  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1015  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1016  case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1017  case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1018  case PPCISD::FRE:             return "PPCISD::FRE";
1019  case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1020  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1021  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
1022  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
1023  case PPCISD::VPERM:           return "PPCISD::VPERM";
1024  case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1025  case PPCISD::XXINSERT:        return "PPCISD::XXINSERT";
1026  case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1027  case PPCISD::CMPB:            return "PPCISD::CMPB";
1028  case PPCISD::Hi:              return "PPCISD::Hi";
1029  case PPCISD::Lo:              return "PPCISD::Lo";
1030  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1031  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1032  case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1033  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1034  case PPCISD::SRL:             return "PPCISD::SRL";
1035  case PPCISD::SRA:             return "PPCISD::SRA";
1036  case PPCISD::SHL:             return "PPCISD::SHL";
1037  case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1038  case PPCISD::CALL:            return "PPCISD::CALL";
1039  case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1040  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1041  case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1042  case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1043  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1044  case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1045  case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1046  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1047  case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1048  case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1049  case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1050  case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1051  case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1052  case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1053  case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1054  case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1055  case PPCISD::VCMP:            return "PPCISD::VCMP";
1056  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1057  case PPCISD::LBRX:            return "PPCISD::LBRX";
1058  case PPCISD::STBRX:           return "PPCISD::STBRX";
1059  case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1060  case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1061  case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1062  case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1063  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1064  case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1065  case PPCISD::BDZ:             return "PPCISD::BDZ";
1066  case PPCISD::MFFS:            return "PPCISD::MFFS";
1067  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1068  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1069  case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1070  case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1071  case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1072  case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1073  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1074  case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1075  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1076  case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1077  case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1078  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1079  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1080  case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1081  case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1082  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1083  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1084  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1085  case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1086  case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1087  case PPCISD::SC:              return "PPCISD::SC";
1088  case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1089  case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1090  case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1091  case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1092  case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1093  case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1094  case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1095  case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1096  case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1097  case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1098  case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1099  }
1100  return nullptr;
1101}
1102
1103EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1104                                          EVT VT) const {
1105  if (!VT.isVector())
1106    return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1107
1108  if (Subtarget.hasQPX())
1109    return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1110
1111  return VT.changeVectorElementTypeToInteger();
1112}
1113
1114bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1115  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1116  return true;
1117}
1118
1119//===----------------------------------------------------------------------===//
1120// Node matching predicates, for use by the tblgen matching code.
1121//===----------------------------------------------------------------------===//
1122
1123/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1124static bool isFloatingPointZero(SDValue Op) {
1125  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1126    return CFP->getValueAPF().isZero();
1127  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1128    // Maybe this has already been legalized into the constant pool?
1129    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1130      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1131        return CFP->getValueAPF().isZero();
1132  }
1133  return false;
1134}
1135
1136/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1137/// true if Op is undef or if it matches the specified value.
1138static bool isConstantOrUndef(int Op, int Val) {
1139  return Op < 0 || Op == Val;
1140}
1141
1142/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1143/// VPKUHUM instruction.
1144/// The ShuffleKind distinguishes between big-endian operations with
1145/// two different inputs (0), either-endian operations with two identical
1146/// inputs (1), and little-endian operations with two different inputs (2).
1147/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1148bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1149                               SelectionDAG &DAG) {
1150  bool IsLE = DAG.getDataLayout().isLittleEndian();
1151  if (ShuffleKind == 0) {
1152    if (IsLE)
1153      return false;
1154    for (unsigned i = 0; i != 16; ++i)
1155      if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1156        return false;
1157  } else if (ShuffleKind == 2) {
1158    if (!IsLE)
1159      return false;
1160    for (unsigned i = 0; i != 16; ++i)
1161      if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1162        return false;
1163  } else if (ShuffleKind == 1) {
1164    unsigned j = IsLE ? 0 : 1;
1165    for (unsigned i = 0; i != 8; ++i)
1166      if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1167          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1168        return false;
1169  }
1170  return true;
1171}
1172
1173/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1174/// VPKUWUM instruction.
1175/// The ShuffleKind distinguishes between big-endian operations with
1176/// two different inputs (0), either-endian operations with two identical
1177/// inputs (1), and little-endian operations with two different inputs (2).
1178/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1179bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1180                               SelectionDAG &DAG) {
1181  bool IsLE = DAG.getDataLayout().isLittleEndian();
1182  if (ShuffleKind == 0) {
1183    if (IsLE)
1184      return false;
1185    for (unsigned i = 0; i != 16; i += 2)
1186      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1187          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1188        return false;
1189  } else if (ShuffleKind == 2) {
1190    if (!IsLE)
1191      return false;
1192    for (unsigned i = 0; i != 16; i += 2)
1193      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1194          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1195        return false;
1196  } else if (ShuffleKind == 1) {
1197    unsigned j = IsLE ? 0 : 2;
1198    for (unsigned i = 0; i != 8; i += 2)
1199      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1200          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1201          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1202          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1203        return false;
1204  }
1205  return true;
1206}
1207
1208/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1209/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1210/// current subtarget.
1211///
1212/// The ShuffleKind distinguishes between big-endian operations with
1213/// two different inputs (0), either-endian operations with two identical
1214/// inputs (1), and little-endian operations with two different inputs (2).
1215/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1216bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1217                               SelectionDAG &DAG) {
1218  const PPCSubtarget& Subtarget =
1219    static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1220  if (!Subtarget.hasP8Vector())
1221    return false;
1222
1223  bool IsLE = DAG.getDataLayout().isLittleEndian();
1224  if (ShuffleKind == 0) {
1225    if (IsLE)
1226      return false;
1227    for (unsigned i = 0; i != 16; i += 4)
1228      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1229          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1230          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1231          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1232        return false;
1233  } else if (ShuffleKind == 2) {
1234    if (!IsLE)
1235      return false;
1236    for (unsigned i = 0; i != 16; i += 4)
1237      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1238          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1239          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1240          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1241        return false;
1242  } else if (ShuffleKind == 1) {
1243    unsigned j = IsLE ? 0 : 4;
1244    for (unsigned i = 0; i != 8; i += 4)
1245      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1246          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1247          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1248          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1249          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1250          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1251          !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1252          !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1253        return false;
1254  }
1255  return true;
1256}
1257
1258/// isVMerge - Common function, used to match vmrg* shuffles.
1259///
1260static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1261                     unsigned LHSStart, unsigned RHSStart) {
1262  if (N->getValueType(0) != MVT::v16i8)
1263    return false;
1264  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1265         "Unsupported merge size!");
1266
1267  for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1268    for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1269      if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1270                             LHSStart+j+i*UnitSize) ||
1271          !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1272                             RHSStart+j+i*UnitSize))
1273        return false;
1274    }
1275  return true;
1276}
1277
1278/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1279/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1280/// The ShuffleKind distinguishes between big-endian merges with two
1281/// different inputs (0), either-endian merges with two identical inputs (1),
1282/// and little-endian merges with two different inputs (2).  For the latter,
1283/// the input operands are swapped (see PPCInstrAltivec.td).
1284bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1285                             unsigned ShuffleKind, SelectionDAG &DAG) {
1286  if (DAG.getDataLayout().isLittleEndian()) {
1287    if (ShuffleKind == 1) // unary
1288      return isVMerge(N, UnitSize, 0, 0);
1289    else if (ShuffleKind == 2) // swapped
1290      return isVMerge(N, UnitSize, 0, 16);
1291    else
1292      return false;
1293  } else {
1294    if (ShuffleKind == 1) // unary
1295      return isVMerge(N, UnitSize, 8, 8);
1296    else if (ShuffleKind == 0) // normal
1297      return isVMerge(N, UnitSize, 8, 24);
1298    else
1299      return false;
1300  }
1301}
1302
1303/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1304/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1305/// The ShuffleKind distinguishes between big-endian merges with two
1306/// different inputs (0), either-endian merges with two identical inputs (1),
1307/// and little-endian merges with two different inputs (2).  For the latter,
1308/// the input operands are swapped (see PPCInstrAltivec.td).
1309bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1310                             unsigned ShuffleKind, SelectionDAG &DAG) {
1311  if (DAG.getDataLayout().isLittleEndian()) {
1312    if (ShuffleKind == 1) // unary
1313      return isVMerge(N, UnitSize, 8, 8);
1314    else if (ShuffleKind == 2) // swapped
1315      return isVMerge(N, UnitSize, 8, 24);
1316    else
1317      return false;
1318  } else {
1319    if (ShuffleKind == 1) // unary
1320      return isVMerge(N, UnitSize, 0, 0);
1321    else if (ShuffleKind == 0) // normal
1322      return isVMerge(N, UnitSize, 0, 16);
1323    else
1324      return false;
1325  }
1326}
1327
1328/**
1329 * \brief Common function used to match vmrgew and vmrgow shuffles
1330 *
1331 * The indexOffset determines whether to look for even or odd words in
1332 * the shuffle mask. This is based on the of the endianness of the target
1333 * machine.
1334 *   - Little Endian:
1335 *     - Use offset of 0 to check for odd elements
1336 *     - Use offset of 4 to check for even elements
1337 *   - Big Endian:
1338 *     - Use offset of 0 to check for even elements
1339 *     - Use offset of 4 to check for odd elements
1340 * A detailed description of the vector element ordering for little endian and
1341 * big endian can be found at
1342 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1343 * Targeting your applications - what little endian and big endian IBM XL C/C++
1344 * compiler differences mean to you
1345 *
1346 * The mask to the shuffle vector instruction specifies the indices of the
1347 * elements from the two input vectors to place in the result. The elements are
1348 * numbered in array-access order, starting with the first vector. These vectors
1349 * are always of type v16i8, thus each vector will contain 16 elements of size
1350 * 8. More info on the shuffle vector can be found in the
1351 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1352 * Language Reference.
1353 *
1354 * The RHSStartValue indicates whether the same input vectors are used (unary)
1355 * or two different input vectors are used, based on the following:
1356 *   - If the instruction uses the same vector for both inputs, the range of the
1357 *     indices will be 0 to 15. In this case, the RHSStart value passed should
1358 *     be 0.
1359 *   - If the instruction has two different vectors then the range of the
1360 *     indices will be 0 to 31. In this case, the RHSStart value passed should
1361 *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1362 *     to 31 specify elements in the second vector).
1363 *
1364 * \param[in] N The shuffle vector SD Node to analyze
1365 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1366 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1367 * vector to the shuffle_vector instruction
1368 * \return true iff this shuffle vector represents an even or odd word merge
1369 */
1370static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1371                     unsigned RHSStartValue) {
1372  if (N->getValueType(0) != MVT::v16i8)
1373    return false;
1374
1375  for (unsigned i = 0; i < 2; ++i)
1376    for (unsigned j = 0; j < 4; ++j)
1377      if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1378                             i*RHSStartValue+j+IndexOffset) ||
1379          !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1380                             i*RHSStartValue+j+IndexOffset+8))
1381        return false;
1382  return true;
1383}
1384
1385/**
1386 * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
1387 * vmrgow instructions.
1388 *
1389 * \param[in] N The shuffle vector SD Node to analyze
1390 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1391 * \param[in] ShuffleKind Identify the type of merge:
1392 *   - 0 = big-endian merge with two different inputs;
1393 *   - 1 = either-endian merge with two identical inputs;
1394 *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1395 *     little-endian merges).
1396 * \param[in] DAG The current SelectionDAG
1397 * \return true iff this shuffle mask
1398 */
1399bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1400                              unsigned ShuffleKind, SelectionDAG &DAG) {
1401  if (DAG.getDataLayout().isLittleEndian()) {
1402    unsigned indexOffset = CheckEven ? 4 : 0;
1403    if (ShuffleKind == 1) // Unary
1404      return isVMerge(N, indexOffset, 0);
1405    else if (ShuffleKind == 2) // swapped
1406      return isVMerge(N, indexOffset, 16);
1407    else
1408      return false;
1409  }
1410  else {
1411    unsigned indexOffset = CheckEven ? 0 : 4;
1412    if (ShuffleKind == 1) // Unary
1413      return isVMerge(N, indexOffset, 0);
1414    else if (ShuffleKind == 0) // Normal
1415      return isVMerge(N, indexOffset, 16);
1416    else
1417      return false;
1418  }
1419  return false;
1420}
1421
1422/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1423/// amount, otherwise return -1.
1424/// The ShuffleKind distinguishes between big-endian operations with two
1425/// different inputs (0), either-endian operations with two identical inputs
1426/// (1), and little-endian operations with two different inputs (2).  For the
1427/// latter, the input operands are swapped (see PPCInstrAltivec.td).
1428int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1429                             SelectionDAG &DAG) {
1430  if (N->getValueType(0) != MVT::v16i8)
1431    return -1;
1432
1433  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1434
1435  // Find the first non-undef value in the shuffle mask.
1436  unsigned i;
1437  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1438    /*search*/;
1439
1440  if (i == 16) return -1;  // all undef.
1441
1442  // Otherwise, check to see if the rest of the elements are consecutively
1443  // numbered from this value.
1444  unsigned ShiftAmt = SVOp->getMaskElt(i);
1445  if (ShiftAmt < i) return -1;
1446
1447  ShiftAmt -= i;
1448  bool isLE = DAG.getDataLayout().isLittleEndian();
1449
1450  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1451    // Check the rest of the elements to see if they are consecutive.
1452    for (++i; i != 16; ++i)
1453      if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1454        return -1;
1455  } else if (ShuffleKind == 1) {
1456    // Check the rest of the elements to see if they are consecutive.
1457    for (++i; i != 16; ++i)
1458      if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1459        return -1;
1460  } else
1461    return -1;
1462
1463  if (isLE)
1464    ShiftAmt = 16 - ShiftAmt;
1465
1466  return ShiftAmt;
1467}
1468
1469/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1470/// specifies a splat of a single element that is suitable for input to
1471/// VSPLTB/VSPLTH/VSPLTW.
1472bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1473  assert(N->getValueType(0) == MVT::v16i8 &&
1474         (EltSize == 1 || EltSize == 2 || EltSize == 4));
1475
1476  // The consecutive indices need to specify an element, not part of two
1477  // different elements.  So abandon ship early if this isn't the case.
1478  if (N->getMaskElt(0) % EltSize != 0)
1479    return false;
1480
1481  // This is a splat operation if each element of the permute is the same, and
1482  // if the value doesn't reference the second vector.
1483  unsigned ElementBase = N->getMaskElt(0);
1484
1485  // FIXME: Handle UNDEF elements too!
1486  if (ElementBase >= 16)
1487    return false;
1488
1489  // Check that the indices are consecutive, in the case of a multi-byte element
1490  // splatted with a v16i8 mask.
1491  for (unsigned i = 1; i != EltSize; ++i)
1492    if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1493      return false;
1494
1495  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1496    if (N->getMaskElt(i) < 0) continue;
1497    for (unsigned j = 0; j != EltSize; ++j)
1498      if (N->getMaskElt(i+j) != N->getMaskElt(j))
1499        return false;
1500  }
1501  return true;
1502}
1503
1504bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1505                          unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1506
1507  // Check that the mask is shuffling words
1508  for (unsigned i = 0; i < 4; ++i) {
1509    unsigned B0 = N->getMaskElt(i*4);
1510    unsigned B1 = N->getMaskElt(i*4+1);
1511    unsigned B2 = N->getMaskElt(i*4+2);
1512    unsigned B3 = N->getMaskElt(i*4+3);
1513    if (B0 % 4)
1514      return false;
1515    if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1)
1516      return false;
1517  }
1518
1519  // Now we look at mask elements 0,4,8,12
1520  unsigned M0 = N->getMaskElt(0) / 4;
1521  unsigned M1 = N->getMaskElt(4) / 4;
1522  unsigned M2 = N->getMaskElt(8) / 4;
1523  unsigned M3 = N->getMaskElt(12) / 4;
1524  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1525  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1526
1527  // Below, let H and L be arbitrary elements of the shuffle mask
1528  // where H is in the range [4,7] and L is in the range [0,3].
1529  // H, 1, 2, 3 or L, 5, 6, 7
1530  if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1531      (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1532    ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1533    InsertAtByte = IsLE ? 12 : 0;
1534    Swap = M0 < 4;
1535    return true;
1536  }
1537  // 0, H, 2, 3 or 4, L, 6, 7
1538  if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1539      (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1540    ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1541    InsertAtByte = IsLE ? 8 : 4;
1542    Swap = M1 < 4;
1543    return true;
1544  }
1545  // 0, 1, H, 3 or 4, 5, L, 7
1546  if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1547      (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1548    ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1549    InsertAtByte = IsLE ? 4 : 8;
1550    Swap = M2 < 4;
1551    return true;
1552  }
1553  // 0, 1, 2, H or 4, 5, 6, L
1554  if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1555      (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1556    ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1557    InsertAtByte = IsLE ? 0 : 12;
1558    Swap = M3 < 4;
1559    return true;
1560  }
1561
1562  // If both vector operands for the shuffle are the same vector, the mask will
1563  // contain only elements from the first one and the second one will be undef.
1564  if (N->getOperand(1).isUndef()) {
1565    ShiftElts = 0;
1566    Swap = true;
1567    unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1568    if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1569      InsertAtByte = IsLE ? 12 : 0;
1570      return true;
1571    }
1572    if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1573      InsertAtByte = IsLE ? 8 : 4;
1574      return true;
1575    }
1576    if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1577      InsertAtByte = IsLE ? 4 : 8;
1578      return true;
1579    }
1580    if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1581      InsertAtByte = IsLE ? 0 : 12;
1582      return true;
1583    }
1584  }
1585
1586  return false;
1587}
1588
1589/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1590/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1591unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1592                                SelectionDAG &DAG) {
1593  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1594  assert(isSplatShuffleMask(SVOp, EltSize));
1595  if (DAG.getDataLayout().isLittleEndian())
1596    return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1597  else
1598    return SVOp->getMaskElt(0) / EltSize;
1599}
1600
1601/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1602/// by using a vspltis[bhw] instruction of the specified element size, return
1603/// the constant being splatted.  The ByteSize field indicates the number of
1604/// bytes of each element [124] -> [bhw].
1605SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1606  SDValue OpVal(nullptr, 0);
1607
1608  // If ByteSize of the splat is bigger than the element size of the
1609  // build_vector, then we have a case where we are checking for a splat where
1610  // multiple elements of the buildvector are folded together into a single
1611  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1612  unsigned EltSize = 16/N->getNumOperands();
1613  if (EltSize < ByteSize) {
1614    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1615    SDValue UniquedVals[4];
1616    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1617
1618    // See if all of the elements in the buildvector agree across.
1619    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1620      if (N->getOperand(i).isUndef()) continue;
1621      // If the element isn't a constant, bail fully out.
1622      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1623
1624
1625      if (!UniquedVals[i&(Multiple-1)].getNode())
1626        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1627      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1628        return SDValue();  // no match.
1629    }
1630
1631    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1632    // either constant or undef values that are identical for each chunk.  See
1633    // if these chunks can form into a larger vspltis*.
1634
1635    // Check to see if all of the leading entries are either 0 or -1.  If
1636    // neither, then this won't fit into the immediate field.
1637    bool LeadingZero = true;
1638    bool LeadingOnes = true;
1639    for (unsigned i = 0; i != Multiple-1; ++i) {
1640      if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1641
1642      LeadingZero &= isNullConstant(UniquedVals[i]);
1643      LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
1644    }
1645    // Finally, check the least significant entry.
1646    if (LeadingZero) {
1647      if (!UniquedVals[Multiple-1].getNode())
1648        return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
1649      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1650      if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
1651        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1652    }
1653    if (LeadingOnes) {
1654      if (!UniquedVals[Multiple-1].getNode())
1655        return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1656      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1657      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1658        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1659    }
1660
1661    return SDValue();
1662  }
1663
1664  // Check to see if this buildvec has a single non-undef value in its elements.
1665  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1666    if (N->getOperand(i).isUndef()) continue;
1667    if (!OpVal.getNode())
1668      OpVal = N->getOperand(i);
1669    else if (OpVal != N->getOperand(i))
1670      return SDValue();
1671  }
1672
1673  if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1674
1675  unsigned ValSizeInBytes = EltSize;
1676  uint64_t Value = 0;
1677  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1678    Value = CN->getZExtValue();
1679  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1680    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1681    Value = FloatToBits(CN->getValueAPF().convertToFloat());
1682  }
1683
1684  // If the splat value is larger than the element value, then we can never do
1685  // this splat.  The only case that we could fit the replicated bits into our
1686  // immediate field for would be zero, and we prefer to use vxor for it.
1687  if (ValSizeInBytes < ByteSize) return SDValue();
1688
1689  // If the element value is larger than the splat value, check if it consists
1690  // of a repeated bit pattern of size ByteSize.
1691  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
1692    return SDValue();
1693
1694  // Properly sign extend the value.
1695  int MaskVal = SignExtend32(Value, ByteSize * 8);
1696
1697  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1698  if (MaskVal == 0) return SDValue();
1699
1700  // Finally, if this value fits in a 5 bit sext field, return it
1701  if (SignExtend32<5>(MaskVal) == MaskVal)
1702    return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
1703  return SDValue();
1704}
1705
1706/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1707/// amount, otherwise return -1.
1708int PPC::isQVALIGNIShuffleMask(SDNode *N) {
1709  EVT VT = N->getValueType(0);
1710  if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
1711    return -1;
1712
1713  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1714
1715  // Find the first non-undef value in the shuffle mask.
1716  unsigned i;
1717  for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
1718    /*search*/;
1719
1720  if (i == 4) return -1;  // all undef.
1721
1722  // Otherwise, check to see if the rest of the elements are consecutively
1723  // numbered from this value.
1724  unsigned ShiftAmt = SVOp->getMaskElt(i);
1725  if (ShiftAmt < i) return -1;
1726  ShiftAmt -= i;
1727
1728  // Check the rest of the elements to see if they are consecutive.
1729  for (++i; i != 4; ++i)
1730    if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1731      return -1;
1732
1733  return ShiftAmt;
1734}
1735
1736//===----------------------------------------------------------------------===//
1737//  Addressing Mode Selection
1738//===----------------------------------------------------------------------===//
1739
1740/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
1741/// or 64-bit immediate, and if the value can be accurately represented as a
1742/// sign extension from a 16-bit value.  If so, this returns true and the
1743/// immediate.
1744static bool isIntS16Immediate(SDNode *N, short &Imm) {
1745  if (!isa<ConstantSDNode>(N))
1746    return false;
1747
1748  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
1749  if (N->getValueType(0) == MVT::i32)
1750    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
1751  else
1752    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
1753}
1754static bool isIntS16Immediate(SDValue Op, short &Imm) {
1755  return isIntS16Immediate(Op.getNode(), Imm);
1756}
1757
1758/// SelectAddressRegReg - Given the specified addressed, check to see if it
1759/// can be represented as an indexed [r+r] operation.  Returns false if it
1760/// can be more efficiently represented with [r+imm].
1761bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
1762                                            SDValue &Index,
1763                                            SelectionDAG &DAG) const {
1764  short imm = 0;
1765  if (N.getOpcode() == ISD::ADD) {
1766    if (isIntS16Immediate(N.getOperand(1), imm))
1767      return false;    // r+i
1768    if (N.getOperand(1).getOpcode() == PPCISD::Lo)
1769      return false;    // r+i
1770
1771    Base = N.getOperand(0);
1772    Index = N.getOperand(1);
1773    return true;
1774  } else if (N.getOpcode() == ISD::OR) {
1775    if (isIntS16Immediate(N.getOperand(1), imm))
1776      return false;    // r+i can fold it if we can.
1777
1778    // If this is an or of disjoint bitfields, we can codegen this as an add
1779    // (for better address arithmetic) if the LHS and RHS of the OR are provably
1780    // disjoint.
1781    APInt LHSKnownZero, LHSKnownOne;
1782    APInt RHSKnownZero, RHSKnownOne;
1783    DAG.computeKnownBits(N.getOperand(0),
1784                         LHSKnownZero, LHSKnownOne);
1785
1786    if (LHSKnownZero.getBoolValue()) {
1787      DAG.computeKnownBits(N.getOperand(1),
1788                           RHSKnownZero, RHSKnownOne);
1789      // If all of the bits are known zero on the LHS or RHS, the add won't
1790      // carry.
1791      if (~(LHSKnownZero | RHSKnownZero) == 0) {
1792        Base = N.getOperand(0);
1793        Index = N.getOperand(1);
1794        return true;
1795      }
1796    }
1797  }
1798
1799  return false;
1800}
1801
1802// If we happen to be doing an i64 load or store into a stack slot that has
1803// less than a 4-byte alignment, then the frame-index elimination may need to
1804// use an indexed load or store instruction (because the offset may not be a
1805// multiple of 4). The extra register needed to hold the offset comes from the
1806// register scavenger, and it is possible that the scavenger will need to use
1807// an emergency spill slot. As a result, we need to make sure that a spill slot
1808// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
1809// stack slot.
1810static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
1811  // FIXME: This does not handle the LWA case.
1812  if (VT != MVT::i64)
1813    return;
1814
1815  // NOTE: We'll exclude negative FIs here, which come from argument
1816  // lowering, because there are no known test cases triggering this problem
1817  // using packed structures (or similar). We can remove this exclusion if
1818  // we find such a test case. The reason why this is so test-case driven is
1819  // because this entire 'fixup' is only to prevent crashes (from the
1820  // register scavenger) on not-really-valid inputs. For example, if we have:
1821  //   %a = alloca i1
1822  //   %b = bitcast i1* %a to i64*
1823  //   store i64* a, i64 b
1824  // then the store should really be marked as 'align 1', but is not. If it
1825  // were marked as 'align 1' then the indexed form would have been
1826  // instruction-selected initially, and the problem this 'fixup' is preventing
1827  // won't happen regardless.
1828  if (FrameIdx < 0)
1829    return;
1830
1831  MachineFunction &MF = DAG.getMachineFunction();
1832  MachineFrameInfo *MFI = MF.getFrameInfo();
1833
1834  unsigned Align = MFI->getObjectAlignment(FrameIdx);
1835  if (Align >= 4)
1836    return;
1837
1838  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1839  FuncInfo->setHasNonRISpills();
1840}
1841
1842/// Returns true if the address N can be represented by a base register plus
1843/// a signed 16-bit displacement [r+imm], and if it is not better
1844/// represented as reg+reg.  If Aligned is true, only accept displacements
1845/// suitable for STD and friends, i.e. multiples of 4.
1846bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
1847                                            SDValue &Base,
1848                                            SelectionDAG &DAG,
1849                                            bool Aligned) const {
1850  // FIXME dl should come from parent load or store, not from address
1851  SDLoc dl(N);
1852  // If this can be more profitably realized as r+r, fail.
1853  if (SelectAddressRegReg(N, Disp, Base, DAG))
1854    return false;
1855
1856  if (N.getOpcode() == ISD::ADD) {
1857    short imm = 0;
1858    if (isIntS16Immediate(N.getOperand(1), imm) &&
1859        (!Aligned || (imm & 3) == 0)) {
1860      Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1861      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1862        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1863        fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1864      } else {
1865        Base = N.getOperand(0);
1866      }
1867      return true; // [r+i]
1868    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1869      // Match LOAD (ADD (X, Lo(G))).
1870      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1871             && "Cannot handle constant offsets yet!");
1872      Disp = N.getOperand(1).getOperand(0);  // The global address.
1873      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1874             Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1875             Disp.getOpcode() == ISD::TargetConstantPool ||
1876             Disp.getOpcode() == ISD::TargetJumpTable);
1877      Base = N.getOperand(0);
1878      return true;  // [&g+r]
1879    }
1880  } else if (N.getOpcode() == ISD::OR) {
1881    short imm = 0;
1882    if (isIntS16Immediate(N.getOperand(1), imm) &&
1883        (!Aligned || (imm & 3) == 0)) {
1884      // If this is an or of disjoint bitfields, we can codegen this as an add
1885      // (for better address arithmetic) if the LHS and RHS of the OR are
1886      // provably disjoint.
1887      APInt LHSKnownZero, LHSKnownOne;
1888      DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1889
1890      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1891        // If all of the bits are known zero on the LHS or RHS, the add won't
1892        // carry.
1893        if (FrameIndexSDNode *FI =
1894              dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1895          Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1896          fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1897        } else {
1898          Base = N.getOperand(0);
1899        }
1900        Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1901        return true;
1902      }
1903    }
1904  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1905    // Loading from a constant address.
1906
1907    // If this address fits entirely in a 16-bit sext immediate field, codegen
1908    // this as "d, 0"
1909    short Imm;
1910    if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
1911      Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
1912      Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1913                             CN->getValueType(0));
1914      return true;
1915    }
1916
1917    // Handle 32-bit sext immediates with LIS + addr mode.
1918    if ((CN->getValueType(0) == MVT::i32 ||
1919         (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
1920        (!Aligned || (CN->getZExtValue() & 3) == 0)) {
1921      int Addr = (int)CN->getZExtValue();
1922
1923      // Otherwise, break this down into an LIS + disp.
1924      Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
1925
1926      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
1927                                   MVT::i32);
1928      unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1929      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1930      return true;
1931    }
1932  }
1933
1934  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
1935  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
1936    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1937    fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1938  } else
1939    Base = N;
1940  return true;      // [r+0]
1941}
1942
1943/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1944/// represented as an indexed [r+r] operation.
1945bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
1946                                                SDValue &Index,
1947                                                SelectionDAG &DAG) const {
1948  // Check to see if we can easily represent this as an [r+r] address.  This
1949  // will fail if it thinks that the address is more profitably represented as
1950  // reg+imm, e.g. where imm = 0.
1951  if (SelectAddressRegReg(N, Base, Index, DAG))
1952    return true;
1953
1954  // If the operand is an addition, always emit this as [r+r], since this is
1955  // better (for code size, and execution, as the memop does the add for free)
1956  // than emitting an explicit add.
1957  if (N.getOpcode() == ISD::ADD) {
1958    Base = N.getOperand(0);
1959    Index = N.getOperand(1);
1960    return true;
1961  }
1962
1963  // Otherwise, do it the hard way, using R0 as the base register.
1964  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1965                         N.getValueType());
1966  Index = N;
1967  return true;
1968}
1969
1970/// getPreIndexedAddressParts - returns true by value, base pointer and
1971/// offset pointer and addressing mode by reference if the node's address
1972/// can be legally represented as pre-indexed load / store address.
1973bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1974                                                  SDValue &Offset,
1975                                                  ISD::MemIndexedMode &AM,
1976                                                  SelectionDAG &DAG) const {
1977  if (DisablePPCPreinc) return false;
1978
1979  bool isLoad = true;
1980  SDValue Ptr;
1981  EVT VT;
1982  unsigned Alignment;
1983  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1984    Ptr = LD->getBasePtr();
1985    VT = LD->getMemoryVT();
1986    Alignment = LD->getAlignment();
1987  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1988    Ptr = ST->getBasePtr();
1989    VT  = ST->getMemoryVT();
1990    Alignment = ST->getAlignment();
1991    isLoad = false;
1992  } else
1993    return false;
1994
1995  // PowerPC doesn't have preinc load/store instructions for vectors (except
1996  // for QPX, which does have preinc r+r forms).
1997  if (VT.isVector()) {
1998    if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
1999      return false;
2000    } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
2001      AM = ISD::PRE_INC;
2002      return true;
2003    }
2004  }
2005
2006  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2007
2008    // Common code will reject creating a pre-inc form if the base pointer
2009    // is a frame index, or if N is a store and the base pointer is either
2010    // the same as or a predecessor of the value being stored.  Check for
2011    // those situations here, and try with swapped Base/Offset instead.
2012    bool Swap = false;
2013
2014    if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2015      Swap = true;
2016    else if (!isLoad) {
2017      SDValue Val = cast<StoreSDNode>(N)->getValue();
2018      if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2019        Swap = true;
2020    }
2021
2022    if (Swap)
2023      std::swap(Base, Offset);
2024
2025    AM = ISD::PRE_INC;
2026    return true;
2027  }
2028
2029  // LDU/STU can only handle immediates that are a multiple of 4.
2030  if (VT != MVT::i64) {
2031    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
2032      return false;
2033  } else {
2034    // LDU/STU need an address with at least 4-byte alignment.
2035    if (Alignment < 4)
2036      return false;
2037
2038    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
2039      return false;
2040  }
2041
2042  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2043    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2044    // sext i32 to i64 when addr mode is r+i.
2045    if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2046        LD->getExtensionType() == ISD::SEXTLOAD &&
2047        isa<ConstantSDNode>(Offset))
2048      return false;
2049  }
2050
2051  AM = ISD::PRE_INC;
2052  return true;
2053}
2054
2055//===----------------------------------------------------------------------===//
2056//  LowerOperation implementation
2057//===----------------------------------------------------------------------===//
2058
2059/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2060/// and LoOpFlags to the target MO flags.
2061static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2062                               unsigned &HiOpFlags, unsigned &LoOpFlags,
2063                               const GlobalValue *GV = nullptr) {
2064  HiOpFlags = PPCII::MO_HA;
2065  LoOpFlags = PPCII::MO_LO;
2066
2067  // Don't use the pic base if not in PIC relocation model.
2068  if (IsPIC) {
2069    HiOpFlags |= PPCII::MO_PIC_FLAG;
2070    LoOpFlags |= PPCII::MO_PIC_FLAG;
2071  }
2072
2073  // If this is a reference to a global value that requires a non-lazy-ptr, make
2074  // sure that instruction lowering adds it.
2075  if (GV && Subtarget.hasLazyResolverStub(GV)) {
2076    HiOpFlags |= PPCII::MO_NLP_FLAG;
2077    LoOpFlags |= PPCII::MO_NLP_FLAG;
2078
2079    if (GV->hasHiddenVisibility()) {
2080      HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2081      LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
2082    }
2083  }
2084}
2085
2086static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2087                             SelectionDAG &DAG) {
2088  SDLoc DL(HiPart);
2089  EVT PtrVT = HiPart.getValueType();
2090  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2091
2092  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2093  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2094
2095  // With PIC, the first instruction is actually "GR+hi(&G)".
2096  if (isPIC)
2097    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2098                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2099
2100  // Generate non-pic code that has direct accesses to the constant pool.
2101  // The address of the global is just (hi(&g)+lo(&g)).
2102  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2103}
2104
2105static void setUsesTOCBasePtr(MachineFunction &MF) {
2106  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2107  FuncInfo->setUsesTOCBasePtr();
2108}
2109
2110static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2111  setUsesTOCBasePtr(DAG.getMachineFunction());
2112}
2113
2114static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit,
2115                           SDValue GA) {
2116  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2117  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
2118                DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2119
2120  SDValue Ops[] = { GA, Reg };
2121  return DAG.getMemIntrinsicNode(
2122      PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2123      MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
2124      false, 0);
2125}
2126
2127SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2128                                             SelectionDAG &DAG) const {
2129  EVT PtrVT = Op.getValueType();
2130  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2131  const Constant *C = CP->getConstVal();
2132
2133  // 64-bit SVR4 ABI code is always position-independent.
2134  // The actual address of the GlobalValue is stored in the TOC.
2135  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2136    setUsesTOCBasePtr(DAG);
2137    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2138    return getTOCEntry(DAG, SDLoc(CP), true, GA);
2139  }
2140
2141  unsigned MOHiFlag, MOLoFlag;
2142  bool IsPIC = isPositionIndependent();
2143  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2144
2145  if (IsPIC && Subtarget.isSVR4ABI()) {
2146    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2147                                           PPCII::MO_PIC_FLAG);
2148    return getTOCEntry(DAG, SDLoc(CP), false, GA);
2149  }
2150
2151  SDValue CPIHi =
2152    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2153  SDValue CPILo =
2154    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2155  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2156}
2157
2158SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2159  EVT PtrVT = Op.getValueType();
2160  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2161
2162  // 64-bit SVR4 ABI code is always position-independent.
2163  // The actual address of the GlobalValue is stored in the TOC.
2164  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2165    setUsesTOCBasePtr(DAG);
2166    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2167    return getTOCEntry(DAG, SDLoc(JT), true, GA);
2168  }
2169
2170  unsigned MOHiFlag, MOLoFlag;
2171  bool IsPIC = isPositionIndependent();
2172  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2173
2174  if (IsPIC && Subtarget.isSVR4ABI()) {
2175    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2176                                        PPCII::MO_PIC_FLAG);
2177    return getTOCEntry(DAG, SDLoc(GA), false, GA);
2178  }
2179
2180  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2181  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2182  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2183}
2184
2185SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2186                                             SelectionDAG &DAG) const {
2187  EVT PtrVT = Op.getValueType();
2188  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2189  const BlockAddress *BA = BASDN->getBlockAddress();
2190
2191  // 64-bit SVR4 ABI code is always position-independent.
2192  // The actual BlockAddress is stored in the TOC.
2193  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2194    setUsesTOCBasePtr(DAG);
2195    SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2196    return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
2197  }
2198
2199  unsigned MOHiFlag, MOLoFlag;
2200  bool IsPIC = isPositionIndependent();
2201  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2202  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2203  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2204  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2205}
2206
2207SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2208                                              SelectionDAG &DAG) const {
2209
2210  // FIXME: TLS addresses currently use medium model code sequences,
2211  // which is the most useful form.  Eventually support for small and
2212  // large models could be added if users need it, at the cost of
2213  // additional complexity.
2214  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2215  if (DAG.getTarget().Options.EmulatedTLS)
2216    return LowerToTLSEmulatedModel(GA, DAG);
2217
2218  SDLoc dl(GA);
2219  const GlobalValue *GV = GA->getGlobal();
2220  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2221  bool is64bit = Subtarget.isPPC64();
2222  const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2223  PICLevel::Level picLevel = M->getPICLevel();
2224
2225  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
2226
2227  if (Model == TLSModel::LocalExec) {
2228    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2229                                               PPCII::MO_TPREL_HA);
2230    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2231                                               PPCII::MO_TPREL_LO);
2232    SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
2233                                     is64bit ? MVT::i64 : MVT::i32);
2234    SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2235    return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2236  }
2237
2238  if (Model == TLSModel::InitialExec) {
2239    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2240    SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2241                                                PPCII::MO_TLS);
2242    SDValue GOTPtr;
2243    if (is64bit) {
2244      setUsesTOCBasePtr(DAG);
2245      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2246      GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2247                           PtrVT, GOTReg, TGA);
2248    } else
2249      GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2250    SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2251                                   PtrVT, TGA, GOTPtr);
2252    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2253  }
2254
2255  if (Model == TLSModel::GeneralDynamic) {
2256    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2257    SDValue GOTPtr;
2258    if (is64bit) {
2259      setUsesTOCBasePtr(DAG);
2260      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2261      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2262                                   GOTReg, TGA);
2263    } else {
2264      if (picLevel == PICLevel::SmallPIC)
2265        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2266      else
2267        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2268    }
2269    return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2270                       GOTPtr, TGA, TGA);
2271  }
2272
2273  if (Model == TLSModel::LocalDynamic) {
2274    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2275    SDValue GOTPtr;
2276    if (is64bit) {
2277      setUsesTOCBasePtr(DAG);
2278      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2279      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2280                           GOTReg, TGA);
2281    } else {
2282      if (picLevel == PICLevel::SmallPIC)
2283        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2284      else
2285        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2286    }
2287    SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2288                                  PtrVT, GOTPtr, TGA, TGA);
2289    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2290                                      PtrVT, TLSAddr, TGA);
2291    return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2292  }
2293
2294  llvm_unreachable("Unknown TLS model!");
2295}
2296
2297SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2298                                              SelectionDAG &DAG) const {
2299  EVT PtrVT = Op.getValueType();
2300  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2301  SDLoc DL(GSDN);
2302  const GlobalValue *GV = GSDN->getGlobal();
2303
2304  // 64-bit SVR4 ABI code is always position-independent.
2305  // The actual address of the GlobalValue is stored in the TOC.
2306  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2307    setUsesTOCBasePtr(DAG);
2308    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2309    return getTOCEntry(DAG, DL, true, GA);
2310  }
2311
2312  unsigned MOHiFlag, MOLoFlag;
2313  bool IsPIC = isPositionIndependent();
2314  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
2315
2316  if (IsPIC && Subtarget.isSVR4ABI()) {
2317    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2318                                            GSDN->getOffset(),
2319                                            PPCII::MO_PIC_FLAG);
2320    return getTOCEntry(DAG, DL, false, GA);
2321  }
2322
2323  SDValue GAHi =
2324    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2325  SDValue GALo =
2326    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2327
2328  SDValue Ptr = LowerLabelRef(GAHi, GALo, IsPIC, DAG);
2329
2330  // If the global reference is actually to a non-lazy-pointer, we have to do an
2331  // extra load to get the address of the global.
2332  if (MOHiFlag & PPCII::MO_NLP_FLAG)
2333    Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
2334                      false, false, false, 0);
2335  return Ptr;
2336}
2337
2338SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2339  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2340  SDLoc dl(Op);
2341
2342  if (Op.getValueType() == MVT::v2i64) {
2343    // When the operands themselves are v2i64 values, we need to do something
2344    // special because VSX has no underlying comparison operations for these.
2345    if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2346      // Equality can be handled by casting to the legal type for Altivec
2347      // comparisons, everything else needs to be expanded.
2348      if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2349        return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2350                 DAG.getSetCC(dl, MVT::v4i32,
2351                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2352                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2353                   CC));
2354      }
2355
2356      return SDValue();
2357    }
2358
2359    // We handle most of these in the usual way.
2360    return Op;
2361  }
2362
2363  // If we're comparing for equality to zero, expose the fact that this is
2364  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2365  // fold the new nodes.
2366  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2367    if (C->isNullValue() && CC == ISD::SETEQ) {
2368      EVT VT = Op.getOperand(0).getValueType();
2369      SDValue Zext = Op.getOperand(0);
2370      if (VT.bitsLT(MVT::i32)) {
2371        VT = MVT::i32;
2372        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
2373      }
2374      unsigned Log2b = Log2_32(VT.getSizeInBits());
2375      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
2376      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
2377                                DAG.getConstant(Log2b, dl, MVT::i32));
2378      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
2379    }
2380    // Leave comparisons against 0 and -1 alone for now, since they're usually
2381    // optimized.  FIXME: revisit this when we can custom lower all setcc
2382    // optimizations.
2383    if (C->isAllOnesValue() || C->isNullValue())
2384      return SDValue();
2385  }
2386
2387  // If we have an integer seteq/setne, turn it into a compare against zero
2388  // by xor'ing the rhs with the lhs, which is faster than setting a
2389  // condition register, reading it back out, and masking the correct bit.  The
2390  // normal approach here uses sub to do this instead of xor.  Using xor exposes
2391  // the result to other bit-twiddling opportunities.
2392  EVT LHSVT = Op.getOperand(0).getValueType();
2393  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2394    EVT VT = Op.getValueType();
2395    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2396                                Op.getOperand(1));
2397    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2398  }
2399  return SDValue();
2400}
2401
2402SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
2403  SDNode *Node = Op.getNode();
2404  EVT VT = Node->getValueType(0);
2405  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2406  SDValue InChain = Node->getOperand(0);
2407  SDValue VAListPtr = Node->getOperand(1);
2408  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2409  SDLoc dl(Node);
2410
2411  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2412
2413  // gpr_index
2414  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2415                                    VAListPtr, MachinePointerInfo(SV), MVT::i8,
2416                                    false, false, false, 0);
2417  InChain = GprIndex.getValue(1);
2418
2419  if (VT == MVT::i64) {
2420    // Check if GprIndex is even
2421    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2422                                 DAG.getConstant(1, dl, MVT::i32));
2423    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2424                                DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2425    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2426                                          DAG.getConstant(1, dl, MVT::i32));
2427    // Align GprIndex to be even if it isn't
2428    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2429                           GprIndex);
2430  }
2431
2432  // fpr index is 1 byte after gpr
2433  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2434                               DAG.getConstant(1, dl, MVT::i32));
2435
2436  // fpr
2437  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2438                                    FprPtr, MachinePointerInfo(SV), MVT::i8,
2439                                    false, false, false, 0);
2440  InChain = FprIndex.getValue(1);
2441
2442  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2443                                       DAG.getConstant(8, dl, MVT::i32));
2444
2445  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2446                                        DAG.getConstant(4, dl, MVT::i32));
2447
2448  // areas
2449  SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
2450                                     MachinePointerInfo(), false, false,
2451                                     false, 0);
2452  InChain = OverflowArea.getValue(1);
2453
2454  SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
2455                                    MachinePointerInfo(), false, false,
2456                                    false, 0);
2457  InChain = RegSaveArea.getValue(1);
2458
2459  // select overflow_area if index > 8
2460  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2461                            DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2462
2463  // adjustment constant gpr_index * 4/8
2464  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2465                                    VT.isInteger() ? GprIndex : FprIndex,
2466                                    DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2467                                                    MVT::i32));
2468
2469  // OurReg = RegSaveArea + RegConstant
2470  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2471                               RegConstant);
2472
2473  // Floating types are 32 bytes into RegSaveArea
2474  if (VT.isFloatingPoint())
2475    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2476                         DAG.getConstant(32, dl, MVT::i32));
2477
2478  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2479  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2480                                   VT.isInteger() ? GprIndex : FprIndex,
2481                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2482                                                   MVT::i32));
2483
2484  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2485                              VT.isInteger() ? VAListPtr : FprPtr,
2486                              MachinePointerInfo(SV),
2487                              MVT::i8, false, false, 0);
2488
2489  // determine if we should load from reg_save_area or overflow_area
2490  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2491
2492  // increase overflow_area by 4/8 if gpr/fpr > 8
2493  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2494                                          DAG.getConstant(VT.isInteger() ? 4 : 8,
2495                                          dl, MVT::i32));
2496
2497  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2498                             OverflowAreaPlusN);
2499
2500  InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
2501                              OverflowAreaPtr,
2502                              MachinePointerInfo(),
2503                              MVT::i32, false, false, 0);
2504
2505  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
2506                     false, false, false, 0);
2507}
2508
2509SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
2510  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2511
2512  // We have to copy the entire va_list struct:
2513  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2514  return DAG.getMemcpy(Op.getOperand(0), Op,
2515                       Op.getOperand(1), Op.getOperand(2),
2516                       DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2517                       false, MachinePointerInfo(), MachinePointerInfo());
2518}
2519
2520SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2521                                                  SelectionDAG &DAG) const {
2522  return Op.getOperand(0);
2523}
2524
2525SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2526                                                SelectionDAG &DAG) const {
2527  SDValue Chain = Op.getOperand(0);
2528  SDValue Trmp = Op.getOperand(1); // trampoline
2529  SDValue FPtr = Op.getOperand(2); // nested function
2530  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2531  SDLoc dl(Op);
2532
2533  EVT PtrVT = getPointerTy(DAG.getDataLayout());
2534  bool isPPC64 = (PtrVT == MVT::i64);
2535  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
2536
2537  TargetLowering::ArgListTy Args;
2538  TargetLowering::ArgListEntry Entry;
2539
2540  Entry.Ty = IntPtrTy;
2541  Entry.Node = Trmp; Args.push_back(Entry);
2542
2543  // TrampSize == (isPPC64 ? 48 : 40);
2544  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
2545                               isPPC64 ? MVT::i64 : MVT::i32);
2546  Args.push_back(Entry);
2547
2548  Entry.Node = FPtr; Args.push_back(Entry);
2549  Entry.Node = Nest; Args.push_back(Entry);
2550
2551  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2552  TargetLowering::CallLoweringInfo CLI(DAG);
2553  CLI.setDebugLoc(dl).setChain(Chain)
2554    .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2555               DAG.getExternalSymbol("__trampoline_setup", PtrVT),
2556               std::move(Args));
2557
2558  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2559  return CallResult.second;
2560}
2561
2562SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
2563  MachineFunction &MF = DAG.getMachineFunction();
2564  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2565  EVT PtrVT = getPointerTy(MF.getDataLayout());
2566
2567  SDLoc dl(Op);
2568
2569  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2570    // vastart just stores the address of the VarArgsFrameIndex slot into the
2571    // memory location argument.
2572    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2573    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2574    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2575                        MachinePointerInfo(SV),
2576                        false, false, 0);
2577  }
2578
2579  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2580  // We suppose the given va_list is already allocated.
2581  //
2582  // typedef struct {
2583  //  char gpr;     /* index into the array of 8 GPRs
2584  //                 * stored in the register save area
2585  //                 * gpr=0 corresponds to r3,
2586  //                 * gpr=1 to r4, etc.
2587  //                 */
2588  //  char fpr;     /* index into the array of 8 FPRs
2589  //                 * stored in the register save area
2590  //                 * fpr=0 corresponds to f1,
2591  //                 * fpr=1 to f2, etc.
2592  //                 */
2593  //  char *overflow_arg_area;
2594  //                /* location on stack that holds
2595  //                 * the next overflow argument
2596  //                 */
2597  //  char *reg_save_area;
2598  //               /* where r3:r10 and f1:f8 (if saved)
2599  //                * are stored
2600  //                */
2601  // } va_list[1];
2602
2603  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2604  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2605  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2606                                            PtrVT);
2607  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2608                                 PtrVT);
2609
2610  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2611  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2612
2613  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2614  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2615
2616  uint64_t FPROffset = 1;
2617  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2618
2619  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2620
2621  // Store first byte : number of int regs
2622  SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
2623                                         Op.getOperand(1),
2624                                         MachinePointerInfo(SV),
2625                                         MVT::i8, false, false, 0);
2626  uint64_t nextOffset = FPROffset;
2627  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2628                                  ConstFPROffset);
2629
2630  // Store second byte : number of float regs
2631  SDValue secondStore =
2632    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2633                      MachinePointerInfo(SV, nextOffset), MVT::i8,
2634                      false, false, 0);
2635  nextOffset += StackOffset;
2636  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2637
2638  // Store second word : arguments given on stack
2639  SDValue thirdStore =
2640    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2641                 MachinePointerInfo(SV, nextOffset),
2642                 false, false, 0);
2643  nextOffset += FrameOffset;
2644  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2645
2646  // Store third word : arguments given in registers
2647  return DAG.getStore(thirdStore, dl, FR, nextPtr,
2648                      MachinePointerInfo(SV, nextOffset),
2649                      false, false, 0);
2650
2651}
2652
2653#include "PPCGenCallingConv.inc"
2654
2655// Function whose sole purpose is to kill compiler warnings
2656// stemming from unused functions included from PPCGenCallingConv.inc.
2657CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2658  return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2659}
2660
2661bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2662                                      CCValAssign::LocInfo &LocInfo,
2663                                      ISD::ArgFlagsTy &ArgFlags,
2664                                      CCState &State) {
2665  return true;
2666}
2667
2668bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2669                                             MVT &LocVT,
2670                                             CCValAssign::LocInfo &LocInfo,
2671                                             ISD::ArgFlagsTy &ArgFlags,
2672                                             CCState &State) {
2673  static const MCPhysReg ArgRegs[] = {
2674    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2675    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2676  };
2677  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2678
2679  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2680
2681  // Skip one register if the first unallocated register has an even register
2682  // number and there are still argument registers available which have not been
2683  // allocated yet. RegNum is actually an index into ArgRegs, which means we
2684  // need to skip a register if RegNum is odd.
2685  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2686    State.AllocateReg(ArgRegs[RegNum]);
2687  }
2688
2689  // Always return false here, as this function only makes sure that the first
2690  // unallocated register has an odd register number and does not actually
2691  // allocate a register for the current argument.
2692  return false;
2693}
2694
2695bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
2696                                               MVT &LocVT,
2697                                               CCValAssign::LocInfo &LocInfo,
2698                                               ISD::ArgFlagsTy &ArgFlags,
2699                                               CCState &State) {
2700  static const MCPhysReg ArgRegs[] = {
2701    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2702    PPC::F8
2703  };
2704
2705  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2706
2707  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2708
2709  // If there is only one Floating-point register left we need to put both f64
2710  // values of a split ppc_fp128 value on the stack.
2711  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
2712    State.AllocateReg(ArgRegs[RegNum]);
2713  }
2714
2715  // Always return false here, as this function only makes sure that the two f64
2716  // values a ppc_fp128 value is split into are both passed in registers or both
2717  // passed on the stack and does not actually allocate a register for the
2718  // current argument.
2719  return false;
2720}
2721
2722/// FPR - The set of FP registers that should be allocated for arguments,
2723/// on Darwin.
2724static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
2725                                PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
2726                                PPC::F11, PPC::F12, PPC::F13};
2727
2728/// QFPR - The set of QPX registers that should be allocated for arguments.
2729static const MCPhysReg QFPR[] = {
2730    PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
2731    PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
2732
2733/// CalculateStackSlotSize - Calculates the size reserved for this argument on
2734/// the stack.
2735static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
2736                                       unsigned PtrByteSize) {
2737  unsigned ArgSize = ArgVT.getStoreSize();
2738  if (Flags.isByVal())
2739    ArgSize = Flags.getByValSize();
2740
2741  // Round up to multiples of the pointer size, except for array members,
2742  // which are always packed.
2743  if (!Flags.isInConsecutiveRegs())
2744    ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2745
2746  return ArgSize;
2747}
2748
2749/// CalculateStackSlotAlignment - Calculates the alignment of this argument
2750/// on the stack.
2751static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
2752                                            ISD::ArgFlagsTy Flags,
2753                                            unsigned PtrByteSize) {
2754  unsigned Align = PtrByteSize;
2755
2756  // Altivec parameters are padded to a 16 byte boundary.
2757  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2758      ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2759      ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2760      ArgVT == MVT::v1i128)
2761    Align = 16;
2762  // QPX vector types stored in double-precision are padded to a 32 byte
2763  // boundary.
2764  else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
2765    Align = 32;
2766
2767  // ByVal parameters are aligned as requested.
2768  if (Flags.isByVal()) {
2769    unsigned BVAlign = Flags.getByValAlign();
2770    if (BVAlign > PtrByteSize) {
2771      if (BVAlign % PtrByteSize != 0)
2772          llvm_unreachable(
2773            "ByVal alignment is not a multiple of the pointer size");
2774
2775      Align = BVAlign;
2776    }
2777  }
2778
2779  // Array members are always packed to their original alignment.
2780  if (Flags.isInConsecutiveRegs()) {
2781    // If the array member was split into multiple registers, the first
2782    // needs to be aligned to the size of the full type.  (Except for
2783    // ppcf128, which is only aligned as its f64 components.)
2784    if (Flags.isSplit() && OrigVT != MVT::ppcf128)
2785      Align = OrigVT.getStoreSize();
2786    else
2787      Align = ArgVT.getStoreSize();
2788  }
2789
2790  return Align;
2791}
2792
2793/// CalculateStackSlotUsed - Return whether this argument will use its
2794/// stack slot (instead of being passed in registers).  ArgOffset,
2795/// AvailableFPRs, and AvailableVRs must hold the current argument
2796/// position, and will be updated to account for this argument.
2797static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
2798                                   ISD::ArgFlagsTy Flags,
2799                                   unsigned PtrByteSize,
2800                                   unsigned LinkageSize,
2801                                   unsigned ParamAreaSize,
2802                                   unsigned &ArgOffset,
2803                                   unsigned &AvailableFPRs,
2804                                   unsigned &AvailableVRs, bool HasQPX) {
2805  bool UseMemory = false;
2806
2807  // Respect alignment of argument on the stack.
2808  unsigned Align =
2809    CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
2810  ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2811  // If there's no space left in the argument save area, we must
2812  // use memory (this check also catches zero-sized arguments).
2813  if (ArgOffset >= LinkageSize + ParamAreaSize)
2814    UseMemory = true;
2815
2816  // Allocate argument on the stack.
2817  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2818  if (Flags.isInConsecutiveRegsLast())
2819    ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2820  // If we overran the argument save area, we must use memory
2821  // (this check catches arguments passed partially in memory)
2822  if (ArgOffset > LinkageSize + ParamAreaSize)
2823    UseMemory = true;
2824
2825  // However, if the argument is actually passed in an FPR or a VR,
2826  // we don't use memory after all.
2827  if (!Flags.isByVal()) {
2828    if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
2829        // QPX registers overlap with the scalar FP registers.
2830        (HasQPX && (ArgVT == MVT::v4f32 ||
2831                    ArgVT == MVT::v4f64 ||
2832                    ArgVT == MVT::v4i1)))
2833      if (AvailableFPRs > 0) {
2834        --AvailableFPRs;
2835        return false;
2836      }
2837    if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2838        ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2839        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2840        ArgVT == MVT::v1i128)
2841      if (AvailableVRs > 0) {
2842        --AvailableVRs;
2843        return false;
2844      }
2845  }
2846
2847  return UseMemory;
2848}
2849
2850/// EnsureStackAlignment - Round stack frame size up from NumBytes to
2851/// ensure minimum alignment required for target.
2852static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
2853                                     unsigned NumBytes) {
2854  unsigned TargetAlign = Lowering->getStackAlignment();
2855  unsigned AlignMask = TargetAlign - 1;
2856  NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2857  return NumBytes;
2858}
2859
2860SDValue PPCTargetLowering::LowerFormalArguments(
2861    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2862    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2863    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2864  if (Subtarget.isSVR4ABI()) {
2865    if (Subtarget.isPPC64())
2866      return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
2867                                         dl, DAG, InVals);
2868    else
2869      return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
2870                                         dl, DAG, InVals);
2871  } else {
2872    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
2873                                       dl, DAG, InVals);
2874  }
2875}
2876
2877SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
2878    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2879    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2880    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2881
2882  // 32-bit SVR4 ABI Stack Frame Layout:
2883  //              +-----------------------------------+
2884  //        +-->  |            Back chain             |
2885  //        |     +-----------------------------------+
2886  //        |     | Floating-point register save area |
2887  //        |     +-----------------------------------+
2888  //        |     |    General register save area     |
2889  //        |     +-----------------------------------+
2890  //        |     |          CR save word             |
2891  //        |     +-----------------------------------+
2892  //        |     |         VRSAVE save word          |
2893  //        |     +-----------------------------------+
2894  //        |     |         Alignment padding         |
2895  //        |     +-----------------------------------+
2896  //        |     |     Vector register save area     |
2897  //        |     +-----------------------------------+
2898  //        |     |       Local variable space        |
2899  //        |     +-----------------------------------+
2900  //        |     |        Parameter list area        |
2901  //        |     +-----------------------------------+
2902  //        |     |           LR save word            |
2903  //        |     +-----------------------------------+
2904  // SP-->  +---  |            Back chain             |
2905  //              +-----------------------------------+
2906  //
2907  // Specifications:
2908  //   System V Application Binary Interface PowerPC Processor Supplement
2909  //   AltiVec Technology Programming Interface Manual
2910
2911  MachineFunction &MF = DAG.getMachineFunction();
2912  MachineFrameInfo *MFI = MF.getFrameInfo();
2913  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2914
2915  EVT PtrVT = getPointerTy(MF.getDataLayout());
2916  // Potential tail calls could cause overwriting of argument stack slots.
2917  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2918                       (CallConv == CallingConv::Fast));
2919  unsigned PtrByteSize = 4;
2920
2921  // Assign locations to all of the incoming arguments.
2922  SmallVector<CCValAssign, 16> ArgLocs;
2923  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2924                 *DAG.getContext());
2925
2926  // Reserve space for the linkage area on the stack.
2927  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
2928  CCInfo.AllocateStack(LinkageSize, PtrByteSize);
2929  if (useSoftFloat())
2930    CCInfo.PreAnalyzeFormalArguments(Ins);
2931
2932  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
2933  CCInfo.clearWasPPCF128();
2934
2935  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2936    CCValAssign &VA = ArgLocs[i];
2937
2938    // Arguments stored in registers.
2939    if (VA.isRegLoc()) {
2940      const TargetRegisterClass *RC;
2941      EVT ValVT = VA.getValVT();
2942
2943      switch (ValVT.getSimpleVT().SimpleTy) {
2944        default:
2945          llvm_unreachable("ValVT not supported by formal arguments Lowering");
2946        case MVT::i1:
2947        case MVT::i32:
2948          RC = &PPC::GPRCRegClass;
2949          break;
2950        case MVT::f32:
2951          if (Subtarget.hasP8Vector())
2952            RC = &PPC::VSSRCRegClass;
2953          else
2954            RC = &PPC::F4RCRegClass;
2955          break;
2956        case MVT::f64:
2957          if (Subtarget.hasVSX())
2958            RC = &PPC::VSFRCRegClass;
2959          else
2960            RC = &PPC::F8RCRegClass;
2961          break;
2962        case MVT::v16i8:
2963        case MVT::v8i16:
2964        case MVT::v4i32:
2965          RC = &PPC::VRRCRegClass;
2966          break;
2967        case MVT::v4f32:
2968          RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
2969          break;
2970        case MVT::v2f64:
2971        case MVT::v2i64:
2972          RC = &PPC::VSHRCRegClass;
2973          break;
2974        case MVT::v4f64:
2975          RC = &PPC::QFRCRegClass;
2976          break;
2977        case MVT::v4i1:
2978          RC = &PPC::QBRCRegClass;
2979          break;
2980      }
2981
2982      // Transform the arguments stored in physical registers into virtual ones.
2983      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2984      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
2985                                            ValVT == MVT::i1 ? MVT::i32 : ValVT);
2986
2987      if (ValVT == MVT::i1)
2988        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
2989
2990      InVals.push_back(ArgValue);
2991    } else {
2992      // Argument stored in memory.
2993      assert(VA.isMemLoc());
2994
2995      unsigned ArgSize = VA.getLocVT().getStoreSize();
2996      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
2997                                      isImmutable);
2998
2999      // Create load nodes to retrieve arguments from the stack.
3000      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3001      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3002                                   MachinePointerInfo(),
3003                                   false, false, false, 0));
3004    }
3005  }
3006
3007  // Assign locations to all of the incoming aggregate by value arguments.
3008  // Aggregates passed by value are stored in the local variable space of the
3009  // caller's stack frame, right above the parameter list area.
3010  SmallVector<CCValAssign, 16> ByValArgLocs;
3011  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3012                      ByValArgLocs, *DAG.getContext());
3013
3014  // Reserve stack space for the allocations in CCInfo.
3015  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3016
3017  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3018
3019  // Area that is at least reserved in the caller of this function.
3020  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3021  MinReservedArea = std::max(MinReservedArea, LinkageSize);
3022
3023  // Set the size that is at least reserved in caller of this function.  Tail
3024  // call optimized function's reserved stack space needs to be aligned so that
3025  // taking the difference between two stack areas will result in an aligned
3026  // stack.
3027  MinReservedArea =
3028      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3029  FuncInfo->setMinReservedArea(MinReservedArea);
3030
3031  SmallVector<SDValue, 8> MemOps;
3032
3033  // If the function takes variable number of arguments, make a frame index for
3034  // the start of the first vararg value... for expansion of llvm.va_start.
3035  if (isVarArg) {
3036    static const MCPhysReg GPArgRegs[] = {
3037      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3038      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3039    };
3040    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3041
3042    static const MCPhysReg FPArgRegs[] = {
3043      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3044      PPC::F8
3045    };
3046    unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3047
3048    if (useSoftFloat())
3049       NumFPArgRegs = 0;
3050
3051    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3052    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3053
3054    // Make room for NumGPArgRegs and NumFPArgRegs.
3055    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3056                NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3057
3058    FuncInfo->setVarArgsStackOffset(
3059      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
3060                             CCInfo.getNextStackOffset(), true));
3061
3062    FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
3063    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3064
3065    // The fixed integer arguments of a variadic function are stored to the
3066    // VarArgsFrameIndex on the stack so that they may be loaded by
3067    // dereferencing the result of va_next.
3068    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3069      // Get an existing live-in vreg, or add a new one.
3070      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3071      if (!VReg)
3072        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3073
3074      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3075      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3076                                   MachinePointerInfo(), false, false, 0);
3077      MemOps.push_back(Store);
3078      // Increment the address by four for the next argument to store
3079      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3080      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3081    }
3082
3083    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3084    // is set.
3085    // The double arguments are stored to the VarArgsFrameIndex
3086    // on the stack.
3087    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3088      // Get an existing live-in vreg, or add a new one.
3089      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3090      if (!VReg)
3091        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3092
3093      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3094      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3095                                   MachinePointerInfo(), false, false, 0);
3096      MemOps.push_back(Store);
3097      // Increment the address by eight for the next argument to store
3098      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3099                                         PtrVT);
3100      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3101    }
3102  }
3103
3104  if (!MemOps.empty())
3105    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3106
3107  return Chain;
3108}
3109
3110// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3111// value to MVT::i64 and then truncate to the correct register size.
3112SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3113                                             EVT ObjectVT, SelectionDAG &DAG,
3114                                             SDValue ArgVal,
3115                                             const SDLoc &dl) const {
3116  if (Flags.isSExt())
3117    ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3118                         DAG.getValueType(ObjectVT));
3119  else if (Flags.isZExt())
3120    ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3121                         DAG.getValueType(ObjectVT));
3122
3123  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3124}
3125
3126SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3127    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3128    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3129    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3130  // TODO: add description of PPC stack frame format, or at least some docs.
3131  //
3132  bool isELFv2ABI = Subtarget.isELFv2ABI();
3133  bool isLittleEndian = Subtarget.isLittleEndian();
3134  MachineFunction &MF = DAG.getMachineFunction();
3135  MachineFrameInfo *MFI = MF.getFrameInfo();
3136  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3137
3138  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3139         "fastcc not supported on varargs functions");
3140
3141  EVT PtrVT = getPointerTy(MF.getDataLayout());
3142  // Potential tail calls could cause overwriting of argument stack slots.
3143  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3144                       (CallConv == CallingConv::Fast));
3145  unsigned PtrByteSize = 8;
3146  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3147
3148  static const MCPhysReg GPR[] = {
3149    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3150    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3151  };
3152  static const MCPhysReg VR[] = {
3153    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3154    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3155  };
3156  static const MCPhysReg VSRH[] = {
3157    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
3158    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
3159  };
3160
3161  const unsigned Num_GPR_Regs = array_lengthof(GPR);
3162  const unsigned Num_FPR_Regs = 13;
3163  const unsigned Num_VR_Regs  = array_lengthof(VR);
3164  const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3165
3166  // Do a first pass over the arguments to determine whether the ABI
3167  // guarantees that our caller has allocated the parameter save area
3168  // on its stack frame.  In the ELFv1 ABI, this is always the case;
3169  // in the ELFv2 ABI, it is true if this is a vararg function or if
3170  // any parameter is located in a stack slot.
3171
3172  bool HasParameterArea = !isELFv2ABI || isVarArg;
3173  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3174  unsigned NumBytes = LinkageSize;
3175  unsigned AvailableFPRs = Num_FPR_Regs;
3176  unsigned AvailableVRs = Num_VR_Regs;
3177  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3178    if (Ins[i].Flags.isNest())
3179      continue;
3180
3181    if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3182                               PtrByteSize, LinkageSize, ParamAreaSize,
3183                               NumBytes, AvailableFPRs, AvailableVRs,
3184                               Subtarget.hasQPX()))
3185      HasParameterArea = true;
3186  }
3187
3188  // Add DAG nodes to load the arguments or copy them out of registers.  On
3189  // entry to a function on PPC, the arguments start after the linkage area,
3190  // although the first ones are often in registers.
3191
3192  unsigned ArgOffset = LinkageSize;
3193  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3194  unsigned &QFPR_idx = FPR_idx;
3195  SmallVector<SDValue, 8> MemOps;
3196  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3197  unsigned CurArgIdx = 0;
3198  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3199    SDValue ArgVal;
3200    bool needsLoad = false;
3201    EVT ObjectVT = Ins[ArgNo].VT;
3202    EVT OrigVT = Ins[ArgNo].ArgVT;
3203    unsigned ObjSize = ObjectVT.getStoreSize();
3204    unsigned ArgSize = ObjSize;
3205    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3206    if (Ins[ArgNo].isOrigArg()) {
3207      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3208      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3209    }
3210    // We re-align the argument offset for each argument, except when using the
3211    // fast calling convention, when we need to make sure we do that only when
3212    // we'll actually use a stack slot.
3213    unsigned CurArgOffset, Align;
3214    auto ComputeArgOffset = [&]() {
3215      /* Respect alignment of argument on the stack.  */
3216      Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3217      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3218      CurArgOffset = ArgOffset;
3219    };
3220
3221    if (CallConv != CallingConv::Fast) {
3222      ComputeArgOffset();
3223
3224      /* Compute GPR index associated with argument offset.  */
3225      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3226      GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3227    }
3228
3229    // FIXME the codegen can be much improved in some cases.
3230    // We do not have to keep everything in memory.
3231    if (Flags.isByVal()) {
3232      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3233
3234      if (CallConv == CallingConv::Fast)
3235        ComputeArgOffset();
3236
3237      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3238      ObjSize = Flags.getByValSize();
3239      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3240      // Empty aggregate parameters do not take up registers.  Examples:
3241      //   struct { } a;
3242      //   union  { } b;
3243      //   int c[0];
3244      // etc.  However, we have to provide a place-holder in InVals, so
3245      // pretend we have an 8-byte item at the current address for that
3246      // purpose.
3247      if (!ObjSize) {
3248        int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
3249        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3250        InVals.push_back(FIN);
3251        continue;
3252      }
3253
3254      // Create a stack object covering all stack doublewords occupied
3255      // by the argument.  If the argument is (fully or partially) on
3256      // the stack, or if the argument is fully in registers but the
3257      // caller has allocated the parameter save anyway, we can refer
3258      // directly to the caller's stack frame.  Otherwise, create a
3259      // local copy in our own frame.
3260      int FI;
3261      if (HasParameterArea ||
3262          ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3263        FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
3264      else
3265        FI = MFI->CreateStackObject(ArgSize, Align, false);
3266      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3267
3268      // Handle aggregates smaller than 8 bytes.
3269      if (ObjSize < PtrByteSize) {
3270        // The value of the object is its address, which differs from the
3271        // address of the enclosing doubleword on big-endian systems.
3272        SDValue Arg = FIN;
3273        if (!isLittleEndian) {
3274          SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3275          Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3276        }
3277        InVals.push_back(Arg);
3278
3279        if (GPR_idx != Num_GPR_Regs) {
3280          unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3281          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3282          SDValue Store;
3283
3284          if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3285            EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3286                           (ObjSize == 2 ? MVT::i16 : MVT::i32));
3287            Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3288                                      MachinePointerInfo(&*FuncArg), ObjType,
3289                                      false, false, 0);
3290          } else {
3291            // For sizes that don't fit a truncating store (3, 5, 6, 7),
3292            // store the whole register as-is to the parameter save area
3293            // slot.
3294            Store =
3295                DAG.getStore(Val.getValue(1), dl, Val, FIN,
3296                             MachinePointerInfo(&*FuncArg), false, false, 0);
3297          }
3298
3299          MemOps.push_back(Store);
3300        }
3301        // Whether we copied from a register or not, advance the offset
3302        // into the parameter save area by a full doubleword.
3303        ArgOffset += PtrByteSize;
3304        continue;
3305      }
3306
3307      // The value of the object is its address, which is the address of
3308      // its first stack doubleword.
3309      InVals.push_back(FIN);
3310
3311      // Store whatever pieces of the object are in registers to memory.
3312      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3313        if (GPR_idx == Num_GPR_Regs)
3314          break;
3315
3316        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3317        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3318        SDValue Addr = FIN;
3319        if (j) {
3320          SDValue Off = DAG.getConstant(j, dl, PtrVT);
3321          Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3322        }
3323        SDValue Store =
3324            DAG.getStore(Val.getValue(1), dl, Val, Addr,
3325                         MachinePointerInfo(&*FuncArg, j), false, false, 0);
3326        MemOps.push_back(Store);
3327        ++GPR_idx;
3328      }
3329      ArgOffset += ArgSize;
3330      continue;
3331    }
3332
3333    switch (ObjectVT.getSimpleVT().SimpleTy) {
3334    default: llvm_unreachable("Unhandled argument type!");
3335    case MVT::i1:
3336    case MVT::i32:
3337    case MVT::i64:
3338      if (Flags.isNest()) {
3339        // The 'nest' parameter, if any, is passed in R11.
3340        unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3341        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3342
3343        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3344          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3345
3346        break;
3347      }
3348
3349      // These can be scalar arguments or elements of an integer array type
3350      // passed directly.  Clang may use those instead of "byval" aggregate
3351      // types to avoid forcing arguments to memory unnecessarily.
3352      if (GPR_idx != Num_GPR_Regs) {
3353        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3354        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3355
3356        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3357          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3358          // value to MVT::i64 and then truncate to the correct register size.
3359          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3360      } else {
3361        if (CallConv == CallingConv::Fast)
3362          ComputeArgOffset();
3363
3364        needsLoad = true;
3365        ArgSize = PtrByteSize;
3366      }
3367      if (CallConv != CallingConv::Fast || needsLoad)
3368        ArgOffset += 8;
3369      break;
3370
3371    case MVT::f32:
3372    case MVT::f64:
3373      // These can be scalar arguments or elements of a float array type
3374      // passed directly.  The latter are used to implement ELFv2 homogenous
3375      // float aggregates.
3376      if (FPR_idx != Num_FPR_Regs) {
3377        unsigned VReg;
3378
3379        if (ObjectVT == MVT::f32)
3380          VReg = MF.addLiveIn(FPR[FPR_idx],
3381                              Subtarget.hasP8Vector()
3382                                  ? &PPC::VSSRCRegClass
3383                                  : &PPC::F4RCRegClass);
3384        else
3385          VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3386                                                ? &PPC::VSFRCRegClass
3387                                                : &PPC::F8RCRegClass);
3388
3389        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3390        ++FPR_idx;
3391      } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3392        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3393        // once we support fp <-> gpr moves.
3394
3395        // This can only ever happen in the presence of f32 array types,
3396        // since otherwise we never run out of FPRs before running out
3397        // of GPRs.
3398        unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3399        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3400
3401        if (ObjectVT == MVT::f32) {
3402          if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3403            ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3404                                 DAG.getConstant(32, dl, MVT::i32));
3405          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3406        }
3407
3408        ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3409      } else {
3410        if (CallConv == CallingConv::Fast)
3411          ComputeArgOffset();
3412
3413        needsLoad = true;
3414      }
3415
3416      // When passing an array of floats, the array occupies consecutive
3417      // space in the argument area; only round up to the next doubleword
3418      // at the end of the array.  Otherwise, each float takes 8 bytes.
3419      if (CallConv != CallingConv::Fast || needsLoad) {
3420        ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3421        ArgOffset += ArgSize;
3422        if (Flags.isInConsecutiveRegsLast())
3423          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3424      }
3425      break;
3426    case MVT::v4f32:
3427    case MVT::v4i32:
3428    case MVT::v8i16:
3429    case MVT::v16i8:
3430    case MVT::v2f64:
3431    case MVT::v2i64:
3432    case MVT::v1i128:
3433      if (!Subtarget.hasQPX()) {
3434      // These can be scalar arguments or elements of a vector array type
3435      // passed directly.  The latter are used to implement ELFv2 homogenous
3436      // vector aggregates.
3437      if (VR_idx != Num_VR_Regs) {
3438        unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
3439                        MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
3440                        MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3441        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3442        ++VR_idx;
3443      } else {
3444        if (CallConv == CallingConv::Fast)
3445          ComputeArgOffset();
3446
3447        needsLoad = true;
3448      }
3449      if (CallConv != CallingConv::Fast || needsLoad)
3450        ArgOffset += 16;
3451      break;
3452      } // not QPX
3453
3454      assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3455             "Invalid QPX parameter type");
3456      /* fall through */
3457
3458    case MVT::v4f64:
3459    case MVT::v4i1:
3460      // QPX vectors are treated like their scalar floating-point subregisters
3461      // (except that they're larger).
3462      unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3463      if (QFPR_idx != Num_QFPR_Regs) {
3464        const TargetRegisterClass *RC;
3465        switch (ObjectVT.getSimpleVT().SimpleTy) {
3466        case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3467        case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3468        default:         RC = &PPC::QBRCRegClass; break;
3469        }
3470
3471        unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3472        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3473        ++QFPR_idx;
3474      } else {
3475        if (CallConv == CallingConv::Fast)
3476          ComputeArgOffset();
3477        needsLoad = true;
3478      }
3479      if (CallConv != CallingConv::Fast || needsLoad)
3480        ArgOffset += Sz;
3481      break;
3482    }
3483
3484    // We need to load the argument to a virtual register if we determined
3485    // above that we ran out of physical registers of the appropriate type.
3486    if (needsLoad) {
3487      if (ObjSize < ArgSize && !isLittleEndian)
3488        CurArgOffset += ArgSize - ObjSize;
3489      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3490      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3491      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3492                           false, false, false, 0);
3493    }
3494
3495    InVals.push_back(ArgVal);
3496  }
3497
3498  // Area that is at least reserved in the caller of this function.
3499  unsigned MinReservedArea;
3500  if (HasParameterArea)
3501    MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3502  else
3503    MinReservedArea = LinkageSize;
3504
3505  // Set the size that is at least reserved in caller of this function.  Tail
3506  // call optimized functions' reserved stack space needs to be aligned so that
3507  // taking the difference between two stack areas will result in an aligned
3508  // stack.
3509  MinReservedArea =
3510      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3511  FuncInfo->setMinReservedArea(MinReservedArea);
3512
3513  // If the function takes variable number of arguments, make a frame index for
3514  // the start of the first vararg value... for expansion of llvm.va_start.
3515  if (isVarArg) {
3516    int Depth = ArgOffset;
3517
3518    FuncInfo->setVarArgsFrameIndex(
3519      MFI->CreateFixedObject(PtrByteSize, Depth, true));
3520    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3521
3522    // If this function is vararg, store any remaining integer argument regs
3523    // to their spots on the stack so that they may be loaded by dereferencing
3524    // the result of va_next.
3525    for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3526         GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3527      unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3528      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3529      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3530                                   MachinePointerInfo(), false, false, 0);
3531      MemOps.push_back(Store);
3532      // Increment the address by four for the next argument to store
3533      SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3534      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3535    }
3536  }
3537
3538  if (!MemOps.empty())
3539    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3540
3541  return Chain;
3542}
3543
3544SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
3545    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3546    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3547    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3548  // TODO: add description of PPC stack frame format, or at least some docs.
3549  //
3550  MachineFunction &MF = DAG.getMachineFunction();
3551  MachineFrameInfo *MFI = MF.getFrameInfo();
3552  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3553
3554  EVT PtrVT = getPointerTy(MF.getDataLayout());
3555  bool isPPC64 = PtrVT == MVT::i64;
3556  // Potential tail calls could cause overwriting of argument stack slots.
3557  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3558                       (CallConv == CallingConv::Fast));
3559  unsigned PtrByteSize = isPPC64 ? 8 : 4;
3560  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3561  unsigned ArgOffset = LinkageSize;
3562  // Area that is at least reserved in caller of this function.
3563  unsigned MinReservedArea = ArgOffset;
3564
3565  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
3566    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3567    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3568  };
3569  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
3570    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3571    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3572  };
3573  static const MCPhysReg VR[] = {
3574    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3575    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3576  };
3577
3578  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3579  const unsigned Num_FPR_Regs = 13;
3580  const unsigned Num_VR_Regs  = array_lengthof( VR);
3581
3582  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3583
3584  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3585
3586  // In 32-bit non-varargs functions, the stack space for vectors is after the
3587  // stack space for non-vectors.  We do not use this space unless we have
3588  // too many vectors to fit in registers, something that only occurs in
3589  // constructed examples:), but we have to walk the arglist to figure
3590  // that out...for the pathological case, compute VecArgOffset as the
3591  // start of the vector parameter area.  Computing VecArgOffset is the
3592  // entire point of the following loop.
3593  unsigned VecArgOffset = ArgOffset;
3594  if (!isVarArg && !isPPC64) {
3595    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3596         ++ArgNo) {
3597      EVT ObjectVT = Ins[ArgNo].VT;
3598      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3599
3600      if (Flags.isByVal()) {
3601        // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3602        unsigned ObjSize = Flags.getByValSize();
3603        unsigned ArgSize =
3604                ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3605        VecArgOffset += ArgSize;
3606        continue;
3607      }
3608
3609      switch(ObjectVT.getSimpleVT().SimpleTy) {
3610      default: llvm_unreachable("Unhandled argument type!");
3611      case MVT::i1:
3612      case MVT::i32:
3613      case MVT::f32:
3614        VecArgOffset += 4;
3615        break;
3616      case MVT::i64:  // PPC64
3617      case MVT::f64:
3618        // FIXME: We are guaranteed to be !isPPC64 at this point.
3619        // Does MVT::i64 apply?
3620        VecArgOffset += 8;
3621        break;
3622      case MVT::v4f32:
3623      case MVT::v4i32:
3624      case MVT::v8i16:
3625      case MVT::v16i8:
3626        // Nothing to do, we're only looking at Nonvector args here.
3627        break;
3628      }
3629    }
3630  }
3631  // We've found where the vector parameter area in memory is.  Skip the
3632  // first 12 parameters; these don't use that memory.
3633  VecArgOffset = ((VecArgOffset+15)/16)*16;
3634  VecArgOffset += 12*16;
3635
3636  // Add DAG nodes to load the arguments or copy them out of registers.  On
3637  // entry to a function on PPC, the arguments start after the linkage area,
3638  // although the first ones are often in registers.
3639
3640  SmallVector<SDValue, 8> MemOps;
3641  unsigned nAltivecParamsAtEnd = 0;
3642  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3643  unsigned CurArgIdx = 0;
3644  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3645    SDValue ArgVal;
3646    bool needsLoad = false;
3647    EVT ObjectVT = Ins[ArgNo].VT;
3648    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3649    unsigned ArgSize = ObjSize;
3650    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3651    if (Ins[ArgNo].isOrigArg()) {
3652      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3653      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3654    }
3655    unsigned CurArgOffset = ArgOffset;
3656
3657    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3658    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3659        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3660      if (isVarArg || isPPC64) {
3661        MinReservedArea = ((MinReservedArea+15)/16)*16;
3662        MinReservedArea += CalculateStackSlotSize(ObjectVT,
3663                                                  Flags,
3664                                                  PtrByteSize);
3665      } else  nAltivecParamsAtEnd++;
3666    } else
3667      // Calculate min reserved area.
3668      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3669                                                Flags,
3670                                                PtrByteSize);
3671
3672    // FIXME the codegen can be much improved in some cases.
3673    // We do not have to keep everything in memory.
3674    if (Flags.isByVal()) {
3675      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3676
3677      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3678      ObjSize = Flags.getByValSize();
3679      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3680      // Objects of size 1 and 2 are right justified, everything else is
3681      // left justified.  This means the memory address is adjusted forwards.
3682      if (ObjSize==1 || ObjSize==2) {
3683        CurArgOffset = CurArgOffset + (4 - ObjSize);
3684      }
3685      // The value of the object is its address.
3686      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
3687      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3688      InVals.push_back(FIN);
3689      if (ObjSize==1 || ObjSize==2) {
3690        if (GPR_idx != Num_GPR_Regs) {
3691          unsigned VReg;
3692          if (isPPC64)
3693            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3694          else
3695            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3696          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3697          EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
3698          SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
3699                                            MachinePointerInfo(&*FuncArg),
3700                                            ObjType, false, false, 0);
3701          MemOps.push_back(Store);
3702          ++GPR_idx;
3703        }
3704
3705        ArgOffset += PtrByteSize;
3706
3707        continue;
3708      }
3709      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3710        // Store whatever pieces of the object are in registers
3711        // to memory.  ArgOffset will be the address of the beginning
3712        // of the object.
3713        if (GPR_idx != Num_GPR_Regs) {
3714          unsigned VReg;
3715          if (isPPC64)
3716            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3717          else
3718            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3719          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
3720          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3721          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3722          SDValue Store =
3723              DAG.getStore(Val.getValue(1), dl, Val, FIN,
3724                           MachinePointerInfo(&*FuncArg, j), false, false, 0);
3725          MemOps.push_back(Store);
3726          ++GPR_idx;
3727          ArgOffset += PtrByteSize;
3728        } else {
3729          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
3730          break;
3731        }
3732      }
3733      continue;
3734    }
3735
3736    switch (ObjectVT.getSimpleVT().SimpleTy) {
3737    default: llvm_unreachable("Unhandled argument type!");
3738    case MVT::i1:
3739    case MVT::i32:
3740      if (!isPPC64) {
3741        if (GPR_idx != Num_GPR_Regs) {
3742          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3743          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3744
3745          if (ObjectVT == MVT::i1)
3746            ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
3747
3748          ++GPR_idx;
3749        } else {
3750          needsLoad = true;
3751          ArgSize = PtrByteSize;
3752        }
3753        // All int arguments reserve stack space in the Darwin ABI.
3754        ArgOffset += PtrByteSize;
3755        break;
3756      }
3757      // FALLTHROUGH
3758    case MVT::i64:  // PPC64
3759      if (GPR_idx != Num_GPR_Regs) {
3760        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3761        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3762
3763        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3764          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3765          // value to MVT::i64 and then truncate to the correct register size.
3766          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3767
3768        ++GPR_idx;
3769      } else {
3770        needsLoad = true;
3771        ArgSize = PtrByteSize;
3772      }
3773      // All int arguments reserve stack space in the Darwin ABI.
3774      ArgOffset += 8;
3775      break;
3776
3777    case MVT::f32:
3778    case MVT::f64:
3779      // Every 4 bytes of argument space consumes one of the GPRs available for
3780      // argument passing.
3781      if (GPR_idx != Num_GPR_Regs) {
3782        ++GPR_idx;
3783        if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
3784          ++GPR_idx;
3785      }
3786      if (FPR_idx != Num_FPR_Regs) {
3787        unsigned VReg;
3788
3789        if (ObjectVT == MVT::f32)
3790          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
3791        else
3792          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
3793
3794        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3795        ++FPR_idx;
3796      } else {
3797        needsLoad = true;
3798      }
3799
3800      // All FP arguments reserve stack space in the Darwin ABI.
3801      ArgOffset += isPPC64 ? 8 : ObjSize;
3802      break;
3803    case MVT::v4f32:
3804    case MVT::v4i32:
3805    case MVT::v8i16:
3806    case MVT::v16i8:
3807      // Note that vector arguments in registers don't reserve stack space,
3808      // except in varargs functions.
3809      if (VR_idx != Num_VR_Regs) {
3810        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3811        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3812        if (isVarArg) {
3813          while ((ArgOffset % 16) != 0) {
3814            ArgOffset += PtrByteSize;
3815            if (GPR_idx != Num_GPR_Regs)
3816              GPR_idx++;
3817          }
3818          ArgOffset += 16;
3819          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
3820        }
3821        ++VR_idx;
3822      } else {
3823        if (!isVarArg && !isPPC64) {
3824          // Vectors go after all the nonvectors.
3825          CurArgOffset = VecArgOffset;
3826          VecArgOffset += 16;
3827        } else {
3828          // Vectors are aligned.
3829          ArgOffset = ((ArgOffset+15)/16)*16;
3830          CurArgOffset = ArgOffset;
3831          ArgOffset += 16;
3832        }
3833        needsLoad = true;
3834      }
3835      break;
3836    }
3837
3838    // We need to load the argument to a virtual register if we determined above
3839    // that we ran out of physical registers of the appropriate type.
3840    if (needsLoad) {
3841      int FI = MFI->CreateFixedObject(ObjSize,
3842                                      CurArgOffset + (ArgSize - ObjSize),
3843                                      isImmutable);
3844      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3845      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3846                           false, false, false, 0);
3847    }
3848
3849    InVals.push_back(ArgVal);
3850  }
3851
3852  // Allow for Altivec parameters at the end, if needed.
3853  if (nAltivecParamsAtEnd) {
3854    MinReservedArea = ((MinReservedArea+15)/16)*16;
3855    MinReservedArea += 16*nAltivecParamsAtEnd;
3856  }
3857
3858  // Area that is at least reserved in the caller of this function.
3859  MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
3860
3861  // Set the size that is at least reserved in caller of this function.  Tail
3862  // call optimized functions' reserved stack space needs to be aligned so that
3863  // taking the difference between two stack areas will result in an aligned
3864  // stack.
3865  MinReservedArea =
3866      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3867  FuncInfo->setMinReservedArea(MinReservedArea);
3868
3869  // If the function takes variable number of arguments, make a frame index for
3870  // the start of the first vararg value... for expansion of llvm.va_start.
3871  if (isVarArg) {
3872    int Depth = ArgOffset;
3873
3874    FuncInfo->setVarArgsFrameIndex(
3875      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
3876                             Depth, true));
3877    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3878
3879    // If this function is vararg, store any remaining integer argument regs
3880    // to their spots on the stack so that they may be loaded by dereferencing
3881    // the result of va_next.
3882    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
3883      unsigned VReg;
3884
3885      if (isPPC64)
3886        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3887      else
3888        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3889
3890      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3891      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3892                                   MachinePointerInfo(), false, false, 0);
3893      MemOps.push_back(Store);
3894      // Increment the address by four for the next argument to store
3895      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3896      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3897    }
3898  }
3899
3900  if (!MemOps.empty())
3901    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3902
3903  return Chain;
3904}
3905
3906/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
3907/// adjusted to accommodate the arguments for the tailcall.
3908static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
3909                                   unsigned ParamSize) {
3910
3911  if (!isTailCall) return 0;
3912
3913  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3914  unsigned CallerMinReservedArea = FI->getMinReservedArea();
3915  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
3916  // Remember only if the new adjustement is bigger.
3917  if (SPDiff < FI->getTailCallSPDelta())
3918    FI->setTailCallSPDelta(SPDiff);
3919
3920  return SPDiff;
3921}
3922
3923static bool isFunctionGlobalAddress(SDValue Callee);
3924
3925static bool
3926resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
3927  // If !G, Callee can be an external symbol.
3928  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3929  if (!G) return false;
3930
3931  const GlobalValue *GV = G->getGlobal();
3932
3933  if (GV->isDeclaration()) return false;
3934
3935  switch(GV->getLinkage()) {
3936  default: llvm_unreachable("unknow linkage type");
3937  case GlobalValue::AvailableExternallyLinkage:
3938  case GlobalValue::ExternalWeakLinkage:
3939    return false;
3940
3941  // Callee with weak linkage is allowed if it has hidden or protected
3942  // visibility
3943  case GlobalValue::LinkOnceAnyLinkage:
3944  case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
3945  case GlobalValue::WeakAnyLinkage:
3946  case GlobalValue::WeakODRLinkage:     // e.g. c++ template instantiation
3947    if (GV->hasDefaultVisibility())
3948      return false;
3949
3950  case GlobalValue::ExternalLinkage:
3951  case GlobalValue::InternalLinkage:
3952  case GlobalValue::PrivateLinkage:
3953    break;
3954  }
3955
3956  // With '-fPIC', calling default visiblity function need insert 'nop' after
3957  // function call, no matter that function resides in same module or not, so
3958  // we treat it as in different module.
3959  if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
3960    return false;
3961
3962  return true;
3963}
3964
3965static bool
3966needStackSlotPassParameters(const PPCSubtarget &Subtarget,
3967                            const SmallVectorImpl<ISD::OutputArg> &Outs) {
3968  assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
3969
3970  const unsigned PtrByteSize = 8;
3971  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3972
3973  static const MCPhysReg GPR[] = {
3974    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3975    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3976  };
3977  static const MCPhysReg VR[] = {
3978    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3979    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3980  };
3981
3982  const unsigned NumGPRs = array_lengthof(GPR);
3983  const unsigned NumFPRs = 13;
3984  const unsigned NumVRs = array_lengthof(VR);
3985  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
3986
3987  unsigned NumBytes = LinkageSize;
3988  unsigned AvailableFPRs = NumFPRs;
3989  unsigned AvailableVRs = NumVRs;
3990
3991  for (const ISD::OutputArg& Param : Outs) {
3992    if (Param.Flags.isNest()) continue;
3993
3994    if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
3995                               PtrByteSize, LinkageSize, ParamAreaSize,
3996                               NumBytes, AvailableFPRs, AvailableVRs,
3997                               Subtarget.hasQPX()))
3998      return true;
3999  }
4000  return false;
4001}
4002
4003static bool
4004hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
4005  if (CS->arg_size() != CallerFn->getArgumentList().size())
4006    return false;
4007
4008  ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
4009  ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end();
4010  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4011
4012  for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4013    const Value* CalleeArg = *CalleeArgIter;
4014    const Value* CallerArg = &(*CallerArgIter);
4015    if (CalleeArg == CallerArg)
4016      continue;
4017
4018    // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4019    //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4020    //      }
4021    // 1st argument of callee is undef and has the same type as caller.
4022    if (CalleeArg->getType() == CallerArg->getType() &&
4023        isa<UndefValue>(CalleeArg))
4024      continue;
4025
4026    return false;
4027  }
4028
4029  return true;
4030}
4031
4032bool
4033PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4034                                    SDValue Callee,
4035                                    CallingConv::ID CalleeCC,
4036                                    ImmutableCallSite *CS,
4037                                    bool isVarArg,
4038                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
4039                                    const SmallVectorImpl<ISD::InputArg> &Ins,
4040                                    SelectionDAG& DAG) const {
4041  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4042
4043  if (DisableSCO && !TailCallOpt) return false;
4044
4045  // Variadic argument functions are not supported.
4046  if (isVarArg) return false;
4047
4048  MachineFunction &MF = DAG.getMachineFunction();
4049  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4050
4051  // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
4052  // the same calling convention
4053  if (CallerCC != CalleeCC) return false;
4054
4055  // SCO support C calling convention
4056  if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
4057    return false;
4058
4059  // Functions containing by val parameters are not supported.
4060  if (std::any_of(Ins.begin(), Ins.end(),
4061                  [](const ISD::InputArg& IA) { return IA.Flags.isByVal(); }))
4062    return false;
4063
4064  // No TCO/SCO on indirect call because Caller have to restore its TOC
4065  if (!isFunctionGlobalAddress(Callee) &&
4066      !isa<ExternalSymbolSDNode>(Callee))
4067    return false;
4068
4069  // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
4070  // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
4071  // module.
4072  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4073  if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
4074    return false;
4075
4076  // TCO allows altering callee ABI, so we don't have to check further.
4077  if (CalleeCC == CallingConv::Fast && TailCallOpt)
4078    return true;
4079
4080  if (DisableSCO) return false;
4081
4082  // If callee use the same argument list that caller is using, then we can
4083  // apply SCO on this case. If it is not, then we need to check if callee needs
4084  // stack for passing arguments.
4085  if (!hasSameArgumentList(MF.getFunction(), CS) &&
4086      needStackSlotPassParameters(Subtarget, Outs)) {
4087    return false;
4088  }
4089
4090  return true;
4091}
4092
4093/// IsEligibleForTailCallOptimization - Check whether the call is eligible
4094/// for tail call optimization. Targets which want to do tail call
4095/// optimization should implement this function.
4096bool
4097PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4098                                                     CallingConv::ID CalleeCC,
4099                                                     bool isVarArg,
4100                                      const SmallVectorImpl<ISD::InputArg> &Ins,
4101                                                     SelectionDAG& DAG) const {
4102  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4103    return false;
4104
4105  // Variable argument functions are not supported.
4106  if (isVarArg)
4107    return false;
4108
4109  MachineFunction &MF = DAG.getMachineFunction();
4110  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
4111  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4112    // Functions containing by val parameters are not supported.
4113    for (unsigned i = 0; i != Ins.size(); i++) {
4114       ISD::ArgFlagsTy Flags = Ins[i].Flags;
4115       if (Flags.isByVal()) return false;
4116    }
4117
4118    // Non-PIC/GOT tail calls are supported.
4119    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4120      return true;
4121
4122    // At the moment we can only do local tail calls (in same module, hidden
4123    // or protected) if we are generating PIC.
4124    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4125      return G->getGlobal()->hasHiddenVisibility()
4126          || G->getGlobal()->hasProtectedVisibility();
4127  }
4128
4129  return false;
4130}
4131
4132/// isCallCompatibleAddress - Return the immediate to use if the specified
4133/// 32-bit value is representable in the immediate field of a BxA instruction.
4134static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4135  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4136  if (!C) return nullptr;
4137
4138  int Addr = C->getZExtValue();
4139  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4140      SignExtend32<26>(Addr) != Addr)
4141    return nullptr;  // Top 6 bits have to be sext of immediate.
4142
4143  return DAG
4144      .getConstant(
4145          (int)C->getZExtValue() >> 2, SDLoc(Op),
4146          DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4147      .getNode();
4148}
4149
4150namespace {
4151
4152struct TailCallArgumentInfo {
4153  SDValue Arg;
4154  SDValue FrameIdxOp;
4155  int       FrameIdx;
4156
4157  TailCallArgumentInfo() : FrameIdx(0) {}
4158};
4159}
4160
4161/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4162static void StoreTailCallArgumentsToStackSlot(
4163    SelectionDAG &DAG, SDValue Chain,
4164    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4165    SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4166  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4167    SDValue Arg = TailCallArgs[i].Arg;
4168    SDValue FIN = TailCallArgs[i].FrameIdxOp;
4169    int FI = TailCallArgs[i].FrameIdx;
4170    // Store relative to framepointer.
4171    MemOpChains.push_back(DAG.getStore(
4172        Chain, dl, Arg, FIN,
4173        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
4174        false, 0));
4175  }
4176}
4177
4178/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4179/// the appropriate stack slot for the tail call optimized function call.
4180static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4181                                             SDValue OldRetAddr, SDValue OldFP,
4182                                             int SPDiff, const SDLoc &dl) {
4183  if (SPDiff) {
4184    // Calculate the new stack slot for the return address.
4185    MachineFunction &MF = DAG.getMachineFunction();
4186    const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4187    const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4188    bool isPPC64 = Subtarget.isPPC64();
4189    int SlotSize = isPPC64 ? 8 : 4;
4190    int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4191    int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
4192                                                          NewRetAddrLoc, true);
4193    EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4194    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4195    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4196                         MachinePointerInfo::getFixedStack(MF, NewRetAddr),
4197                         false, false, 0);
4198
4199    // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4200    // slot as the FP is never overwritten.
4201    if (Subtarget.isDarwinABI()) {
4202      int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
4203      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
4204                                                          true);
4205      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
4206      Chain = DAG.getStore(
4207          Chain, dl, OldFP, NewFramePtrIdx,
4208          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFPIdx),
4209          false, false, 0);
4210    }
4211  }
4212  return Chain;
4213}
4214
4215/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4216/// the position of the argument.
4217static void
4218CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4219                         SDValue Arg, int SPDiff, unsigned ArgOffset,
4220                     SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4221  int Offset = ArgOffset + SPDiff;
4222  uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
4223  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
4224  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4225  SDValue FIN = DAG.getFrameIndex(FI, VT);
4226  TailCallArgumentInfo Info;
4227  Info.Arg = Arg;
4228  Info.FrameIdxOp = FIN;
4229  Info.FrameIdx = FI;
4230  TailCallArguments.push_back(Info);
4231}
4232
4233/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4234/// stack slot. Returns the chain as result and the loaded frame pointers in
4235/// LROpOut/FPOpout. Used when tail calling.
4236SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4237    SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4238    SDValue &FPOpOut, const SDLoc &dl) const {
4239  if (SPDiff) {
4240    // Load the LR and FP stack slot for later adjusting.
4241    EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4242    LROpOut = getReturnAddrFrameIndex(DAG);
4243    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
4244                          false, false, false, 0);
4245    Chain = SDValue(LROpOut.getNode(), 1);
4246
4247    // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4248    // slot as the FP is never overwritten.
4249    if (Subtarget.isDarwinABI()) {
4250      FPOpOut = getFramePointerFrameIndex(DAG);
4251      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
4252                            false, false, false, 0);
4253      Chain = SDValue(FPOpOut.getNode(), 1);
4254    }
4255  }
4256  return Chain;
4257}
4258
4259/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4260/// by "Src" to address "Dst" of size "Size".  Alignment information is
4261/// specified by the specific parameter attribute. The copy will be passed as
4262/// a byval function parameter.
4263/// Sometimes what we are copying is the end of a larger object, the part that
4264/// does not fit in registers.
4265static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4266                                         SDValue Chain, ISD::ArgFlagsTy Flags,
4267                                         SelectionDAG &DAG, const SDLoc &dl) {
4268  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4269  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
4270                       false, false, false, MachinePointerInfo(),
4271                       MachinePointerInfo());
4272}
4273
4274/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4275/// tail calls.
4276static void LowerMemOpCallTo(
4277    SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4278    SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4279    bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4280    SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4281  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4282  if (!isTailCall) {
4283    if (isVector) {
4284      SDValue StackPtr;
4285      if (isPPC64)
4286        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4287      else
4288        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4289      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4290                           DAG.getConstant(ArgOffset, dl, PtrVT));
4291    }
4292    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
4293                                       MachinePointerInfo(), false, false, 0));
4294  // Calculate and remember argument location.
4295  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4296                                  TailCallArguments);
4297}
4298
4299static void
4300PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4301                const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4302                SDValue FPOp,
4303                SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4304  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4305  // might overwrite each other in case of tail call optimization.
4306  SmallVector<SDValue, 8> MemOpChains2;
4307  // Do not flag preceding copytoreg stuff together with the following stuff.
4308  InFlag = SDValue();
4309  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4310                                    MemOpChains2, dl);
4311  if (!MemOpChains2.empty())
4312    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4313
4314  // Store the return address to the appropriate stack slot.
4315  Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4316
4317  // Emit callseq_end just before tailcall node.
4318  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4319                             DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4320  InFlag = Chain.getValue(1);
4321}
4322
4323// Is this global address that of a function that can be called by name? (as
4324// opposed to something that must hold a descriptor for an indirect call).
4325static bool isFunctionGlobalAddress(SDValue Callee) {
4326  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4327    if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4328        Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4329      return false;
4330
4331    return G->getGlobal()->getValueType()->isFunctionTy();
4332  }
4333
4334  return false;
4335}
4336
4337static unsigned
4338PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
4339            SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
4340            bool isPatchPoint, bool hasNest,
4341            SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
4342            SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4343            ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
4344
4345  bool isPPC64 = Subtarget.isPPC64();
4346  bool isSVR4ABI = Subtarget.isSVR4ABI();
4347  bool isELFv2ABI = Subtarget.isELFv2ABI();
4348
4349  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4350  NodeTys.push_back(MVT::Other);   // Returns a chain
4351  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4352
4353  unsigned CallOpc = PPCISD::CALL;
4354
4355  bool needIndirectCall = true;
4356  if (!isSVR4ABI || !isPPC64)
4357    if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4358      // If this is an absolute destination address, use the munged value.
4359      Callee = SDValue(Dest, 0);
4360      needIndirectCall = false;
4361    }
4362
4363  // PC-relative references to external symbols should go through $stub, unless
4364  // we're building with the leopard linker or later, which automatically
4365  // synthesizes these stubs.
4366  const TargetMachine &TM = DAG.getTarget();
4367  const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
4368  const GlobalValue *GV = nullptr;
4369  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
4370    GV = G->getGlobal();
4371  bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
4372  bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
4373
4374  if (isFunctionGlobalAddress(Callee)) {
4375    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4376    // A call to a TLS address is actually an indirect call to a
4377    // thread-specific pointer.
4378    unsigned OpFlags = 0;
4379    if (UsePlt)
4380      OpFlags = PPCII::MO_PLT;
4381
4382    // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4383    // every direct call is) turn it into a TargetGlobalAddress /
4384    // TargetExternalSymbol node so that legalize doesn't hack it.
4385    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4386                                        Callee.getValueType(), 0, OpFlags);
4387    needIndirectCall = false;
4388  }
4389
4390  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4391    unsigned char OpFlags = 0;
4392
4393    if (UsePlt)
4394      OpFlags = PPCII::MO_PLT;
4395
4396    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4397                                         OpFlags);
4398    needIndirectCall = false;
4399  }
4400
4401  if (isPatchPoint) {
4402    // We'll form an invalid direct call when lowering a patchpoint; the full
4403    // sequence for an indirect call is complicated, and many of the
4404    // instructions introduced might have side effects (and, thus, can't be
4405    // removed later). The call itself will be removed as soon as the
4406    // argument/return lowering is complete, so the fact that it has the wrong
4407    // kind of operands should not really matter.
4408    needIndirectCall = false;
4409  }
4410
4411  if (needIndirectCall) {
4412    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4413    // to do the call, we can't use PPCISD::CALL.
4414    SDValue MTCTROps[] = {Chain, Callee, InFlag};
4415
4416    if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4417      // Function pointers in the 64-bit SVR4 ABI do not point to the function
4418      // entry point, but to the function descriptor (the function entry point
4419      // address is part of the function descriptor though).
4420      // The function descriptor is a three doubleword structure with the
4421      // following fields: function entry point, TOC base address and
4422      // environment pointer.
4423      // Thus for a call through a function pointer, the following actions need
4424      // to be performed:
4425      //   1. Save the TOC of the caller in the TOC save area of its stack
4426      //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4427      //   2. Load the address of the function entry point from the function
4428      //      descriptor.
4429      //   3. Load the TOC of the callee from the function descriptor into r2.
4430      //   4. Load the environment pointer from the function descriptor into
4431      //      r11.
4432      //   5. Branch to the function entry point address.
4433      //   6. On return of the callee, the TOC of the caller needs to be
4434      //      restored (this is done in FinishCall()).
4435      //
4436      // The loads are scheduled at the beginning of the call sequence, and the
4437      // register copies are flagged together to ensure that no other
4438      // operations can be scheduled in between. E.g. without flagging the
4439      // copies together, a TOC access in the caller could be scheduled between
4440      // the assignment of the callee TOC and the branch to the callee, which
4441      // results in the TOC access going through the TOC of the callee instead
4442      // of going through the TOC of the caller, which leads to incorrect code.
4443
4444      // Load the address of the function entry point from the function
4445      // descriptor.
4446      SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4447      if (LDChain.getValueType() == MVT::Glue)
4448        LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4449
4450      bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
4451
4452      MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4453      SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4454                                        false, false, LoadsInv, 8);
4455
4456      // Load environment pointer into r11.
4457      SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4458      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4459      SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
4460                                       MPI.getWithOffset(16), false, false,
4461                                       LoadsInv, 8);
4462
4463      SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4464      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4465      SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
4466                                   MPI.getWithOffset(8), false, false,
4467                                   LoadsInv, 8);
4468
4469      setUsesTOCBasePtr(DAG);
4470      SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4471                                        InFlag);
4472      Chain = TOCVal.getValue(0);
4473      InFlag = TOCVal.getValue(1);
4474
4475      // If the function call has an explicit 'nest' parameter, it takes the
4476      // place of the environment pointer.
4477      if (!hasNest) {
4478        SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4479                                          InFlag);
4480
4481        Chain = EnvVal.getValue(0);
4482        InFlag = EnvVal.getValue(1);
4483      }
4484
4485      MTCTROps[0] = Chain;
4486      MTCTROps[1] = LoadFuncPtr;
4487      MTCTROps[2] = InFlag;
4488    }
4489
4490    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4491                        makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4492    InFlag = Chain.getValue(1);
4493
4494    NodeTys.clear();
4495    NodeTys.push_back(MVT::Other);
4496    NodeTys.push_back(MVT::Glue);
4497    Ops.push_back(Chain);
4498    CallOpc = PPCISD::BCTRL;
4499    Callee.setNode(nullptr);
4500    // Add use of X11 (holding environment pointer)
4501    if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
4502      Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4503    // Add CTR register as callee so a bctr can be emitted later.
4504    if (isTailCall)
4505      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4506  }
4507
4508  // If this is a direct call, pass the chain and the callee.
4509  if (Callee.getNode()) {
4510    Ops.push_back(Chain);
4511    Ops.push_back(Callee);
4512  }
4513  // If this is a tail call add stack pointer delta.
4514  if (isTailCall)
4515    Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4516
4517  // Add argument registers to the end of the list so that they are known live
4518  // into the call.
4519  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4520    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4521                                  RegsToPass[i].second.getValueType()));
4522
4523  // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4524  // into the call.
4525  if (isSVR4ABI && isPPC64 && !isPatchPoint) {
4526    setUsesTOCBasePtr(DAG);
4527    Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4528  }
4529
4530  return CallOpc;
4531}
4532
4533static
4534bool isLocalCall(const SDValue &Callee)
4535{
4536  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4537    return G->getGlobal()->isStrongDefinitionForLinker();
4538  return false;
4539}
4540
4541SDValue PPCTargetLowering::LowerCallResult(
4542    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4543    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4544    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4545
4546  SmallVector<CCValAssign, 16> RVLocs;
4547  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4548                    *DAG.getContext());
4549  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4550
4551  // Copy all of the result registers out of their specified physreg.
4552  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4553    CCValAssign &VA = RVLocs[i];
4554    assert(VA.isRegLoc() && "Can only return in registers!");
4555
4556    SDValue Val = DAG.getCopyFromReg(Chain, dl,
4557                                     VA.getLocReg(), VA.getLocVT(), InFlag);
4558    Chain = Val.getValue(1);
4559    InFlag = Val.getValue(2);
4560
4561    switch (VA.getLocInfo()) {
4562    default: llvm_unreachable("Unknown loc info!");
4563    case CCValAssign::Full: break;
4564    case CCValAssign::AExt:
4565      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4566      break;
4567    case CCValAssign::ZExt:
4568      Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4569                        DAG.getValueType(VA.getValVT()));
4570      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4571      break;
4572    case CCValAssign::SExt:
4573      Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4574                        DAG.getValueType(VA.getValVT()));
4575      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4576      break;
4577    }
4578
4579    InVals.push_back(Val);
4580  }
4581
4582  return Chain;
4583}
4584
4585SDValue PPCTargetLowering::FinishCall(
4586    CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
4587    bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
4588    SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
4589    SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
4590    unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
4591    SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
4592
4593  std::vector<EVT> NodeTys;
4594  SmallVector<SDValue, 8> Ops;
4595  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4596                                 SPDiff, isTailCall, isPatchPoint, hasNest,
4597                                 RegsToPass, Ops, NodeTys, CS, Subtarget);
4598
4599  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4600  if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4601    Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4602
4603  // When performing tail call optimization the callee pops its arguments off
4604  // the stack. Account for this here so these bytes can be pushed back on in
4605  // PPCFrameLowering::eliminateCallFramePseudoInstr.
4606  int BytesCalleePops =
4607    (CallConv == CallingConv::Fast &&
4608     getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
4609
4610  // Add a register mask operand representing the call-preserved registers.
4611  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4612  const uint32_t *Mask =
4613      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4614  assert(Mask && "Missing call preserved mask for calling convention");
4615  Ops.push_back(DAG.getRegisterMask(Mask));
4616
4617  if (InFlag.getNode())
4618    Ops.push_back(InFlag);
4619
4620  // Emit tail call.
4621  if (isTailCall) {
4622    assert(((Callee.getOpcode() == ISD::Register &&
4623             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4624            Callee.getOpcode() == ISD::TargetExternalSymbol ||
4625            Callee.getOpcode() == ISD::TargetGlobalAddress ||
4626            isa<ConstantSDNode>(Callee)) &&
4627    "Expecting an global address, external symbol, absolute value or register");
4628
4629    DAG.getMachineFunction().getFrameInfo()->setHasTailCall();
4630    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4631  }
4632
4633  // Add a NOP immediately after the branch instruction when using the 64-bit
4634  // SVR4 ABI. At link time, if caller and callee are in a different module and
4635  // thus have a different TOC, the call will be replaced with a call to a stub
4636  // function which saves the current TOC, loads the TOC of the callee and
4637  // branches to the callee. The NOP will be replaced with a load instruction
4638  // which restores the TOC of the caller from the TOC save slot of the current
4639  // stack frame. If caller and callee belong to the same module (and have the
4640  // same TOC), the NOP will remain unchanged.
4641
4642  if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4643      !isPatchPoint) {
4644    if (CallOpc == PPCISD::BCTRL) {
4645      // This is a call through a function pointer.
4646      // Restore the caller TOC from the save area into R2.
4647      // See PrepareCall() for more information about calls through function
4648      // pointers in the 64-bit SVR4 ABI.
4649      // We are using a target-specific load with r2 hard coded, because the
4650      // result of a target-independent load would never go directly into r2,
4651      // since r2 is a reserved register (which prevents the register allocator
4652      // from allocating it), resulting in an additional register being
4653      // allocated and an unnecessary move instruction being generated.
4654      CallOpc = PPCISD::BCTRL_LOAD_TOC;
4655
4656      EVT PtrVT = getPointerTy(DAG.getDataLayout());
4657      SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4658      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4659      SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
4660      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4661
4662      // The address needs to go after the chain input but before the flag (or
4663      // any other variadic arguments).
4664      Ops.insert(std::next(Ops.begin()), AddTOC);
4665    } else if ((CallOpc == PPCISD::CALL) &&
4666               (!isLocalCall(Callee) ||
4667                DAG.getTarget().getRelocationModel() == Reloc::PIC_))
4668      // Otherwise insert NOP for non-local calls.
4669      CallOpc = PPCISD::CALL_NOP;
4670  }
4671
4672  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
4673  InFlag = Chain.getValue(1);
4674
4675  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4676                             DAG.getIntPtrConstant(BytesCalleePops, dl, true),
4677                             InFlag, dl);
4678  if (!Ins.empty())
4679    InFlag = Chain.getValue(1);
4680
4681  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
4682                         Ins, dl, DAG, InVals);
4683}
4684
4685SDValue
4686PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4687                             SmallVectorImpl<SDValue> &InVals) const {
4688  SelectionDAG &DAG                     = CLI.DAG;
4689  SDLoc &dl                             = CLI.DL;
4690  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4691  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
4692  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
4693  SDValue Chain                         = CLI.Chain;
4694  SDValue Callee                        = CLI.Callee;
4695  bool &isTailCall                      = CLI.IsTailCall;
4696  CallingConv::ID CallConv              = CLI.CallConv;
4697  bool isVarArg                         = CLI.IsVarArg;
4698  bool isPatchPoint                     = CLI.IsPatchPoint;
4699  ImmutableCallSite *CS                 = CLI.CS;
4700
4701  if (isTailCall) {
4702    if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
4703      isTailCall =
4704        IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
4705                                                 isVarArg, Outs, Ins, DAG);
4706    else
4707      isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
4708                                                     Ins, DAG);
4709    if (isTailCall) {
4710      ++NumTailCalls;
4711      if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4712        ++NumSiblingCalls;
4713
4714      assert(isa<GlobalAddressSDNode>(Callee) &&
4715             "Callee should be an llvm::Function object.");
4716      DEBUG(
4717        const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
4718        const unsigned Width = 80 - strlen("TCO caller: ")
4719                                  - strlen(", callee linkage: 0, 0");
4720        dbgs() << "TCO caller: "
4721               << left_justify(DAG.getMachineFunction().getName(), Width)
4722               << ", callee linkage: "
4723               << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
4724      );
4725    }
4726  }
4727
4728  if (!isTailCall && CS && CS->isMustTailCall())
4729    report_fatal_error("failed to perform tail call elimination on a call "
4730                       "site marked musttail");
4731
4732  if (Subtarget.isSVR4ABI()) {
4733    if (Subtarget.isPPC64())
4734      return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
4735                              isTailCall, isPatchPoint, Outs, OutVals, Ins,
4736                              dl, DAG, InVals, CS);
4737    else
4738      return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
4739                              isTailCall, isPatchPoint, Outs, OutVals, Ins,
4740                              dl, DAG, InVals, CS);
4741  }
4742
4743  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
4744                          isTailCall, isPatchPoint, Outs, OutVals, Ins,
4745                          dl, DAG, InVals, CS);
4746}
4747
4748SDValue PPCTargetLowering::LowerCall_32SVR4(
4749    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
4750    bool isTailCall, bool isPatchPoint,
4751    const SmallVectorImpl<ISD::OutputArg> &Outs,
4752    const SmallVectorImpl<SDValue> &OutVals,
4753    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4754    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
4755    ImmutableCallSite *CS) const {
4756  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
4757  // of the 32-bit SVR4 ABI stack frame layout.
4758
4759  assert((CallConv == CallingConv::C ||
4760          CallConv == CallingConv::Fast) && "Unknown calling convention!");
4761
4762  unsigned PtrByteSize = 4;
4763
4764  MachineFunction &MF = DAG.getMachineFunction();
4765
4766  // Mark this function as potentially containing a function that contains a
4767  // tail call. As a consequence the frame pointer will be used for dynamicalloc
4768  // and restoring the callers stack pointer in this functions epilog. This is
4769  // done because by tail calling the called function might overwrite the value
4770  // in this function's (MF) stack pointer stack slot 0(SP).
4771  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4772      CallConv == CallingConv::Fast)
4773    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4774
4775  // Count how many bytes are to be pushed on the stack, including the linkage
4776  // area, parameter list area and the part of the local variable space which
4777  // contains copies of aggregates which are passed by value.
4778
4779  // Assign locations to all of the outgoing arguments.
4780  SmallVector<CCValAssign, 16> ArgLocs;
4781  PPCCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
4782
4783  // Reserve space for the linkage area on the stack.
4784  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
4785                       PtrByteSize);
4786  if (useSoftFloat())
4787    CCInfo.PreAnalyzeCallOperands(Outs);
4788
4789  if (isVarArg) {
4790    // Handle fixed and variable vector arguments differently.
4791    // Fixed vector arguments go into registers as long as registers are
4792    // available. Variable vector arguments always go into memory.
4793    unsigned NumArgs = Outs.size();
4794
4795    for (unsigned i = 0; i != NumArgs; ++i) {
4796      MVT ArgVT = Outs[i].VT;
4797      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4798      bool Result;
4799
4800      if (Outs[i].IsFixed) {
4801        Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
4802                               CCInfo);
4803      } else {
4804        Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
4805                                      ArgFlags, CCInfo);
4806      }
4807
4808      if (Result) {
4809#ifndef NDEBUG
4810        errs() << "Call operand #" << i << " has unhandled type "
4811             << EVT(ArgVT).getEVTString() << "\n";
4812#endif
4813        llvm_unreachable(nullptr);
4814      }
4815    }
4816  } else {
4817    // All arguments are treated the same.
4818    CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
4819  }
4820  CCInfo.clearWasPPCF128();
4821
4822  // Assign locations to all of the outgoing aggregate by value arguments.
4823  SmallVector<CCValAssign, 16> ByValArgLocs;
4824  CCState CCByValInfo(CallConv, isVarArg, MF, ByValArgLocs, *DAG.getContext());
4825
4826  // Reserve stack space for the allocations in CCInfo.
4827  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
4828
4829  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
4830
4831  // Size of the linkage area, parameter list area and the part of the local
4832  // space variable where copies of aggregates which are passed by value are
4833  // stored.
4834  unsigned NumBytes = CCByValInfo.getNextStackOffset();
4835
4836  // Calculate by how many bytes the stack has to be adjusted in case of tail
4837  // call optimization.
4838  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4839
4840  // Adjust the stack pointer for the new arguments...
4841  // These operations are automatically eliminated by the prolog/epilog pass
4842  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4843                               dl);
4844  SDValue CallSeqStart = Chain;
4845
4846  // Load the return address and frame pointer so it can be moved somewhere else
4847  // later.
4848  SDValue LROp, FPOp;
4849  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
4850
4851  // Set up a copy of the stack pointer for use loading and storing any
4852  // arguments that may not fit in the registers available for argument
4853  // passing.
4854  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4855
4856  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4857  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4858  SmallVector<SDValue, 8> MemOpChains;
4859
4860  bool seenFloatArg = false;
4861  // Walk the register/memloc assignments, inserting copies/loads.
4862  for (unsigned i = 0, j = 0, e = ArgLocs.size();
4863       i != e;
4864       ++i) {
4865    CCValAssign &VA = ArgLocs[i];
4866    SDValue Arg = OutVals[i];
4867    ISD::ArgFlagsTy Flags = Outs[i].Flags;
4868
4869    if (Flags.isByVal()) {
4870      // Argument is an aggregate which is passed by value, thus we need to
4871      // create a copy of it in the local variable space of the current stack
4872      // frame (which is the stack frame of the caller) and pass the address of
4873      // this copy to the callee.
4874      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
4875      CCValAssign &ByValVA = ByValArgLocs[j++];
4876      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
4877
4878      // Memory reserved in the local variable space of the callers stack frame.
4879      unsigned LocMemOffset = ByValVA.getLocMemOffset();
4880
4881      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4882      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4883                           StackPtr, PtrOff);
4884
4885      // Create a copy of the argument in the local area of the current
4886      // stack frame.
4887      SDValue MemcpyCall =
4888        CreateCopyOfByValArgument(Arg, PtrOff,
4889                                  CallSeqStart.getNode()->getOperand(0),
4890                                  Flags, DAG, dl);
4891
4892      // This must go outside the CALLSEQ_START..END.
4893      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4894                           CallSeqStart.getNode()->getOperand(1),
4895                           SDLoc(MemcpyCall));
4896      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4897                             NewCallSeqStart.getNode());
4898      Chain = CallSeqStart = NewCallSeqStart;
4899
4900      // Pass the address of the aggregate copy on the stack either in a
4901      // physical register or in the parameter list area of the current stack
4902      // frame to the callee.
4903      Arg = PtrOff;
4904    }
4905
4906    if (VA.isRegLoc()) {
4907      if (Arg.getValueType() == MVT::i1)
4908        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
4909
4910      seenFloatArg |= VA.getLocVT().isFloatingPoint();
4911      // Put argument in a physical register.
4912      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4913    } else {
4914      // Put argument in the parameter list area of the current stack frame.
4915      assert(VA.isMemLoc());
4916      unsigned LocMemOffset = VA.getLocMemOffset();
4917
4918      if (!isTailCall) {
4919        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4920        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4921                             StackPtr, PtrOff);
4922
4923        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
4924                                           MachinePointerInfo(),
4925                                           false, false, 0));
4926      } else {
4927        // Calculate and remember argument location.
4928        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
4929                                 TailCallArguments);
4930      }
4931    }
4932  }
4933
4934  if (!MemOpChains.empty())
4935    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4936
4937  // Build a sequence of copy-to-reg nodes chained together with token chain
4938  // and flag operands which copy the outgoing args into the appropriate regs.
4939  SDValue InFlag;
4940  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4941    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4942                             RegsToPass[i].second, InFlag);
4943    InFlag = Chain.getValue(1);
4944  }
4945
4946  // Set CR bit 6 to true if this is a vararg call with floating args passed in
4947  // registers.
4948  if (isVarArg) {
4949    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
4950    SDValue Ops[] = { Chain, InFlag };
4951
4952    Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
4953                        dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
4954
4955    InFlag = Chain.getValue(1);
4956  }
4957
4958  if (isTailCall)
4959    PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
4960                    TailCallArguments);
4961
4962  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
4963                    /* unused except on PPC64 ELFv1 */ false, DAG,
4964                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
4965                    NumBytes, Ins, InVals, CS);
4966}
4967
4968// Copy an argument into memory, being careful to do this outside the
4969// call sequence for the call to which the argument belongs.
4970SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
4971    SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
4972    SelectionDAG &DAG, const SDLoc &dl) const {
4973  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
4974                        CallSeqStart.getNode()->getOperand(0),
4975                        Flags, DAG, dl);
4976  // The MEMCPY must go outside the CALLSEQ_START..END.
4977  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4978                             CallSeqStart.getNode()->getOperand(1),
4979                             SDLoc(MemcpyCall));
4980  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4981                         NewCallSeqStart.getNode());
4982  return NewCallSeqStart;
4983}
4984
4985SDValue PPCTargetLowering::LowerCall_64SVR4(
4986    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
4987    bool isTailCall, bool isPatchPoint,
4988    const SmallVectorImpl<ISD::OutputArg> &Outs,
4989    const SmallVectorImpl<SDValue> &OutVals,
4990    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4991    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
4992    ImmutableCallSite *CS) const {
4993
4994  bool isELFv2ABI = Subtarget.isELFv2ABI();
4995  bool isLittleEndian = Subtarget.isLittleEndian();
4996  unsigned NumOps = Outs.size();
4997  bool hasNest = false;
4998  bool IsSibCall = false;
4999
5000  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5001  unsigned PtrByteSize = 8;
5002
5003  MachineFunction &MF = DAG.getMachineFunction();
5004
5005  if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5006    IsSibCall = true;
5007
5008  // Mark this function as potentially containing a function that contains a
5009  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5010  // and restoring the callers stack pointer in this functions epilog. This is
5011  // done because by tail calling the called function might overwrite the value
5012  // in this function's (MF) stack pointer stack slot 0(SP).
5013  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5014      CallConv == CallingConv::Fast)
5015    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5016
5017  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
5018         "fastcc not supported on varargs functions");
5019
5020  // Count how many bytes are to be pushed on the stack, including the linkage
5021  // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5022  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5023  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5024  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5025  unsigned NumBytes = LinkageSize;
5026  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5027  unsigned &QFPR_idx = FPR_idx;
5028
5029  static const MCPhysReg GPR[] = {
5030    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5031    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5032  };
5033  static const MCPhysReg VR[] = {
5034    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5035    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5036  };
5037  static const MCPhysReg VSRH[] = {
5038    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
5039    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
5040  };
5041
5042  const unsigned NumGPRs = array_lengthof(GPR);
5043  const unsigned NumFPRs = 13;
5044  const unsigned NumVRs  = array_lengthof(VR);
5045  const unsigned NumQFPRs = NumFPRs;
5046
5047  // When using the fast calling convention, we don't provide backing for
5048  // arguments that will be in registers.
5049  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5050
5051  // Add up all the space actually used.
5052  for (unsigned i = 0; i != NumOps; ++i) {
5053    ISD::ArgFlagsTy Flags = Outs[i].Flags;
5054    EVT ArgVT = Outs[i].VT;
5055    EVT OrigVT = Outs[i].ArgVT;
5056
5057    if (Flags.isNest())
5058      continue;
5059
5060    if (CallConv == CallingConv::Fast) {
5061      if (Flags.isByVal())
5062        NumGPRsUsed += (Flags.getByValSize()+7)/8;
5063      else
5064        switch (ArgVT.getSimpleVT().SimpleTy) {
5065        default: llvm_unreachable("Unexpected ValueType for argument!");
5066        case MVT::i1:
5067        case MVT::i32:
5068        case MVT::i64:
5069          if (++NumGPRsUsed <= NumGPRs)
5070            continue;
5071          break;
5072        case MVT::v4i32:
5073        case MVT::v8i16:
5074        case MVT::v16i8:
5075        case MVT::v2f64:
5076        case MVT::v2i64:
5077        case MVT::v1i128:
5078          if (++NumVRsUsed <= NumVRs)
5079            continue;
5080          break;
5081        case MVT::v4f32:
5082          // When using QPX, this is handled like a FP register, otherwise, it
5083          // is an Altivec register.
5084          if (Subtarget.hasQPX()) {
5085            if (++NumFPRsUsed <= NumFPRs)
5086              continue;
5087          } else {
5088            if (++NumVRsUsed <= NumVRs)
5089              continue;
5090          }
5091          break;
5092        case MVT::f32:
5093        case MVT::f64:
5094        case MVT::v4f64: // QPX
5095        case MVT::v4i1:  // QPX
5096          if (++NumFPRsUsed <= NumFPRs)
5097            continue;
5098          break;
5099        }
5100    }
5101
5102    /* Respect alignment of argument on the stack.  */
5103    unsigned Align =
5104      CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5105    NumBytes = ((NumBytes + Align - 1) / Align) * Align;
5106
5107    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5108    if (Flags.isInConsecutiveRegsLast())
5109      NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5110  }
5111
5112  unsigned NumBytesActuallyUsed = NumBytes;
5113
5114  // The prolog code of the callee may store up to 8 GPR argument registers to
5115  // the stack, allowing va_start to index over them in memory if its varargs.
5116  // Because we cannot tell if this is needed on the caller side, we have to
5117  // conservatively assume that it is needed.  As such, make sure we have at
5118  // least enough stack space for the caller to store the 8 GPRs.
5119  // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
5120  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5121
5122  // Tail call needs the stack to be aligned.
5123  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5124      CallConv == CallingConv::Fast)
5125    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5126
5127  int SPDiff = 0;
5128
5129  // Calculate by how many bytes the stack has to be adjusted in case of tail
5130  // call optimization.
5131  if (!IsSibCall)
5132    SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5133
5134  // To protect arguments on the stack from being clobbered in a tail call,
5135  // force all the loads to happen before doing any other lowering.
5136  if (isTailCall)
5137    Chain = DAG.getStackArgumentTokenFactor(Chain);
5138
5139  // Adjust the stack pointer for the new arguments...
5140  // These operations are automatically eliminated by the prolog/epilog pass
5141  if (!IsSibCall)
5142    Chain = DAG.getCALLSEQ_START(Chain,
5143                                 DAG.getIntPtrConstant(NumBytes, dl, true), dl);
5144  SDValue CallSeqStart = Chain;
5145
5146  // Load the return address and frame pointer so it can be move somewhere else
5147  // later.
5148  SDValue LROp, FPOp;
5149  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5150
5151  // Set up a copy of the stack pointer for use loading and storing any
5152  // arguments that may not fit in the registers available for argument
5153  // passing.
5154  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5155
5156  // Figure out which arguments are going to go in registers, and which in
5157  // memory.  Also, if this is a vararg function, floating point operations
5158  // must be stored to our stack, and loaded into integer regs as well, if
5159  // any integer regs are available for argument passing.
5160  unsigned ArgOffset = LinkageSize;
5161
5162  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5163  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5164
5165  SmallVector<SDValue, 8> MemOpChains;
5166  for (unsigned i = 0; i != NumOps; ++i) {
5167    SDValue Arg = OutVals[i];
5168    ISD::ArgFlagsTy Flags = Outs[i].Flags;
5169    EVT ArgVT = Outs[i].VT;
5170    EVT OrigVT = Outs[i].ArgVT;
5171
5172    // PtrOff will be used to store the current argument to the stack if a
5173    // register cannot be found for it.
5174    SDValue PtrOff;
5175
5176    // We re-align the argument offset for each argument, except when using the
5177    // fast calling convention, when we need to make sure we do that only when
5178    // we'll actually use a stack slot.
5179    auto ComputePtrOff = [&]() {
5180      /* Respect alignment of argument on the stack.  */
5181      unsigned Align =
5182        CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5183      ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
5184
5185      PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5186
5187      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5188    };
5189
5190    if (CallConv != CallingConv::Fast) {
5191      ComputePtrOff();
5192
5193      /* Compute GPR index associated with argument offset.  */
5194      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5195      GPR_idx = std::min(GPR_idx, NumGPRs);
5196    }
5197
5198    // Promote integers to 64-bit values.
5199    if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5200      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5201      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5202      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5203    }
5204
5205    // FIXME memcpy is used way more than necessary.  Correctness first.
5206    // Note: "by value" is code for passing a structure by value, not
5207    // basic types.
5208    if (Flags.isByVal()) {
5209      // Note: Size includes alignment padding, so
5210      //   struct x { short a; char b; }
5211      // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5212      // These are the proper values we need for right-justifying the
5213      // aggregate in a parameter register.
5214      unsigned Size = Flags.getByValSize();
5215
5216      // An empty aggregate parameter takes up no storage and no
5217      // registers.
5218      if (Size == 0)
5219        continue;
5220
5221      if (CallConv == CallingConv::Fast)
5222        ComputePtrOff();
5223
5224      // All aggregates smaller than 8 bytes must be passed right-justified.
5225      if (Size==1 || Size==2 || Size==4) {
5226        EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5227        if (GPR_idx != NumGPRs) {
5228          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5229                                        MachinePointerInfo(), VT,
5230                                        false, false, false, 0);
5231          MemOpChains.push_back(Load.getValue(1));
5232          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5233
5234          ArgOffset += PtrByteSize;
5235          continue;
5236        }
5237      }
5238
5239      if (GPR_idx == NumGPRs && Size < 8) {
5240        SDValue AddPtr = PtrOff;
5241        if (!isLittleEndian) {
5242          SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5243                                          PtrOff.getValueType());
5244          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5245        }
5246        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5247                                                          CallSeqStart,
5248                                                          Flags, DAG, dl);
5249        ArgOffset += PtrByteSize;
5250        continue;
5251      }
5252      // Copy entire object into memory.  There are cases where gcc-generated
5253      // code assumes it is there, even if it could be put entirely into
5254      // registers.  (This is not what the doc says.)
5255
5256      // FIXME: The above statement is likely due to a misunderstanding of the
5257      // documents.  All arguments must be copied into the parameter area BY
5258      // THE CALLEE in the event that the callee takes the address of any
5259      // formal argument.  That has not yet been implemented.  However, it is
5260      // reasonable to use the stack area as a staging area for the register
5261      // load.
5262
5263      // Skip this for small aggregates, as we will use the same slot for a
5264      // right-justified copy, below.
5265      if (Size >= 8)
5266        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5267                                                          CallSeqStart,
5268                                                          Flags, DAG, dl);
5269
5270      // When a register is available, pass a small aggregate right-justified.
5271      if (Size < 8 && GPR_idx != NumGPRs) {
5272        // The easiest way to get this right-justified in a register
5273        // is to copy the structure into the rightmost portion of a
5274        // local variable slot, then load the whole slot into the
5275        // register.
5276        // FIXME: The memcpy seems to produce pretty awful code for
5277        // small aggregates, particularly for packed ones.
5278        // FIXME: It would be preferable to use the slot in the
5279        // parameter save area instead of a new local variable.
5280        SDValue AddPtr = PtrOff;
5281        if (!isLittleEndian) {
5282          SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5283          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5284        }
5285        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5286                                                          CallSeqStart,
5287                                                          Flags, DAG, dl);
5288
5289        // Load the slot into the register.
5290        SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
5291                                   MachinePointerInfo(),
5292                                   false, false, false, 0);
5293        MemOpChains.push_back(Load.getValue(1));
5294        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5295
5296        // Done with this argument.
5297        ArgOffset += PtrByteSize;
5298        continue;
5299      }
5300
5301      // For aggregates larger than PtrByteSize, copy the pieces of the
5302      // object that fit into registers from the parameter save area.
5303      for (unsigned j=0; j<Size; j+=PtrByteSize) {
5304        SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5305        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5306        if (GPR_idx != NumGPRs) {
5307          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
5308                                     MachinePointerInfo(),
5309                                     false, false, false, 0);
5310          MemOpChains.push_back(Load.getValue(1));
5311          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5312          ArgOffset += PtrByteSize;
5313        } else {
5314          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5315          break;
5316        }
5317      }
5318      continue;
5319    }
5320
5321    switch (Arg.getSimpleValueType().SimpleTy) {
5322    default: llvm_unreachable("Unexpected ValueType for argument!");
5323    case MVT::i1:
5324    case MVT::i32:
5325    case MVT::i64:
5326      if (Flags.isNest()) {
5327        // The 'nest' parameter, if any, is passed in R11.
5328        RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5329        hasNest = true;
5330        break;
5331      }
5332
5333      // These can be scalar arguments or elements of an integer array type
5334      // passed directly.  Clang may use those instead of "byval" aggregate
5335      // types to avoid forcing arguments to memory unnecessarily.
5336      if (GPR_idx != NumGPRs) {
5337        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5338      } else {
5339        if (CallConv == CallingConv::Fast)
5340          ComputePtrOff();
5341
5342        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5343                         true, isTailCall, false, MemOpChains,
5344                         TailCallArguments, dl);
5345        if (CallConv == CallingConv::Fast)
5346          ArgOffset += PtrByteSize;
5347      }
5348      if (CallConv != CallingConv::Fast)
5349        ArgOffset += PtrByteSize;
5350      break;
5351    case MVT::f32:
5352    case MVT::f64: {
5353      // These can be scalar arguments or elements of a float array type
5354      // passed directly.  The latter are used to implement ELFv2 homogenous
5355      // float aggregates.
5356
5357      // Named arguments go into FPRs first, and once they overflow, the
5358      // remaining arguments go into GPRs and then the parameter save area.
5359      // Unnamed arguments for vararg functions always go to GPRs and
5360      // then the parameter save area.  For now, put all arguments to vararg
5361      // routines always in both locations (FPR *and* GPR or stack slot).
5362      bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5363      bool NeededLoad = false;
5364
5365      // First load the argument into the next available FPR.
5366      if (FPR_idx != NumFPRs)
5367        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5368
5369      // Next, load the argument into GPR or stack slot if needed.
5370      if (!NeedGPROrStack)
5371        ;
5372      else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
5373        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5374        // once we support fp <-> gpr moves.
5375
5376        // In the non-vararg case, this can only ever happen in the
5377        // presence of f32 array types, since otherwise we never run
5378        // out of FPRs before running out of GPRs.
5379        SDValue ArgVal;
5380
5381        // Double values are always passed in a single GPR.
5382        if (Arg.getValueType() != MVT::f32) {
5383          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
5384
5385        // Non-array float values are extended and passed in a GPR.
5386        } else if (!Flags.isInConsecutiveRegs()) {
5387          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5388          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5389
5390        // If we have an array of floats, we collect every odd element
5391        // together with its predecessor into one GPR.
5392        } else if (ArgOffset % PtrByteSize != 0) {
5393          SDValue Lo, Hi;
5394          Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5395          Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5396          if (!isLittleEndian)
5397            std::swap(Lo, Hi);
5398          ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5399
5400        // The final element, if even, goes into the first half of a GPR.
5401        } else if (Flags.isInConsecutiveRegsLast()) {
5402          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5403          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5404          if (!isLittleEndian)
5405            ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5406                                 DAG.getConstant(32, dl, MVT::i32));
5407
5408        // Non-final even elements are skipped; they will be handled
5409        // together the with subsequent argument on the next go-around.
5410        } else
5411          ArgVal = SDValue();
5412
5413        if (ArgVal.getNode())
5414          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5415      } else {
5416        if (CallConv == CallingConv::Fast)
5417          ComputePtrOff();
5418
5419        // Single-precision floating-point values are mapped to the
5420        // second (rightmost) word of the stack doubleword.
5421        if (Arg.getValueType() == MVT::f32 &&
5422            !isLittleEndian && !Flags.isInConsecutiveRegs()) {
5423          SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5424          PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5425        }
5426
5427        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5428                         true, isTailCall, false, MemOpChains,
5429                         TailCallArguments, dl);
5430
5431        NeededLoad = true;
5432      }
5433      // When passing an array of floats, the array occupies consecutive
5434      // space in the argument area; only round up to the next doubleword
5435      // at the end of the array.  Otherwise, each float takes 8 bytes.
5436      if (CallConv != CallingConv::Fast || NeededLoad) {
5437        ArgOffset += (Arg.getValueType() == MVT::f32 &&
5438                      Flags.isInConsecutiveRegs()) ? 4 : 8;
5439        if (Flags.isInConsecutiveRegsLast())
5440          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5441      }
5442      break;
5443    }
5444    case MVT::v4f32:
5445    case MVT::v4i32:
5446    case MVT::v8i16:
5447    case MVT::v16i8:
5448    case MVT::v2f64:
5449    case MVT::v2i64:
5450    case MVT::v1i128:
5451      if (!Subtarget.hasQPX()) {
5452      // These can be scalar arguments or elements of a vector array type
5453      // passed directly.  The latter are used to implement ELFv2 homogenous
5454      // vector aggregates.
5455
5456      // For a varargs call, named arguments go into VRs or on the stack as
5457      // usual; unnamed arguments always go to the stack or the corresponding
5458      // GPRs when within range.  For now, we always put the value in both
5459      // locations (or even all three).
5460      if (isVarArg) {
5461        // We could elide this store in the case where the object fits
5462        // entirely in R registers.  Maybe later.
5463        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5464                                     MachinePointerInfo(), false, false, 0);
5465        MemOpChains.push_back(Store);
5466        if (VR_idx != NumVRs) {
5467          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
5468                                     MachinePointerInfo(),
5469                                     false, false, false, 0);
5470          MemOpChains.push_back(Load.getValue(1));
5471
5472          unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5473                           Arg.getSimpleValueType() == MVT::v2i64) ?
5474                          VSRH[VR_idx] : VR[VR_idx];
5475          ++VR_idx;
5476
5477          RegsToPass.push_back(std::make_pair(VReg, Load));
5478        }
5479        ArgOffset += 16;
5480        for (unsigned i=0; i<16; i+=PtrByteSize) {
5481          if (GPR_idx == NumGPRs)
5482            break;
5483          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5484                                   DAG.getConstant(i, dl, PtrVT));
5485          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5486                                     false, false, false, 0);
5487          MemOpChains.push_back(Load.getValue(1));
5488          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5489        }
5490        break;
5491      }
5492
5493      // Non-varargs Altivec params go into VRs or on the stack.
5494      if (VR_idx != NumVRs) {
5495        unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5496                         Arg.getSimpleValueType() == MVT::v2i64) ?
5497                        VSRH[VR_idx] : VR[VR_idx];
5498        ++VR_idx;
5499
5500        RegsToPass.push_back(std::make_pair(VReg, Arg));
5501      } else {
5502        if (CallConv == CallingConv::Fast)
5503          ComputePtrOff();
5504
5505        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5506                         true, isTailCall, true, MemOpChains,
5507                         TailCallArguments, dl);
5508        if (CallConv == CallingConv::Fast)
5509          ArgOffset += 16;
5510      }
5511
5512      if (CallConv != CallingConv::Fast)
5513        ArgOffset += 16;
5514      break;
5515      } // not QPX
5516
5517      assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5518             "Invalid QPX parameter type");
5519
5520      /* fall through */
5521    case MVT::v4f64:
5522    case MVT::v4i1: {
5523      bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5524      if (isVarArg) {
5525        // We could elide this store in the case where the object fits
5526        // entirely in R registers.  Maybe later.
5527        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5528                                     MachinePointerInfo(), false, false, 0);
5529        MemOpChains.push_back(Store);
5530        if (QFPR_idx != NumQFPRs) {
5531          SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
5532                                     Store, PtrOff, MachinePointerInfo(),
5533                                     false, false, false, 0);
5534          MemOpChains.push_back(Load.getValue(1));
5535          RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5536        }
5537        ArgOffset += (IsF32 ? 16 : 32);
5538        for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5539          if (GPR_idx == NumGPRs)
5540            break;
5541          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5542                                   DAG.getConstant(i, dl, PtrVT));
5543          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5544                                     false, false, false, 0);
5545          MemOpChains.push_back(Load.getValue(1));
5546          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5547        }
5548        break;
5549      }
5550
5551      // Non-varargs QPX params go into registers or on the stack.
5552      if (QFPR_idx != NumQFPRs) {
5553        RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5554      } else {
5555        if (CallConv == CallingConv::Fast)
5556          ComputePtrOff();
5557
5558        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5559                         true, isTailCall, true, MemOpChains,
5560                         TailCallArguments, dl);
5561        if (CallConv == CallingConv::Fast)
5562          ArgOffset += (IsF32 ? 16 : 32);
5563      }
5564
5565      if (CallConv != CallingConv::Fast)
5566        ArgOffset += (IsF32 ? 16 : 32);
5567      break;
5568      }
5569    }
5570  }
5571
5572  assert(NumBytesActuallyUsed == ArgOffset);
5573  (void)NumBytesActuallyUsed;
5574
5575  if (!MemOpChains.empty())
5576    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5577
5578  // Check if this is an indirect call (MTCTR/BCTRL).
5579  // See PrepareCall() for more information about calls through function
5580  // pointers in the 64-bit SVR4 ABI.
5581  if (!isTailCall && !isPatchPoint &&
5582      !isFunctionGlobalAddress(Callee) &&
5583      !isa<ExternalSymbolSDNode>(Callee)) {
5584    // Load r2 into a virtual register and store it to the TOC save area.
5585    setUsesTOCBasePtr(DAG);
5586    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5587    // TOC save area offset.
5588    unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5589    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5590    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5591    Chain = DAG.getStore(
5592        Val.getValue(1), dl, Val, AddPtr,
5593        MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset),
5594        false, false, 0);
5595    // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5596    // This does not mean the MTCTR instruction must use R12; it's easier
5597    // to model this as an extra parameter, so do that.
5598    if (isELFv2ABI && !isPatchPoint)
5599      RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5600  }
5601
5602  // Build a sequence of copy-to-reg nodes chained together with token chain
5603  // and flag operands which copy the outgoing args into the appropriate regs.
5604  SDValue InFlag;
5605  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5606    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5607                             RegsToPass[i].second, InFlag);
5608    InFlag = Chain.getValue(1);
5609  }
5610
5611  if (isTailCall && !IsSibCall)
5612    PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5613                    TailCallArguments);
5614
5615  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, hasNest,
5616                    DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
5617                    SPDiff, NumBytes, Ins, InVals, CS);
5618}
5619
5620SDValue PPCTargetLowering::LowerCall_Darwin(
5621    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
5622    bool isTailCall, bool isPatchPoint,
5623    const SmallVectorImpl<ISD::OutputArg> &Outs,
5624    const SmallVectorImpl<SDValue> &OutVals,
5625    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5626    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5627    ImmutableCallSite *CS) const {
5628
5629  unsigned NumOps = Outs.size();
5630
5631  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5632  bool isPPC64 = PtrVT == MVT::i64;
5633  unsigned PtrByteSize = isPPC64 ? 8 : 4;
5634
5635  MachineFunction &MF = DAG.getMachineFunction();
5636
5637  // Mark this function as potentially containing a function that contains a
5638  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5639  // and restoring the callers stack pointer in this functions epilog. This is
5640  // done because by tail calling the called function might overwrite the value
5641  // in this function's (MF) stack pointer stack slot 0(SP).
5642  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5643      CallConv == CallingConv::Fast)
5644    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5645
5646  // Count how many bytes are to be pushed on the stack, including the linkage
5647  // area, and parameter passing area.  We start with 24/48 bytes, which is
5648  // prereserved space for [SP][CR][LR][3 x unused].
5649  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5650  unsigned NumBytes = LinkageSize;
5651
5652  // Add up all the space actually used.
5653  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
5654  // they all go in registers, but we must reserve stack space for them for
5655  // possible use by the caller.  In varargs or 64-bit calls, parameters are
5656  // assigned stack space in order, with padding so Altivec parameters are
5657  // 16-byte aligned.
5658  unsigned nAltivecParamsAtEnd = 0;
5659  for (unsigned i = 0; i != NumOps; ++i) {
5660    ISD::ArgFlagsTy Flags = Outs[i].Flags;
5661    EVT ArgVT = Outs[i].VT;
5662    // Varargs Altivec parameters are padded to a 16 byte boundary.
5663    if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
5664        ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
5665        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
5666      if (!isVarArg && !isPPC64) {
5667        // Non-varargs Altivec parameters go after all the non-Altivec
5668        // parameters; handle those later so we know how much padding we need.
5669        nAltivecParamsAtEnd++;
5670        continue;
5671      }
5672      // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
5673      NumBytes = ((NumBytes+15)/16)*16;
5674    }
5675    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5676  }
5677
5678  // Allow for Altivec parameters at the end, if needed.
5679  if (nAltivecParamsAtEnd) {
5680    NumBytes = ((NumBytes+15)/16)*16;
5681    NumBytes += 16*nAltivecParamsAtEnd;
5682  }
5683
5684  // The prolog code of the callee may store up to 8 GPR argument registers to
5685  // the stack, allowing va_start to index over them in memory if its varargs.
5686  // Because we cannot tell if this is needed on the caller side, we have to
5687  // conservatively assume that it is needed.  As such, make sure we have at
5688  // least enough stack space for the caller to store the 8 GPRs.
5689  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5690
5691  // Tail call needs the stack to be aligned.
5692  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5693      CallConv == CallingConv::Fast)
5694    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5695
5696  // Calculate by how many bytes the stack has to be adjusted in case of tail
5697  // call optimization.
5698  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5699
5700  // To protect arguments on the stack from being clobbered in a tail call,
5701  // force all the loads to happen before doing any other lowering.
5702  if (isTailCall)
5703    Chain = DAG.getStackArgumentTokenFactor(Chain);
5704
5705  // Adjust the stack pointer for the new arguments...
5706  // These operations are automatically eliminated by the prolog/epilog pass
5707  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5708                               dl);
5709  SDValue CallSeqStart = Chain;
5710
5711  // Load the return address and frame pointer so it can be move somewhere else
5712  // later.
5713  SDValue LROp, FPOp;
5714  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5715
5716  // Set up a copy of the stack pointer for use loading and storing any
5717  // arguments that may not fit in the registers available for argument
5718  // passing.
5719  SDValue StackPtr;
5720  if (isPPC64)
5721    StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5722  else
5723    StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5724
5725  // Figure out which arguments are going to go in registers, and which in
5726  // memory.  Also, if this is a vararg function, floating point operations
5727  // must be stored to our stack, and loaded into integer regs as well, if
5728  // any integer regs are available for argument passing.
5729  unsigned ArgOffset = LinkageSize;
5730  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5731
5732  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
5733    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
5734    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
5735  };
5736  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
5737    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5738    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5739  };
5740  static const MCPhysReg VR[] = {
5741    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5742    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5743  };
5744  const unsigned NumGPRs = array_lengthof(GPR_32);
5745  const unsigned NumFPRs = 13;
5746  const unsigned NumVRs  = array_lengthof(VR);
5747
5748  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
5749
5750  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5751  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5752
5753  SmallVector<SDValue, 8> MemOpChains;
5754  for (unsigned i = 0; i != NumOps; ++i) {
5755    SDValue Arg = OutVals[i];
5756    ISD::ArgFlagsTy Flags = Outs[i].Flags;
5757
5758    // PtrOff will be used to store the current argument to the stack if a
5759    // register cannot be found for it.
5760    SDValue PtrOff;
5761
5762    PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5763
5764    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5765
5766    // On PPC64, promote integers to 64-bit values.
5767    if (isPPC64 && Arg.getValueType() == MVT::i32) {
5768      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5769      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5770      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5771    }
5772
5773    // FIXME memcpy is used way more than necessary.  Correctness first.
5774    // Note: "by value" is code for passing a structure by value, not
5775    // basic types.
5776    if (Flags.isByVal()) {
5777      unsigned Size = Flags.getByValSize();
5778      // Very small objects are passed right-justified.  Everything else is
5779      // passed left-justified.
5780      if (Size==1 || Size==2) {
5781        EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
5782        if (GPR_idx != NumGPRs) {
5783          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5784                                        MachinePointerInfo(), VT,
5785                                        false, false, false, 0);
5786          MemOpChains.push_back(Load.getValue(1));
5787          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5788
5789          ArgOffset += PtrByteSize;
5790        } else {
5791          SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5792                                          PtrOff.getValueType());
5793          SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5794          Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5795                                                            CallSeqStart,
5796                                                            Flags, DAG, dl);
5797          ArgOffset += PtrByteSize;
5798        }
5799        continue;
5800      }
5801      // Copy entire object into memory.  There are cases where gcc-generated
5802      // code assumes it is there, even if it could be put entirely into
5803      // registers.  (This is not what the doc says.)
5804      Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5805                                                        CallSeqStart,
5806                                                        Flags, DAG, dl);
5807
5808      // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
5809      // copy the pieces of the object that fit into registers from the
5810      // parameter save area.
5811      for (unsigned j=0; j<Size; j+=PtrByteSize) {
5812        SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5813        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5814        if (GPR_idx != NumGPRs) {
5815          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
5816                                     MachinePointerInfo(),
5817                                     false, false, false, 0);
5818          MemOpChains.push_back(Load.getValue(1));
5819          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5820          ArgOffset += PtrByteSize;
5821        } else {
5822          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5823          break;
5824        }
5825      }
5826      continue;
5827    }
5828
5829    switch (Arg.getSimpleValueType().SimpleTy) {
5830    default: llvm_unreachable("Unexpected ValueType for argument!");
5831    case MVT::i1:
5832    case MVT::i32:
5833    case MVT::i64:
5834      if (GPR_idx != NumGPRs) {
5835        if (Arg.getValueType() == MVT::i1)
5836          Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
5837
5838        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5839      } else {
5840        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5841                         isPPC64, isTailCall, false, MemOpChains,
5842                         TailCallArguments, dl);
5843      }
5844      ArgOffset += PtrByteSize;
5845      break;
5846    case MVT::f32:
5847    case MVT::f64:
5848      if (FPR_idx != NumFPRs) {
5849        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5850
5851        if (isVarArg) {
5852          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5853                                       MachinePointerInfo(), false, false, 0);
5854          MemOpChains.push_back(Store);
5855
5856          // Float varargs are always shadowed in available integer registers
5857          if (GPR_idx != NumGPRs) {
5858            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
5859                                       MachinePointerInfo(), false, false,
5860                                       false, 0);
5861            MemOpChains.push_back(Load.getValue(1));
5862            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5863          }
5864          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
5865            SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5866            PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5867            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
5868                                       MachinePointerInfo(),
5869                                       false, false, false, 0);
5870            MemOpChains.push_back(Load.getValue(1));
5871            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5872          }
5873        } else {
5874          // If we have any FPRs remaining, we may also have GPRs remaining.
5875          // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
5876          // GPRs.
5877          if (GPR_idx != NumGPRs)
5878            ++GPR_idx;
5879          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
5880              !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
5881            ++GPR_idx;
5882        }
5883      } else
5884        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5885                         isPPC64, isTailCall, false, MemOpChains,
5886                         TailCallArguments, dl);
5887      if (isPPC64)
5888        ArgOffset += 8;
5889      else
5890        ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
5891      break;
5892    case MVT::v4f32:
5893    case MVT::v4i32:
5894    case MVT::v8i16:
5895    case MVT::v16i8:
5896      if (isVarArg) {
5897        // These go aligned on the stack, or in the corresponding R registers
5898        // when within range.  The Darwin PPC ABI doc claims they also go in
5899        // V registers; in fact gcc does this only for arguments that are
5900        // prototyped, not for those that match the ...  We do it for all
5901        // arguments, seems to work.
5902        while (ArgOffset % 16 !=0) {
5903          ArgOffset += PtrByteSize;
5904          if (GPR_idx != NumGPRs)
5905            GPR_idx++;
5906        }
5907        // We could elide this store in the case where the object fits
5908        // entirely in R registers.  Maybe later.
5909        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5910                             DAG.getConstant(ArgOffset, dl, PtrVT));
5911        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5912                                     MachinePointerInfo(), false, false, 0);
5913        MemOpChains.push_back(Store);
5914        if (VR_idx != NumVRs) {
5915          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
5916                                     MachinePointerInfo(),
5917                                     false, false, false, 0);
5918          MemOpChains.push_back(Load.getValue(1));
5919          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5920        }
5921        ArgOffset += 16;
5922        for (unsigned i=0; i<16; i+=PtrByteSize) {
5923          if (GPR_idx == NumGPRs)
5924            break;
5925          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5926                                   DAG.getConstant(i, dl, PtrVT));
5927          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5928                                     false, false, false, 0);
5929          MemOpChains.push_back(Load.getValue(1));
5930          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5931        }
5932        break;
5933      }
5934
5935      // Non-varargs Altivec params generally go in registers, but have
5936      // stack space allocated at the end.
5937      if (VR_idx != NumVRs) {
5938        // Doesn't have GPR space allocated.
5939        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5940      } else if (nAltivecParamsAtEnd==0) {
5941        // We are emitting Altivec params in order.
5942        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5943                         isPPC64, isTailCall, true, MemOpChains,
5944                         TailCallArguments, dl);
5945        ArgOffset += 16;
5946      }
5947      break;
5948    }
5949  }
5950  // If all Altivec parameters fit in registers, as they usually do,
5951  // they get stack space following the non-Altivec parameters.  We
5952  // don't track this here because nobody below needs it.
5953  // If there are more Altivec parameters than fit in registers emit
5954  // the stores here.
5955  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
5956    unsigned j = 0;
5957    // Offset is aligned; skip 1st 12 params which go in V registers.
5958    ArgOffset = ((ArgOffset+15)/16)*16;
5959    ArgOffset += 12*16;
5960    for (unsigned i = 0; i != NumOps; ++i) {
5961      SDValue Arg = OutVals[i];
5962      EVT ArgType = Outs[i].VT;
5963      if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
5964          ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
5965        if (++j > NumVRs) {
5966          SDValue PtrOff;
5967          // We are emitting Altivec params in order.
5968          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5969                           isPPC64, isTailCall, true, MemOpChains,
5970                           TailCallArguments, dl);
5971          ArgOffset += 16;
5972        }
5973      }
5974    }
5975  }
5976
5977  if (!MemOpChains.empty())
5978    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5979
5980  // On Darwin, R12 must contain the address of an indirect callee.  This does
5981  // not mean the MTCTR instruction must use R12; it's easier to model this as
5982  // an extra parameter, so do that.
5983  if (!isTailCall &&
5984      !isFunctionGlobalAddress(Callee) &&
5985      !isa<ExternalSymbolSDNode>(Callee) &&
5986      !isBLACompatibleAddress(Callee, DAG))
5987    RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
5988                                                   PPC::R12), Callee));
5989
5990  // Build a sequence of copy-to-reg nodes chained together with token chain
5991  // and flag operands which copy the outgoing args into the appropriate regs.
5992  SDValue InFlag;
5993  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5994    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5995                             RegsToPass[i].second, InFlag);
5996    InFlag = Chain.getValue(1);
5997  }
5998
5999  if (isTailCall)
6000    PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6001                    TailCallArguments);
6002
6003  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
6004                    /* unused except on PPC64 ELFv1 */ false, DAG,
6005                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
6006                    NumBytes, Ins, InVals, CS);
6007}
6008
6009bool
6010PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
6011                                  MachineFunction &MF, bool isVarArg,
6012                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
6013                                  LLVMContext &Context) const {
6014  SmallVector<CCValAssign, 16> RVLocs;
6015  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6016  return CCInfo.CheckReturn(Outs, RetCC_PPC);
6017}
6018
6019SDValue
6020PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6021                               bool isVarArg,
6022                               const SmallVectorImpl<ISD::OutputArg> &Outs,
6023                               const SmallVectorImpl<SDValue> &OutVals,
6024                               const SDLoc &dl, SelectionDAG &DAG) const {
6025
6026  SmallVector<CCValAssign, 16> RVLocs;
6027  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6028                 *DAG.getContext());
6029  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
6030
6031  SDValue Flag;
6032  SmallVector<SDValue, 4> RetOps(1, Chain);
6033
6034  // Copy the result values into the output registers.
6035  for (unsigned i = 0; i != RVLocs.size(); ++i) {
6036    CCValAssign &VA = RVLocs[i];
6037    assert(VA.isRegLoc() && "Can only return in registers!");
6038
6039    SDValue Arg = OutVals[i];
6040
6041    switch (VA.getLocInfo()) {
6042    default: llvm_unreachable("Unknown loc info!");
6043    case CCValAssign::Full: break;
6044    case CCValAssign::AExt:
6045      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
6046      break;
6047    case CCValAssign::ZExt:
6048      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6049      break;
6050    case CCValAssign::SExt:
6051      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6052      break;
6053    }
6054
6055    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
6056    Flag = Chain.getValue(1);
6057    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
6058  }
6059
6060  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
6061  const MCPhysReg *I =
6062    TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6063  if (I) {
6064    for (; *I; ++I) {
6065
6066      if (PPC::G8RCRegClass.contains(*I))
6067        RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6068      else if (PPC::F8RCRegClass.contains(*I))
6069        RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6070      else if (PPC::CRRCRegClass.contains(*I))
6071        RetOps.push_back(DAG.getRegister(*I, MVT::i1));
6072      else if (PPC::VRRCRegClass.contains(*I))
6073        RetOps.push_back(DAG.getRegister(*I, MVT::Other));
6074      else
6075        llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6076    }
6077  }
6078
6079  RetOps[0] = Chain;  // Update chain.
6080
6081  // Add the flag if we have it.
6082  if (Flag.getNode())
6083    RetOps.push_back(Flag);
6084
6085  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
6086}
6087
6088SDValue
6089PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
6090                                                SelectionDAG &DAG) const {
6091  SDLoc dl(Op);
6092
6093  // Get the corect type for integers.
6094  EVT IntVT = Op.getValueType();
6095
6096  // Get the inputs.
6097  SDValue Chain = Op.getOperand(0);
6098  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6099  // Build a DYNAREAOFFSET node.
6100  SDValue Ops[2] = {Chain, FPSIdx};
6101  SDVTList VTs = DAG.getVTList(IntVT);
6102  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
6103}
6104
6105SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
6106                                             SelectionDAG &DAG) const {
6107  // When we pop the dynamic allocation we need to restore the SP link.
6108  SDLoc dl(Op);
6109
6110  // Get the corect type for pointers.
6111  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6112
6113  // Construct the stack pointer operand.
6114  bool isPPC64 = Subtarget.isPPC64();
6115  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
6116  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
6117
6118  // Get the operands for the STACKRESTORE.
6119  SDValue Chain = Op.getOperand(0);
6120  SDValue SaveSP = Op.getOperand(1);
6121
6122  // Load the old link SP.
6123  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
6124                                   MachinePointerInfo(),
6125                                   false, false, false, 0);
6126
6127  // Restore the stack pointer.
6128  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
6129
6130  // Store the old link SP.
6131  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
6132                      false, false, 0);
6133}
6134
6135SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
6136  MachineFunction &MF = DAG.getMachineFunction();
6137  bool isPPC64 = Subtarget.isPPC64();
6138  EVT PtrVT = getPointerTy(MF.getDataLayout());
6139
6140  // Get current frame pointer save index.  The users of this index will be
6141  // primarily DYNALLOC instructions.
6142  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6143  int RASI = FI->getReturnAddrSaveIndex();
6144
6145  // If the frame pointer save index hasn't been defined yet.
6146  if (!RASI) {
6147    // Find out what the fix offset of the frame pointer save area.
6148    int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
6149    // Allocate the frame index for frame pointer save area.
6150    RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
6151    // Save the result.
6152    FI->setReturnAddrSaveIndex(RASI);
6153  }
6154  return DAG.getFrameIndex(RASI, PtrVT);
6155}
6156
6157SDValue
6158PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
6159  MachineFunction &MF = DAG.getMachineFunction();
6160  bool isPPC64 = Subtarget.isPPC64();
6161  EVT PtrVT = getPointerTy(MF.getDataLayout());
6162
6163  // Get current frame pointer save index.  The users of this index will be
6164  // primarily DYNALLOC instructions.
6165  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
6166  int FPSI = FI->getFramePointerSaveIndex();
6167
6168  // If the frame pointer save index hasn't been defined yet.
6169  if (!FPSI) {
6170    // Find out what the fix offset of the frame pointer save area.
6171    int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
6172    // Allocate the frame index for frame pointer save area.
6173    FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
6174    // Save the result.
6175    FI->setFramePointerSaveIndex(FPSI);
6176  }
6177  return DAG.getFrameIndex(FPSI, PtrVT);
6178}
6179
6180SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
6181                                                   SelectionDAG &DAG) const {
6182  // Get the inputs.
6183  SDValue Chain = Op.getOperand(0);
6184  SDValue Size  = Op.getOperand(1);
6185  SDLoc dl(Op);
6186
6187  // Get the corect type for pointers.
6188  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6189  // Negate the size.
6190  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
6191                                DAG.getConstant(0, dl, PtrVT), Size);
6192  // Construct a node for the frame pointer save index.
6193  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
6194  // Build a DYNALLOC node.
6195  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
6196  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
6197  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
6198}
6199
6200SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
6201                                               SelectionDAG &DAG) const {
6202  SDLoc DL(Op);
6203  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
6204                     DAG.getVTList(MVT::i32, MVT::Other),
6205                     Op.getOperand(0), Op.getOperand(1));
6206}
6207
6208SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
6209                                                SelectionDAG &DAG) const {
6210  SDLoc DL(Op);
6211  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
6212                     Op.getOperand(0), Op.getOperand(1));
6213}
6214
6215SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
6216  if (Op.getValueType().isVector())
6217    return LowerVectorLoad(Op, DAG);
6218
6219  assert(Op.getValueType() == MVT::i1 &&
6220         "Custom lowering only for i1 loads");
6221
6222  // First, load 8 bits into 32 bits, then truncate to 1 bit.
6223
6224  SDLoc dl(Op);
6225  LoadSDNode *LD = cast<LoadSDNode>(Op);
6226
6227  SDValue Chain = LD->getChain();
6228  SDValue BasePtr = LD->getBasePtr();
6229  MachineMemOperand *MMO = LD->getMemOperand();
6230
6231  SDValue NewLD =
6232      DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
6233                     BasePtr, MVT::i8, MMO);
6234  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
6235
6236  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
6237  return DAG.getMergeValues(Ops, dl);
6238}
6239
6240SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
6241  if (Op.getOperand(1).getValueType().isVector())
6242    return LowerVectorStore(Op, DAG);
6243
6244  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
6245         "Custom lowering only for i1 stores");
6246
6247  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
6248
6249  SDLoc dl(Op);
6250  StoreSDNode *ST = cast<StoreSDNode>(Op);
6251
6252  SDValue Chain = ST->getChain();
6253  SDValue BasePtr = ST->getBasePtr();
6254  SDValue Value = ST->getValue();
6255  MachineMemOperand *MMO = ST->getMemOperand();
6256
6257  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
6258                      Value);
6259  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
6260}
6261
6262// FIXME: Remove this once the ANDI glue bug is fixed:
6263SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
6264  assert(Op.getValueType() == MVT::i1 &&
6265         "Custom lowering only for i1 results");
6266
6267  SDLoc DL(Op);
6268  return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
6269                     Op.getOperand(0));
6270}
6271
6272/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
6273/// possible.
6274SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
6275  // Not FP? Not a fsel.
6276  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
6277      !Op.getOperand(2).getValueType().isFloatingPoint())
6278    return Op;
6279
6280  // We might be able to do better than this under some circumstances, but in
6281  // general, fsel-based lowering of select is a finite-math-only optimization.
6282  // For more information, see section F.3 of the 2.06 ISA specification.
6283  if (!DAG.getTarget().Options.NoInfsFPMath ||
6284      !DAG.getTarget().Options.NoNaNsFPMath)
6285    return Op;
6286  // TODO: Propagate flags from the select rather than global settings.
6287  SDNodeFlags Flags;
6288  Flags.setNoInfs(true);
6289  Flags.setNoNaNs(true);
6290
6291  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
6292
6293  EVT ResVT = Op.getValueType();
6294  EVT CmpVT = Op.getOperand(0).getValueType();
6295  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6296  SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
6297  SDLoc dl(Op);
6298
6299  // If the RHS of the comparison is a 0.0, we don't need to do the
6300  // subtraction at all.
6301  SDValue Sel1;
6302  if (isFloatingPointZero(RHS))
6303    switch (CC) {
6304    default: break;       // SETUO etc aren't handled by fsel.
6305    case ISD::SETNE:
6306      std::swap(TV, FV);
6307    case ISD::SETEQ:
6308      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6309        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6310      Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6311      if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6312        Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6313      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6314                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6315    case ISD::SETULT:
6316    case ISD::SETLT:
6317      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6318    case ISD::SETOGE:
6319    case ISD::SETGE:
6320      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6321        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6322      return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6323    case ISD::SETUGT:
6324    case ISD::SETGT:
6325      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6326    case ISD::SETOLE:
6327    case ISD::SETLE:
6328      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6329        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6330      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6331                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6332    }
6333
6334  SDValue Cmp;
6335  switch (CC) {
6336  default: break;       // SETUO etc aren't handled by fsel.
6337  case ISD::SETNE:
6338    std::swap(TV, FV);
6339  case ISD::SETEQ:
6340    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6341    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6342      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6343    Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6344    if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6345      Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6346    return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6347                       DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6348  case ISD::SETULT:
6349  case ISD::SETLT:
6350    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6351    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6352      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6353    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6354  case ISD::SETOGE:
6355  case ISD::SETGE:
6356    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
6357    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6358      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6359    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6360  case ISD::SETUGT:
6361  case ISD::SETGT:
6362    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
6363    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6364      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6365    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6366  case ISD::SETOLE:
6367  case ISD::SETLE:
6368    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
6369    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6370      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6371    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6372  }
6373  return Op;
6374}
6375
6376void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
6377                                               SelectionDAG &DAG,
6378                                               const SDLoc &dl) const {
6379  assert(Op.getOperand(0).getValueType().isFloatingPoint());
6380  SDValue Src = Op.getOperand(0);
6381  if (Src.getValueType() == MVT::f32)
6382    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6383
6384  SDValue Tmp;
6385  switch (Op.getSimpleValueType().SimpleTy) {
6386  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6387  case MVT::i32:
6388    Tmp = DAG.getNode(
6389        Op.getOpcode() == ISD::FP_TO_SINT
6390            ? PPCISD::FCTIWZ
6391            : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6392        dl, MVT::f64, Src);
6393    break;
6394  case MVT::i64:
6395    assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6396           "i64 FP_TO_UINT is supported only with FPCVT");
6397    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6398                                                        PPCISD::FCTIDUZ,
6399                      dl, MVT::f64, Src);
6400    break;
6401  }
6402
6403  // Convert the FP value to an int value through memory.
6404  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
6405    (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
6406  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
6407  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
6408  MachinePointerInfo MPI =
6409      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6410
6411  // Emit a store to the stack slot.
6412  SDValue Chain;
6413  if (i32Stack) {
6414    MachineFunction &MF = DAG.getMachineFunction();
6415    MachineMemOperand *MMO =
6416      MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
6417    SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
6418    Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
6419              DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
6420  } else
6421    Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
6422                         MPI, false, false, 0);
6423
6424  // Result is a load from the stack slot.  If loading 4 bytes, make sure to
6425  // add in a bias on big endian.
6426  if (Op.getValueType() == MVT::i32 && !i32Stack) {
6427    FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
6428                        DAG.getConstant(4, dl, FIPtr.getValueType()));
6429    MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
6430  }
6431
6432  RLI.Chain = Chain;
6433  RLI.Ptr = FIPtr;
6434  RLI.MPI = MPI;
6435}
6436
6437/// \brief Custom lowers floating point to integer conversions to use
6438/// the direct move instructions available in ISA 2.07 to avoid the
6439/// need for load/store combinations.
6440SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6441                                                    SelectionDAG &DAG,
6442                                                    const SDLoc &dl) const {
6443  assert(Op.getOperand(0).getValueType().isFloatingPoint());
6444  SDValue Src = Op.getOperand(0);
6445
6446  if (Src.getValueType() == MVT::f32)
6447    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6448
6449  SDValue Tmp;
6450  switch (Op.getSimpleValueType().SimpleTy) {
6451  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6452  case MVT::i32:
6453    Tmp = DAG.getNode(
6454        Op.getOpcode() == ISD::FP_TO_SINT
6455            ? PPCISD::FCTIWZ
6456            : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6457        dl, MVT::f64, Src);
6458    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6459    break;
6460  case MVT::i64:
6461    assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6462           "i64 FP_TO_UINT is supported only with FPCVT");
6463    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6464                                                        PPCISD::FCTIDUZ,
6465                      dl, MVT::f64, Src);
6466    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6467    break;
6468  }
6469  return Tmp;
6470}
6471
6472SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6473                                          const SDLoc &dl) const {
6474  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
6475    return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6476
6477  ReuseLoadInfo RLI;
6478  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6479
6480  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
6481                     false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
6482                     RLI.Ranges);
6483}
6484
6485// We're trying to insert a regular store, S, and then a load, L. If the
6486// incoming value, O, is a load, we might just be able to have our load use the
6487// address used by O. However, we don't know if anything else will store to
6488// that address before we can load from it. To prevent this situation, we need
6489// to insert our load, L, into the chain as a peer of O. To do this, we give L
6490// the same chain operand as O, we create a token factor from the chain results
6491// of O and L, and we replace all uses of O's chain result with that token
6492// factor (see spliceIntoChain below for this last part).
6493bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6494                                            ReuseLoadInfo &RLI,
6495                                            SelectionDAG &DAG,
6496                                            ISD::LoadExtType ET) const {
6497  SDLoc dl(Op);
6498  if (ET == ISD::NON_EXTLOAD &&
6499      (Op.getOpcode() == ISD::FP_TO_UINT ||
6500       Op.getOpcode() == ISD::FP_TO_SINT) &&
6501      isOperationLegalOrCustom(Op.getOpcode(),
6502                               Op.getOperand(0).getValueType())) {
6503
6504    LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6505    return true;
6506  }
6507
6508  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6509  if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
6510      LD->isNonTemporal())
6511    return false;
6512  if (LD->getMemoryVT() != MemVT)
6513    return false;
6514
6515  RLI.Ptr = LD->getBasePtr();
6516  if (LD->isIndexed() && !LD->getOffset().isUndef()) {
6517    assert(LD->getAddressingMode() == ISD::PRE_INC &&
6518           "Non-pre-inc AM on PPC?");
6519    RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6520                          LD->getOffset());
6521  }
6522
6523  RLI.Chain = LD->getChain();
6524  RLI.MPI = LD->getPointerInfo();
6525  RLI.IsInvariant = LD->isInvariant();
6526  RLI.Alignment = LD->getAlignment();
6527  RLI.AAInfo = LD->getAAInfo();
6528  RLI.Ranges = LD->getRanges();
6529
6530  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
6531  return true;
6532}
6533
6534// Given the head of the old chain, ResChain, insert a token factor containing
6535// it and NewResChain, and make users of ResChain now be users of that token
6536// factor.
6537void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6538                                        SDValue NewResChain,
6539                                        SelectionDAG &DAG) const {
6540  if (!ResChain)
6541    return;
6542
6543  SDLoc dl(NewResChain);
6544
6545  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
6546                           NewResChain, DAG.getUNDEF(MVT::Other));
6547  assert(TF.getNode() != NewResChain.getNode() &&
6548         "A new TF really is required here");
6549
6550  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6551  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6552}
6553
6554/// \brief Analyze profitability of direct move
6555/// prefer float load to int load plus direct move
6556/// when there is no integer use of int load
6557static bool directMoveIsProfitable(const SDValue &Op) {
6558  SDNode *Origin = Op.getOperand(0).getNode();
6559  if (Origin->getOpcode() != ISD::LOAD)
6560    return true;
6561
6562  for (SDNode::use_iterator UI = Origin->use_begin(),
6563                            UE = Origin->use_end();
6564       UI != UE; ++UI) {
6565
6566    // Only look at the users of the loaded value.
6567    if (UI.getUse().get().getResNo() != 0)
6568      continue;
6569
6570    if (UI->getOpcode() != ISD::SINT_TO_FP &&
6571        UI->getOpcode() != ISD::UINT_TO_FP)
6572      return true;
6573  }
6574
6575  return false;
6576}
6577
6578/// \brief Custom lowers integer to floating point conversions to use
6579/// the direct move instructions available in ISA 2.07 to avoid the
6580/// need for load/store combinations.
6581SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6582                                                    SelectionDAG &DAG,
6583                                                    const SDLoc &dl) const {
6584  assert((Op.getValueType() == MVT::f32 ||
6585          Op.getValueType() == MVT::f64) &&
6586         "Invalid floating point type as target of conversion");
6587  assert(Subtarget.hasFPCVT() &&
6588         "Int to FP conversions with direct moves require FPCVT");
6589  SDValue FP;
6590  SDValue Src = Op.getOperand(0);
6591  bool SinglePrec = Op.getValueType() == MVT::f32;
6592  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6593  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6594  unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
6595                             (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
6596
6597  if (WordInt) {
6598    FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
6599                     dl, MVT::f64, Src);
6600    FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6601  }
6602  else {
6603    FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6604    FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6605  }
6606
6607  return FP;
6608}
6609
6610SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6611                                          SelectionDAG &DAG) const {
6612  SDLoc dl(Op);
6613
6614  if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6615    if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6616      return SDValue();
6617
6618    SDValue Value = Op.getOperand(0);
6619    // The values are now known to be -1 (false) or 1 (true). To convert this
6620    // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6621    // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6622    Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6623
6624    SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
6625
6626    Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6627
6628    if (Op.getValueType() != MVT::v4f64)
6629      Value = DAG.getNode(ISD::FP_ROUND, dl,
6630                          Op.getValueType(), Value,
6631                          DAG.getIntPtrConstant(1, dl));
6632    return Value;
6633  }
6634
6635  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6636  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6637    return SDValue();
6638
6639  if (Op.getOperand(0).getValueType() == MVT::i1)
6640    return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
6641                       DAG.getConstantFP(1.0, dl, Op.getValueType()),
6642                       DAG.getConstantFP(0.0, dl, Op.getValueType()));
6643
6644  // If we have direct moves, we can do all the conversion, skip the store/load
6645  // however, without FPCVT we can't do most conversions.
6646  if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
6647      Subtarget.isPPC64() && Subtarget.hasFPCVT())
6648    return LowerINT_TO_FPDirectMove(Op, DAG, dl);
6649
6650  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
6651         "UINT_TO_FP is supported only with FPCVT");
6652
6653  // If we have FCFIDS, then use it when converting to single-precision.
6654  // Otherwise, convert to double-precision and then round.
6655  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6656                       ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
6657                                                            : PPCISD::FCFIDS)
6658                       : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
6659                                                            : PPCISD::FCFID);
6660  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6661                  ? MVT::f32
6662                  : MVT::f64;
6663
6664  if (Op.getOperand(0).getValueType() == MVT::i64) {
6665    SDValue SINT = Op.getOperand(0);
6666    // When converting to single-precision, we actually need to convert
6667    // to double-precision first and then round to single-precision.
6668    // To avoid double-rounding effects during that operation, we have
6669    // to prepare the input operand.  Bits that might be truncated when
6670    // converting to double-precision are replaced by a bit that won't
6671    // be lost at this stage, but is below the single-precision rounding
6672    // position.
6673    //
6674    // However, if -enable-unsafe-fp-math is in effect, accept double
6675    // rounding to avoid the extra overhead.
6676    if (Op.getValueType() == MVT::f32 &&
6677        !Subtarget.hasFPCVT() &&
6678        !DAG.getTarget().Options.UnsafeFPMath) {
6679
6680      // Twiddle input to make sure the low 11 bits are zero.  (If this
6681      // is the case, we are guaranteed the value will fit into the 53 bit
6682      // mantissa of an IEEE double-precision value without rounding.)
6683      // If any of those low 11 bits were not zero originally, make sure
6684      // bit 12 (value 2048) is set instead, so that the final rounding
6685      // to single-precision gets the correct result.
6686      SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6687                                  SINT, DAG.getConstant(2047, dl, MVT::i64));
6688      Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
6689                          Round, DAG.getConstant(2047, dl, MVT::i64));
6690      Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
6691      Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6692                          Round, DAG.getConstant(-2048, dl, MVT::i64));
6693
6694      // However, we cannot use that value unconditionally: if the magnitude
6695      // of the input value is small, the bit-twiddling we did above might
6696      // end up visibly changing the output.  Fortunately, in that case, we
6697      // don't need to twiddle bits since the original input will convert
6698      // exactly to double-precision floating-point already.  Therefore,
6699      // construct a conditional to use the original value if the top 11
6700      // bits are all sign-bit copies, and use the rounded value computed
6701      // above otherwise.
6702      SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
6703                                 SINT, DAG.getConstant(53, dl, MVT::i32));
6704      Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
6705                         Cond, DAG.getConstant(1, dl, MVT::i64));
6706      Cond = DAG.getSetCC(dl, MVT::i32,
6707                          Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
6708
6709      SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
6710    }
6711
6712    ReuseLoadInfo RLI;
6713    SDValue Bits;
6714
6715    MachineFunction &MF = DAG.getMachineFunction();
6716    if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
6717      Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
6718                         false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
6719                         RLI.Ranges);
6720      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6721    } else if (Subtarget.hasLFIWAX() &&
6722               canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
6723      MachineMemOperand *MMO =
6724        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6725                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6726      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6727      Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
6728                                     DAG.getVTList(MVT::f64, MVT::Other),
6729                                     Ops, MVT::i32, MMO);
6730      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6731    } else if (Subtarget.hasFPCVT() &&
6732               canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
6733      MachineMemOperand *MMO =
6734        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6735                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6736      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6737      Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
6738                                     DAG.getVTList(MVT::f64, MVT::Other),
6739                                     Ops, MVT::i32, MMO);
6740      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6741    } else if (((Subtarget.hasLFIWAX() &&
6742                 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
6743                (Subtarget.hasFPCVT() &&
6744                 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
6745               SINT.getOperand(0).getValueType() == MVT::i32) {
6746      MachineFrameInfo *FrameInfo = MF.getFrameInfo();
6747      EVT PtrVT = getPointerTy(DAG.getDataLayout());
6748
6749      int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
6750      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6751
6752      SDValue Store = DAG.getStore(
6753          DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
6754          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
6755          false, false, 0);
6756
6757      assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6758             "Expected an i32 store");
6759
6760      RLI.Ptr = FIdx;
6761      RLI.Chain = Store;
6762      RLI.MPI =
6763          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
6764      RLI.Alignment = 4;
6765
6766      MachineMemOperand *MMO =
6767        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6768                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6769      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6770      Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
6771                                     PPCISD::LFIWZX : PPCISD::LFIWAX,
6772                                     dl, DAG.getVTList(MVT::f64, MVT::Other),
6773                                     Ops, MVT::i32, MMO);
6774    } else
6775      Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
6776
6777    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
6778
6779    if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6780      FP = DAG.getNode(ISD::FP_ROUND, dl,
6781                       MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
6782    return FP;
6783  }
6784
6785  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
6786         "Unhandled INT_TO_FP type in custom expander!");
6787  // Since we only generate this in 64-bit mode, we can take advantage of
6788  // 64-bit registers.  In particular, sign extend the input value into the
6789  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
6790  // then lfd it and fcfid it.
6791  MachineFunction &MF = DAG.getMachineFunction();
6792  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
6793  EVT PtrVT = getPointerTy(MF.getDataLayout());
6794
6795  SDValue Ld;
6796  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
6797    ReuseLoadInfo RLI;
6798    bool ReusingLoad;
6799    if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
6800                                            DAG))) {
6801      int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
6802      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6803
6804      SDValue Store = DAG.getStore(
6805          DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
6806          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
6807          false, false, 0);
6808
6809      assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6810             "Expected an i32 store");
6811
6812      RLI.Ptr = FIdx;
6813      RLI.Chain = Store;
6814      RLI.MPI =
6815          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
6816      RLI.Alignment = 4;
6817    }
6818
6819    MachineMemOperand *MMO =
6820      MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6821                              RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6822    SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6823    Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
6824                                   PPCISD::LFIWZX : PPCISD::LFIWAX,
6825                                 dl, DAG.getVTList(MVT::f64, MVT::Other),
6826                                 Ops, MVT::i32, MMO);
6827    if (ReusingLoad)
6828      spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
6829  } else {
6830    assert(Subtarget.isPPC64() &&
6831           "i32->FP without LFIWAX supported only on PPC64");
6832
6833    int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
6834    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6835
6836    SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
6837                                Op.getOperand(0));
6838
6839    // STD the extended value into the stack slot.
6840    SDValue Store = DAG.getStore(
6841        DAG.getEntryNode(), dl, Ext64, FIdx,
6842        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
6843        false, false, 0);
6844
6845    // Load the value as a double.
6846    Ld = DAG.getLoad(
6847        MVT::f64, dl, Store, FIdx,
6848        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
6849        false, false, false, 0);
6850  }
6851
6852  // FCFID it and return it.
6853  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
6854  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6855    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
6856                     DAG.getIntPtrConstant(0, dl));
6857  return FP;
6858}
6859
6860SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
6861                                            SelectionDAG &DAG) const {
6862  SDLoc dl(Op);
6863  /*
6864   The rounding mode is in bits 30:31 of FPSR, and has the following
6865   settings:
6866     00 Round to nearest
6867     01 Round to 0
6868     10 Round to +inf
6869     11 Round to -inf
6870
6871  FLT_ROUNDS, on the other hand, expects the following:
6872    -1 Undefined
6873     0 Round to 0
6874     1 Round to nearest
6875     2 Round to +inf
6876     3 Round to -inf
6877
6878  To perform the conversion, we do:
6879    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
6880  */
6881
6882  MachineFunction &MF = DAG.getMachineFunction();
6883  EVT VT = Op.getValueType();
6884  EVT PtrVT = getPointerTy(MF.getDataLayout());
6885
6886  // Save FP Control Word to register
6887  EVT NodeTys[] = {
6888    MVT::f64,    // return register
6889    MVT::Glue    // unused in this context
6890  };
6891  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
6892
6893  // Save FP register to stack slot
6894  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
6895  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
6896  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
6897                               StackSlot, MachinePointerInfo(), false, false,0);
6898
6899  // Load FP Control Word from low 32 bits of stack slot.
6900  SDValue Four = DAG.getConstant(4, dl, PtrVT);
6901  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
6902  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
6903                            false, false, false, 0);
6904
6905  // Transform as necessary
6906  SDValue CWD1 =
6907    DAG.getNode(ISD::AND, dl, MVT::i32,
6908                CWD, DAG.getConstant(3, dl, MVT::i32));
6909  SDValue CWD2 =
6910    DAG.getNode(ISD::SRL, dl, MVT::i32,
6911                DAG.getNode(ISD::AND, dl, MVT::i32,
6912                            DAG.getNode(ISD::XOR, dl, MVT::i32,
6913                                        CWD, DAG.getConstant(3, dl, MVT::i32)),
6914                            DAG.getConstant(3, dl, MVT::i32)),
6915                DAG.getConstant(1, dl, MVT::i32));
6916
6917  SDValue RetVal =
6918    DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
6919
6920  return DAG.getNode((VT.getSizeInBits() < 16 ?
6921                      ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
6922}
6923
6924SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6925  EVT VT = Op.getValueType();
6926  unsigned BitWidth = VT.getSizeInBits();
6927  SDLoc dl(Op);
6928  assert(Op.getNumOperands() == 3 &&
6929         VT == Op.getOperand(1).getValueType() &&
6930         "Unexpected SHL!");
6931
6932  // Expand into a bunch of logical ops.  Note that these ops
6933  // depend on the PPC behavior for oversized shift amounts.
6934  SDValue Lo = Op.getOperand(0);
6935  SDValue Hi = Op.getOperand(1);
6936  SDValue Amt = Op.getOperand(2);
6937  EVT AmtVT = Amt.getValueType();
6938
6939  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6940                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6941  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
6942  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
6943  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
6944  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6945                             DAG.getConstant(-BitWidth, dl, AmtVT));
6946  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
6947  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6948  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
6949  SDValue OutOps[] = { OutLo, OutHi };
6950  return DAG.getMergeValues(OutOps, dl);
6951}
6952
6953SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6954  EVT VT = Op.getValueType();
6955  SDLoc dl(Op);
6956  unsigned BitWidth = VT.getSizeInBits();
6957  assert(Op.getNumOperands() == 3 &&
6958         VT == Op.getOperand(1).getValueType() &&
6959         "Unexpected SRL!");
6960
6961  // Expand into a bunch of logical ops.  Note that these ops
6962  // depend on the PPC behavior for oversized shift amounts.
6963  SDValue Lo = Op.getOperand(0);
6964  SDValue Hi = Op.getOperand(1);
6965  SDValue Amt = Op.getOperand(2);
6966  EVT AmtVT = Amt.getValueType();
6967
6968  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6969                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6970  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6971  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6972  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6973  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6974                             DAG.getConstant(-BitWidth, dl, AmtVT));
6975  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
6976  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6977  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
6978  SDValue OutOps[] = { OutLo, OutHi };
6979  return DAG.getMergeValues(OutOps, dl);
6980}
6981
6982SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
6983  SDLoc dl(Op);
6984  EVT VT = Op.getValueType();
6985  unsigned BitWidth = VT.getSizeInBits();
6986  assert(Op.getNumOperands() == 3 &&
6987         VT == Op.getOperand(1).getValueType() &&
6988         "Unexpected SRA!");
6989
6990  // Expand into a bunch of logical ops, followed by a select_cc.
6991  SDValue Lo = Op.getOperand(0);
6992  SDValue Hi = Op.getOperand(1);
6993  SDValue Amt = Op.getOperand(2);
6994  EVT AmtVT = Amt.getValueType();
6995
6996  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6997                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6998  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6999  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
7000  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
7001  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
7002                             DAG.getConstant(-BitWidth, dl, AmtVT));
7003  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
7004  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
7005  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
7006                                  Tmp4, Tmp6, ISD::SETLE);
7007  SDValue OutOps[] = { OutLo, OutHi };
7008  return DAG.getMergeValues(OutOps, dl);
7009}
7010
7011//===----------------------------------------------------------------------===//
7012// Vector related lowering.
7013//
7014
7015/// BuildSplatI - Build a canonical splati of Val with an element size of
7016/// SplatSize.  Cast the result to VT.
7017static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
7018                           SelectionDAG &DAG, const SDLoc &dl) {
7019  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
7020
7021  static const MVT VTys[] = { // canonical VT to use for each size.
7022    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
7023  };
7024
7025  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
7026
7027  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
7028  if (Val == -1)
7029    SplatSize = 1;
7030
7031  EVT CanonicalVT = VTys[SplatSize-1];
7032
7033  // Build a canonical splat for this value.
7034  return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
7035}
7036
7037/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
7038/// specified intrinsic ID.
7039static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
7040                                const SDLoc &dl, EVT DestVT = MVT::Other) {
7041  if (DestVT == MVT::Other) DestVT = Op.getValueType();
7042  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7043                     DAG.getConstant(IID, dl, MVT::i32), Op);
7044}
7045
7046/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
7047/// specified intrinsic ID.
7048static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
7049                                SelectionDAG &DAG, const SDLoc &dl,
7050                                EVT DestVT = MVT::Other) {
7051  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
7052  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7053                     DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
7054}
7055
7056/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
7057/// specified intrinsic ID.
7058static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
7059                                SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
7060                                EVT DestVT = MVT::Other) {
7061  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
7062  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
7063                     DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
7064}
7065
7066/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
7067/// amount.  The result has the specified value type.
7068static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
7069                           SelectionDAG &DAG, const SDLoc &dl) {
7070  // Force LHS/RHS to be the right type.
7071  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
7072  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
7073
7074  int Ops[16];
7075  for (unsigned i = 0; i != 16; ++i)
7076    Ops[i] = i + Amt;
7077  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
7078  return DAG.getNode(ISD::BITCAST, dl, VT, T);
7079}
7080
7081// If this is a case we can't handle, return null and let the default
7082// expansion code take care of it.  If we CAN select this case, and if it
7083// selects to a single instruction, return Op.  Otherwise, if we can codegen
7084// this case more efficiently than a constant pool load, lower it to the
7085// sequence of ops that should be used.
7086SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
7087                                             SelectionDAG &DAG) const {
7088  SDLoc dl(Op);
7089  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7090  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
7091
7092  if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
7093    // We first build an i32 vector, load it into a QPX register,
7094    // then convert it to a floating-point vector and compare it
7095    // to a zero vector to get the boolean result.
7096    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7097    int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7098    MachinePointerInfo PtrInfo =
7099        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7100    EVT PtrVT = getPointerTy(DAG.getDataLayout());
7101    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7102
7103    assert(BVN->getNumOperands() == 4 &&
7104      "BUILD_VECTOR for v4i1 does not have 4 operands");
7105
7106    bool IsConst = true;
7107    for (unsigned i = 0; i < 4; ++i) {
7108      if (BVN->getOperand(i).isUndef()) continue;
7109      if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
7110        IsConst = false;
7111        break;
7112      }
7113    }
7114
7115    if (IsConst) {
7116      Constant *One =
7117        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
7118      Constant *NegOne =
7119        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
7120
7121      Constant *CV[4];
7122      for (unsigned i = 0; i < 4; ++i) {
7123        if (BVN->getOperand(i).isUndef())
7124          CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
7125        else if (isNullConstant(BVN->getOperand(i)))
7126          CV[i] = NegOne;
7127        else
7128          CV[i] = One;
7129      }
7130
7131      Constant *CP = ConstantVector::get(CV);
7132      SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
7133                                          16 /* alignment */);
7134
7135      SDValue Ops[] = {DAG.getEntryNode(), CPIdx};
7136      SDVTList VTs = DAG.getVTList({MVT::v4i1, /*chain*/ MVT::Other});
7137      return DAG.getMemIntrinsicNode(
7138          PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
7139          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
7140    }
7141
7142    SmallVector<SDValue, 4> Stores;
7143    for (unsigned i = 0; i < 4; ++i) {
7144      if (BVN->getOperand(i).isUndef()) continue;
7145
7146      unsigned Offset = 4*i;
7147      SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7148      Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7149
7150      unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
7151      if (StoreSize > 4) {
7152        Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
7153                                           BVN->getOperand(i), Idx,
7154                                           PtrInfo.getWithOffset(Offset),
7155                                           MVT::i32, false, false, 0));
7156      } else {
7157        SDValue StoreValue = BVN->getOperand(i);
7158        if (StoreSize < 4)
7159          StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
7160
7161        Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
7162                                      StoreValue, Idx,
7163                                      PtrInfo.getWithOffset(Offset),
7164                                      false, false, 0));
7165      }
7166    }
7167
7168    SDValue StoreChain;
7169    if (!Stores.empty())
7170      StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7171    else
7172      StoreChain = DAG.getEntryNode();
7173
7174    // Now load from v4i32 into the QPX register; this will extend it to
7175    // v4i64 but not yet convert it to a floating point. Nevertheless, this
7176    // is typed as v4f64 because the QPX register integer states are not
7177    // explicitly represented.
7178
7179    SDValue Ops[] = {StoreChain,
7180                     DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32),
7181                     FIdx};
7182    SDVTList VTs = DAG.getVTList({MVT::v4f64, /*chain*/ MVT::Other});
7183
7184    SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
7185      dl, VTs, Ops, MVT::v4i32, PtrInfo);
7186    LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7187      DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
7188      LoadedVect);
7189
7190    SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::v4f64);
7191
7192    return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
7193  }
7194
7195  // All other QPX vectors are handled by generic code.
7196  if (Subtarget.hasQPX())
7197    return SDValue();
7198
7199  // Check if this is a splat of a constant value.
7200  APInt APSplatBits, APSplatUndef;
7201  unsigned SplatBitSize;
7202  bool HasAnyUndefs;
7203  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
7204                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
7205      SplatBitSize > 32)
7206    return SDValue();
7207
7208  unsigned SplatBits = APSplatBits.getZExtValue();
7209  unsigned SplatUndef = APSplatUndef.getZExtValue();
7210  unsigned SplatSize = SplatBitSize / 8;
7211
7212  // First, handle single instruction cases.
7213
7214  // All zeros?
7215  if (SplatBits == 0) {
7216    // Canonicalize all zero vectors to be v4i32.
7217    if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
7218      SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
7219      Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
7220    }
7221    return Op;
7222  }
7223
7224  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
7225  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
7226                    (32-SplatBitSize));
7227  if (SextVal >= -16 && SextVal <= 15)
7228    return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
7229
7230  // Two instruction sequences.
7231
7232  // If this value is in the range [-32,30] and is even, use:
7233  //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
7234  // If this value is in the range [17,31] and is odd, use:
7235  //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
7236  // If this value is in the range [-31,-17] and is odd, use:
7237  //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
7238  // Note the last two are three-instruction sequences.
7239  if (SextVal >= -32 && SextVal <= 31) {
7240    // To avoid having these optimizations undone by constant folding,
7241    // we convert to a pseudo that will be expanded later into one of
7242    // the above forms.
7243    SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
7244    EVT VT = (SplatSize == 1 ? MVT::v16i8 :
7245              (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
7246    SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
7247    SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
7248    if (VT == Op.getValueType())
7249      return RetVal;
7250    else
7251      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
7252  }
7253
7254  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
7255  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
7256  // for fneg/fabs.
7257  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
7258    // Make -1 and vspltisw -1:
7259    SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
7260
7261    // Make the VSLW intrinsic, computing 0x8000_0000.
7262    SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
7263                                   OnesV, DAG, dl);
7264
7265    // xor by OnesV to invert it.
7266    Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
7267    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7268  }
7269
7270  // Check to see if this is a wide variety of vsplti*, binop self cases.
7271  static const signed char SplatCsts[] = {
7272    -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
7273    -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
7274  };
7275
7276  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
7277    // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
7278    // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
7279    int i = SplatCsts[idx];
7280
7281    // Figure out what shift amount will be used by altivec if shifted by i in
7282    // this splat size.
7283    unsigned TypeShiftAmt = i & (SplatBitSize-1);
7284
7285    // vsplti + shl self.
7286    if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
7287      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7288      static const unsigned IIDs[] = { // Intrinsic to use for each size.
7289        Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
7290        Intrinsic::ppc_altivec_vslw
7291      };
7292      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7293      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7294    }
7295
7296    // vsplti + srl self.
7297    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7298      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7299      static const unsigned IIDs[] = { // Intrinsic to use for each size.
7300        Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
7301        Intrinsic::ppc_altivec_vsrw
7302      };
7303      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7304      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7305    }
7306
7307    // vsplti + sra self.
7308    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
7309      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7310      static const unsigned IIDs[] = { // Intrinsic to use for each size.
7311        Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
7312        Intrinsic::ppc_altivec_vsraw
7313      };
7314      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7315      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7316    }
7317
7318    // vsplti + rol self.
7319    if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
7320                         ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
7321      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7322      static const unsigned IIDs[] = { // Intrinsic to use for each size.
7323        Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
7324        Intrinsic::ppc_altivec_vrlw
7325      };
7326      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7327      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7328    }
7329
7330    // t = vsplti c, result = vsldoi t, t, 1
7331    if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
7332      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7333      unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
7334      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7335    }
7336    // t = vsplti c, result = vsldoi t, t, 2
7337    if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
7338      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7339      unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
7340      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7341    }
7342    // t = vsplti c, result = vsldoi t, t, 3
7343    if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
7344      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7345      unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
7346      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7347    }
7348  }
7349
7350  return SDValue();
7351}
7352
7353/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7354/// the specified operations to build the shuffle.
7355static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7356                                      SDValue RHS, SelectionDAG &DAG,
7357                                      const SDLoc &dl) {
7358  unsigned OpNum = (PFEntry >> 26) & 0x0F;
7359  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7360  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
7361
7362  enum {
7363    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7364    OP_VMRGHW,
7365    OP_VMRGLW,
7366    OP_VSPLTISW0,
7367    OP_VSPLTISW1,
7368    OP_VSPLTISW2,
7369    OP_VSPLTISW3,
7370    OP_VSLDOI4,
7371    OP_VSLDOI8,
7372    OP_VSLDOI12
7373  };
7374
7375  if (OpNum == OP_COPY) {
7376    if (LHSID == (1*9+2)*9+3) return LHS;
7377    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7378    return RHS;
7379  }
7380
7381  SDValue OpLHS, OpRHS;
7382  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7383  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7384
7385  int ShufIdxs[16];
7386  switch (OpNum) {
7387  default: llvm_unreachable("Unknown i32 permute!");
7388  case OP_VMRGHW:
7389    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
7390    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
7391    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
7392    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
7393    break;
7394  case OP_VMRGLW:
7395    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
7396    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
7397    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
7398    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
7399    break;
7400  case OP_VSPLTISW0:
7401    for (unsigned i = 0; i != 16; ++i)
7402      ShufIdxs[i] = (i&3)+0;
7403    break;
7404  case OP_VSPLTISW1:
7405    for (unsigned i = 0; i != 16; ++i)
7406      ShufIdxs[i] = (i&3)+4;
7407    break;
7408  case OP_VSPLTISW2:
7409    for (unsigned i = 0; i != 16; ++i)
7410      ShufIdxs[i] = (i&3)+8;
7411    break;
7412  case OP_VSPLTISW3:
7413    for (unsigned i = 0; i != 16; ++i)
7414      ShufIdxs[i] = (i&3)+12;
7415    break;
7416  case OP_VSLDOI4:
7417    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
7418  case OP_VSLDOI8:
7419    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
7420  case OP_VSLDOI12:
7421    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
7422  }
7423  EVT VT = OpLHS.getValueType();
7424  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
7425  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
7426  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
7427  return DAG.getNode(ISD::BITCAST, dl, VT, T);
7428}
7429
7430/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
7431/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
7432/// return the code it can be lowered into.  Worst case, it can always be
7433/// lowered into a vperm.
7434SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
7435                                               SelectionDAG &DAG) const {
7436  SDLoc dl(Op);
7437  SDValue V1 = Op.getOperand(0);
7438  SDValue V2 = Op.getOperand(1);
7439  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7440  EVT VT = Op.getValueType();
7441  bool isLittleEndian = Subtarget.isLittleEndian();
7442
7443  unsigned ShiftElts, InsertAtByte;
7444  bool Swap;
7445  if (Subtarget.hasP9Vector() &&
7446      PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
7447                           isLittleEndian)) {
7448    if (Swap)
7449      std::swap(V1, V2);
7450    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7451    SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
7452    if (ShiftElts) {
7453      SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
7454                                DAG.getConstant(ShiftElts, dl, MVT::i32));
7455      SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl,
7456                                DAG.getConstant(InsertAtByte, dl, MVT::i32));
7457      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7458    }
7459    SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2,
7460                              DAG.getConstant(InsertAtByte, dl, MVT::i32));
7461    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
7462  }
7463
7464  if (Subtarget.hasVSX()) {
7465    if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
7466      int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
7467      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
7468      SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
7469                                  DAG.getConstant(SplatIdx, dl, MVT::i32));
7470      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
7471    }
7472
7473    // Left shifts of 8 bytes are actually swaps. Convert accordingly.
7474    if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
7475      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
7476      SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
7477      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
7478    }
7479
7480  }
7481
7482  if (Subtarget.hasQPX()) {
7483    if (VT.getVectorNumElements() != 4)
7484      return SDValue();
7485
7486    if (V2.isUndef()) V2 = V1;
7487
7488    int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
7489    if (AlignIdx != -1) {
7490      return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
7491                         DAG.getConstant(AlignIdx, dl, MVT::i32));
7492    } else if (SVOp->isSplat()) {
7493      int SplatIdx = SVOp->getSplatIndex();
7494      if (SplatIdx >= 4) {
7495        std::swap(V1, V2);
7496        SplatIdx -= 4;
7497      }
7498
7499      return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
7500                         DAG.getConstant(SplatIdx, dl, MVT::i32));
7501    }
7502
7503    // Lower this into a qvgpci/qvfperm pair.
7504
7505    // Compute the qvgpci literal
7506    unsigned idx = 0;
7507    for (unsigned i = 0; i < 4; ++i) {
7508      int m = SVOp->getMaskElt(i);
7509      unsigned mm = m >= 0 ? (unsigned) m : i;
7510      idx |= mm << (3-i)*3;
7511    }
7512
7513    SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
7514                             DAG.getConstant(idx, dl, MVT::i32));
7515    return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
7516  }
7517
7518  // Cases that are handled by instructions that take permute immediates
7519  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
7520  // selected by the instruction selector.
7521  if (V2.isUndef()) {
7522    if (PPC::isSplatShuffleMask(SVOp, 1) ||
7523        PPC::isSplatShuffleMask(SVOp, 2) ||
7524        PPC::isSplatShuffleMask(SVOp, 4) ||
7525        PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
7526        PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
7527        PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
7528        PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
7529        PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
7530        PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
7531        PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
7532        PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
7533        PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
7534        (Subtarget.hasP8Altivec() && (
7535         PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
7536         PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
7537         PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
7538      return Op;
7539    }
7540  }
7541
7542  // Altivec has a variety of "shuffle immediates" that take two vector inputs
7543  // and produce a fixed permutation.  If any of these match, do not lower to
7544  // VPERM.
7545  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
7546  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7547      PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7548      PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
7549      PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7550      PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7551      PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7552      PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7553      PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7554      PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7555      (Subtarget.hasP8Altivec() && (
7556       PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7557       PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
7558       PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
7559    return Op;
7560
7561  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
7562  // perfect shuffle table to emit an optimal matching sequence.
7563  ArrayRef<int> PermMask = SVOp->getMask();
7564
7565  unsigned PFIndexes[4];
7566  bool isFourElementShuffle = true;
7567  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
7568    unsigned EltNo = 8;   // Start out undef.
7569    for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
7570      if (PermMask[i*4+j] < 0)
7571        continue;   // Undef, ignore it.
7572
7573      unsigned ByteSource = PermMask[i*4+j];
7574      if ((ByteSource & 3) != j) {
7575        isFourElementShuffle = false;
7576        break;
7577      }
7578
7579      if (EltNo == 8) {
7580        EltNo = ByteSource/4;
7581      } else if (EltNo != ByteSource/4) {
7582        isFourElementShuffle = false;
7583        break;
7584      }
7585    }
7586    PFIndexes[i] = EltNo;
7587  }
7588
7589  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
7590  // perfect shuffle vector to determine if it is cost effective to do this as
7591  // discrete instructions, or whether we should use a vperm.
7592  // For now, we skip this for little endian until such time as we have a
7593  // little-endian perfect shuffle table.
7594  if (isFourElementShuffle && !isLittleEndian) {
7595    // Compute the index in the perfect shuffle table.
7596    unsigned PFTableIndex =
7597      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7598
7599    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7600    unsigned Cost  = (PFEntry >> 30);
7601
7602    // Determining when to avoid vperm is tricky.  Many things affect the cost
7603    // of vperm, particularly how many times the perm mask needs to be computed.
7604    // For example, if the perm mask can be hoisted out of a loop or is already
7605    // used (perhaps because there are multiple permutes with the same shuffle
7606    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
7607    // the loop requires an extra register.
7608    //
7609    // As a compromise, we only emit discrete instructions if the shuffle can be
7610    // generated in 3 or fewer operations.  When we have loop information
7611    // available, if this block is within a loop, we should avoid using vperm
7612    // for 3-operation perms and use a constant pool load instead.
7613    if (Cost < 3)
7614      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7615  }
7616
7617  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
7618  // vector that will get spilled to the constant pool.
7619  if (V2.isUndef()) V2 = V1;
7620
7621  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
7622  // that it is in input element units, not in bytes.  Convert now.
7623
7624  // For little endian, the order of the input vectors is reversed, and
7625  // the permutation mask is complemented with respect to 31.  This is
7626  // necessary to produce proper semantics with the big-endian-biased vperm
7627  // instruction.
7628  EVT EltVT = V1.getValueType().getVectorElementType();
7629  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
7630
7631  SmallVector<SDValue, 16> ResultMask;
7632  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
7633    unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
7634
7635    for (unsigned j = 0; j != BytesPerElement; ++j)
7636      if (isLittleEndian)
7637        ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
7638                                             dl, MVT::i32));
7639      else
7640        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
7641                                             MVT::i32));
7642  }
7643
7644  SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
7645  if (isLittleEndian)
7646    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7647                       V2, V1, VPermMask);
7648  else
7649    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7650                       V1, V2, VPermMask);
7651}
7652
7653/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
7654/// vector comparison.  If it is, return true and fill in Opc/isDot with
7655/// information about the intrinsic.
7656static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
7657                                 bool &isDot, const PPCSubtarget &Subtarget) {
7658  unsigned IntrinsicID =
7659    cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
7660  CompareOpc = -1;
7661  isDot = false;
7662  switch (IntrinsicID) {
7663  default: return false;
7664    // Comparison predicates.
7665  case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
7666  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
7667  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
7668  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
7669  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
7670  case Intrinsic::ppc_altivec_vcmpequd_p:
7671    if (Subtarget.hasP8Altivec()) {
7672      CompareOpc = 199;
7673      isDot = 1;
7674    } else
7675      return false;
7676
7677    break;
7678  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
7679  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
7680  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
7681  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
7682  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
7683  case Intrinsic::ppc_altivec_vcmpgtsd_p:
7684    if (Subtarget.hasP8Altivec()) {
7685      CompareOpc = 967;
7686      isDot = 1;
7687    } else
7688      return false;
7689
7690    break;
7691  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
7692  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
7693  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
7694  case Intrinsic::ppc_altivec_vcmpgtud_p:
7695    if (Subtarget.hasP8Altivec()) {
7696      CompareOpc = 711;
7697      isDot = 1;
7698    } else
7699      return false;
7700
7701    break;
7702    // VSX predicate comparisons use the same infrastructure
7703  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
7704  case Intrinsic::ppc_vsx_xvcmpgedp_p:
7705  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
7706  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
7707  case Intrinsic::ppc_vsx_xvcmpgesp_p:
7708  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
7709    if (Subtarget.hasVSX()) {
7710      switch (IntrinsicID) {
7711      case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
7712      case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
7713      case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
7714      case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
7715      case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
7716      case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
7717      }
7718      isDot = 1;
7719    }
7720    else
7721      return false;
7722
7723    break;
7724
7725    // Normal Comparisons.
7726  case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
7727  case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
7728  case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
7729  case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
7730  case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
7731  case Intrinsic::ppc_altivec_vcmpequd:
7732    if (Subtarget.hasP8Altivec()) {
7733      CompareOpc = 199;
7734      isDot = 0;
7735    } else
7736      return false;
7737
7738    break;
7739  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
7740  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
7741  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
7742  case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
7743  case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
7744  case Intrinsic::ppc_altivec_vcmpgtsd:
7745    if (Subtarget.hasP8Altivec()) {
7746      CompareOpc = 967;
7747      isDot = 0;
7748    } else
7749      return false;
7750
7751    break;
7752  case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
7753  case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
7754  case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
7755  case Intrinsic::ppc_altivec_vcmpgtud:
7756    if (Subtarget.hasP8Altivec()) {
7757      CompareOpc = 711;
7758      isDot = 0;
7759    } else
7760      return false;
7761
7762    break;
7763  }
7764  return true;
7765}
7766
7767/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
7768/// lower, do it, otherwise return null.
7769SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
7770                                                   SelectionDAG &DAG) const {
7771  unsigned IntrinsicID =
7772    cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7773
7774  if (IntrinsicID == Intrinsic::thread_pointer) {
7775    // Reads the thread pointer register, used for __builtin_thread_pointer.
7776    bool is64bit = Subtarget.isPPC64();
7777    return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
7778                           is64bit ? MVT::i64 : MVT::i32);
7779  }
7780
7781  // If this is a lowered altivec predicate compare, CompareOpc is set to the
7782  // opcode number of the comparison.
7783  SDLoc dl(Op);
7784  int CompareOpc;
7785  bool isDot;
7786  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
7787    return SDValue();    // Don't custom lower most intrinsics.
7788
7789  // If this is a non-dot comparison, make the VCMP node and we are done.
7790  if (!isDot) {
7791    SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
7792                              Op.getOperand(1), Op.getOperand(2),
7793                              DAG.getConstant(CompareOpc, dl, MVT::i32));
7794    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
7795  }
7796
7797  // Create the PPCISD altivec 'dot' comparison node.
7798  SDValue Ops[] = {
7799    Op.getOperand(2),  // LHS
7800    Op.getOperand(3),  // RHS
7801    DAG.getConstant(CompareOpc, dl, MVT::i32)
7802  };
7803  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
7804  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
7805
7806  // Now that we have the comparison, emit a copy from the CR to a GPR.
7807  // This is flagged to the above dot comparison.
7808  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
7809                                DAG.getRegister(PPC::CR6, MVT::i32),
7810                                CompNode.getValue(1));
7811
7812  // Unpack the result based on how the target uses it.
7813  unsigned BitNo;   // Bit # of CR6.
7814  bool InvertBit;   // Invert result?
7815  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
7816  default:  // Can't happen, don't crash on invalid number though.
7817  case 0:   // Return the value of the EQ bit of CR6.
7818    BitNo = 0; InvertBit = false;
7819    break;
7820  case 1:   // Return the inverted value of the EQ bit of CR6.
7821    BitNo = 0; InvertBit = true;
7822    break;
7823  case 2:   // Return the value of the LT bit of CR6.
7824    BitNo = 2; InvertBit = false;
7825    break;
7826  case 3:   // Return the inverted value of the LT bit of CR6.
7827    BitNo = 2; InvertBit = true;
7828    break;
7829  }
7830
7831  // Shift the bit into the low position.
7832  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
7833                      DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
7834  // Isolate the bit.
7835  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
7836                      DAG.getConstant(1, dl, MVT::i32));
7837
7838  // If we are supposed to, toggle the bit.
7839  if (InvertBit)
7840    Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
7841                        DAG.getConstant(1, dl, MVT::i32));
7842  return Flags;
7843}
7844
7845SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
7846                                                  SelectionDAG &DAG) const {
7847  SDLoc dl(Op);
7848  // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
7849  // instructions), but for smaller types, we need to first extend up to v2i32
7850  // before doing going farther.
7851  if (Op.getValueType() == MVT::v2i64) {
7852    EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
7853    if (ExtVT != MVT::v2i32) {
7854      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
7855      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
7856                       DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
7857                                        ExtVT.getVectorElementType(), 4)));
7858      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
7859      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
7860                       DAG.getValueType(MVT::v2i32));
7861    }
7862
7863    return Op;
7864  }
7865
7866  return SDValue();
7867}
7868
7869SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
7870                                                   SelectionDAG &DAG) const {
7871  SDLoc dl(Op);
7872  // Create a stack slot that is 16-byte aligned.
7873  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7874  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7875  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7876  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7877
7878  // Store the input value into Value#0 of the stack slot.
7879  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
7880                               Op.getOperand(0), FIdx, MachinePointerInfo(),
7881                               false, false, 0);
7882  // Load it out.
7883  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
7884                     false, false, false, 0);
7885}
7886
7887SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
7888                                                   SelectionDAG &DAG) const {
7889  SDLoc dl(Op);
7890  SDNode *N = Op.getNode();
7891
7892  assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
7893         "Unknown extract_vector_elt type");
7894
7895  SDValue Value = N->getOperand(0);
7896
7897  // The first part of this is like the store lowering except that we don't
7898  // need to track the chain.
7899
7900  // The values are now known to be -1 (false) or 1 (true). To convert this
7901  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7902  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7903  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7904
7905  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
7906  // understand how to form the extending load.
7907  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
7908
7909  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7910
7911  // Now convert to an integer and store.
7912  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7913    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
7914    Value);
7915
7916  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7917  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7918  MachinePointerInfo PtrInfo =
7919      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
7920  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7921  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7922
7923  SDValue StoreChain = DAG.getEntryNode();
7924  SDValue Ops[] = {StoreChain,
7925                   DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
7926                   Value, FIdx};
7927  SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
7928
7929  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
7930    dl, VTs, Ops, MVT::v4i32, PtrInfo);
7931
7932  // Extract the value requested.
7933  unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
7934  SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7935  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7936
7937  SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
7938                               PtrInfo.getWithOffset(Offset),
7939                               false, false, false, 0);
7940
7941  if (!Subtarget.useCRBits())
7942    return IntVal;
7943
7944  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
7945}
7946
7947/// Lowering for QPX v4i1 loads
7948SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
7949                                           SelectionDAG &DAG) const {
7950  SDLoc dl(Op);
7951  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
7952  SDValue LoadChain = LN->getChain();
7953  SDValue BasePtr = LN->getBasePtr();
7954
7955  if (Op.getValueType() == MVT::v4f64 ||
7956      Op.getValueType() == MVT::v4f32) {
7957    EVT MemVT = LN->getMemoryVT();
7958    unsigned Alignment = LN->getAlignment();
7959
7960    // If this load is properly aligned, then it is legal.
7961    if (Alignment >= MemVT.getStoreSize())
7962      return Op;
7963
7964    EVT ScalarVT = Op.getValueType().getScalarType(),
7965        ScalarMemVT = MemVT.getScalarType();
7966    unsigned Stride = ScalarMemVT.getStoreSize();
7967
7968    SDValue Vals[4], LoadChains[4];
7969    for (unsigned Idx = 0; Idx < 4; ++Idx) {
7970      SDValue Load;
7971      if (ScalarVT != ScalarMemVT)
7972        Load =
7973          DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
7974                         BasePtr,
7975                         LN->getPointerInfo().getWithOffset(Idx*Stride),
7976                         ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(),
7977                         LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
7978                         LN->getAAInfo());
7979      else
7980        Load =
7981          DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
7982                       LN->getPointerInfo().getWithOffset(Idx*Stride),
7983                       LN->isVolatile(), LN->isNonTemporal(),
7984                       LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
7985                       LN->getAAInfo());
7986
7987      if (Idx == 0 && LN->isIndexed()) {
7988        assert(LN->getAddressingMode() == ISD::PRE_INC &&
7989               "Unknown addressing mode on vector load");
7990        Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
7991                                  LN->getAddressingMode());
7992      }
7993
7994      Vals[Idx] = Load;
7995      LoadChains[Idx] = Load.getValue(1);
7996
7997      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
7998                            DAG.getConstant(Stride, dl,
7999                                            BasePtr.getValueType()));
8000    }
8001
8002    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8003    SDValue Value = DAG.getBuildVector(Op.getValueType(), dl, Vals);
8004
8005    if (LN->isIndexed()) {
8006      SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
8007      return DAG.getMergeValues(RetOps, dl);
8008    }
8009
8010    SDValue RetOps[] = { Value, TF };
8011    return DAG.getMergeValues(RetOps, dl);
8012  }
8013
8014  assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
8015  assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
8016
8017  // To lower v4i1 from a byte array, we load the byte elements of the
8018  // vector and then reuse the BUILD_VECTOR logic.
8019
8020  SDValue VectElmts[4], VectElmtChains[4];
8021  for (unsigned i = 0; i < 4; ++i) {
8022    SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8023    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8024
8025    VectElmts[i] = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, LoadChain, Idx,
8026                                  LN->getPointerInfo().getWithOffset(i),
8027                                  MVT::i8 /* memory type */, LN->isVolatile(),
8028                                  LN->isNonTemporal(), LN->isInvariant(),
8029                                  1 /* alignment */, LN->getAAInfo());
8030    VectElmtChains[i] = VectElmts[i].getValue(1);
8031  }
8032
8033  LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
8034  SDValue Value = DAG.getBuildVector(MVT::v4i1, dl, VectElmts);
8035
8036  SDValue RVals[] = { Value, LoadChain };
8037  return DAG.getMergeValues(RVals, dl);
8038}
8039
8040/// Lowering for QPX v4i1 stores
8041SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
8042                                            SelectionDAG &DAG) const {
8043  SDLoc dl(Op);
8044  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
8045  SDValue StoreChain = SN->getChain();
8046  SDValue BasePtr = SN->getBasePtr();
8047  SDValue Value = SN->getValue();
8048
8049  if (Value.getValueType() == MVT::v4f64 ||
8050      Value.getValueType() == MVT::v4f32) {
8051    EVT MemVT = SN->getMemoryVT();
8052    unsigned Alignment = SN->getAlignment();
8053
8054    // If this store is properly aligned, then it is legal.
8055    if (Alignment >= MemVT.getStoreSize())
8056      return Op;
8057
8058    EVT ScalarVT = Value.getValueType().getScalarType(),
8059        ScalarMemVT = MemVT.getScalarType();
8060    unsigned Stride = ScalarMemVT.getStoreSize();
8061
8062    SDValue Stores[4];
8063    for (unsigned Idx = 0; Idx < 4; ++Idx) {
8064      SDValue Ex = DAG.getNode(
8065          ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
8066          DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
8067      SDValue Store;
8068      if (ScalarVT != ScalarMemVT)
8069        Store =
8070          DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
8071                            SN->getPointerInfo().getWithOffset(Idx*Stride),
8072                            ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(),
8073                            MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
8074      else
8075        Store =
8076          DAG.getStore(StoreChain, dl, Ex, BasePtr,
8077                       SN->getPointerInfo().getWithOffset(Idx*Stride),
8078                       SN->isVolatile(), SN->isNonTemporal(),
8079                       MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
8080
8081      if (Idx == 0 && SN->isIndexed()) {
8082        assert(SN->getAddressingMode() == ISD::PRE_INC &&
8083               "Unknown addressing mode on vector store");
8084        Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
8085                                    SN->getAddressingMode());
8086      }
8087
8088      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
8089                            DAG.getConstant(Stride, dl,
8090                                            BasePtr.getValueType()));
8091      Stores[Idx] = Store;
8092    }
8093
8094    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8095
8096    if (SN->isIndexed()) {
8097      SDValue RetOps[] = { TF, Stores[0].getValue(1) };
8098      return DAG.getMergeValues(RetOps, dl);
8099    }
8100
8101    return TF;
8102  }
8103
8104  assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
8105  assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
8106
8107  // The values are now known to be -1 (false) or 1 (true). To convert this
8108  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
8109  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
8110  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
8111
8112  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
8113  // understand how to form the extending load.
8114  SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::v4f64);
8115
8116  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
8117
8118  // Now convert to an integer and store.
8119  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
8120    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
8121    Value);
8122
8123  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
8124  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
8125  MachinePointerInfo PtrInfo =
8126      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8127  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8128  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8129
8130  SDValue Ops[] = {StoreChain,
8131                   DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32),
8132                   Value, FIdx};
8133  SDVTList VTs = DAG.getVTList(/*chain*/ MVT::Other);
8134
8135  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
8136    dl, VTs, Ops, MVT::v4i32, PtrInfo);
8137
8138  // Move data into the byte array.
8139  SDValue Loads[4], LoadChains[4];
8140  for (unsigned i = 0; i < 4; ++i) {
8141    unsigned Offset = 4*i;
8142    SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
8143    Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
8144
8145    Loads[i] =
8146        DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
8147                    PtrInfo.getWithOffset(Offset), false, false, false, 0);
8148    LoadChains[i] = Loads[i].getValue(1);
8149  }
8150
8151  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
8152
8153  SDValue Stores[4];
8154  for (unsigned i = 0; i < 4; ++i) {
8155    SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
8156    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
8157
8158    Stores[i] = DAG.getTruncStore(
8159        StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
8160        MVT::i8 /* memory type */, SN->isNonTemporal(), SN->isVolatile(),
8161        1 /* alignment */, SN->getAAInfo());
8162  }
8163
8164  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
8165
8166  return StoreChain;
8167}
8168
8169SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
8170  SDLoc dl(Op);
8171  if (Op.getValueType() == MVT::v4i32) {
8172    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8173
8174    SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
8175    SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
8176
8177    SDValue RHSSwap =   // = vrlw RHS, 16
8178      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
8179
8180    // Shrinkify inputs to v8i16.
8181    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
8182    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
8183    RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
8184
8185    // Low parts multiplied together, generating 32-bit results (we ignore the
8186    // top parts).
8187    SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
8188                                        LHS, RHS, DAG, dl, MVT::v4i32);
8189
8190    SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
8191                                      LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
8192    // Shift the high parts up 16 bits.
8193    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
8194                              Neg16, DAG, dl);
8195    return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
8196  } else if (Op.getValueType() == MVT::v8i16) {
8197    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8198
8199    SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
8200
8201    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
8202                            LHS, RHS, Zero, DAG, dl);
8203  } else if (Op.getValueType() == MVT::v16i8) {
8204    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8205    bool isLittleEndian = Subtarget.isLittleEndian();
8206
8207    // Multiply the even 8-bit parts, producing 16-bit sums.
8208    SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
8209                                           LHS, RHS, DAG, dl, MVT::v8i16);
8210    EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
8211
8212    // Multiply the odd 8-bit parts, producing 16-bit sums.
8213    SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
8214                                          LHS, RHS, DAG, dl, MVT::v8i16);
8215    OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
8216
8217    // Merge the results together.  Because vmuleub and vmuloub are
8218    // instructions with a big-endian bias, we must reverse the
8219    // element numbering and reverse the meaning of "odd" and "even"
8220    // when generating little endian code.
8221    int Ops[16];
8222    for (unsigned i = 0; i != 8; ++i) {
8223      if (isLittleEndian) {
8224        Ops[i*2  ] = 2*i;
8225        Ops[i*2+1] = 2*i+16;
8226      } else {
8227        Ops[i*2  ] = 2*i+1;
8228        Ops[i*2+1] = 2*i+1+16;
8229      }
8230    }
8231    if (isLittleEndian)
8232      return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
8233    else
8234      return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
8235  } else {
8236    llvm_unreachable("Unknown mul to lower!");
8237  }
8238}
8239
8240/// LowerOperation - Provide custom lowering hooks for some operations.
8241///
8242SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
8243  switch (Op.getOpcode()) {
8244  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
8245  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
8246  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
8247  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
8248  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
8249  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
8250  case ISD::SETCC:              return LowerSETCC(Op, DAG);
8251  case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
8252  case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
8253  case ISD::VASTART:
8254    return LowerVASTART(Op, DAG);
8255
8256  case ISD::VAARG:
8257    return LowerVAARG(Op, DAG);
8258
8259  case ISD::VACOPY:
8260    return LowerVACOPY(Op, DAG);
8261
8262  case ISD::STACKRESTORE:
8263    return LowerSTACKRESTORE(Op, DAG);
8264
8265  case ISD::DYNAMIC_STACKALLOC:
8266    return LowerDYNAMIC_STACKALLOC(Op, DAG);
8267
8268  case ISD::GET_DYNAMIC_AREA_OFFSET:
8269    return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
8270
8271  case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
8272  case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
8273
8274  case ISD::LOAD:               return LowerLOAD(Op, DAG);
8275  case ISD::STORE:              return LowerSTORE(Op, DAG);
8276  case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
8277  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
8278  case ISD::FP_TO_UINT:
8279  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
8280                                                      SDLoc(Op));
8281  case ISD::UINT_TO_FP:
8282  case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
8283  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
8284
8285  // Lower 64-bit shifts.
8286  case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
8287  case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
8288  case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
8289
8290  // Vector-related lowering.
8291  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
8292  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
8293  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
8294  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
8295  case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
8296  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
8297  case ISD::MUL:                return LowerMUL(Op, DAG);
8298
8299  // For counter-based loop handling.
8300  case ISD::INTRINSIC_W_CHAIN:  return SDValue();
8301
8302  // Frame & Return address.
8303  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
8304  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
8305  }
8306}
8307
8308void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
8309                                           SmallVectorImpl<SDValue>&Results,
8310                                           SelectionDAG &DAG) const {
8311  SDLoc dl(N);
8312  switch (N->getOpcode()) {
8313  default:
8314    llvm_unreachable("Do not know how to custom type legalize this operation!");
8315  case ISD::READCYCLECOUNTER: {
8316    SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
8317    SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
8318
8319    Results.push_back(RTB);
8320    Results.push_back(RTB.getValue(1));
8321    Results.push_back(RTB.getValue(2));
8322    break;
8323  }
8324  case ISD::INTRINSIC_W_CHAIN: {
8325    if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
8326        Intrinsic::ppc_is_decremented_ctr_nonzero)
8327      break;
8328
8329    assert(N->getValueType(0) == MVT::i1 &&
8330           "Unexpected result type for CTR decrement intrinsic");
8331    EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
8332                                 N->getValueType(0));
8333    SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
8334    SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
8335                                 N->getOperand(1));
8336
8337    Results.push_back(NewInt);
8338    Results.push_back(NewInt.getValue(1));
8339    break;
8340  }
8341  case ISD::VAARG: {
8342    if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
8343      return;
8344
8345    EVT VT = N->getValueType(0);
8346
8347    if (VT == MVT::i64) {
8348      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
8349
8350      Results.push_back(NewNode);
8351      Results.push_back(NewNode.getValue(1));
8352    }
8353    return;
8354  }
8355  case ISD::FP_ROUND_INREG: {
8356    assert(N->getValueType(0) == MVT::ppcf128);
8357    assert(N->getOperand(0).getValueType() == MVT::ppcf128);
8358    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
8359                             MVT::f64, N->getOperand(0),
8360                             DAG.getIntPtrConstant(0, dl));
8361    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
8362                             MVT::f64, N->getOperand(0),
8363                             DAG.getIntPtrConstant(1, dl));
8364
8365    // Add the two halves of the long double in round-to-zero mode.
8366    SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8367
8368    // We know the low half is about to be thrown away, so just use something
8369    // convenient.
8370    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
8371                                FPreg, FPreg));
8372    return;
8373  }
8374  case ISD::FP_TO_SINT:
8375  case ISD::FP_TO_UINT:
8376    // LowerFP_TO_INT() can only handle f32 and f64.
8377    if (N->getOperand(0).getValueType() == MVT::ppcf128)
8378      return;
8379    Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
8380    return;
8381  }
8382}
8383
8384//===----------------------------------------------------------------------===//
8385//  Other Lowering Code
8386//===----------------------------------------------------------------------===//
8387
8388static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
8389  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
8390  Function *Func = Intrinsic::getDeclaration(M, Id);
8391  return Builder.CreateCall(Func, {});
8392}
8393
8394// The mappings for emitLeading/TrailingFence is taken from
8395// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
8396Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
8397                                         AtomicOrdering Ord, bool IsStore,
8398                                         bool IsLoad) const {
8399  if (Ord == AtomicOrdering::SequentiallyConsistent)
8400    return callIntrinsic(Builder, Intrinsic::ppc_sync);
8401  if (isReleaseOrStronger(Ord))
8402    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8403  return nullptr;
8404}
8405
8406Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
8407                                          AtomicOrdering Ord, bool IsStore,
8408                                          bool IsLoad) const {
8409  if (IsLoad && isAcquireOrStronger(Ord))
8410    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8411  // FIXME: this is too conservative, a dependent branch + isync is enough.
8412  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
8413  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
8414  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
8415  return nullptr;
8416}
8417
8418MachineBasicBlock *
8419PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
8420                                    unsigned AtomicSize,
8421                                    unsigned BinOpcode) const {
8422  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8423  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8424
8425  auto LoadMnemonic = PPC::LDARX;
8426  auto StoreMnemonic = PPC::STDCX;
8427  switch (AtomicSize) {
8428  default:
8429    llvm_unreachable("Unexpected size of atomic entity");
8430  case 1:
8431    LoadMnemonic = PPC::LBARX;
8432    StoreMnemonic = PPC::STBCX;
8433    assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8434    break;
8435  case 2:
8436    LoadMnemonic = PPC::LHARX;
8437    StoreMnemonic = PPC::STHCX;
8438    assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8439    break;
8440  case 4:
8441    LoadMnemonic = PPC::LWARX;
8442    StoreMnemonic = PPC::STWCX;
8443    break;
8444  case 8:
8445    LoadMnemonic = PPC::LDARX;
8446    StoreMnemonic = PPC::STDCX;
8447    break;
8448  }
8449
8450  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8451  MachineFunction *F = BB->getParent();
8452  MachineFunction::iterator It = ++BB->getIterator();
8453
8454  unsigned dest = MI.getOperand(0).getReg();
8455  unsigned ptrA = MI.getOperand(1).getReg();
8456  unsigned ptrB = MI.getOperand(2).getReg();
8457  unsigned incr = MI.getOperand(3).getReg();
8458  DebugLoc dl = MI.getDebugLoc();
8459
8460  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8461  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8462  F->insert(It, loopMBB);
8463  F->insert(It, exitMBB);
8464  exitMBB->splice(exitMBB->begin(), BB,
8465                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8466  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8467
8468  MachineRegisterInfo &RegInfo = F->getRegInfo();
8469  unsigned TmpReg = (!BinOpcode) ? incr :
8470    RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
8471                                           : &PPC::GPRCRegClass);
8472
8473  //  thisMBB:
8474  //   ...
8475  //   fallthrough --> loopMBB
8476  BB->addSuccessor(loopMBB);
8477
8478  //  loopMBB:
8479  //   l[wd]arx dest, ptr
8480  //   add r0, dest, incr
8481  //   st[wd]cx. r0, ptr
8482  //   bne- loopMBB
8483  //   fallthrough --> exitMBB
8484  BB = loopMBB;
8485  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
8486    .addReg(ptrA).addReg(ptrB);
8487  if (BinOpcode)
8488    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
8489  BuildMI(BB, dl, TII->get(StoreMnemonic))
8490    .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
8491  BuildMI(BB, dl, TII->get(PPC::BCC))
8492    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8493  BB->addSuccessor(loopMBB);
8494  BB->addSuccessor(exitMBB);
8495
8496  //  exitMBB:
8497  //   ...
8498  BB = exitMBB;
8499  return BB;
8500}
8501
8502MachineBasicBlock *
8503PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
8504                                            MachineBasicBlock *BB,
8505                                            bool is8bit, // operation
8506                                            unsigned BinOpcode) const {
8507  // If we support part-word atomic mnemonics, just use them
8508  if (Subtarget.hasPartwordAtomics())
8509    return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
8510
8511  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8512  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8513  // In 64 bit mode we have to use 64 bits for addresses, even though the
8514  // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
8515  // registers without caring whether they're 32 or 64, but here we're
8516  // doing actual arithmetic on the addresses.
8517  bool is64bit = Subtarget.isPPC64();
8518  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
8519
8520  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8521  MachineFunction *F = BB->getParent();
8522  MachineFunction::iterator It = ++BB->getIterator();
8523
8524  unsigned dest = MI.getOperand(0).getReg();
8525  unsigned ptrA = MI.getOperand(1).getReg();
8526  unsigned ptrB = MI.getOperand(2).getReg();
8527  unsigned incr = MI.getOperand(3).getReg();
8528  DebugLoc dl = MI.getDebugLoc();
8529
8530  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8531  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8532  F->insert(It, loopMBB);
8533  F->insert(It, exitMBB);
8534  exitMBB->splice(exitMBB->begin(), BB,
8535                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
8536  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8537
8538  MachineRegisterInfo &RegInfo = F->getRegInfo();
8539  const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
8540                                          : &PPC::GPRCRegClass;
8541  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
8542  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
8543  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
8544  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
8545  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
8546  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
8547  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
8548  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
8549  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
8550  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
8551  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
8552  unsigned Ptr1Reg;
8553  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
8554
8555  //  thisMBB:
8556  //   ...
8557  //   fallthrough --> loopMBB
8558  BB->addSuccessor(loopMBB);
8559
8560  // The 4-byte load must be aligned, while a char or short may be
8561  // anywhere in the word.  Hence all this nasty bookkeeping code.
8562  //   add ptr1, ptrA, ptrB [copy if ptrA==0]
8563  //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
8564  //   xori shift, shift1, 24 [16]
8565  //   rlwinm ptr, ptr1, 0, 0, 29
8566  //   slw incr2, incr, shift
8567  //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
8568  //   slw mask, mask2, shift
8569  //  loopMBB:
8570  //   lwarx tmpDest, ptr
8571  //   add tmp, tmpDest, incr2
8572  //   andc tmp2, tmpDest, mask
8573  //   and tmp3, tmp, mask
8574  //   or tmp4, tmp3, tmp2
8575  //   stwcx. tmp4, ptr
8576  //   bne- loopMBB
8577  //   fallthrough --> exitMBB
8578  //   srw dest, tmpDest, shift
8579  if (ptrA != ZeroReg) {
8580    Ptr1Reg = RegInfo.createVirtualRegister(RC);
8581    BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
8582      .addReg(ptrA).addReg(ptrB);
8583  } else {
8584    Ptr1Reg = ptrB;
8585  }
8586  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
8587      .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
8588  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
8589      .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
8590  if (is64bit)
8591    BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
8592      .addReg(Ptr1Reg).addImm(0).addImm(61);
8593  else
8594    BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
8595      .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
8596  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
8597      .addReg(incr).addReg(ShiftReg);
8598  if (is8bit)
8599    BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
8600  else {
8601    BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
8602    BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
8603  }
8604  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
8605      .addReg(Mask2Reg).addReg(ShiftReg);
8606
8607  BB = loopMBB;
8608  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
8609    .addReg(ZeroReg).addReg(PtrReg);
8610  if (BinOpcode)
8611    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
8612      .addReg(Incr2Reg).addReg(TmpDestReg);
8613  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
8614    .addReg(TmpDestReg).addReg(MaskReg);
8615  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
8616    .addReg(TmpReg).addReg(MaskReg);
8617  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
8618    .addReg(Tmp3Reg).addReg(Tmp2Reg);
8619  BuildMI(BB, dl, TII->get(PPC::STWCX))
8620    .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
8621  BuildMI(BB, dl, TII->get(PPC::BCC))
8622    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8623  BB->addSuccessor(loopMBB);
8624  BB->addSuccessor(exitMBB);
8625
8626  //  exitMBB:
8627  //   ...
8628  BB = exitMBB;
8629  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
8630    .addReg(ShiftReg);
8631  return BB;
8632}
8633
8634llvm::MachineBasicBlock *
8635PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
8636                                    MachineBasicBlock *MBB) const {
8637  DebugLoc DL = MI.getDebugLoc();
8638  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8639
8640  MachineFunction *MF = MBB->getParent();
8641  MachineRegisterInfo &MRI = MF->getRegInfo();
8642
8643  const BasicBlock *BB = MBB->getBasicBlock();
8644  MachineFunction::iterator I = ++MBB->getIterator();
8645
8646  // Memory Reference
8647  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
8648  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
8649
8650  unsigned DstReg = MI.getOperand(0).getReg();
8651  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
8652  assert(RC->hasType(MVT::i32) && "Invalid destination!");
8653  unsigned mainDstReg = MRI.createVirtualRegister(RC);
8654  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
8655
8656  MVT PVT = getPointerTy(MF->getDataLayout());
8657  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8658         "Invalid Pointer Size!");
8659  // For v = setjmp(buf), we generate
8660  //
8661  // thisMBB:
8662  //  SjLjSetup mainMBB
8663  //  bl mainMBB
8664  //  v_restore = 1
8665  //  b sinkMBB
8666  //
8667  // mainMBB:
8668  //  buf[LabelOffset] = LR
8669  //  v_main = 0
8670  //
8671  // sinkMBB:
8672  //  v = phi(main, restore)
8673  //
8674
8675  MachineBasicBlock *thisMBB = MBB;
8676  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
8677  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
8678  MF->insert(I, mainMBB);
8679  MF->insert(I, sinkMBB);
8680
8681  MachineInstrBuilder MIB;
8682
8683  // Transfer the remainder of BB and its successor edges to sinkMBB.
8684  sinkMBB->splice(sinkMBB->begin(), MBB,
8685                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
8686  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8687
8688  // Note that the structure of the jmp_buf used here is not compatible
8689  // with that used by libc, and is not designed to be. Specifically, it
8690  // stores only those 'reserved' registers that LLVM does not otherwise
8691  // understand how to spill. Also, by convention, by the time this
8692  // intrinsic is called, Clang has already stored the frame address in the
8693  // first slot of the buffer and stack address in the third. Following the
8694  // X86 target code, we'll store the jump address in the second slot. We also
8695  // need to save the TOC pointer (R2) to handle jumps between shared
8696  // libraries, and that will be stored in the fourth slot. The thread
8697  // identifier (R13) is not affected.
8698
8699  // thisMBB:
8700  const int64_t LabelOffset = 1 * PVT.getStoreSize();
8701  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
8702  const int64_t BPOffset    = 4 * PVT.getStoreSize();
8703
8704  // Prepare IP either in reg.
8705  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
8706  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
8707  unsigned BufReg = MI.getOperand(1).getReg();
8708
8709  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
8710    setUsesTOCBasePtr(*MBB->getParent());
8711    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
8712            .addReg(PPC::X2)
8713            .addImm(TOCOffset)
8714            .addReg(BufReg);
8715    MIB.setMemRefs(MMOBegin, MMOEnd);
8716  }
8717
8718  // Naked functions never have a base pointer, and so we use r1. For all
8719  // other functions, this decision must be delayed until during PEI.
8720  unsigned BaseReg;
8721  if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
8722    BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
8723  else
8724    BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
8725
8726  MIB = BuildMI(*thisMBB, MI, DL,
8727                TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
8728            .addReg(BaseReg)
8729            .addImm(BPOffset)
8730            .addReg(BufReg);
8731  MIB.setMemRefs(MMOBegin, MMOEnd);
8732
8733  // Setup
8734  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
8735  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
8736  MIB.addRegMask(TRI->getNoPreservedMask());
8737
8738  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
8739
8740  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
8741          .addMBB(mainMBB);
8742  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
8743
8744  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
8745  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
8746
8747  // mainMBB:
8748  //  mainDstReg = 0
8749  MIB =
8750      BuildMI(mainMBB, DL,
8751              TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
8752
8753  // Store IP
8754  if (Subtarget.isPPC64()) {
8755    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
8756            .addReg(LabelReg)
8757            .addImm(LabelOffset)
8758            .addReg(BufReg);
8759  } else {
8760    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
8761            .addReg(LabelReg)
8762            .addImm(LabelOffset)
8763            .addReg(BufReg);
8764  }
8765
8766  MIB.setMemRefs(MMOBegin, MMOEnd);
8767
8768  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
8769  mainMBB->addSuccessor(sinkMBB);
8770
8771  // sinkMBB:
8772  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
8773          TII->get(PPC::PHI), DstReg)
8774    .addReg(mainDstReg).addMBB(mainMBB)
8775    .addReg(restoreDstReg).addMBB(thisMBB);
8776
8777  MI.eraseFromParent();
8778  return sinkMBB;
8779}
8780
8781MachineBasicBlock *
8782PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
8783                                     MachineBasicBlock *MBB) const {
8784  DebugLoc DL = MI.getDebugLoc();
8785  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8786
8787  MachineFunction *MF = MBB->getParent();
8788  MachineRegisterInfo &MRI = MF->getRegInfo();
8789
8790  // Memory Reference
8791  MachineInstr::mmo_iterator MMOBegin = MI.memoperands_begin();
8792  MachineInstr::mmo_iterator MMOEnd = MI.memoperands_end();
8793
8794  MVT PVT = getPointerTy(MF->getDataLayout());
8795  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8796         "Invalid Pointer Size!");
8797
8798  const TargetRegisterClass *RC =
8799    (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
8800  unsigned Tmp = MRI.createVirtualRegister(RC);
8801  // Since FP is only updated here but NOT referenced, it's treated as GPR.
8802  unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
8803  unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
8804  unsigned BP =
8805      (PVT == MVT::i64)
8806          ? PPC::X30
8807          : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
8808                                                              : PPC::R30);
8809
8810  MachineInstrBuilder MIB;
8811
8812  const int64_t LabelOffset = 1 * PVT.getStoreSize();
8813  const int64_t SPOffset    = 2 * PVT.getStoreSize();
8814  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
8815  const int64_t BPOffset    = 4 * PVT.getStoreSize();
8816
8817  unsigned BufReg = MI.getOperand(0).getReg();
8818
8819  // Reload FP (the jumped-to function may not have had a
8820  // frame pointer, and if so, then its r31 will be restored
8821  // as necessary).
8822  if (PVT == MVT::i64) {
8823    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
8824            .addImm(0)
8825            .addReg(BufReg);
8826  } else {
8827    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
8828            .addImm(0)
8829            .addReg(BufReg);
8830  }
8831  MIB.setMemRefs(MMOBegin, MMOEnd);
8832
8833  // Reload IP
8834  if (PVT == MVT::i64) {
8835    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
8836            .addImm(LabelOffset)
8837            .addReg(BufReg);
8838  } else {
8839    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
8840            .addImm(LabelOffset)
8841            .addReg(BufReg);
8842  }
8843  MIB.setMemRefs(MMOBegin, MMOEnd);
8844
8845  // Reload SP
8846  if (PVT == MVT::i64) {
8847    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
8848            .addImm(SPOffset)
8849            .addReg(BufReg);
8850  } else {
8851    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
8852            .addImm(SPOffset)
8853            .addReg(BufReg);
8854  }
8855  MIB.setMemRefs(MMOBegin, MMOEnd);
8856
8857  // Reload BP
8858  if (PVT == MVT::i64) {
8859    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
8860            .addImm(BPOffset)
8861            .addReg(BufReg);
8862  } else {
8863    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
8864            .addImm(BPOffset)
8865            .addReg(BufReg);
8866  }
8867  MIB.setMemRefs(MMOBegin, MMOEnd);
8868
8869  // Reload TOC
8870  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
8871    setUsesTOCBasePtr(*MBB->getParent());
8872    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
8873            .addImm(TOCOffset)
8874            .addReg(BufReg);
8875
8876    MIB.setMemRefs(MMOBegin, MMOEnd);
8877  }
8878
8879  // Jump
8880  BuildMI(*MBB, MI, DL,
8881          TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
8882  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
8883
8884  MI.eraseFromParent();
8885  return MBB;
8886}
8887
8888MachineBasicBlock *
8889PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
8890                                               MachineBasicBlock *BB) const {
8891  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
8892      MI.getOpcode() == TargetOpcode::PATCHPOINT) {
8893    if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
8894        MI.getOpcode() == TargetOpcode::PATCHPOINT) {
8895      // Call lowering should have added an r2 operand to indicate a dependence
8896      // on the TOC base pointer value. It can't however, because there is no
8897      // way to mark the dependence as implicit there, and so the stackmap code
8898      // will confuse it with a regular operand. Instead, add the dependence
8899      // here.
8900      setUsesTOCBasePtr(*BB->getParent());
8901      MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
8902    }
8903
8904    return emitPatchPoint(MI, BB);
8905  }
8906
8907  if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
8908      MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
8909    return emitEHSjLjSetJmp(MI, BB);
8910  } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
8911             MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
8912    return emitEHSjLjLongJmp(MI, BB);
8913  }
8914
8915  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8916
8917  // To "insert" these instructions we actually have to insert their
8918  // control-flow patterns.
8919  const BasicBlock *LLVM_BB = BB->getBasicBlock();
8920  MachineFunction::iterator It = ++BB->getIterator();
8921
8922  MachineFunction *F = BB->getParent();
8923
8924  if (Subtarget.hasISEL() &&
8925      (MI.getOpcode() == PPC::SELECT_CC_I4 ||
8926       MI.getOpcode() == PPC::SELECT_CC_I8 ||
8927       MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
8928    SmallVector<MachineOperand, 2> Cond;
8929    if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
8930        MI.getOpcode() == PPC::SELECT_CC_I8)
8931      Cond.push_back(MI.getOperand(4));
8932    else
8933      Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
8934    Cond.push_back(MI.getOperand(1));
8935
8936    DebugLoc dl = MI.getDebugLoc();
8937    TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
8938                      MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
8939  } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
8940             MI.getOpcode() == PPC::SELECT_CC_I8 ||
8941             MI.getOpcode() == PPC::SELECT_CC_F4 ||
8942             MI.getOpcode() == PPC::SELECT_CC_F8 ||
8943             MI.getOpcode() == PPC::SELECT_CC_QFRC ||
8944             MI.getOpcode() == PPC::SELECT_CC_QSRC ||
8945             MI.getOpcode() == PPC::SELECT_CC_QBRC ||
8946             MI.getOpcode() == PPC::SELECT_CC_VRRC ||
8947             MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
8948             MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
8949             MI.getOpcode() == PPC::SELECT_CC_VSRC ||
8950             MI.getOpcode() == PPC::SELECT_I4 ||
8951             MI.getOpcode() == PPC::SELECT_I8 ||
8952             MI.getOpcode() == PPC::SELECT_F4 ||
8953             MI.getOpcode() == PPC::SELECT_F8 ||
8954             MI.getOpcode() == PPC::SELECT_QFRC ||
8955             MI.getOpcode() == PPC::SELECT_QSRC ||
8956             MI.getOpcode() == PPC::SELECT_QBRC ||
8957             MI.getOpcode() == PPC::SELECT_VRRC ||
8958             MI.getOpcode() == PPC::SELECT_VSFRC ||
8959             MI.getOpcode() == PPC::SELECT_VSSRC ||
8960             MI.getOpcode() == PPC::SELECT_VSRC) {
8961    // The incoming instruction knows the destination vreg to set, the
8962    // condition code register to branch on, the true/false values to
8963    // select between, and a branch opcode to use.
8964
8965    //  thisMBB:
8966    //  ...
8967    //   TrueVal = ...
8968    //   cmpTY ccX, r1, r2
8969    //   bCC copy1MBB
8970    //   fallthrough --> copy0MBB
8971    MachineBasicBlock *thisMBB = BB;
8972    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
8973    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
8974    DebugLoc dl = MI.getDebugLoc();
8975    F->insert(It, copy0MBB);
8976    F->insert(It, sinkMBB);
8977
8978    // Transfer the remainder of BB and its successor edges to sinkMBB.
8979    sinkMBB->splice(sinkMBB->begin(), BB,
8980                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
8981    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
8982
8983    // Next, add the true and fallthrough blocks as its successors.
8984    BB->addSuccessor(copy0MBB);
8985    BB->addSuccessor(sinkMBB);
8986
8987    if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
8988        MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
8989        MI.getOpcode() == PPC::SELECT_QFRC ||
8990        MI.getOpcode() == PPC::SELECT_QSRC ||
8991        MI.getOpcode() == PPC::SELECT_QBRC ||
8992        MI.getOpcode() == PPC::SELECT_VRRC ||
8993        MI.getOpcode() == PPC::SELECT_VSFRC ||
8994        MI.getOpcode() == PPC::SELECT_VSSRC ||
8995        MI.getOpcode() == PPC::SELECT_VSRC) {
8996      BuildMI(BB, dl, TII->get(PPC::BC))
8997          .addReg(MI.getOperand(1).getReg())
8998          .addMBB(sinkMBB);
8999    } else {
9000      unsigned SelectPred = MI.getOperand(4).getImm();
9001      BuildMI(BB, dl, TII->get(PPC::BCC))
9002          .addImm(SelectPred)
9003          .addReg(MI.getOperand(1).getReg())
9004          .addMBB(sinkMBB);
9005    }
9006
9007    //  copy0MBB:
9008    //   %FalseValue = ...
9009    //   # fallthrough to sinkMBB
9010    BB = copy0MBB;
9011
9012    // Update machine-CFG edges
9013    BB->addSuccessor(sinkMBB);
9014
9015    //  sinkMBB:
9016    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
9017    //  ...
9018    BB = sinkMBB;
9019    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
9020        .addReg(MI.getOperand(3).getReg())
9021        .addMBB(copy0MBB)
9022        .addReg(MI.getOperand(2).getReg())
9023        .addMBB(thisMBB);
9024  } else if (MI.getOpcode() == PPC::ReadTB) {
9025    // To read the 64-bit time-base register on a 32-bit target, we read the
9026    // two halves. Should the counter have wrapped while it was being read, we
9027    // need to try again.
9028    // ...
9029    // readLoop:
9030    // mfspr Rx,TBU # load from TBU
9031    // mfspr Ry,TB  # load from TB
9032    // mfspr Rz,TBU # load from TBU
9033    // cmpw crX,Rx,Rz # check if 'old'='new'
9034    // bne readLoop   # branch if they're not equal
9035    // ...
9036
9037    MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
9038    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
9039    DebugLoc dl = MI.getDebugLoc();
9040    F->insert(It, readMBB);
9041    F->insert(It, sinkMBB);
9042
9043    // Transfer the remainder of BB and its successor edges to sinkMBB.
9044    sinkMBB->splice(sinkMBB->begin(), BB,
9045                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
9046    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
9047
9048    BB->addSuccessor(readMBB);
9049    BB = readMBB;
9050
9051    MachineRegisterInfo &RegInfo = F->getRegInfo();
9052    unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
9053    unsigned LoReg = MI.getOperand(0).getReg();
9054    unsigned HiReg = MI.getOperand(1).getReg();
9055
9056    BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
9057    BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
9058    BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
9059
9060    unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9061
9062    BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
9063      .addReg(HiReg).addReg(ReadAgainReg);
9064    BuildMI(BB, dl, TII->get(PPC::BCC))
9065      .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
9066
9067    BB->addSuccessor(readMBB);
9068    BB->addSuccessor(sinkMBB);
9069  } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
9070    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
9071  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
9072    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
9073  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
9074    BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
9075  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
9076    BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
9077
9078  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
9079    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
9080  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
9081    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
9082  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
9083    BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
9084  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
9085    BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
9086
9087  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
9088    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
9089  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
9090    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
9091  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
9092    BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
9093  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
9094    BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
9095
9096  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
9097    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
9098  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
9099    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
9100  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
9101    BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
9102  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
9103    BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
9104
9105  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
9106    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
9107  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
9108    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
9109  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
9110    BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
9111  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
9112    BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
9113
9114  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
9115    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
9116  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
9117    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
9118  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
9119    BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
9120  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
9121    BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
9122
9123  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
9124    BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
9125  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
9126    BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
9127  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
9128    BB = EmitAtomicBinary(MI, BB, 4, 0);
9129  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
9130    BB = EmitAtomicBinary(MI, BB, 8, 0);
9131
9132  else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
9133           MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
9134           (Subtarget.hasPartwordAtomics() &&
9135            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
9136           (Subtarget.hasPartwordAtomics() &&
9137            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
9138    bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
9139
9140    auto LoadMnemonic = PPC::LDARX;
9141    auto StoreMnemonic = PPC::STDCX;
9142    switch (MI.getOpcode()) {
9143    default:
9144      llvm_unreachable("Compare and swap of unknown size");
9145    case PPC::ATOMIC_CMP_SWAP_I8:
9146      LoadMnemonic = PPC::LBARX;
9147      StoreMnemonic = PPC::STBCX;
9148      assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9149      break;
9150    case PPC::ATOMIC_CMP_SWAP_I16:
9151      LoadMnemonic = PPC::LHARX;
9152      StoreMnemonic = PPC::STHCX;
9153      assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
9154      break;
9155    case PPC::ATOMIC_CMP_SWAP_I32:
9156      LoadMnemonic = PPC::LWARX;
9157      StoreMnemonic = PPC::STWCX;
9158      break;
9159    case PPC::ATOMIC_CMP_SWAP_I64:
9160      LoadMnemonic = PPC::LDARX;
9161      StoreMnemonic = PPC::STDCX;
9162      break;
9163    }
9164    unsigned dest = MI.getOperand(0).getReg();
9165    unsigned ptrA = MI.getOperand(1).getReg();
9166    unsigned ptrB = MI.getOperand(2).getReg();
9167    unsigned oldval = MI.getOperand(3).getReg();
9168    unsigned newval = MI.getOperand(4).getReg();
9169    DebugLoc dl = MI.getDebugLoc();
9170
9171    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9172    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9173    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9174    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9175    F->insert(It, loop1MBB);
9176    F->insert(It, loop2MBB);
9177    F->insert(It, midMBB);
9178    F->insert(It, exitMBB);
9179    exitMBB->splice(exitMBB->begin(), BB,
9180                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
9181    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9182
9183    //  thisMBB:
9184    //   ...
9185    //   fallthrough --> loopMBB
9186    BB->addSuccessor(loop1MBB);
9187
9188    // loop1MBB:
9189    //   l[bhwd]arx dest, ptr
9190    //   cmp[wd] dest, oldval
9191    //   bne- midMBB
9192    // loop2MBB:
9193    //   st[bhwd]cx. newval, ptr
9194    //   bne- loopMBB
9195    //   b exitBB
9196    // midMBB:
9197    //   st[bhwd]cx. dest, ptr
9198    // exitBB:
9199    BB = loop1MBB;
9200    BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
9201      .addReg(ptrA).addReg(ptrB);
9202    BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
9203      .addReg(oldval).addReg(dest);
9204    BuildMI(BB, dl, TII->get(PPC::BCC))
9205      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9206    BB->addSuccessor(loop2MBB);
9207    BB->addSuccessor(midMBB);
9208
9209    BB = loop2MBB;
9210    BuildMI(BB, dl, TII->get(StoreMnemonic))
9211      .addReg(newval).addReg(ptrA).addReg(ptrB);
9212    BuildMI(BB, dl, TII->get(PPC::BCC))
9213      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9214    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9215    BB->addSuccessor(loop1MBB);
9216    BB->addSuccessor(exitMBB);
9217
9218    BB = midMBB;
9219    BuildMI(BB, dl, TII->get(StoreMnemonic))
9220      .addReg(dest).addReg(ptrA).addReg(ptrB);
9221    BB->addSuccessor(exitMBB);
9222
9223    //  exitMBB:
9224    //   ...
9225    BB = exitMBB;
9226  } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
9227             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
9228    // We must use 64-bit registers for addresses when targeting 64-bit,
9229    // since we're actually doing arithmetic on them.  Other registers
9230    // can be 32-bit.
9231    bool is64bit = Subtarget.isPPC64();
9232    bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
9233
9234    unsigned dest = MI.getOperand(0).getReg();
9235    unsigned ptrA = MI.getOperand(1).getReg();
9236    unsigned ptrB = MI.getOperand(2).getReg();
9237    unsigned oldval = MI.getOperand(3).getReg();
9238    unsigned newval = MI.getOperand(4).getReg();
9239    DebugLoc dl = MI.getDebugLoc();
9240
9241    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
9242    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
9243    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
9244    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
9245    F->insert(It, loop1MBB);
9246    F->insert(It, loop2MBB);
9247    F->insert(It, midMBB);
9248    F->insert(It, exitMBB);
9249    exitMBB->splice(exitMBB->begin(), BB,
9250                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
9251    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
9252
9253    MachineRegisterInfo &RegInfo = F->getRegInfo();
9254    const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
9255                                            : &PPC::GPRCRegClass;
9256    unsigned PtrReg = RegInfo.createVirtualRegister(RC);
9257    unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
9258    unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
9259    unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
9260    unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
9261    unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
9262    unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
9263    unsigned MaskReg = RegInfo.createVirtualRegister(RC);
9264    unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
9265    unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
9266    unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
9267    unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
9268    unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
9269    unsigned Ptr1Reg;
9270    unsigned TmpReg = RegInfo.createVirtualRegister(RC);
9271    unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
9272    //  thisMBB:
9273    //   ...
9274    //   fallthrough --> loopMBB
9275    BB->addSuccessor(loop1MBB);
9276
9277    // The 4-byte load must be aligned, while a char or short may be
9278    // anywhere in the word.  Hence all this nasty bookkeeping code.
9279    //   add ptr1, ptrA, ptrB [copy if ptrA==0]
9280    //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
9281    //   xori shift, shift1, 24 [16]
9282    //   rlwinm ptr, ptr1, 0, 0, 29
9283    //   slw newval2, newval, shift
9284    //   slw oldval2, oldval,shift
9285    //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
9286    //   slw mask, mask2, shift
9287    //   and newval3, newval2, mask
9288    //   and oldval3, oldval2, mask
9289    // loop1MBB:
9290    //   lwarx tmpDest, ptr
9291    //   and tmp, tmpDest, mask
9292    //   cmpw tmp, oldval3
9293    //   bne- midMBB
9294    // loop2MBB:
9295    //   andc tmp2, tmpDest, mask
9296    //   or tmp4, tmp2, newval3
9297    //   stwcx. tmp4, ptr
9298    //   bne- loop1MBB
9299    //   b exitBB
9300    // midMBB:
9301    //   stwcx. tmpDest, ptr
9302    // exitBB:
9303    //   srw dest, tmpDest, shift
9304    if (ptrA != ZeroReg) {
9305      Ptr1Reg = RegInfo.createVirtualRegister(RC);
9306      BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
9307        .addReg(ptrA).addReg(ptrB);
9308    } else {
9309      Ptr1Reg = ptrB;
9310    }
9311    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
9312        .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
9313    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
9314        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
9315    if (is64bit)
9316      BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
9317        .addReg(Ptr1Reg).addImm(0).addImm(61);
9318    else
9319      BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
9320        .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
9321    BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
9322        .addReg(newval).addReg(ShiftReg);
9323    BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
9324        .addReg(oldval).addReg(ShiftReg);
9325    if (is8bit)
9326      BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
9327    else {
9328      BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
9329      BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
9330        .addReg(Mask3Reg).addImm(65535);
9331    }
9332    BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
9333        .addReg(Mask2Reg).addReg(ShiftReg);
9334    BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
9335        .addReg(NewVal2Reg).addReg(MaskReg);
9336    BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
9337        .addReg(OldVal2Reg).addReg(MaskReg);
9338
9339    BB = loop1MBB;
9340    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
9341        .addReg(ZeroReg).addReg(PtrReg);
9342    BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
9343        .addReg(TmpDestReg).addReg(MaskReg);
9344    BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
9345        .addReg(TmpReg).addReg(OldVal3Reg);
9346    BuildMI(BB, dl, TII->get(PPC::BCC))
9347        .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
9348    BB->addSuccessor(loop2MBB);
9349    BB->addSuccessor(midMBB);
9350
9351    BB = loop2MBB;
9352    BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
9353        .addReg(TmpDestReg).addReg(MaskReg);
9354    BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
9355        .addReg(Tmp2Reg).addReg(NewVal3Reg);
9356    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
9357        .addReg(ZeroReg).addReg(PtrReg);
9358    BuildMI(BB, dl, TII->get(PPC::BCC))
9359      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
9360    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
9361    BB->addSuccessor(loop1MBB);
9362    BB->addSuccessor(exitMBB);
9363
9364    BB = midMBB;
9365    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
9366      .addReg(ZeroReg).addReg(PtrReg);
9367    BB->addSuccessor(exitMBB);
9368
9369    //  exitMBB:
9370    //   ...
9371    BB = exitMBB;
9372    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
9373      .addReg(ShiftReg);
9374  } else if (MI.getOpcode() == PPC::FADDrtz) {
9375    // This pseudo performs an FADD with rounding mode temporarily forced
9376    // to round-to-zero.  We emit this via custom inserter since the FPSCR
9377    // is not modeled at the SelectionDAG level.
9378    unsigned Dest = MI.getOperand(0).getReg();
9379    unsigned Src1 = MI.getOperand(1).getReg();
9380    unsigned Src2 = MI.getOperand(2).getReg();
9381    DebugLoc dl = MI.getDebugLoc();
9382
9383    MachineRegisterInfo &RegInfo = F->getRegInfo();
9384    unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
9385
9386    // Save FPSCR value.
9387    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
9388
9389    // Set rounding mode to round-to-zero.
9390    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
9391    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
9392
9393    // Perform addition.
9394    BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
9395
9396    // Restore FPSCR value.
9397    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
9398  } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9399             MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
9400             MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9401             MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
9402    unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9403                       MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
9404                          ? PPC::ANDIo8
9405                          : PPC::ANDIo;
9406    bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9407                 MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
9408
9409    MachineRegisterInfo &RegInfo = F->getRegInfo();
9410    unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
9411                                                  &PPC::GPRCRegClass :
9412                                                  &PPC::G8RCRegClass);
9413
9414    DebugLoc dl = MI.getDebugLoc();
9415    BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
9416        .addReg(MI.getOperand(1).getReg())
9417        .addImm(1);
9418    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
9419            MI.getOperand(0).getReg())
9420        .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
9421  } else if (MI.getOpcode() == PPC::TCHECK_RET) {
9422    DebugLoc Dl = MI.getDebugLoc();
9423    MachineRegisterInfo &RegInfo = F->getRegInfo();
9424    unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9425    BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
9426    return BB;
9427  } else {
9428    llvm_unreachable("Unexpected instr type to insert");
9429  }
9430
9431  MI.eraseFromParent(); // The pseudo instruction is gone now.
9432  return BB;
9433}
9434
9435//===----------------------------------------------------------------------===//
9436// Target Optimization Hooks
9437//===----------------------------------------------------------------------===//
9438
9439static std::string getRecipOp(const char *Base, EVT VT) {
9440  std::string RecipOp(Base);
9441  if (VT.getScalarType() == MVT::f64)
9442    RecipOp += "d";
9443  else
9444    RecipOp += "f";
9445
9446  if (VT.isVector())
9447    RecipOp = "vec-" + RecipOp;
9448
9449  return RecipOp;
9450}
9451
9452SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
9453                                            DAGCombinerInfo &DCI,
9454                                            unsigned &RefinementSteps,
9455                                            bool &UseOneConstNR) const {
9456  EVT VT = Operand.getValueType();
9457  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
9458      (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
9459      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9460      (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9461      (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9462      (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9463    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
9464    std::string RecipOp = getRecipOp("sqrt", VT);
9465    if (!Recips.isEnabled(RecipOp))
9466      return SDValue();
9467
9468    RefinementSteps = Recips.getRefinementSteps(RecipOp);
9469    UseOneConstNR = true;
9470    return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
9471  }
9472  return SDValue();
9473}
9474
9475SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
9476                                            DAGCombinerInfo &DCI,
9477                                            unsigned &RefinementSteps) const {
9478  EVT VT = Operand.getValueType();
9479  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
9480      (VT == MVT::f64 && Subtarget.hasFRE()) ||
9481      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9482      (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9483      (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9484      (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9485    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
9486    std::string RecipOp = getRecipOp("div", VT);
9487    if (!Recips.isEnabled(RecipOp))
9488      return SDValue();
9489
9490    RefinementSteps = Recips.getRefinementSteps(RecipOp);
9491    return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
9492  }
9493  return SDValue();
9494}
9495
9496unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
9497  // Note: This functionality is used only when unsafe-fp-math is enabled, and
9498  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
9499  // enabled for division), this functionality is redundant with the default
9500  // combiner logic (once the division -> reciprocal/multiply transformation
9501  // has taken place). As a result, this matters more for older cores than for
9502  // newer ones.
9503
9504  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9505  // reciprocal if there are two or more FDIVs (for embedded cores with only
9506  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
9507  switch (Subtarget.getDarwinDirective()) {
9508  default:
9509    return 3;
9510  case PPC::DIR_440:
9511  case PPC::DIR_A2:
9512  case PPC::DIR_E500mc:
9513  case PPC::DIR_E5500:
9514    return 2;
9515  }
9516}
9517
9518// isConsecutiveLSLoc needs to work even if all adds have not yet been
9519// collapsed, and so we need to look through chains of them.
9520static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
9521                                     int64_t& Offset, SelectionDAG &DAG) {
9522  if (DAG.isBaseWithConstantOffset(Loc)) {
9523    Base = Loc.getOperand(0);
9524    Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
9525
9526    // The base might itself be a base plus an offset, and if so, accumulate
9527    // that as well.
9528    getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
9529  }
9530}
9531
9532static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
9533                            unsigned Bytes, int Dist,
9534                            SelectionDAG &DAG) {
9535  if (VT.getSizeInBits() / 8 != Bytes)
9536    return false;
9537
9538  SDValue BaseLoc = Base->getBasePtr();
9539  if (Loc.getOpcode() == ISD::FrameIndex) {
9540    if (BaseLoc.getOpcode() != ISD::FrameIndex)
9541      return false;
9542    const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
9543    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
9544    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
9545    int FS  = MFI->getObjectSize(FI);
9546    int BFS = MFI->getObjectSize(BFI);
9547    if (FS != BFS || FS != (int)Bytes) return false;
9548    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
9549  }
9550
9551  SDValue Base1 = Loc, Base2 = BaseLoc;
9552  int64_t Offset1 = 0, Offset2 = 0;
9553  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
9554  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
9555  if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
9556    return true;
9557
9558  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9559  const GlobalValue *GV1 = nullptr;
9560  const GlobalValue *GV2 = nullptr;
9561  Offset1 = 0;
9562  Offset2 = 0;
9563  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
9564  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
9565  if (isGA1 && isGA2 && GV1 == GV2)
9566    return Offset1 == (Offset2 + Dist*Bytes);
9567  return false;
9568}
9569
9570// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
9571// not enforce equality of the chain operands.
9572static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
9573                            unsigned Bytes, int Dist,
9574                            SelectionDAG &DAG) {
9575  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
9576    EVT VT = LS->getMemoryVT();
9577    SDValue Loc = LS->getBasePtr();
9578    return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
9579  }
9580
9581  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
9582    EVT VT;
9583    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9584    default: return false;
9585    case Intrinsic::ppc_qpx_qvlfd:
9586    case Intrinsic::ppc_qpx_qvlfda:
9587      VT = MVT::v4f64;
9588      break;
9589    case Intrinsic::ppc_qpx_qvlfs:
9590    case Intrinsic::ppc_qpx_qvlfsa:
9591      VT = MVT::v4f32;
9592      break;
9593    case Intrinsic::ppc_qpx_qvlfcd:
9594    case Intrinsic::ppc_qpx_qvlfcda:
9595      VT = MVT::v2f64;
9596      break;
9597    case Intrinsic::ppc_qpx_qvlfcs:
9598    case Intrinsic::ppc_qpx_qvlfcsa:
9599      VT = MVT::v2f32;
9600      break;
9601    case Intrinsic::ppc_qpx_qvlfiwa:
9602    case Intrinsic::ppc_qpx_qvlfiwz:
9603    case Intrinsic::ppc_altivec_lvx:
9604    case Intrinsic::ppc_altivec_lvxl:
9605    case Intrinsic::ppc_vsx_lxvw4x:
9606      VT = MVT::v4i32;
9607      break;
9608    case Intrinsic::ppc_vsx_lxvd2x:
9609      VT = MVT::v2f64;
9610      break;
9611    case Intrinsic::ppc_altivec_lvebx:
9612      VT = MVT::i8;
9613      break;
9614    case Intrinsic::ppc_altivec_lvehx:
9615      VT = MVT::i16;
9616      break;
9617    case Intrinsic::ppc_altivec_lvewx:
9618      VT = MVT::i32;
9619      break;
9620    }
9621
9622    return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
9623  }
9624
9625  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
9626    EVT VT;
9627    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9628    default: return false;
9629    case Intrinsic::ppc_qpx_qvstfd:
9630    case Intrinsic::ppc_qpx_qvstfda:
9631      VT = MVT::v4f64;
9632      break;
9633    case Intrinsic::ppc_qpx_qvstfs:
9634    case Intrinsic::ppc_qpx_qvstfsa:
9635      VT = MVT::v4f32;
9636      break;
9637    case Intrinsic::ppc_qpx_qvstfcd:
9638    case Intrinsic::ppc_qpx_qvstfcda:
9639      VT = MVT::v2f64;
9640      break;
9641    case Intrinsic::ppc_qpx_qvstfcs:
9642    case Intrinsic::ppc_qpx_qvstfcsa:
9643      VT = MVT::v2f32;
9644      break;
9645    case Intrinsic::ppc_qpx_qvstfiw:
9646    case Intrinsic::ppc_qpx_qvstfiwa:
9647    case Intrinsic::ppc_altivec_stvx:
9648    case Intrinsic::ppc_altivec_stvxl:
9649    case Intrinsic::ppc_vsx_stxvw4x:
9650      VT = MVT::v4i32;
9651      break;
9652    case Intrinsic::ppc_vsx_stxvd2x:
9653      VT = MVT::v2f64;
9654      break;
9655    case Intrinsic::ppc_altivec_stvebx:
9656      VT = MVT::i8;
9657      break;
9658    case Intrinsic::ppc_altivec_stvehx:
9659      VT = MVT::i16;
9660      break;
9661    case Intrinsic::ppc_altivec_stvewx:
9662      VT = MVT::i32;
9663      break;
9664    }
9665
9666    return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
9667  }
9668
9669  return false;
9670}
9671
9672// Return true is there is a nearyby consecutive load to the one provided
9673// (regardless of alignment). We search up and down the chain, looking though
9674// token factors and other loads (but nothing else). As a result, a true result
9675// indicates that it is safe to create a new consecutive load adjacent to the
9676// load provided.
9677static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
9678  SDValue Chain = LD->getChain();
9679  EVT VT = LD->getMemoryVT();
9680
9681  SmallSet<SDNode *, 16> LoadRoots;
9682  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
9683  SmallSet<SDNode *, 16> Visited;
9684
9685  // First, search up the chain, branching to follow all token-factor operands.
9686  // If we find a consecutive load, then we're done, otherwise, record all
9687  // nodes just above the top-level loads and token factors.
9688  while (!Queue.empty()) {
9689    SDNode *ChainNext = Queue.pop_back_val();
9690    if (!Visited.insert(ChainNext).second)
9691      continue;
9692
9693    if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
9694      if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9695        return true;
9696
9697      if (!Visited.count(ChainLD->getChain().getNode()))
9698        Queue.push_back(ChainLD->getChain().getNode());
9699    } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
9700      for (const SDUse &O : ChainNext->ops())
9701        if (!Visited.count(O.getNode()))
9702          Queue.push_back(O.getNode());
9703    } else
9704      LoadRoots.insert(ChainNext);
9705  }
9706
9707  // Second, search down the chain, starting from the top-level nodes recorded
9708  // in the first phase. These top-level nodes are the nodes just above all
9709  // loads and token factors. Starting with their uses, recursively look though
9710  // all loads (just the chain uses) and token factors to find a consecutive
9711  // load.
9712  Visited.clear();
9713  Queue.clear();
9714
9715  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
9716       IE = LoadRoots.end(); I != IE; ++I) {
9717    Queue.push_back(*I);
9718
9719    while (!Queue.empty()) {
9720      SDNode *LoadRoot = Queue.pop_back_val();
9721      if (!Visited.insert(LoadRoot).second)
9722        continue;
9723
9724      if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
9725        if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9726          return true;
9727
9728      for (SDNode::use_iterator UI = LoadRoot->use_begin(),
9729           UE = LoadRoot->use_end(); UI != UE; ++UI)
9730        if (((isa<MemSDNode>(*UI) &&
9731            cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
9732            UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
9733          Queue.push_back(*UI);
9734    }
9735  }
9736
9737  return false;
9738}
9739
9740SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
9741                                                  DAGCombinerInfo &DCI) const {
9742  SelectionDAG &DAG = DCI.DAG;
9743  SDLoc dl(N);
9744
9745  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
9746  // If we're tracking CR bits, we need to be careful that we don't have:
9747  //   trunc(binary-ops(zext(x), zext(y)))
9748  // or
9749  //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
9750  // such that we're unnecessarily moving things into GPRs when it would be
9751  // better to keep them in CR bits.
9752
9753  // Note that trunc here can be an actual i1 trunc, or can be the effective
9754  // truncation that comes from a setcc or select_cc.
9755  if (N->getOpcode() == ISD::TRUNCATE &&
9756      N->getValueType(0) != MVT::i1)
9757    return SDValue();
9758
9759  if (N->getOperand(0).getValueType() != MVT::i32 &&
9760      N->getOperand(0).getValueType() != MVT::i64)
9761    return SDValue();
9762
9763  if (N->getOpcode() == ISD::SETCC ||
9764      N->getOpcode() == ISD::SELECT_CC) {
9765    // If we're looking at a comparison, then we need to make sure that the
9766    // high bits (all except for the first) don't matter the result.
9767    ISD::CondCode CC =
9768      cast<CondCodeSDNode>(N->getOperand(
9769        N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
9770    unsigned OpBits = N->getOperand(0).getValueSizeInBits();
9771
9772    if (ISD::isSignedIntSetCC(CC)) {
9773      if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
9774          DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
9775        return SDValue();
9776    } else if (ISD::isUnsignedIntSetCC(CC)) {
9777      if (!DAG.MaskedValueIsZero(N->getOperand(0),
9778                                 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
9779          !DAG.MaskedValueIsZero(N->getOperand(1),
9780                                 APInt::getHighBitsSet(OpBits, OpBits-1)))
9781        return SDValue();
9782    } else {
9783      // This is neither a signed nor an unsigned comparison, just make sure
9784      // that the high bits are equal.
9785      APInt Op1Zero, Op1One;
9786      APInt Op2Zero, Op2One;
9787      DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
9788      DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
9789
9790      // We don't really care about what is known about the first bit (if
9791      // anything), so clear it in all masks prior to comparing them.
9792      Op1Zero.clearBit(0); Op1One.clearBit(0);
9793      Op2Zero.clearBit(0); Op2One.clearBit(0);
9794
9795      if (Op1Zero != Op2Zero || Op1One != Op2One)
9796        return SDValue();
9797    }
9798  }
9799
9800  // We now know that the higher-order bits are irrelevant, we just need to
9801  // make sure that all of the intermediate operations are bit operations, and
9802  // all inputs are extensions.
9803  if (N->getOperand(0).getOpcode() != ISD::AND &&
9804      N->getOperand(0).getOpcode() != ISD::OR  &&
9805      N->getOperand(0).getOpcode() != ISD::XOR &&
9806      N->getOperand(0).getOpcode() != ISD::SELECT &&
9807      N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
9808      N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
9809      N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
9810      N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
9811      N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
9812    return SDValue();
9813
9814  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
9815      N->getOperand(1).getOpcode() != ISD::AND &&
9816      N->getOperand(1).getOpcode() != ISD::OR  &&
9817      N->getOperand(1).getOpcode() != ISD::XOR &&
9818      N->getOperand(1).getOpcode() != ISD::SELECT &&
9819      N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
9820      N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
9821      N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
9822      N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
9823      N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
9824    return SDValue();
9825
9826  SmallVector<SDValue, 4> Inputs;
9827  SmallVector<SDValue, 8> BinOps, PromOps;
9828  SmallPtrSet<SDNode *, 16> Visited;
9829
9830  for (unsigned i = 0; i < 2; ++i) {
9831    if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9832          N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9833          N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
9834          N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
9835        isa<ConstantSDNode>(N->getOperand(i)))
9836      Inputs.push_back(N->getOperand(i));
9837    else
9838      BinOps.push_back(N->getOperand(i));
9839
9840    if (N->getOpcode() == ISD::TRUNCATE)
9841      break;
9842  }
9843
9844  // Visit all inputs, collect all binary operations (and, or, xor and
9845  // select) that are all fed by extensions.
9846  while (!BinOps.empty()) {
9847    SDValue BinOp = BinOps.back();
9848    BinOps.pop_back();
9849
9850    if (!Visited.insert(BinOp.getNode()).second)
9851      continue;
9852
9853    PromOps.push_back(BinOp);
9854
9855    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
9856      // The condition of the select is not promoted.
9857      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
9858        continue;
9859      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
9860        continue;
9861
9862      if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9863            BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9864            BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
9865           BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
9866          isa<ConstantSDNode>(BinOp.getOperand(i))) {
9867        Inputs.push_back(BinOp.getOperand(i));
9868      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
9869                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
9870                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
9871                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
9872                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
9873                 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
9874                 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9875                 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9876                 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
9877        BinOps.push_back(BinOp.getOperand(i));
9878      } else {
9879        // We have an input that is not an extension or another binary
9880        // operation; we'll abort this transformation.
9881        return SDValue();
9882      }
9883    }
9884  }
9885
9886  // Make sure that this is a self-contained cluster of operations (which
9887  // is not quite the same thing as saying that everything has only one
9888  // use).
9889  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9890    if (isa<ConstantSDNode>(Inputs[i]))
9891      continue;
9892
9893    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
9894                              UE = Inputs[i].getNode()->use_end();
9895         UI != UE; ++UI) {
9896      SDNode *User = *UI;
9897      if (User != N && !Visited.count(User))
9898        return SDValue();
9899
9900      // Make sure that we're not going to promote the non-output-value
9901      // operand(s) or SELECT or SELECT_CC.
9902      // FIXME: Although we could sometimes handle this, and it does occur in
9903      // practice that one of the condition inputs to the select is also one of
9904      // the outputs, we currently can't deal with this.
9905      if (User->getOpcode() == ISD::SELECT) {
9906        if (User->getOperand(0) == Inputs[i])
9907          return SDValue();
9908      } else if (User->getOpcode() == ISD::SELECT_CC) {
9909        if (User->getOperand(0) == Inputs[i] ||
9910            User->getOperand(1) == Inputs[i])
9911          return SDValue();
9912      }
9913    }
9914  }
9915
9916  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
9917    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
9918                              UE = PromOps[i].getNode()->use_end();
9919         UI != UE; ++UI) {
9920      SDNode *User = *UI;
9921      if (User != N && !Visited.count(User))
9922        return SDValue();
9923
9924      // Make sure that we're not going to promote the non-output-value
9925      // operand(s) or SELECT or SELECT_CC.
9926      // FIXME: Although we could sometimes handle this, and it does occur in
9927      // practice that one of the condition inputs to the select is also one of
9928      // the outputs, we currently can't deal with this.
9929      if (User->getOpcode() == ISD::SELECT) {
9930        if (User->getOperand(0) == PromOps[i])
9931          return SDValue();
9932      } else if (User->getOpcode() == ISD::SELECT_CC) {
9933        if (User->getOperand(0) == PromOps[i] ||
9934            User->getOperand(1) == PromOps[i])
9935          return SDValue();
9936      }
9937    }
9938  }
9939
9940  // Replace all inputs with the extension operand.
9941  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9942    // Constants may have users outside the cluster of to-be-promoted nodes,
9943    // and so we need to replace those as we do the promotions.
9944    if (isa<ConstantSDNode>(Inputs[i]))
9945      continue;
9946    else
9947      DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
9948  }
9949
9950  std::list<HandleSDNode> PromOpHandles;
9951  for (auto &PromOp : PromOps)
9952    PromOpHandles.emplace_back(PromOp);
9953
9954  // Replace all operations (these are all the same, but have a different
9955  // (i1) return type). DAG.getNode will validate that the types of
9956  // a binary operator match, so go through the list in reverse so that
9957  // we've likely promoted both operands first. Any intermediate truncations or
9958  // extensions disappear.
9959  while (!PromOpHandles.empty()) {
9960    SDValue PromOp = PromOpHandles.back().getValue();
9961    PromOpHandles.pop_back();
9962
9963    if (PromOp.getOpcode() == ISD::TRUNCATE ||
9964        PromOp.getOpcode() == ISD::SIGN_EXTEND ||
9965        PromOp.getOpcode() == ISD::ZERO_EXTEND ||
9966        PromOp.getOpcode() == ISD::ANY_EXTEND) {
9967      if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
9968          PromOp.getOperand(0).getValueType() != MVT::i1) {
9969        // The operand is not yet ready (see comment below).
9970        PromOpHandles.emplace_front(PromOp);
9971        continue;
9972      }
9973
9974      SDValue RepValue = PromOp.getOperand(0);
9975      if (isa<ConstantSDNode>(RepValue))
9976        RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
9977
9978      DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
9979      continue;
9980    }
9981
9982    unsigned C;
9983    switch (PromOp.getOpcode()) {
9984    default:             C = 0; break;
9985    case ISD::SELECT:    C = 1; break;
9986    case ISD::SELECT_CC: C = 2; break;
9987    }
9988
9989    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
9990         PromOp.getOperand(C).getValueType() != MVT::i1) ||
9991        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
9992         PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
9993      // The to-be-promoted operands of this node have not yet been
9994      // promoted (this should be rare because we're going through the
9995      // list backward, but if one of the operands has several users in
9996      // this cluster of to-be-promoted nodes, it is possible).
9997      PromOpHandles.emplace_front(PromOp);
9998      continue;
9999    }
10000
10001    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10002                                PromOp.getNode()->op_end());
10003
10004    // If there are any constant inputs, make sure they're replaced now.
10005    for (unsigned i = 0; i < 2; ++i)
10006      if (isa<ConstantSDNode>(Ops[C+i]))
10007        Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
10008
10009    DAG.ReplaceAllUsesOfValueWith(PromOp,
10010      DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
10011  }
10012
10013  // Now we're left with the initial truncation itself.
10014  if (N->getOpcode() == ISD::TRUNCATE)
10015    return N->getOperand(0);
10016
10017  // Otherwise, this is a comparison. The operands to be compared have just
10018  // changed type (to i1), but everything else is the same.
10019  return SDValue(N, 0);
10020}
10021
10022SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
10023                                                  DAGCombinerInfo &DCI) const {
10024  SelectionDAG &DAG = DCI.DAG;
10025  SDLoc dl(N);
10026
10027  // If we're tracking CR bits, we need to be careful that we don't have:
10028  //   zext(binary-ops(trunc(x), trunc(y)))
10029  // or
10030  //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
10031  // such that we're unnecessarily moving things into CR bits that can more
10032  // efficiently stay in GPRs. Note that if we're not certain that the high
10033  // bits are set as required by the final extension, we still may need to do
10034  // some masking to get the proper behavior.
10035
10036  // This same functionality is important on PPC64 when dealing with
10037  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
10038  // the return values of functions. Because it is so similar, it is handled
10039  // here as well.
10040
10041  if (N->getValueType(0) != MVT::i32 &&
10042      N->getValueType(0) != MVT::i64)
10043    return SDValue();
10044
10045  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
10046        (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
10047    return SDValue();
10048
10049  if (N->getOperand(0).getOpcode() != ISD::AND &&
10050      N->getOperand(0).getOpcode() != ISD::OR  &&
10051      N->getOperand(0).getOpcode() != ISD::XOR &&
10052      N->getOperand(0).getOpcode() != ISD::SELECT &&
10053      N->getOperand(0).getOpcode() != ISD::SELECT_CC)
10054    return SDValue();
10055
10056  SmallVector<SDValue, 4> Inputs;
10057  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
10058  SmallPtrSet<SDNode *, 16> Visited;
10059
10060  // Visit all inputs, collect all binary operations (and, or, xor and
10061  // select) that are all fed by truncations.
10062  while (!BinOps.empty()) {
10063    SDValue BinOp = BinOps.back();
10064    BinOps.pop_back();
10065
10066    if (!Visited.insert(BinOp.getNode()).second)
10067      continue;
10068
10069    PromOps.push_back(BinOp);
10070
10071    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
10072      // The condition of the select is not promoted.
10073      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
10074        continue;
10075      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
10076        continue;
10077
10078      if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
10079          isa<ConstantSDNode>(BinOp.getOperand(i))) {
10080        Inputs.push_back(BinOp.getOperand(i));
10081      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
10082                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
10083                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
10084                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
10085                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
10086        BinOps.push_back(BinOp.getOperand(i));
10087      } else {
10088        // We have an input that is not a truncation or another binary
10089        // operation; we'll abort this transformation.
10090        return SDValue();
10091      }
10092    }
10093  }
10094
10095  // The operands of a select that must be truncated when the select is
10096  // promoted because the operand is actually part of the to-be-promoted set.
10097  DenseMap<SDNode *, EVT> SelectTruncOp[2];
10098
10099  // Make sure that this is a self-contained cluster of operations (which
10100  // is not quite the same thing as saying that everything has only one
10101  // use).
10102  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10103    if (isa<ConstantSDNode>(Inputs[i]))
10104      continue;
10105
10106    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
10107                              UE = Inputs[i].getNode()->use_end();
10108         UI != UE; ++UI) {
10109      SDNode *User = *UI;
10110      if (User != N && !Visited.count(User))
10111        return SDValue();
10112
10113      // If we're going to promote the non-output-value operand(s) or SELECT or
10114      // SELECT_CC, record them for truncation.
10115      if (User->getOpcode() == ISD::SELECT) {
10116        if (User->getOperand(0) == Inputs[i])
10117          SelectTruncOp[0].insert(std::make_pair(User,
10118                                    User->getOperand(0).getValueType()));
10119      } else if (User->getOpcode() == ISD::SELECT_CC) {
10120        if (User->getOperand(0) == Inputs[i])
10121          SelectTruncOp[0].insert(std::make_pair(User,
10122                                    User->getOperand(0).getValueType()));
10123        if (User->getOperand(1) == Inputs[i])
10124          SelectTruncOp[1].insert(std::make_pair(User,
10125                                    User->getOperand(1).getValueType()));
10126      }
10127    }
10128  }
10129
10130  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
10131    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
10132                              UE = PromOps[i].getNode()->use_end();
10133         UI != UE; ++UI) {
10134      SDNode *User = *UI;
10135      if (User != N && !Visited.count(User))
10136        return SDValue();
10137
10138      // If we're going to promote the non-output-value operand(s) or SELECT or
10139      // SELECT_CC, record them for truncation.
10140      if (User->getOpcode() == ISD::SELECT) {
10141        if (User->getOperand(0) == PromOps[i])
10142          SelectTruncOp[0].insert(std::make_pair(User,
10143                                    User->getOperand(0).getValueType()));
10144      } else if (User->getOpcode() == ISD::SELECT_CC) {
10145        if (User->getOperand(0) == PromOps[i])
10146          SelectTruncOp[0].insert(std::make_pair(User,
10147                                    User->getOperand(0).getValueType()));
10148        if (User->getOperand(1) == PromOps[i])
10149          SelectTruncOp[1].insert(std::make_pair(User,
10150                                    User->getOperand(1).getValueType()));
10151      }
10152    }
10153  }
10154
10155  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
10156  bool ReallyNeedsExt = false;
10157  if (N->getOpcode() != ISD::ANY_EXTEND) {
10158    // If all of the inputs are not already sign/zero extended, then
10159    // we'll still need to do that at the end.
10160    for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10161      if (isa<ConstantSDNode>(Inputs[i]))
10162        continue;
10163
10164      unsigned OpBits =
10165        Inputs[i].getOperand(0).getValueSizeInBits();
10166      assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
10167
10168      if ((N->getOpcode() == ISD::ZERO_EXTEND &&
10169           !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
10170                                  APInt::getHighBitsSet(OpBits,
10171                                                        OpBits-PromBits))) ||
10172          (N->getOpcode() == ISD::SIGN_EXTEND &&
10173           DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
10174             (OpBits-(PromBits-1)))) {
10175        ReallyNeedsExt = true;
10176        break;
10177      }
10178    }
10179  }
10180
10181  // Replace all inputs, either with the truncation operand, or a
10182  // truncation or extension to the final output type.
10183  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
10184    // Constant inputs need to be replaced with the to-be-promoted nodes that
10185    // use them because they might have users outside of the cluster of
10186    // promoted nodes.
10187    if (isa<ConstantSDNode>(Inputs[i]))
10188      continue;
10189
10190    SDValue InSrc = Inputs[i].getOperand(0);
10191    if (Inputs[i].getValueType() == N->getValueType(0))
10192      DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
10193    else if (N->getOpcode() == ISD::SIGN_EXTEND)
10194      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10195        DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
10196    else if (N->getOpcode() == ISD::ZERO_EXTEND)
10197      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10198        DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
10199    else
10200      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
10201        DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
10202  }
10203
10204  std::list<HandleSDNode> PromOpHandles;
10205  for (auto &PromOp : PromOps)
10206    PromOpHandles.emplace_back(PromOp);
10207
10208  // Replace all operations (these are all the same, but have a different
10209  // (promoted) return type). DAG.getNode will validate that the types of
10210  // a binary operator match, so go through the list in reverse so that
10211  // we've likely promoted both operands first.
10212  while (!PromOpHandles.empty()) {
10213    SDValue PromOp = PromOpHandles.back().getValue();
10214    PromOpHandles.pop_back();
10215
10216    unsigned C;
10217    switch (PromOp.getOpcode()) {
10218    default:             C = 0; break;
10219    case ISD::SELECT:    C = 1; break;
10220    case ISD::SELECT_CC: C = 2; break;
10221    }
10222
10223    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
10224         PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
10225        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
10226         PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
10227      // The to-be-promoted operands of this node have not yet been
10228      // promoted (this should be rare because we're going through the
10229      // list backward, but if one of the operands has several users in
10230      // this cluster of to-be-promoted nodes, it is possible).
10231      PromOpHandles.emplace_front(PromOp);
10232      continue;
10233    }
10234
10235    // For SELECT and SELECT_CC nodes, we do a similar check for any
10236    // to-be-promoted comparison inputs.
10237    if (PromOp.getOpcode() == ISD::SELECT ||
10238        PromOp.getOpcode() == ISD::SELECT_CC) {
10239      if ((SelectTruncOp[0].count(PromOp.getNode()) &&
10240           PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
10241          (SelectTruncOp[1].count(PromOp.getNode()) &&
10242           PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
10243        PromOpHandles.emplace_front(PromOp);
10244        continue;
10245      }
10246    }
10247
10248    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
10249                                PromOp.getNode()->op_end());
10250
10251    // If this node has constant inputs, then they'll need to be promoted here.
10252    for (unsigned i = 0; i < 2; ++i) {
10253      if (!isa<ConstantSDNode>(Ops[C+i]))
10254        continue;
10255      if (Ops[C+i].getValueType() == N->getValueType(0))
10256        continue;
10257
10258      if (N->getOpcode() == ISD::SIGN_EXTEND)
10259        Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10260      else if (N->getOpcode() == ISD::ZERO_EXTEND)
10261        Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10262      else
10263        Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
10264    }
10265
10266    // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
10267    // truncate them again to the original value type.
10268    if (PromOp.getOpcode() == ISD::SELECT ||
10269        PromOp.getOpcode() == ISD::SELECT_CC) {
10270      auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
10271      if (SI0 != SelectTruncOp[0].end())
10272        Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
10273      auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
10274      if (SI1 != SelectTruncOp[1].end())
10275        Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
10276    }
10277
10278    DAG.ReplaceAllUsesOfValueWith(PromOp,
10279      DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
10280  }
10281
10282  // Now we're left with the initial extension itself.
10283  if (!ReallyNeedsExt)
10284    return N->getOperand(0);
10285
10286  // To zero extend, just mask off everything except for the first bit (in the
10287  // i1 case).
10288  if (N->getOpcode() == ISD::ZERO_EXTEND)
10289    return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
10290                       DAG.getConstant(APInt::getLowBitsSet(
10291                                         N->getValueSizeInBits(0), PromBits),
10292                                       dl, N->getValueType(0)));
10293
10294  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
10295         "Invalid extension type");
10296  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
10297  SDValue ShiftCst =
10298      DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
10299  return DAG.getNode(
10300      ISD::SRA, dl, N->getValueType(0),
10301      DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
10302      ShiftCst);
10303}
10304
10305SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
10306                                                 DAGCombinerInfo &DCI) const {
10307  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
10308         "Should be called with a BUILD_VECTOR node");
10309
10310  SelectionDAG &DAG = DCI.DAG;
10311  SDLoc dl(N);
10312  if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX())
10313    return SDValue();
10314
10315  // Looking for:
10316  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
10317  if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP &&
10318      N->getOperand(0).getOpcode() != ISD::UINT_TO_FP)
10319    return SDValue();
10320  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
10321      N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
10322    return SDValue();
10323  if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode())
10324    return SDValue();
10325
10326  SDValue Ext1 = N->getOperand(0).getOperand(0);
10327  SDValue Ext2 = N->getOperand(1).getOperand(0);
10328  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
10329     Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
10330    return SDValue();
10331
10332  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
10333  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
10334  if (!Ext1Op || !Ext2Op)
10335    return SDValue();
10336  if (Ext1.getValueType() != MVT::i32 ||
10337      Ext2.getValueType() != MVT::i32)
10338  if (Ext1.getOperand(0) != Ext2.getOperand(0))
10339    return SDValue();
10340
10341  int FirstElem = Ext1Op->getZExtValue();
10342  int SecondElem = Ext2Op->getZExtValue();
10343  int SubvecIdx;
10344  if (FirstElem == 0 && SecondElem == 1)
10345    SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
10346  else if (FirstElem == 2 && SecondElem == 3)
10347    SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
10348  else
10349    return SDValue();
10350
10351  SDValue SrcVec = Ext1.getOperand(0);
10352  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
10353    PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
10354  return DAG.getNode(NodeType, dl, MVT::v2f64,
10355                     SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
10356}
10357
10358SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
10359                                              DAGCombinerInfo &DCI) const {
10360  assert((N->getOpcode() == ISD::SINT_TO_FP ||
10361          N->getOpcode() == ISD::UINT_TO_FP) &&
10362         "Need an int -> FP conversion node here");
10363
10364  if (!Subtarget.has64BitSupport())
10365    return SDValue();
10366
10367  SelectionDAG &DAG = DCI.DAG;
10368  SDLoc dl(N);
10369  SDValue Op(N, 0);
10370
10371  // Don't handle ppc_fp128 here or i1 conversions.
10372  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
10373    return SDValue();
10374  if (Op.getOperand(0).getValueType() == MVT::i1)
10375    return SDValue();
10376
10377  // For i32 intermediate values, unfortunately, the conversion functions
10378  // leave the upper 32 bits of the value are undefined. Within the set of
10379  // scalar instructions, we have no method for zero- or sign-extending the
10380  // value. Thus, we cannot handle i32 intermediate values here.
10381  if (Op.getOperand(0).getValueType() == MVT::i32)
10382    return SDValue();
10383
10384  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
10385         "UINT_TO_FP is supported only with FPCVT");
10386
10387  // If we have FCFIDS, then use it when converting to single-precision.
10388  // Otherwise, convert to double-precision and then round.
10389  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
10390                       ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
10391                                                            : PPCISD::FCFIDS)
10392                       : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
10393                                                            : PPCISD::FCFID);
10394  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
10395                  ? MVT::f32
10396                  : MVT::f64;
10397
10398  // If we're converting from a float, to an int, and back to a float again,
10399  // then we don't need the store/load pair at all.
10400  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
10401       Subtarget.hasFPCVT()) ||
10402      (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
10403    SDValue Src = Op.getOperand(0).getOperand(0);
10404    if (Src.getValueType() == MVT::f32) {
10405      Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
10406      DCI.AddToWorklist(Src.getNode());
10407    } else if (Src.getValueType() != MVT::f64) {
10408      // Make sure that we don't pick up a ppc_fp128 source value.
10409      return SDValue();
10410    }
10411
10412    unsigned FCTOp =
10413      Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
10414                                                        PPCISD::FCTIDUZ;
10415
10416    SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
10417    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
10418
10419    if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
10420      FP = DAG.getNode(ISD::FP_ROUND, dl,
10421                       MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
10422      DCI.AddToWorklist(FP.getNode());
10423    }
10424
10425    return FP;
10426  }
10427
10428  return SDValue();
10429}
10430
10431// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
10432// builtins) into loads with swaps.
10433SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
10434                                              DAGCombinerInfo &DCI) const {
10435  SelectionDAG &DAG = DCI.DAG;
10436  SDLoc dl(N);
10437  SDValue Chain;
10438  SDValue Base;
10439  MachineMemOperand *MMO;
10440
10441  switch (N->getOpcode()) {
10442  default:
10443    llvm_unreachable("Unexpected opcode for little endian VSX load");
10444  case ISD::LOAD: {
10445    LoadSDNode *LD = cast<LoadSDNode>(N);
10446    Chain = LD->getChain();
10447    Base = LD->getBasePtr();
10448    MMO = LD->getMemOperand();
10449    // If the MMO suggests this isn't a load of a full vector, leave
10450    // things alone.  For a built-in, we have to make the change for
10451    // correctness, so if there is a size problem that will be a bug.
10452    if (MMO->getSize() < 16)
10453      return SDValue();
10454    break;
10455  }
10456  case ISD::INTRINSIC_W_CHAIN: {
10457    MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
10458    Chain = Intrin->getChain();
10459    // Similarly to the store case below, Intrin->getBasePtr() doesn't get
10460    // us what we want. Get operand 2 instead.
10461    Base = Intrin->getOperand(2);
10462    MMO = Intrin->getMemOperand();
10463    break;
10464  }
10465  }
10466
10467  MVT VecTy = N->getValueType(0).getSimpleVT();
10468  SDValue LoadOps[] = { Chain, Base };
10469  SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
10470                                         DAG.getVTList(MVT::v2f64, MVT::Other),
10471                                         LoadOps, MVT::v2f64, MMO);
10472
10473  DCI.AddToWorklist(Load.getNode());
10474  Chain = Load.getValue(1);
10475  SDValue Swap = DAG.getNode(
10476      PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
10477  DCI.AddToWorklist(Swap.getNode());
10478
10479  // Add a bitcast if the resulting load type doesn't match v2f64.
10480  if (VecTy != MVT::v2f64) {
10481    SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
10482    DCI.AddToWorklist(N.getNode());
10483    // Package {bitcast value, swap's chain} to match Load's shape.
10484    return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
10485                       N, Swap.getValue(1));
10486  }
10487
10488  return Swap;
10489}
10490
10491// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
10492// builtins) into stores with swaps.
10493SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
10494                                               DAGCombinerInfo &DCI) const {
10495  SelectionDAG &DAG = DCI.DAG;
10496  SDLoc dl(N);
10497  SDValue Chain;
10498  SDValue Base;
10499  unsigned SrcOpnd;
10500  MachineMemOperand *MMO;
10501
10502  switch (N->getOpcode()) {
10503  default:
10504    llvm_unreachable("Unexpected opcode for little endian VSX store");
10505  case ISD::STORE: {
10506    StoreSDNode *ST = cast<StoreSDNode>(N);
10507    Chain = ST->getChain();
10508    Base = ST->getBasePtr();
10509    MMO = ST->getMemOperand();
10510    SrcOpnd = 1;
10511    // If the MMO suggests this isn't a store of a full vector, leave
10512    // things alone.  For a built-in, we have to make the change for
10513    // correctness, so if there is a size problem that will be a bug.
10514    if (MMO->getSize() < 16)
10515      return SDValue();
10516    break;
10517  }
10518  case ISD::INTRINSIC_VOID: {
10519    MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
10520    Chain = Intrin->getChain();
10521    // Intrin->getBasePtr() oddly does not get what we want.
10522    Base = Intrin->getOperand(3);
10523    MMO = Intrin->getMemOperand();
10524    SrcOpnd = 2;
10525    break;
10526  }
10527  }
10528
10529  SDValue Src = N->getOperand(SrcOpnd);
10530  MVT VecTy = Src.getValueType().getSimpleVT();
10531
10532  // All stores are done as v2f64 and possible bit cast.
10533  if (VecTy != MVT::v2f64) {
10534    Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
10535    DCI.AddToWorklist(Src.getNode());
10536  }
10537
10538  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
10539                             DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
10540  DCI.AddToWorklist(Swap.getNode());
10541  Chain = Swap.getValue(1);
10542  SDValue StoreOps[] = { Chain, Swap, Base };
10543  SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
10544                                          DAG.getVTList(MVT::Other),
10545                                          StoreOps, VecTy, MMO);
10546  DCI.AddToWorklist(Store.getNode());
10547  return Store;
10548}
10549
10550SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
10551                                             DAGCombinerInfo &DCI) const {
10552  SelectionDAG &DAG = DCI.DAG;
10553  SDLoc dl(N);
10554  switch (N->getOpcode()) {
10555  default: break;
10556  case PPCISD::SHL:
10557    if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
10558        return N->getOperand(0);
10559    break;
10560  case PPCISD::SRL:
10561    if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
10562        return N->getOperand(0);
10563    break;
10564  case PPCISD::SRA:
10565    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
10566      if (C->isNullValue() ||   //  0 >>s V -> 0.
10567          C->isAllOnesValue())    // -1 >>s V -> -1.
10568        return N->getOperand(0);
10569    }
10570    break;
10571  case ISD::SIGN_EXTEND:
10572  case ISD::ZERO_EXTEND:
10573  case ISD::ANY_EXTEND:
10574    return DAGCombineExtBoolTrunc(N, DCI);
10575  case ISD::TRUNCATE:
10576  case ISD::SETCC:
10577  case ISD::SELECT_CC:
10578    return DAGCombineTruncBoolExt(N, DCI);
10579  case ISD::SINT_TO_FP:
10580  case ISD::UINT_TO_FP:
10581    return combineFPToIntToFP(N, DCI);
10582  case ISD::STORE: {
10583    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
10584    if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
10585        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
10586        N->getOperand(1).getValueType() == MVT::i32 &&
10587        N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
10588      SDValue Val = N->getOperand(1).getOperand(0);
10589      if (Val.getValueType() == MVT::f32) {
10590        Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
10591        DCI.AddToWorklist(Val.getNode());
10592      }
10593      Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
10594      DCI.AddToWorklist(Val.getNode());
10595
10596      SDValue Ops[] = {
10597        N->getOperand(0), Val, N->getOperand(2),
10598        DAG.getValueType(N->getOperand(1).getValueType())
10599      };
10600
10601      Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
10602              DAG.getVTList(MVT::Other), Ops,
10603              cast<StoreSDNode>(N)->getMemoryVT(),
10604              cast<StoreSDNode>(N)->getMemOperand());
10605      DCI.AddToWorklist(Val.getNode());
10606      return Val;
10607    }
10608
10609    // Turn STORE (BSWAP) -> sthbrx/stwbrx.
10610    if (cast<StoreSDNode>(N)->isUnindexed() &&
10611        N->getOperand(1).getOpcode() == ISD::BSWAP &&
10612        N->getOperand(1).getNode()->hasOneUse() &&
10613        (N->getOperand(1).getValueType() == MVT::i32 ||
10614         N->getOperand(1).getValueType() == MVT::i16 ||
10615         (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
10616          N->getOperand(1).getValueType() == MVT::i64))) {
10617      SDValue BSwapOp = N->getOperand(1).getOperand(0);
10618      // Do an any-extend to 32-bits if this is a half-word input.
10619      if (BSwapOp.getValueType() == MVT::i16)
10620        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
10621
10622      SDValue Ops[] = {
10623        N->getOperand(0), BSwapOp, N->getOperand(2),
10624        DAG.getValueType(N->getOperand(1).getValueType())
10625      };
10626      return
10627        DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
10628                                Ops, cast<StoreSDNode>(N)->getMemoryVT(),
10629                                cast<StoreSDNode>(N)->getMemOperand());
10630    }
10631
10632    // For little endian, VSX stores require generating xxswapd/lxvd2x.
10633    EVT VT = N->getOperand(1).getValueType();
10634    if (VT.isSimple()) {
10635      MVT StoreVT = VT.getSimpleVT();
10636      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
10637          (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
10638           StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
10639        return expandVSXStoreForLE(N, DCI);
10640    }
10641    break;
10642  }
10643  case ISD::LOAD: {
10644    LoadSDNode *LD = cast<LoadSDNode>(N);
10645    EVT VT = LD->getValueType(0);
10646
10647    // For little endian, VSX loads require generating lxvd2x/xxswapd.
10648    if (VT.isSimple()) {
10649      MVT LoadVT = VT.getSimpleVT();
10650      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
10651          (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
10652           LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
10653        return expandVSXLoadForLE(N, DCI);
10654    }
10655
10656    // We sometimes end up with a 64-bit integer load, from which we extract
10657    // two single-precision floating-point numbers. This happens with
10658    // std::complex<float>, and other similar structures, because of the way we
10659    // canonicalize structure copies. However, if we lack direct moves,
10660    // then the final bitcasts from the extracted integer values to the
10661    // floating-point numbers turn into store/load pairs. Even with direct moves,
10662    // just loading the two floating-point numbers is likely better.
10663    auto ReplaceTwoFloatLoad = [&]() {
10664      if (VT != MVT::i64)
10665        return false;
10666
10667      if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
10668          LD->isVolatile())
10669        return false;
10670
10671      //  We're looking for a sequence like this:
10672      //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
10673      //      t16: i64 = srl t13, Constant:i32<32>
10674      //    t17: i32 = truncate t16
10675      //  t18: f32 = bitcast t17
10676      //    t19: i32 = truncate t13
10677      //  t20: f32 = bitcast t19
10678
10679      if (!LD->hasNUsesOfValue(2, 0))
10680        return false;
10681
10682      auto UI = LD->use_begin();
10683      while (UI.getUse().getResNo() != 0) ++UI;
10684      SDNode *Trunc = *UI++;
10685      while (UI.getUse().getResNo() != 0) ++UI;
10686      SDNode *RightShift = *UI;
10687      if (Trunc->getOpcode() != ISD::TRUNCATE)
10688        std::swap(Trunc, RightShift);
10689
10690      if (Trunc->getOpcode() != ISD::TRUNCATE ||
10691          Trunc->getValueType(0) != MVT::i32 ||
10692          !Trunc->hasOneUse())
10693        return false;
10694      if (RightShift->getOpcode() != ISD::SRL ||
10695          !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
10696          RightShift->getConstantOperandVal(1) != 32 ||
10697          !RightShift->hasOneUse())
10698        return false;
10699
10700      SDNode *Trunc2 = *RightShift->use_begin();
10701      if (Trunc2->getOpcode() != ISD::TRUNCATE ||
10702          Trunc2->getValueType(0) != MVT::i32 ||
10703          !Trunc2->hasOneUse())
10704        return false;
10705
10706      SDNode *Bitcast = *Trunc->use_begin();
10707      SDNode *Bitcast2 = *Trunc2->use_begin();
10708
10709      if (Bitcast->getOpcode() != ISD::BITCAST ||
10710          Bitcast->getValueType(0) != MVT::f32)
10711        return false;
10712      if (Bitcast2->getOpcode() != ISD::BITCAST ||
10713          Bitcast2->getValueType(0) != MVT::f32)
10714        return false;
10715
10716      if (Subtarget.isLittleEndian())
10717        std::swap(Bitcast, Bitcast2);
10718
10719      // Bitcast has the second float (in memory-layout order) and Bitcast2
10720      // has the first one.
10721
10722      SDValue BasePtr = LD->getBasePtr();
10723      if (LD->isIndexed()) {
10724        assert(LD->getAddressingMode() == ISD::PRE_INC &&
10725               "Non-pre-inc AM on PPC?");
10726        BasePtr =
10727          DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10728                      LD->getOffset());
10729      }
10730
10731      SDValue FloatLoad =
10732        DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
10733                    LD->getPointerInfo(), false, LD->isNonTemporal(),
10734                    LD->isInvariant(), LD->getAlignment(), LD->getAAInfo());
10735      SDValue AddPtr =
10736        DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
10737                    BasePtr, DAG.getIntPtrConstant(4, dl));
10738      SDValue FloatLoad2 =
10739        DAG.getLoad(MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
10740                    LD->getPointerInfo().getWithOffset(4), false,
10741                    LD->isNonTemporal(), LD->isInvariant(),
10742                    MinAlign(LD->getAlignment(), 4), LD->getAAInfo());
10743
10744      if (LD->isIndexed()) {
10745        // Note that DAGCombine should re-form any pre-increment load(s) from
10746        // what is produced here if that makes sense.
10747        DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
10748      }
10749
10750      DCI.CombineTo(Bitcast2, FloatLoad);
10751      DCI.CombineTo(Bitcast, FloatLoad2);
10752
10753      DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
10754                                    SDValue(FloatLoad2.getNode(), 1));
10755      return true;
10756    };
10757
10758    if (ReplaceTwoFloatLoad())
10759      return SDValue(N, 0);
10760
10761    EVT MemVT = LD->getMemoryVT();
10762    Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
10763    unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
10764    Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
10765    unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
10766    if (LD->isUnindexed() && VT.isVector() &&
10767        ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
10768          // P8 and later hardware should just use LOAD.
10769          !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
10770                                       VT == MVT::v4i32 || VT == MVT::v4f32)) ||
10771         (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
10772          LD->getAlignment() >= ScalarABIAlignment)) &&
10773        LD->getAlignment() < ABIAlignment) {
10774      // This is a type-legal unaligned Altivec or QPX load.
10775      SDValue Chain = LD->getChain();
10776      SDValue Ptr = LD->getBasePtr();
10777      bool isLittleEndian = Subtarget.isLittleEndian();
10778
10779      // This implements the loading of unaligned vectors as described in
10780      // the venerable Apple Velocity Engine overview. Specifically:
10781      // https://developer.apple.com/hardwaredrivers/ve/alignment.html
10782      // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
10783      //
10784      // The general idea is to expand a sequence of one or more unaligned
10785      // loads into an alignment-based permutation-control instruction (lvsl
10786      // or lvsr), a series of regular vector loads (which always truncate
10787      // their input address to an aligned address), and a series of
10788      // permutations.  The results of these permutations are the requested
10789      // loaded values.  The trick is that the last "extra" load is not taken
10790      // from the address you might suspect (sizeof(vector) bytes after the
10791      // last requested load), but rather sizeof(vector) - 1 bytes after the
10792      // last requested vector. The point of this is to avoid a page fault if
10793      // the base address happened to be aligned. This works because if the
10794      // base address is aligned, then adding less than a full vector length
10795      // will cause the last vector in the sequence to be (re)loaded.
10796      // Otherwise, the next vector will be fetched as you might suspect was
10797      // necessary.
10798
10799      // We might be able to reuse the permutation generation from
10800      // a different base address offset from this one by an aligned amount.
10801      // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
10802      // optimization later.
10803      Intrinsic::ID Intr, IntrLD, IntrPerm;
10804      MVT PermCntlTy, PermTy, LDTy;
10805      if (Subtarget.hasAltivec()) {
10806        Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
10807                                 Intrinsic::ppc_altivec_lvsl;
10808        IntrLD = Intrinsic::ppc_altivec_lvx;
10809        IntrPerm = Intrinsic::ppc_altivec_vperm;
10810        PermCntlTy = MVT::v16i8;
10811        PermTy = MVT::v4i32;
10812        LDTy = MVT::v4i32;
10813      } else {
10814        Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
10815                                       Intrinsic::ppc_qpx_qvlpcls;
10816        IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
10817                                       Intrinsic::ppc_qpx_qvlfs;
10818        IntrPerm = Intrinsic::ppc_qpx_qvfperm;
10819        PermCntlTy = MVT::v4f64;
10820        PermTy = MVT::v4f64;
10821        LDTy = MemVT.getSimpleVT();
10822      }
10823
10824      SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
10825
10826      // Create the new MMO for the new base load. It is like the original MMO,
10827      // but represents an area in memory almost twice the vector size centered
10828      // on the original address. If the address is unaligned, we might start
10829      // reading up to (sizeof(vector)-1) bytes below the address of the
10830      // original unaligned load.
10831      MachineFunction &MF = DAG.getMachineFunction();
10832      MachineMemOperand *BaseMMO =
10833        MF.getMachineMemOperand(LD->getMemOperand(),
10834                                -(long)MemVT.getStoreSize()+1,
10835                                2*MemVT.getStoreSize()-1);
10836
10837      // Create the new base load.
10838      SDValue LDXIntID =
10839          DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
10840      SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
10841      SDValue BaseLoad =
10842        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
10843                                DAG.getVTList(PermTy, MVT::Other),
10844                                BaseLoadOps, LDTy, BaseMMO);
10845
10846      // Note that the value of IncOffset (which is provided to the next
10847      // load's pointer info offset value, and thus used to calculate the
10848      // alignment), and the value of IncValue (which is actually used to
10849      // increment the pointer value) are different! This is because we
10850      // require the next load to appear to be aligned, even though it
10851      // is actually offset from the base pointer by a lesser amount.
10852      int IncOffset = VT.getSizeInBits() / 8;
10853      int IncValue = IncOffset;
10854
10855      // Walk (both up and down) the chain looking for another load at the real
10856      // (aligned) offset (the alignment of the other load does not matter in
10857      // this case). If found, then do not use the offset reduction trick, as
10858      // that will prevent the loads from being later combined (as they would
10859      // otherwise be duplicates).
10860      if (!findConsecutiveLoad(LD, DAG))
10861        --IncValue;
10862
10863      SDValue Increment =
10864          DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
10865      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
10866
10867      MachineMemOperand *ExtraMMO =
10868        MF.getMachineMemOperand(LD->getMemOperand(),
10869                                1, 2*MemVT.getStoreSize()-1);
10870      SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
10871      SDValue ExtraLoad =
10872        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
10873                                DAG.getVTList(PermTy, MVT::Other),
10874                                ExtraLoadOps, LDTy, ExtraMMO);
10875
10876      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
10877        BaseLoad.getValue(1), ExtraLoad.getValue(1));
10878
10879      // Because vperm has a big-endian bias, we must reverse the order
10880      // of the input vectors and complement the permute control vector
10881      // when generating little endian code.  We have already handled the
10882      // latter by using lvsr instead of lvsl, so just reverse BaseLoad
10883      // and ExtraLoad here.
10884      SDValue Perm;
10885      if (isLittleEndian)
10886        Perm = BuildIntrinsicOp(IntrPerm,
10887                                ExtraLoad, BaseLoad, PermCntl, DAG, dl);
10888      else
10889        Perm = BuildIntrinsicOp(IntrPerm,
10890                                BaseLoad, ExtraLoad, PermCntl, DAG, dl);
10891
10892      if (VT != PermTy)
10893        Perm = Subtarget.hasAltivec() ?
10894                 DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
10895                 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
10896                               DAG.getTargetConstant(1, dl, MVT::i64));
10897                               // second argument is 1 because this rounding
10898                               // is always exact.
10899
10900      // The output of the permutation is our loaded result, the TokenFactor is
10901      // our new chain.
10902      DCI.CombineTo(N, Perm, TF);
10903      return SDValue(N, 0);
10904    }
10905    }
10906    break;
10907    case ISD::INTRINSIC_WO_CHAIN: {
10908      bool isLittleEndian = Subtarget.isLittleEndian();
10909      unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
10910      Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
10911                                           : Intrinsic::ppc_altivec_lvsl);
10912      if ((IID == Intr ||
10913           IID == Intrinsic::ppc_qpx_qvlpcld  ||
10914           IID == Intrinsic::ppc_qpx_qvlpcls) &&
10915        N->getOperand(1)->getOpcode() == ISD::ADD) {
10916        SDValue Add = N->getOperand(1);
10917
10918        int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
10919                   5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
10920
10921        if (DAG.MaskedValueIsZero(
10922                Add->getOperand(1),
10923                APInt::getAllOnesValue(Bits /* alignment */)
10924                    .zext(
10925                        Add.getValueType().getScalarType().getSizeInBits()))) {
10926          SDNode *BasePtr = Add->getOperand(0).getNode();
10927          for (SDNode::use_iterator UI = BasePtr->use_begin(),
10928                                    UE = BasePtr->use_end();
10929               UI != UE; ++UI) {
10930            if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10931                cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
10932              // We've found another LVSL/LVSR, and this address is an aligned
10933              // multiple of that one. The results will be the same, so use the
10934              // one we've just found instead.
10935
10936              return SDValue(*UI, 0);
10937            }
10938          }
10939        }
10940
10941        if (isa<ConstantSDNode>(Add->getOperand(1))) {
10942          SDNode *BasePtr = Add->getOperand(0).getNode();
10943          for (SDNode::use_iterator UI = BasePtr->use_begin(),
10944               UE = BasePtr->use_end(); UI != UE; ++UI) {
10945            if (UI->getOpcode() == ISD::ADD &&
10946                isa<ConstantSDNode>(UI->getOperand(1)) &&
10947                (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
10948                 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
10949                (1ULL << Bits) == 0) {
10950              SDNode *OtherAdd = *UI;
10951              for (SDNode::use_iterator VI = OtherAdd->use_begin(),
10952                   VE = OtherAdd->use_end(); VI != VE; ++VI) {
10953                if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10954                    cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
10955                  return SDValue(*VI, 0);
10956                }
10957              }
10958            }
10959          }
10960        }
10961      }
10962    }
10963
10964    break;
10965  case ISD::INTRINSIC_W_CHAIN: {
10966    // For little endian, VSX loads require generating lxvd2x/xxswapd.
10967    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
10968      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10969      default:
10970        break;
10971      case Intrinsic::ppc_vsx_lxvw4x:
10972      case Intrinsic::ppc_vsx_lxvd2x:
10973        return expandVSXLoadForLE(N, DCI);
10974      }
10975    }
10976    break;
10977  }
10978  case ISD::INTRINSIC_VOID: {
10979    // For little endian, VSX stores require generating xxswapd/stxvd2x.
10980    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
10981      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10982      default:
10983        break;
10984      case Intrinsic::ppc_vsx_stxvw4x:
10985      case Intrinsic::ppc_vsx_stxvd2x:
10986        return expandVSXStoreForLE(N, DCI);
10987      }
10988    }
10989    break;
10990  }
10991  case ISD::BSWAP:
10992    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
10993    if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
10994        N->getOperand(0).hasOneUse() &&
10995        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
10996         (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
10997          N->getValueType(0) == MVT::i64))) {
10998      SDValue Load = N->getOperand(0);
10999      LoadSDNode *LD = cast<LoadSDNode>(Load);
11000      // Create the byte-swapping load.
11001      SDValue Ops[] = {
11002        LD->getChain(),    // Chain
11003        LD->getBasePtr(),  // Ptr
11004        DAG.getValueType(N->getValueType(0)) // VT
11005      };
11006      SDValue BSLoad =
11007        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
11008                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
11009                                              MVT::i64 : MVT::i32, MVT::Other),
11010                                Ops, LD->getMemoryVT(), LD->getMemOperand());
11011
11012      // If this is an i16 load, insert the truncate.
11013      SDValue ResVal = BSLoad;
11014      if (N->getValueType(0) == MVT::i16)
11015        ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
11016
11017      // First, combine the bswap away.  This makes the value produced by the
11018      // load dead.
11019      DCI.CombineTo(N, ResVal);
11020
11021      // Next, combine the load away, we give it a bogus result value but a real
11022      // chain result.  The result value is dead because the bswap is dead.
11023      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
11024
11025      // Return N so it doesn't get rechecked!
11026      return SDValue(N, 0);
11027    }
11028
11029    break;
11030  case PPCISD::VCMP: {
11031    // If a VCMPo node already exists with exactly the same operands as this
11032    // node, use its result instead of this node (VCMPo computes both a CR6 and
11033    // a normal output).
11034    //
11035    if (!N->getOperand(0).hasOneUse() &&
11036        !N->getOperand(1).hasOneUse() &&
11037        !N->getOperand(2).hasOneUse()) {
11038
11039      // Scan all of the users of the LHS, looking for VCMPo's that match.
11040      SDNode *VCMPoNode = nullptr;
11041
11042      SDNode *LHSN = N->getOperand(0).getNode();
11043      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
11044           UI != E; ++UI)
11045        if (UI->getOpcode() == PPCISD::VCMPo &&
11046            UI->getOperand(1) == N->getOperand(1) &&
11047            UI->getOperand(2) == N->getOperand(2) &&
11048            UI->getOperand(0) == N->getOperand(0)) {
11049          VCMPoNode = *UI;
11050          break;
11051        }
11052
11053      // If there is no VCMPo node, or if the flag value has a single use, don't
11054      // transform this.
11055      if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
11056        break;
11057
11058      // Look at the (necessarily single) use of the flag value.  If it has a
11059      // chain, this transformation is more complex.  Note that multiple things
11060      // could use the value result, which we should ignore.
11061      SDNode *FlagUser = nullptr;
11062      for (SDNode::use_iterator UI = VCMPoNode->use_begin();
11063           FlagUser == nullptr; ++UI) {
11064        assert(UI != VCMPoNode->use_end() && "Didn't find user!");
11065        SDNode *User = *UI;
11066        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
11067          if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
11068            FlagUser = User;
11069            break;
11070          }
11071        }
11072      }
11073
11074      // If the user is a MFOCRF instruction, we know this is safe.
11075      // Otherwise we give up for right now.
11076      if (FlagUser->getOpcode() == PPCISD::MFOCRF)
11077        return SDValue(VCMPoNode, 0);
11078    }
11079    break;
11080  }
11081  case ISD::BRCOND: {
11082    SDValue Cond = N->getOperand(1);
11083    SDValue Target = N->getOperand(2);
11084
11085    if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
11086        cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
11087          Intrinsic::ppc_is_decremented_ctr_nonzero) {
11088
11089      // We now need to make the intrinsic dead (it cannot be instruction
11090      // selected).
11091      DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
11092      assert(Cond.getNode()->hasOneUse() &&
11093             "Counter decrement has more than one use");
11094
11095      return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
11096                         N->getOperand(0), Target);
11097    }
11098  }
11099  break;
11100  case ISD::BR_CC: {
11101    // If this is a branch on an altivec predicate comparison, lower this so
11102    // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
11103    // lowering is done pre-legalize, because the legalizer lowers the predicate
11104    // compare down to code that is difficult to reassemble.
11105    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
11106    SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
11107
11108    // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
11109    // value. If so, pass-through the AND to get to the intrinsic.
11110    if (LHS.getOpcode() == ISD::AND &&
11111        LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
11112        cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
11113          Intrinsic::ppc_is_decremented_ctr_nonzero &&
11114        isa<ConstantSDNode>(LHS.getOperand(1)) &&
11115        !isNullConstant(LHS.getOperand(1)))
11116      LHS = LHS.getOperand(0);
11117
11118    if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
11119        cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
11120          Intrinsic::ppc_is_decremented_ctr_nonzero &&
11121        isa<ConstantSDNode>(RHS)) {
11122      assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
11123             "Counter decrement comparison is not EQ or NE");
11124
11125      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
11126      bool isBDNZ = (CC == ISD::SETEQ && Val) ||
11127                    (CC == ISD::SETNE && !Val);
11128
11129      // We now need to make the intrinsic dead (it cannot be instruction
11130      // selected).
11131      DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
11132      assert(LHS.getNode()->hasOneUse() &&
11133             "Counter decrement has more than one use");
11134
11135      return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
11136                         N->getOperand(0), N->getOperand(4));
11137    }
11138
11139    int CompareOpc;
11140    bool isDot;
11141
11142    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
11143        isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
11144        getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
11145      assert(isDot && "Can't compare against a vector result!");
11146
11147      // If this is a comparison against something other than 0/1, then we know
11148      // that the condition is never/always true.
11149      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
11150      if (Val != 0 && Val != 1) {
11151        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
11152          return N->getOperand(0);
11153        // Always !=, turn it into an unconditional branch.
11154        return DAG.getNode(ISD::BR, dl, MVT::Other,
11155                           N->getOperand(0), N->getOperand(4));
11156      }
11157
11158      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
11159
11160      // Create the PPCISD altivec 'dot' comparison node.
11161      SDValue Ops[] = {
11162        LHS.getOperand(2),  // LHS of compare
11163        LHS.getOperand(3),  // RHS of compare
11164        DAG.getConstant(CompareOpc, dl, MVT::i32)
11165      };
11166      EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
11167      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
11168
11169      // Unpack the result based on how the target uses it.
11170      PPC::Predicate CompOpc;
11171      switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
11172      default:  // Can't happen, don't crash on invalid number though.
11173      case 0:   // Branch on the value of the EQ bit of CR6.
11174        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
11175        break;
11176      case 1:   // Branch on the inverted value of the EQ bit of CR6.
11177        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
11178        break;
11179      case 2:   // Branch on the value of the LT bit of CR6.
11180        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
11181        break;
11182      case 3:   // Branch on the inverted value of the LT bit of CR6.
11183        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
11184        break;
11185      }
11186
11187      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
11188                         DAG.getConstant(CompOpc, dl, MVT::i32),
11189                         DAG.getRegister(PPC::CR6, MVT::i32),
11190                         N->getOperand(4), CompNode.getValue(1));
11191    }
11192    break;
11193  }
11194  case ISD::BUILD_VECTOR:
11195    return DAGCombineBuildVector(N, DCI);
11196  }
11197
11198  return SDValue();
11199}
11200
11201SDValue
11202PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
11203                                  SelectionDAG &DAG,
11204                                  std::vector<SDNode *> *Created) const {
11205  // fold (sdiv X, pow2)
11206  EVT VT = N->getValueType(0);
11207  if (VT == MVT::i64 && !Subtarget.isPPC64())
11208    return SDValue();
11209  if ((VT != MVT::i32 && VT != MVT::i64) ||
11210      !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
11211    return SDValue();
11212
11213  SDLoc DL(N);
11214  SDValue N0 = N->getOperand(0);
11215
11216  bool IsNegPow2 = (-Divisor).isPowerOf2();
11217  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
11218  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
11219
11220  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
11221  if (Created)
11222    Created->push_back(Op.getNode());
11223
11224  if (IsNegPow2) {
11225    Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
11226    if (Created)
11227      Created->push_back(Op.getNode());
11228  }
11229
11230  return Op;
11231}
11232
11233//===----------------------------------------------------------------------===//
11234// Inline Assembly Support
11235//===----------------------------------------------------------------------===//
11236
11237void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
11238                                                      APInt &KnownZero,
11239                                                      APInt &KnownOne,
11240                                                      const SelectionDAG &DAG,
11241                                                      unsigned Depth) const {
11242  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
11243  switch (Op.getOpcode()) {
11244  default: break;
11245  case PPCISD::LBRX: {
11246    // lhbrx is known to have the top bits cleared out.
11247    if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
11248      KnownZero = 0xFFFF0000;
11249    break;
11250  }
11251  case ISD::INTRINSIC_WO_CHAIN: {
11252    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
11253    default: break;
11254    case Intrinsic::ppc_altivec_vcmpbfp_p:
11255    case Intrinsic::ppc_altivec_vcmpeqfp_p:
11256    case Intrinsic::ppc_altivec_vcmpequb_p:
11257    case Intrinsic::ppc_altivec_vcmpequh_p:
11258    case Intrinsic::ppc_altivec_vcmpequw_p:
11259    case Intrinsic::ppc_altivec_vcmpequd_p:
11260    case Intrinsic::ppc_altivec_vcmpgefp_p:
11261    case Intrinsic::ppc_altivec_vcmpgtfp_p:
11262    case Intrinsic::ppc_altivec_vcmpgtsb_p:
11263    case Intrinsic::ppc_altivec_vcmpgtsh_p:
11264    case Intrinsic::ppc_altivec_vcmpgtsw_p:
11265    case Intrinsic::ppc_altivec_vcmpgtsd_p:
11266    case Intrinsic::ppc_altivec_vcmpgtub_p:
11267    case Intrinsic::ppc_altivec_vcmpgtuh_p:
11268    case Intrinsic::ppc_altivec_vcmpgtuw_p:
11269    case Intrinsic::ppc_altivec_vcmpgtud_p:
11270      KnownZero = ~1U;  // All bits but the low one are known to be zero.
11271      break;
11272    }
11273  }
11274  }
11275}
11276
11277unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
11278  switch (Subtarget.getDarwinDirective()) {
11279  default: break;
11280  case PPC::DIR_970:
11281  case PPC::DIR_PWR4:
11282  case PPC::DIR_PWR5:
11283  case PPC::DIR_PWR5X:
11284  case PPC::DIR_PWR6:
11285  case PPC::DIR_PWR6X:
11286  case PPC::DIR_PWR7:
11287  case PPC::DIR_PWR8:
11288  case PPC::DIR_PWR9: {
11289    if (!ML)
11290      break;
11291
11292    const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11293
11294    // For small loops (between 5 and 8 instructions), align to a 32-byte
11295    // boundary so that the entire loop fits in one instruction-cache line.
11296    uint64_t LoopSize = 0;
11297    for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
11298      for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
11299        LoopSize += TII->GetInstSizeInBytes(*J);
11300        if (LoopSize > 32)
11301          break;
11302      }
11303
11304    if (LoopSize > 16 && LoopSize <= 32)
11305      return 5;
11306
11307    break;
11308  }
11309  }
11310
11311  return TargetLowering::getPrefLoopAlignment(ML);
11312}
11313
11314/// getConstraintType - Given a constraint, return the type of
11315/// constraint it is for this target.
11316PPCTargetLowering::ConstraintType
11317PPCTargetLowering::getConstraintType(StringRef Constraint) const {
11318  if (Constraint.size() == 1) {
11319    switch (Constraint[0]) {
11320    default: break;
11321    case 'b':
11322    case 'r':
11323    case 'f':
11324    case 'd':
11325    case 'v':
11326    case 'y':
11327      return C_RegisterClass;
11328    case 'Z':
11329      // FIXME: While Z does indicate a memory constraint, it specifically
11330      // indicates an r+r address (used in conjunction with the 'y' modifier
11331      // in the replacement string). Currently, we're forcing the base
11332      // register to be r0 in the asm printer (which is interpreted as zero)
11333      // and forming the complete address in the second register. This is
11334      // suboptimal.
11335      return C_Memory;
11336    }
11337  } else if (Constraint == "wc") { // individual CR bits.
11338    return C_RegisterClass;
11339  } else if (Constraint == "wa" || Constraint == "wd" ||
11340             Constraint == "wf" || Constraint == "ws") {
11341    return C_RegisterClass; // VSX registers.
11342  }
11343  return TargetLowering::getConstraintType(Constraint);
11344}
11345
11346/// Examine constraint type and operand type and determine a weight value.
11347/// This object must already have been set up with the operand type
11348/// and the current alternative constraint selected.
11349TargetLowering::ConstraintWeight
11350PPCTargetLowering::getSingleConstraintMatchWeight(
11351    AsmOperandInfo &info, const char *constraint) const {
11352  ConstraintWeight weight = CW_Invalid;
11353  Value *CallOperandVal = info.CallOperandVal;
11354    // If we don't have a value, we can't do a match,
11355    // but allow it at the lowest weight.
11356  if (!CallOperandVal)
11357    return CW_Default;
11358  Type *type = CallOperandVal->getType();
11359
11360  // Look at the constraint type.
11361  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
11362    return CW_Register; // an individual CR bit.
11363  else if ((StringRef(constraint) == "wa" ||
11364            StringRef(constraint) == "wd" ||
11365            StringRef(constraint) == "wf") &&
11366           type->isVectorTy())
11367    return CW_Register;
11368  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
11369    return CW_Register;
11370
11371  switch (*constraint) {
11372  default:
11373    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
11374    break;
11375  case 'b':
11376    if (type->isIntegerTy())
11377      weight = CW_Register;
11378    break;
11379  case 'f':
11380    if (type->isFloatTy())
11381      weight = CW_Register;
11382    break;
11383  case 'd':
11384    if (type->isDoubleTy())
11385      weight = CW_Register;
11386    break;
11387  case 'v':
11388    if (type->isVectorTy())
11389      weight = CW_Register;
11390    break;
11391  case 'y':
11392    weight = CW_Register;
11393    break;
11394  case 'Z':
11395    weight = CW_Memory;
11396    break;
11397  }
11398  return weight;
11399}
11400
11401std::pair<unsigned, const TargetRegisterClass *>
11402PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
11403                                                StringRef Constraint,
11404                                                MVT VT) const {
11405  if (Constraint.size() == 1) {
11406    // GCC RS6000 Constraint Letters
11407    switch (Constraint[0]) {
11408    case 'b':   // R1-R31
11409      if (VT == MVT::i64 && Subtarget.isPPC64())
11410        return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
11411      return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
11412    case 'r':   // R0-R31
11413      if (VT == MVT::i64 && Subtarget.isPPC64())
11414        return std::make_pair(0U, &PPC::G8RCRegClass);
11415      return std::make_pair(0U, &PPC::GPRCRegClass);
11416    // 'd' and 'f' constraints are both defined to be "the floating point
11417    // registers", where one is for 32-bit and the other for 64-bit. We don't
11418    // really care overly much here so just give them all the same reg classes.
11419    case 'd':
11420    case 'f':
11421      if (VT == MVT::f32 || VT == MVT::i32)
11422        return std::make_pair(0U, &PPC::F4RCRegClass);
11423      if (VT == MVT::f64 || VT == MVT::i64)
11424        return std::make_pair(0U, &PPC::F8RCRegClass);
11425      if (VT == MVT::v4f64 && Subtarget.hasQPX())
11426        return std::make_pair(0U, &PPC::QFRCRegClass);
11427      if (VT == MVT::v4f32 && Subtarget.hasQPX())
11428        return std::make_pair(0U, &PPC::QSRCRegClass);
11429      break;
11430    case 'v':
11431      if (VT == MVT::v4f64 && Subtarget.hasQPX())
11432        return std::make_pair(0U, &PPC::QFRCRegClass);
11433      if (VT == MVT::v4f32 && Subtarget.hasQPX())
11434        return std::make_pair(0U, &PPC::QSRCRegClass);
11435      if (Subtarget.hasAltivec())
11436        return std::make_pair(0U, &PPC::VRRCRegClass);
11437    case 'y':   // crrc
11438      return std::make_pair(0U, &PPC::CRRCRegClass);
11439    }
11440  } else if (Constraint == "wc" && Subtarget.useCRBits()) {
11441    // An individual CR bit.
11442    return std::make_pair(0U, &PPC::CRBITRCRegClass);
11443  } else if ((Constraint == "wa" || Constraint == "wd" ||
11444             Constraint == "wf") && Subtarget.hasVSX()) {
11445    return std::make_pair(0U, &PPC::VSRCRegClass);
11446  } else if (Constraint == "ws" && Subtarget.hasVSX()) {
11447    if (VT == MVT::f32 && Subtarget.hasP8Vector())
11448      return std::make_pair(0U, &PPC::VSSRCRegClass);
11449    else
11450      return std::make_pair(0U, &PPC::VSFRCRegClass);
11451  }
11452
11453  std::pair<unsigned, const TargetRegisterClass *> R =
11454      TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
11455
11456  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
11457  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
11458  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
11459  // register.
11460  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
11461  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
11462  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
11463      PPC::GPRCRegClass.contains(R.first))
11464    return std::make_pair(TRI->getMatchingSuperReg(R.first,
11465                            PPC::sub_32, &PPC::G8RCRegClass),
11466                          &PPC::G8RCRegClass);
11467
11468  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
11469  if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
11470    R.first = PPC::CR0;
11471    R.second = &PPC::CRRCRegClass;
11472  }
11473
11474  return R;
11475}
11476
11477/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
11478/// vector.  If it is invalid, don't add anything to Ops.
11479void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
11480                                                     std::string &Constraint,
11481                                                     std::vector<SDValue>&Ops,
11482                                                     SelectionDAG &DAG) const {
11483  SDValue Result;
11484
11485  // Only support length 1 constraints.
11486  if (Constraint.length() > 1) return;
11487
11488  char Letter = Constraint[0];
11489  switch (Letter) {
11490  default: break;
11491  case 'I':
11492  case 'J':
11493  case 'K':
11494  case 'L':
11495  case 'M':
11496  case 'N':
11497  case 'O':
11498  case 'P': {
11499    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
11500    if (!CST) return; // Must be an immediate to match.
11501    SDLoc dl(Op);
11502    int64_t Value = CST->getSExtValue();
11503    EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
11504                         // numbers are printed as such.
11505    switch (Letter) {
11506    default: llvm_unreachable("Unknown constraint letter!");
11507    case 'I':  // "I" is a signed 16-bit constant.
11508      if (isInt<16>(Value))
11509        Result = DAG.getTargetConstant(Value, dl, TCVT);
11510      break;
11511    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
11512      if (isShiftedUInt<16, 16>(Value))
11513        Result = DAG.getTargetConstant(Value, dl, TCVT);
11514      break;
11515    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
11516      if (isShiftedInt<16, 16>(Value))
11517        Result = DAG.getTargetConstant(Value, dl, TCVT);
11518      break;
11519    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
11520      if (isUInt<16>(Value))
11521        Result = DAG.getTargetConstant(Value, dl, TCVT);
11522      break;
11523    case 'M':  // "M" is a constant that is greater than 31.
11524      if (Value > 31)
11525        Result = DAG.getTargetConstant(Value, dl, TCVT);
11526      break;
11527    case 'N':  // "N" is a positive constant that is an exact power of two.
11528      if (Value > 0 && isPowerOf2_64(Value))
11529        Result = DAG.getTargetConstant(Value, dl, TCVT);
11530      break;
11531    case 'O':  // "O" is the constant zero.
11532      if (Value == 0)
11533        Result = DAG.getTargetConstant(Value, dl, TCVT);
11534      break;
11535    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
11536      if (isInt<16>(-Value))
11537        Result = DAG.getTargetConstant(Value, dl, TCVT);
11538      break;
11539    }
11540    break;
11541  }
11542  }
11543
11544  if (Result.getNode()) {
11545    Ops.push_back(Result);
11546    return;
11547  }
11548
11549  // Handle standard constraint letters.
11550  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
11551}
11552
11553// isLegalAddressingMode - Return true if the addressing mode represented
11554// by AM is legal for this target, for a load/store of the specified type.
11555bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
11556                                              const AddrMode &AM, Type *Ty,
11557                                              unsigned AS) const {
11558  // PPC does not allow r+i addressing modes for vectors!
11559  if (Ty->isVectorTy() && AM.BaseOffs != 0)
11560    return false;
11561
11562  // PPC allows a sign-extended 16-bit immediate field.
11563  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
11564    return false;
11565
11566  // No global is ever allowed as a base.
11567  if (AM.BaseGV)
11568    return false;
11569
11570  // PPC only support r+r,
11571  switch (AM.Scale) {
11572  case 0:  // "r+i" or just "i", depending on HasBaseReg.
11573    break;
11574  case 1:
11575    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
11576      return false;
11577    // Otherwise we have r+r or r+i.
11578    break;
11579  case 2:
11580    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
11581      return false;
11582    // Allow 2*r as r+r.
11583    break;
11584  default:
11585    // No other scales are supported.
11586    return false;
11587  }
11588
11589  return true;
11590}
11591
11592SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
11593                                           SelectionDAG &DAG) const {
11594  MachineFunction &MF = DAG.getMachineFunction();
11595  MachineFrameInfo *MFI = MF.getFrameInfo();
11596  MFI->setReturnAddressIsTaken(true);
11597
11598  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
11599    return SDValue();
11600
11601  SDLoc dl(Op);
11602  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11603
11604  // Make sure the function does not optimize away the store of the RA to
11605  // the stack.
11606  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
11607  FuncInfo->setLRStoreRequired();
11608  bool isPPC64 = Subtarget.isPPC64();
11609  auto PtrVT = getPointerTy(MF.getDataLayout());
11610
11611  if (Depth > 0) {
11612    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
11613    SDValue Offset =
11614        DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
11615                        isPPC64 ? MVT::i64 : MVT::i32);
11616    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
11617                       DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
11618                       MachinePointerInfo(), false, false, false, 0);
11619  }
11620
11621  // Just load the return address off the stack.
11622  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
11623  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
11624                     MachinePointerInfo(), false, false, false, 0);
11625}
11626
11627SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
11628                                          SelectionDAG &DAG) const {
11629  SDLoc dl(Op);
11630  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11631
11632  MachineFunction &MF = DAG.getMachineFunction();
11633  MachineFrameInfo *MFI = MF.getFrameInfo();
11634  MFI->setFrameAddressIsTaken(true);
11635
11636  EVT PtrVT = getPointerTy(MF.getDataLayout());
11637  bool isPPC64 = PtrVT == MVT::i64;
11638
11639  // Naked functions never have a frame pointer, and so we use r1. For all
11640  // other functions, this decision must be delayed until during PEI.
11641  unsigned FrameReg;
11642  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
11643    FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
11644  else
11645    FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
11646
11647  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
11648                                         PtrVT);
11649  while (Depth--)
11650    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
11651                            FrameAddr, MachinePointerInfo(), false, false,
11652                            false, 0);
11653  return FrameAddr;
11654}
11655
11656// FIXME? Maybe this could be a TableGen attribute on some registers and
11657// this table could be generated automatically from RegInfo.
11658unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
11659                                              SelectionDAG &DAG) const {
11660  bool isPPC64 = Subtarget.isPPC64();
11661  bool isDarwinABI = Subtarget.isDarwinABI();
11662
11663  if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
11664      (!isPPC64 && VT != MVT::i32))
11665    report_fatal_error("Invalid register global variable type");
11666
11667  bool is64Bit = isPPC64 && VT == MVT::i64;
11668  unsigned Reg = StringSwitch<unsigned>(RegName)
11669                   .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
11670                   .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
11671                   .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
11672                                  (is64Bit ? PPC::X13 : PPC::R13))
11673                   .Default(0);
11674
11675  if (Reg)
11676    return Reg;
11677  report_fatal_error("Invalid register name global variable");
11678}
11679
11680bool
11681PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
11682  // The PowerPC target isn't yet aware of offsets.
11683  return false;
11684}
11685
11686bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
11687                                           const CallInst &I,
11688                                           unsigned Intrinsic) const {
11689
11690  switch (Intrinsic) {
11691  case Intrinsic::ppc_qpx_qvlfd:
11692  case Intrinsic::ppc_qpx_qvlfs:
11693  case Intrinsic::ppc_qpx_qvlfcd:
11694  case Intrinsic::ppc_qpx_qvlfcs:
11695  case Intrinsic::ppc_qpx_qvlfiwa:
11696  case Intrinsic::ppc_qpx_qvlfiwz:
11697  case Intrinsic::ppc_altivec_lvx:
11698  case Intrinsic::ppc_altivec_lvxl:
11699  case Intrinsic::ppc_altivec_lvebx:
11700  case Intrinsic::ppc_altivec_lvehx:
11701  case Intrinsic::ppc_altivec_lvewx:
11702  case Intrinsic::ppc_vsx_lxvd2x:
11703  case Intrinsic::ppc_vsx_lxvw4x: {
11704    EVT VT;
11705    switch (Intrinsic) {
11706    case Intrinsic::ppc_altivec_lvebx:
11707      VT = MVT::i8;
11708      break;
11709    case Intrinsic::ppc_altivec_lvehx:
11710      VT = MVT::i16;
11711      break;
11712    case Intrinsic::ppc_altivec_lvewx:
11713      VT = MVT::i32;
11714      break;
11715    case Intrinsic::ppc_vsx_lxvd2x:
11716      VT = MVT::v2f64;
11717      break;
11718    case Intrinsic::ppc_qpx_qvlfd:
11719      VT = MVT::v4f64;
11720      break;
11721    case Intrinsic::ppc_qpx_qvlfs:
11722      VT = MVT::v4f32;
11723      break;
11724    case Intrinsic::ppc_qpx_qvlfcd:
11725      VT = MVT::v2f64;
11726      break;
11727    case Intrinsic::ppc_qpx_qvlfcs:
11728      VT = MVT::v2f32;
11729      break;
11730    default:
11731      VT = MVT::v4i32;
11732      break;
11733    }
11734
11735    Info.opc = ISD::INTRINSIC_W_CHAIN;
11736    Info.memVT = VT;
11737    Info.ptrVal = I.getArgOperand(0);
11738    Info.offset = -VT.getStoreSize()+1;
11739    Info.size = 2*VT.getStoreSize()-1;
11740    Info.align = 1;
11741    Info.vol = false;
11742    Info.readMem = true;
11743    Info.writeMem = false;
11744    return true;
11745  }
11746  case Intrinsic::ppc_qpx_qvlfda:
11747  case Intrinsic::ppc_qpx_qvlfsa:
11748  case Intrinsic::ppc_qpx_qvlfcda:
11749  case Intrinsic::ppc_qpx_qvlfcsa:
11750  case Intrinsic::ppc_qpx_qvlfiwaa:
11751  case Intrinsic::ppc_qpx_qvlfiwza: {
11752    EVT VT;
11753    switch (Intrinsic) {
11754    case Intrinsic::ppc_qpx_qvlfda:
11755      VT = MVT::v4f64;
11756      break;
11757    case Intrinsic::ppc_qpx_qvlfsa:
11758      VT = MVT::v4f32;
11759      break;
11760    case Intrinsic::ppc_qpx_qvlfcda:
11761      VT = MVT::v2f64;
11762      break;
11763    case Intrinsic::ppc_qpx_qvlfcsa:
11764      VT = MVT::v2f32;
11765      break;
11766    default:
11767      VT = MVT::v4i32;
11768      break;
11769    }
11770
11771    Info.opc = ISD::INTRINSIC_W_CHAIN;
11772    Info.memVT = VT;
11773    Info.ptrVal = I.getArgOperand(0);
11774    Info.offset = 0;
11775    Info.size = VT.getStoreSize();
11776    Info.align = 1;
11777    Info.vol = false;
11778    Info.readMem = true;
11779    Info.writeMem = false;
11780    return true;
11781  }
11782  case Intrinsic::ppc_qpx_qvstfd:
11783  case Intrinsic::ppc_qpx_qvstfs:
11784  case Intrinsic::ppc_qpx_qvstfcd:
11785  case Intrinsic::ppc_qpx_qvstfcs:
11786  case Intrinsic::ppc_qpx_qvstfiw:
11787  case Intrinsic::ppc_altivec_stvx:
11788  case Intrinsic::ppc_altivec_stvxl:
11789  case Intrinsic::ppc_altivec_stvebx:
11790  case Intrinsic::ppc_altivec_stvehx:
11791  case Intrinsic::ppc_altivec_stvewx:
11792  case Intrinsic::ppc_vsx_stxvd2x:
11793  case Intrinsic::ppc_vsx_stxvw4x: {
11794    EVT VT;
11795    switch (Intrinsic) {
11796    case Intrinsic::ppc_altivec_stvebx:
11797      VT = MVT::i8;
11798      break;
11799    case Intrinsic::ppc_altivec_stvehx:
11800      VT = MVT::i16;
11801      break;
11802    case Intrinsic::ppc_altivec_stvewx:
11803      VT = MVT::i32;
11804      break;
11805    case Intrinsic::ppc_vsx_stxvd2x:
11806      VT = MVT::v2f64;
11807      break;
11808    case Intrinsic::ppc_qpx_qvstfd:
11809      VT = MVT::v4f64;
11810      break;
11811    case Intrinsic::ppc_qpx_qvstfs:
11812      VT = MVT::v4f32;
11813      break;
11814    case Intrinsic::ppc_qpx_qvstfcd:
11815      VT = MVT::v2f64;
11816      break;
11817    case Intrinsic::ppc_qpx_qvstfcs:
11818      VT = MVT::v2f32;
11819      break;
11820    default:
11821      VT = MVT::v4i32;
11822      break;
11823    }
11824
11825    Info.opc = ISD::INTRINSIC_VOID;
11826    Info.memVT = VT;
11827    Info.ptrVal = I.getArgOperand(1);
11828    Info.offset = -VT.getStoreSize()+1;
11829    Info.size = 2*VT.getStoreSize()-1;
11830    Info.align = 1;
11831    Info.vol = false;
11832    Info.readMem = false;
11833    Info.writeMem = true;
11834    return true;
11835  }
11836  case Intrinsic::ppc_qpx_qvstfda:
11837  case Intrinsic::ppc_qpx_qvstfsa:
11838  case Intrinsic::ppc_qpx_qvstfcda:
11839  case Intrinsic::ppc_qpx_qvstfcsa:
11840  case Intrinsic::ppc_qpx_qvstfiwa: {
11841    EVT VT;
11842    switch (Intrinsic) {
11843    case Intrinsic::ppc_qpx_qvstfda:
11844      VT = MVT::v4f64;
11845      break;
11846    case Intrinsic::ppc_qpx_qvstfsa:
11847      VT = MVT::v4f32;
11848      break;
11849    case Intrinsic::ppc_qpx_qvstfcda:
11850      VT = MVT::v2f64;
11851      break;
11852    case Intrinsic::ppc_qpx_qvstfcsa:
11853      VT = MVT::v2f32;
11854      break;
11855    default:
11856      VT = MVT::v4i32;
11857      break;
11858    }
11859
11860    Info.opc = ISD::INTRINSIC_VOID;
11861    Info.memVT = VT;
11862    Info.ptrVal = I.getArgOperand(1);
11863    Info.offset = 0;
11864    Info.size = VT.getStoreSize();
11865    Info.align = 1;
11866    Info.vol = false;
11867    Info.readMem = false;
11868    Info.writeMem = true;
11869    return true;
11870  }
11871  default:
11872    break;
11873  }
11874
11875  return false;
11876}
11877
11878/// getOptimalMemOpType - Returns the target specific optimal type for load
11879/// and store operations as a result of memset, memcpy, and memmove
11880/// lowering. If DstAlign is zero that means it's safe to destination
11881/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
11882/// means there isn't a need to check it against alignment requirement,
11883/// probably because the source does not need to be loaded. If 'IsMemset' is
11884/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
11885/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
11886/// source is constant so it does not need to be loaded.
11887/// It returns EVT::Other if the type should be determined using generic
11888/// target-independent logic.
11889EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
11890                                           unsigned DstAlign, unsigned SrcAlign,
11891                                           bool IsMemset, bool ZeroMemset,
11892                                           bool MemcpyStrSrc,
11893                                           MachineFunction &MF) const {
11894  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
11895    const Function *F = MF.getFunction();
11896    // When expanding a memset, require at least two QPX instructions to cover
11897    // the cost of loading the value to be stored from the constant pool.
11898    if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
11899       (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
11900        !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
11901      return MVT::v4f64;
11902    }
11903
11904    // We should use Altivec/VSX loads and stores when available. For unaligned
11905    // addresses, unaligned VSX loads are only fast starting with the P8.
11906    if (Subtarget.hasAltivec() && Size >= 16 &&
11907        (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
11908         ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
11909      return MVT::v4i32;
11910  }
11911
11912  if (Subtarget.isPPC64()) {
11913    return MVT::i64;
11914  }
11915
11916  return MVT::i32;
11917}
11918
11919/// \brief Returns true if it is beneficial to convert a load of a constant
11920/// to just the constant itself.
11921bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
11922                                                          Type *Ty) const {
11923  assert(Ty->isIntegerTy());
11924
11925  unsigned BitSize = Ty->getPrimitiveSizeInBits();
11926  return !(BitSize == 0 || BitSize > 64);
11927}
11928
11929bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
11930  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11931    return false;
11932  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
11933  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
11934  return NumBits1 == 64 && NumBits2 == 32;
11935}
11936
11937bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
11938  if (!VT1.isInteger() || !VT2.isInteger())
11939    return false;
11940  unsigned NumBits1 = VT1.getSizeInBits();
11941  unsigned NumBits2 = VT2.getSizeInBits();
11942  return NumBits1 == 64 && NumBits2 == 32;
11943}
11944
11945bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
11946  // Generally speaking, zexts are not free, but they are free when they can be
11947  // folded with other operations.
11948  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
11949    EVT MemVT = LD->getMemoryVT();
11950    if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
11951         (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
11952        (LD->getExtensionType() == ISD::NON_EXTLOAD ||
11953         LD->getExtensionType() == ISD::ZEXTLOAD))
11954      return true;
11955  }
11956
11957  // FIXME: Add other cases...
11958  //  - 32-bit shifts with a zext to i64
11959  //  - zext after ctlz, bswap, etc.
11960  //  - zext after and by a constant mask
11961
11962  return TargetLowering::isZExtFree(Val, VT2);
11963}
11964
11965bool PPCTargetLowering::isFPExtFree(EVT VT) const {
11966  assert(VT.isFloatingPoint());
11967  return true;
11968}
11969
11970bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
11971  return isInt<16>(Imm) || isUInt<16>(Imm);
11972}
11973
11974bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
11975  return isInt<16>(Imm) || isUInt<16>(Imm);
11976}
11977
11978bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
11979                                                       unsigned,
11980                                                       unsigned,
11981                                                       bool *Fast) const {
11982  if (DisablePPCUnaligned)
11983    return false;
11984
11985  // PowerPC supports unaligned memory access for simple non-vector types.
11986  // Although accessing unaligned addresses is not as efficient as accessing
11987  // aligned addresses, it is generally more efficient than manual expansion,
11988  // and generally only traps for software emulation when crossing page
11989  // boundaries.
11990
11991  if (!VT.isSimple())
11992    return false;
11993
11994  if (VT.getSimpleVT().isVector()) {
11995    if (Subtarget.hasVSX()) {
11996      if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
11997          VT != MVT::v4f32 && VT != MVT::v4i32)
11998        return false;
11999    } else {
12000      return false;
12001    }
12002  }
12003
12004  if (VT == MVT::ppcf128)
12005    return false;
12006
12007  if (Fast)
12008    *Fast = true;
12009
12010  return true;
12011}
12012
12013bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
12014  VT = VT.getScalarType();
12015
12016  if (!VT.isSimple())
12017    return false;
12018
12019  switch (VT.getSimpleVT().SimpleTy) {
12020  case MVT::f32:
12021  case MVT::f64:
12022    return true;
12023  default:
12024    break;
12025  }
12026
12027  return false;
12028}
12029
12030const MCPhysReg *
12031PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
12032  // LR is a callee-save register, but we must treat it as clobbered by any call
12033  // site. Hence we include LR in the scratch registers, which are in turn added
12034  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
12035  // to CTR, which is used by any indirect call.
12036  static const MCPhysReg ScratchRegs[] = {
12037    PPC::X12, PPC::LR8, PPC::CTR8, 0
12038  };
12039
12040  return ScratchRegs;
12041}
12042
12043unsigned PPCTargetLowering::getExceptionPointerRegister(
12044    const Constant *PersonalityFn) const {
12045  return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
12046}
12047
12048unsigned PPCTargetLowering::getExceptionSelectorRegister(
12049    const Constant *PersonalityFn) const {
12050  return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
12051}
12052
12053bool
12054PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
12055                     EVT VT , unsigned DefinedValues) const {
12056  if (VT == MVT::v2i64)
12057    return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
12058
12059  if (Subtarget.hasVSX() || Subtarget.hasQPX())
12060    return true;
12061
12062  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
12063}
12064
12065Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
12066  if (DisableILPPref || Subtarget.enableMachineScheduler())
12067    return TargetLowering::getSchedulingPreference(N);
12068
12069  return Sched::ILP;
12070}
12071
12072// Create a fast isel object.
12073FastISel *
12074PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
12075                                  const TargetLibraryInfo *LibInfo) const {
12076  return PPC::createFastISel(FuncInfo, LibInfo);
12077}
12078
12079void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
12080  if (Subtarget.isDarwinABI()) return;
12081  if (!Subtarget.isPPC64()) return;
12082
12083  // Update IsSplitCSR in PPCFunctionInfo
12084  PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
12085  PFI->setIsSplitCSR(true);
12086}
12087
12088void PPCTargetLowering::insertCopiesSplitCSR(
12089  MachineBasicBlock *Entry,
12090  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
12091  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12092  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
12093  if (!IStart)
12094    return;
12095
12096  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12097  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
12098  MachineBasicBlock::iterator MBBI = Entry->begin();
12099  for (const MCPhysReg *I = IStart; *I; ++I) {
12100    const TargetRegisterClass *RC = nullptr;
12101    if (PPC::G8RCRegClass.contains(*I))
12102      RC = &PPC::G8RCRegClass;
12103    else if (PPC::F8RCRegClass.contains(*I))
12104      RC = &PPC::F8RCRegClass;
12105    else if (PPC::CRRCRegClass.contains(*I))
12106      RC = &PPC::CRRCRegClass;
12107    else if (PPC::VRRCRegClass.contains(*I))
12108      RC = &PPC::VRRCRegClass;
12109    else
12110      llvm_unreachable("Unexpected register class in CSRsViaCopy!");
12111
12112    unsigned NewVR = MRI->createVirtualRegister(RC);
12113    // Create copy from CSR to a virtual register.
12114    // FIXME: this currently does not emit CFI pseudo-instructions, it works
12115    // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
12116    // nounwind. If we want to generalize this later, we may need to emit
12117    // CFI pseudo-instructions.
12118    assert(Entry->getParent()->getFunction()->hasFnAttribute(
12119             Attribute::NoUnwind) &&
12120           "Function should be nounwind in insertCopiesSplitCSR!");
12121    Entry->addLiveIn(*I);
12122    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
12123      .addReg(*I);
12124
12125    // Insert the copy-back instructions right before the terminator
12126    for (auto *Exit : Exits)
12127      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
12128              TII->get(TargetOpcode::COPY), *I)
12129        .addReg(NewVR);
12130  }
12131}
12132
12133// Override to enable LOAD_STACK_GUARD lowering on Linux.
12134bool PPCTargetLowering::useLoadStackGuardNode() const {
12135  if (!Subtarget.isTargetLinux())
12136    return TargetLowering::useLoadStackGuardNode();
12137  return true;
12138}
12139
12140// Override to disable global variable loading on Linux.
12141void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
12142  if (!Subtarget.isTargetLinux())
12143    return TargetLowering::insertSSPDeclarations(M);
12144}
12145