PPCISelLowering.cpp revision cd81d94322a39503e4a3e87b6ee03d4fcb3465fb
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PPCISelLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCISelLowering.h"
15#include "MCTargetDesc/PPCPredicates.h"
16#include "PPCMachineFunctionInfo.h"
17#include "PPCPerfectShuffle.h"
18#include "PPCTargetMachine.h"
19#include "PPCTargetObjectFile.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/StringSwitch.h"
22#include "llvm/ADT/Triple.h"
23#include "llvm/CodeGen/CallingConvLower.h"
24#include "llvm/CodeGen/MachineFrameInfo.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30#include "llvm/IR/CallingConv.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DerivedTypes.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/Support/ErrorHandling.h"
37#include "llvm/Support/MathExtras.h"
38#include "llvm/Support/raw_ostream.h"
39#include "llvm/Target/TargetOptions.h"
40using namespace llvm;
41
42static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
43cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
44
45static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
46cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
47
48static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
49cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
50
51// FIXME: Remove this once the bug has been fixed!
52extern cl::opt<bool> ANDIGlueBug;
53
54static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
55  // If it isn't a Mach-O file then it's going to be a linux ELF
56  // object file.
57  if (TT.isOSDarwin())
58    return new TargetLoweringObjectFileMachO();
59
60  return new PPC64LinuxTargetObjectFile();
61}
62
63PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
64    : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
65      Subtarget(*TM.getSubtargetImpl()) {
66  setPow2DivIsCheap();
67
68  // Use _setjmp/_longjmp instead of setjmp/longjmp.
69  setUseUnderscoreSetJmp(true);
70  setUseUnderscoreLongJmp(true);
71
72  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
73  // arguments are at least 4/8 bytes aligned.
74  bool isPPC64 = Subtarget.isPPC64();
75  setMinStackArgumentAlignment(isPPC64 ? 8:4);
76
77  // Set up the register classes.
78  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
79  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
80  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
81
82  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
83  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
84  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
85
86  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
87
88  // PowerPC has pre-inc load and store's.
89  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
90  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
91  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
92  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
93  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
94  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
95  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
96  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
97  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
98  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
99
100  if (Subtarget.useCRBits()) {
101    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
102
103    if (isPPC64 || Subtarget.hasFPCVT()) {
104      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
105      AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
106                         isPPC64 ? MVT::i64 : MVT::i32);
107      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
108      AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,
109                         isPPC64 ? MVT::i64 : MVT::i32);
110    } else {
111      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
112      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
113    }
114
115    // PowerPC does not support direct load / store of condition registers
116    setOperationAction(ISD::LOAD, MVT::i1, Custom);
117    setOperationAction(ISD::STORE, MVT::i1, Custom);
118
119    // FIXME: Remove this once the ANDI glue bug is fixed:
120    if (ANDIGlueBug)
121      setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
122
123    setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
124    setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
125    setTruncStoreAction(MVT::i64, MVT::i1, Expand);
126    setTruncStoreAction(MVT::i32, MVT::i1, Expand);
127    setTruncStoreAction(MVT::i16, MVT::i1, Expand);
128    setTruncStoreAction(MVT::i8, MVT::i1, Expand);
129
130    addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
131  }
132
133  // This is used in the ppcf128->int sequence.  Note it has different semantics
134  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
135  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
136
137  // We do not currently implement these libm ops for PowerPC.
138  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
139  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
140  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
141  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
142  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
143  setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
144
145  // PowerPC has no SREM/UREM instructions
146  setOperationAction(ISD::SREM, MVT::i32, Expand);
147  setOperationAction(ISD::UREM, MVT::i32, Expand);
148  setOperationAction(ISD::SREM, MVT::i64, Expand);
149  setOperationAction(ISD::UREM, MVT::i64, Expand);
150
151  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
152  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
153  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
154  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
155  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
156  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
157  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
158  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
159  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
160
161  // We don't support sin/cos/sqrt/fmod/pow
162  setOperationAction(ISD::FSIN , MVT::f64, Expand);
163  setOperationAction(ISD::FCOS , MVT::f64, Expand);
164  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
165  setOperationAction(ISD::FREM , MVT::f64, Expand);
166  setOperationAction(ISD::FPOW , MVT::f64, Expand);
167  setOperationAction(ISD::FMA  , MVT::f64, Legal);
168  setOperationAction(ISD::FSIN , MVT::f32, Expand);
169  setOperationAction(ISD::FCOS , MVT::f32, Expand);
170  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
171  setOperationAction(ISD::FREM , MVT::f32, Expand);
172  setOperationAction(ISD::FPOW , MVT::f32, Expand);
173  setOperationAction(ISD::FMA  , MVT::f32, Legal);
174
175  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
176
177  // If we're enabling GP optimizations, use hardware square root
178  if (!Subtarget.hasFSQRT() &&
179      !(TM.Options.UnsafeFPMath &&
180        Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
181    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
182
183  if (!Subtarget.hasFSQRT() &&
184      !(TM.Options.UnsafeFPMath &&
185        Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
186    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
187
188  if (Subtarget.hasFCPSGN()) {
189    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
190    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
191  } else {
192    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
193    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
194  }
195
196  if (Subtarget.hasFPRND()) {
197    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
198    setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
199    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
200    setOperationAction(ISD::FROUND, MVT::f64, Legal);
201
202    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
203    setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
204    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
205    setOperationAction(ISD::FROUND, MVT::f32, Legal);
206  }
207
208  // PowerPC does not have BSWAP, CTPOP or CTTZ
209  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
210  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
211  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
212  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
213  setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
214  setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
215  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
216  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
217
218  if (Subtarget.hasPOPCNTD()) {
219    setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
220    setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
221  } else {
222    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
223    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
224  }
225
226  // PowerPC does not have ROTR
227  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
228  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
229
230  if (!Subtarget.useCRBits()) {
231    // PowerPC does not have Select
232    setOperationAction(ISD::SELECT, MVT::i32, Expand);
233    setOperationAction(ISD::SELECT, MVT::i64, Expand);
234    setOperationAction(ISD::SELECT, MVT::f32, Expand);
235    setOperationAction(ISD::SELECT, MVT::f64, Expand);
236  }
237
238  // PowerPC wants to turn select_cc of FP into fsel when possible.
239  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
240  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
241
242  // PowerPC wants to optimize integer setcc a bit
243  if (!Subtarget.useCRBits())
244    setOperationAction(ISD::SETCC, MVT::i32, Custom);
245
246  // PowerPC does not have BRCOND which requires SetCC
247  if (!Subtarget.useCRBits())
248    setOperationAction(ISD::BRCOND, MVT::Other, Expand);
249
250  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
251
252  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
253  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
254
255  // PowerPC does not have [U|S]INT_TO_FP
256  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
257  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
258
259  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
260  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
261  setOperationAction(ISD::BITCAST, MVT::i64, Expand);
262  setOperationAction(ISD::BITCAST, MVT::f64, Expand);
263
264  // We cannot sextinreg(i1).  Expand to shifts.
265  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
266
267  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
268  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
269  // support continuation, user-level threading, and etc.. As a result, no
270  // other SjLj exception interfaces are implemented and please don't build
271  // your own exception handling based on them.
272  // LLVM/Clang supports zero-cost DWARF exception handling.
273  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
274  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
275
276  // We want to legalize GlobalAddress and ConstantPool nodes into the
277  // appropriate instructions to materialize the address.
278  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
279  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
280  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
281  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
282  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
283  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
284  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
285  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
286  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
287  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
288
289  // TRAP is legal.
290  setOperationAction(ISD::TRAP, MVT::Other, Legal);
291
292  // TRAMPOLINE is custom lowered.
293  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
294  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
295
296  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
297  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
298
299  if (Subtarget.isSVR4ABI()) {
300    if (isPPC64) {
301      // VAARG always uses double-word chunks, so promote anything smaller.
302      setOperationAction(ISD::VAARG, MVT::i1, Promote);
303      AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
304      setOperationAction(ISD::VAARG, MVT::i8, Promote);
305      AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
306      setOperationAction(ISD::VAARG, MVT::i16, Promote);
307      AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
308      setOperationAction(ISD::VAARG, MVT::i32, Promote);
309      AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
310      setOperationAction(ISD::VAARG, MVT::Other, Expand);
311    } else {
312      // VAARG is custom lowered with the 32-bit SVR4 ABI.
313      setOperationAction(ISD::VAARG, MVT::Other, Custom);
314      setOperationAction(ISD::VAARG, MVT::i64, Custom);
315    }
316  } else
317    setOperationAction(ISD::VAARG, MVT::Other, Expand);
318
319  if (Subtarget.isSVR4ABI() && !isPPC64)
320    // VACOPY is custom lowered with the 32-bit SVR4 ABI.
321    setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
322  else
323    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
324
325  // Use the default implementation.
326  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
327  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
328  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
329  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
330  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
331
332  // We want to custom lower some of our intrinsics.
333  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
334
335  // To handle counter-based loop conditions.
336  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
337
338  // Comparisons that require checking two conditions.
339  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
340  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
341  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
342  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
343  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
344  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
345  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
346  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
347  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
348  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
349  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
350  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
351
352  if (Subtarget.has64BitSupport()) {
353    // They also have instructions for converting between i64 and fp.
354    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
355    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
356    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
357    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
358    // This is just the low 32 bits of a (signed) fp->i64 conversion.
359    // We cannot do this with Promote because i64 is not a legal type.
360    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
361
362    if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
363      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
364  } else {
365    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
366    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
367  }
368
369  // With the instructions enabled under FPCVT, we can do everything.
370  if (Subtarget.hasFPCVT()) {
371    if (Subtarget.has64BitSupport()) {
372      setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
373      setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
374      setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
375      setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
376    }
377
378    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
379    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
380    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
381    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
382  }
383
384  if (Subtarget.use64BitRegs()) {
385    // 64-bit PowerPC implementations can support i64 types directly
386    addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
387    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
388    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
389    // 64-bit PowerPC wants to expand i128 shifts itself.
390    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
391    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
392    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
393  } else {
394    // 32-bit PowerPC wants to expand i64 shifts itself.
395    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
396    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
397    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
398  }
399
400  if (Subtarget.hasAltivec()) {
401    // First set operation action for all vector types to expand. Then we
402    // will selectively turn on ones that can be effectively codegen'd.
403    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
404         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
405      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
406
407      // add/sub are legal for all supported vector VT's.
408      setOperationAction(ISD::ADD , VT, Legal);
409      setOperationAction(ISD::SUB , VT, Legal);
410
411      // We promote all shuffles to v16i8.
412      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
413      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
414
415      // We promote all non-typed operations to v4i32.
416      setOperationAction(ISD::AND   , VT, Promote);
417      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
418      setOperationAction(ISD::OR    , VT, Promote);
419      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
420      setOperationAction(ISD::XOR   , VT, Promote);
421      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
422      setOperationAction(ISD::LOAD  , VT, Promote);
423      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
424      setOperationAction(ISD::SELECT, VT, Promote);
425      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
426      setOperationAction(ISD::STORE, VT, Promote);
427      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
428
429      // No other operations are legal.
430      setOperationAction(ISD::MUL , VT, Expand);
431      setOperationAction(ISD::SDIV, VT, Expand);
432      setOperationAction(ISD::SREM, VT, Expand);
433      setOperationAction(ISD::UDIV, VT, Expand);
434      setOperationAction(ISD::UREM, VT, Expand);
435      setOperationAction(ISD::FDIV, VT, Expand);
436      setOperationAction(ISD::FREM, VT, Expand);
437      setOperationAction(ISD::FNEG, VT, Expand);
438      setOperationAction(ISD::FSQRT, VT, Expand);
439      setOperationAction(ISD::FLOG, VT, Expand);
440      setOperationAction(ISD::FLOG10, VT, Expand);
441      setOperationAction(ISD::FLOG2, VT, Expand);
442      setOperationAction(ISD::FEXP, VT, Expand);
443      setOperationAction(ISD::FEXP2, VT, Expand);
444      setOperationAction(ISD::FSIN, VT, Expand);
445      setOperationAction(ISD::FCOS, VT, Expand);
446      setOperationAction(ISD::FABS, VT, Expand);
447      setOperationAction(ISD::FPOWI, VT, Expand);
448      setOperationAction(ISD::FFLOOR, VT, Expand);
449      setOperationAction(ISD::FCEIL,  VT, Expand);
450      setOperationAction(ISD::FTRUNC, VT, Expand);
451      setOperationAction(ISD::FRINT,  VT, Expand);
452      setOperationAction(ISD::FNEARBYINT, VT, Expand);
453      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
454      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
455      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
456      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
457      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
458      setOperationAction(ISD::UDIVREM, VT, Expand);
459      setOperationAction(ISD::SDIVREM, VT, Expand);
460      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
461      setOperationAction(ISD::FPOW, VT, Expand);
462      setOperationAction(ISD::BSWAP, VT, Expand);
463      setOperationAction(ISD::CTPOP, VT, Expand);
464      setOperationAction(ISD::CTLZ, VT, Expand);
465      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
466      setOperationAction(ISD::CTTZ, VT, Expand);
467      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
468      setOperationAction(ISD::VSELECT, VT, Expand);
469      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
470
471      for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
472           j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
473        MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
474        setTruncStoreAction(VT, InnerVT, Expand);
475      }
476      setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
477      setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
478      setLoadExtAction(ISD::EXTLOAD, VT, Expand);
479    }
480
481    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
482    // with merges, splats, etc.
483    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
484
485    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
486    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
487    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
488    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
489    setOperationAction(ISD::SELECT, MVT::v4i32,
490                       Subtarget.useCRBits() ? Legal : Expand);
491    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
492    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
493    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
494    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
495    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
496    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
497    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
498    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
499    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
500
501    addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
502    addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
503    addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
504    addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
505
506    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
507    setOperationAction(ISD::FMA, MVT::v4f32, Legal);
508
509    if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
510      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
511      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
512    }
513
514    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
515    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
516    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
517
518    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
519    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
520
521    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
522    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
523    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
524    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
525
526    // Altivec does not contain unordered floating-point compare instructions
527    setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
528    setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
529    setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
530    setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
531    setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
532    setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
533
534    setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
535    setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
536
537    if (Subtarget.hasVSX()) {
538      setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
539      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
540
541      setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
542      setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
543      setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
544      setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
545      setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
546
547      setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
548
549      setOperationAction(ISD::MUL, MVT::v2f64, Legal);
550      setOperationAction(ISD::FMA, MVT::v2f64, Legal);
551
552      setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
553      setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
554
555      setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
556      setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
557      setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
558      setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
559      setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
560
561      // Share the Altivec comparison restrictions.
562      setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
563      setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
564      setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
565      setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
566      setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
567      setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
568
569      setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
570      setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
571
572      setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
573      setOperationAction(ISD::STORE, MVT::v2f64, Legal);
574
575      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
576
577      addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
578
579      addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
580      addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
581
582      // VSX v2i64 only supports non-arithmetic operations.
583      setOperationAction(ISD::ADD, MVT::v2i64, Expand);
584      setOperationAction(ISD::SUB, MVT::v2i64, Expand);
585
586      setOperationAction(ISD::SHL, MVT::v2i64, Expand);
587      setOperationAction(ISD::SRA, MVT::v2i64, Expand);
588      setOperationAction(ISD::SRL, MVT::v2i64, Expand);
589
590      setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
591
592      setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
593      AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
594      setOperationAction(ISD::STORE, MVT::v2i64, Promote);
595      AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
596
597      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
598
599      setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
600      setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
601      setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
602      setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
603
604      // Vector operation legalization checks the result type of
605      // SIGN_EXTEND_INREG, overall legalization checks the inner type.
606      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
607      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
608      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
609      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
610
611      addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
612    }
613  }
614
615  if (Subtarget.has64BitSupport()) {
616    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
617    setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
618  }
619
620  setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
621  setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
622  setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
623  setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
624
625  setBooleanContents(ZeroOrOneBooleanContent);
626  // Altivec instructions set fields to all zeros or all ones.
627  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
628
629  if (isPPC64) {
630    setStackPointerRegisterToSaveRestore(PPC::X1);
631    setExceptionPointerRegister(PPC::X3);
632    setExceptionSelectorRegister(PPC::X4);
633  } else {
634    setStackPointerRegisterToSaveRestore(PPC::R1);
635    setExceptionPointerRegister(PPC::R3);
636    setExceptionSelectorRegister(PPC::R4);
637  }
638
639  // We have target-specific dag combine patterns for the following nodes:
640  setTargetDAGCombine(ISD::SINT_TO_FP);
641  setTargetDAGCombine(ISD::LOAD);
642  setTargetDAGCombine(ISD::STORE);
643  setTargetDAGCombine(ISD::BR_CC);
644  if (Subtarget.useCRBits())
645    setTargetDAGCombine(ISD::BRCOND);
646  setTargetDAGCombine(ISD::BSWAP);
647  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
648
649  setTargetDAGCombine(ISD::SIGN_EXTEND);
650  setTargetDAGCombine(ISD::ZERO_EXTEND);
651  setTargetDAGCombine(ISD::ANY_EXTEND);
652
653  if (Subtarget.useCRBits()) {
654    setTargetDAGCombine(ISD::TRUNCATE);
655    setTargetDAGCombine(ISD::SETCC);
656    setTargetDAGCombine(ISD::SELECT_CC);
657  }
658
659  // Use reciprocal estimates.
660  if (TM.Options.UnsafeFPMath) {
661    setTargetDAGCombine(ISD::FDIV);
662    setTargetDAGCombine(ISD::FSQRT);
663  }
664
665  // Darwin long double math library functions have $LDBL128 appended.
666  if (Subtarget.isDarwin()) {
667    setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
668    setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
669    setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
670    setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
671    setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
672    setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
673    setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
674    setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
675    setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
676    setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
677  }
678
679  // With 32 condition bits, we don't need to sink (and duplicate) compares
680  // aggressively in CodeGenPrep.
681  if (Subtarget.useCRBits())
682    setHasMultipleConditionRegisters();
683
684  setMinFunctionAlignment(2);
685  if (Subtarget.isDarwin())
686    setPrefFunctionAlignment(4);
687
688  if (isPPC64 && Subtarget.isJITCodeModel())
689    // Temporary workaround for the inability of PPC64 JIT to handle jump
690    // tables.
691    setSupportJumpTables(false);
692
693  setInsertFencesForAtomic(true);
694
695  if (Subtarget.enableMachineScheduler())
696    setSchedulingPreference(Sched::Source);
697  else
698    setSchedulingPreference(Sched::Hybrid);
699
700  computeRegisterProperties();
701
702  // The Freescale cores does better with aggressive inlining of memcpy and
703  // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
704  if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
705      Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
706    MaxStoresPerMemset = 32;
707    MaxStoresPerMemsetOptSize = 16;
708    MaxStoresPerMemcpy = 32;
709    MaxStoresPerMemcpyOptSize = 8;
710    MaxStoresPerMemmove = 32;
711    MaxStoresPerMemmoveOptSize = 8;
712
713    setPrefFunctionAlignment(4);
714  }
715}
716
717/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
718/// the desired ByVal argument alignment.
719static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
720                             unsigned MaxMaxAlign) {
721  if (MaxAlign == MaxMaxAlign)
722    return;
723  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
724    if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
725      MaxAlign = 32;
726    else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
727      MaxAlign = 16;
728  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
729    unsigned EltAlign = 0;
730    getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
731    if (EltAlign > MaxAlign)
732      MaxAlign = EltAlign;
733  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
734    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
735      unsigned EltAlign = 0;
736      getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
737      if (EltAlign > MaxAlign)
738        MaxAlign = EltAlign;
739      if (MaxAlign == MaxMaxAlign)
740        break;
741    }
742  }
743}
744
745/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
746/// function arguments in the caller parameter area.
747unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
748  // Darwin passes everything on 4 byte boundary.
749  if (Subtarget.isDarwin())
750    return 4;
751
752  // 16byte and wider vectors are passed on 16byte boundary.
753  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
754  unsigned Align = Subtarget.isPPC64() ? 8 : 4;
755  if (Subtarget.hasAltivec() || Subtarget.hasQPX())
756    getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
757  return Align;
758}
759
760const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
761  switch (Opcode) {
762  default: return nullptr;
763  case PPCISD::FSEL:            return "PPCISD::FSEL";
764  case PPCISD::FCFID:           return "PPCISD::FCFID";
765  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
766  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
767  case PPCISD::FRE:             return "PPCISD::FRE";
768  case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
769  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
770  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
771  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
772  case PPCISD::VPERM:           return "PPCISD::VPERM";
773  case PPCISD::Hi:              return "PPCISD::Hi";
774  case PPCISD::Lo:              return "PPCISD::Lo";
775  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
776  case PPCISD::LOAD:            return "PPCISD::LOAD";
777  case PPCISD::LOAD_TOC:        return "PPCISD::LOAD_TOC";
778  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
779  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
780  case PPCISD::SRL:             return "PPCISD::SRL";
781  case PPCISD::SRA:             return "PPCISD::SRA";
782  case PPCISD::SHL:             return "PPCISD::SHL";
783  case PPCISD::CALL:            return "PPCISD::CALL";
784  case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
785  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
786  case PPCISD::BCTRL:           return "PPCISD::BCTRL";
787  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
788  case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
789  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
790  case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
791  case PPCISD::VCMP:            return "PPCISD::VCMP";
792  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
793  case PPCISD::LBRX:            return "PPCISD::LBRX";
794  case PPCISD::STBRX:           return "PPCISD::STBRX";
795  case PPCISD::LARX:            return "PPCISD::LARX";
796  case PPCISD::STCX:            return "PPCISD::STCX";
797  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
798  case PPCISD::BDNZ:            return "PPCISD::BDNZ";
799  case PPCISD::BDZ:             return "PPCISD::BDZ";
800  case PPCISD::MFFS:            return "PPCISD::MFFS";
801  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
802  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
803  case PPCISD::CR6SET:          return "PPCISD::CR6SET";
804  case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
805  case PPCISD::ADDIS_TOC_HA:    return "PPCISD::ADDIS_TOC_HA";
806  case PPCISD::LD_TOC_L:        return "PPCISD::LD_TOC_L";
807  case PPCISD::ADDI_TOC_L:      return "PPCISD::ADDI_TOC_L";
808  case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
809  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
810  case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
811  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
812  case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
813  case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
814  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
815  case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
816  case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
817  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
818  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
819  case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
820  case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
821  case PPCISD::SC:              return "PPCISD::SC";
822  }
823}
824
825EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
826  if (!VT.isVector())
827    return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
828  return VT.changeVectorElementTypeToInteger();
829}
830
831//===----------------------------------------------------------------------===//
832// Node matching predicates, for use by the tblgen matching code.
833//===----------------------------------------------------------------------===//
834
835/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
836static bool isFloatingPointZero(SDValue Op) {
837  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
838    return CFP->getValueAPF().isZero();
839  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
840    // Maybe this has already been legalized into the constant pool?
841    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
842      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
843        return CFP->getValueAPF().isZero();
844  }
845  return false;
846}
847
848/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
849/// true if Op is undef or if it matches the specified value.
850static bool isConstantOrUndef(int Op, int Val) {
851  return Op < 0 || Op == Val;
852}
853
854/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
855/// VPKUHUM instruction.
856bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
857                               SelectionDAG &DAG) {
858  unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
859  if (!isUnary) {
860    for (unsigned i = 0; i != 16; ++i)
861      if (!isConstantOrUndef(N->getMaskElt(i),  i*2+j))
862        return false;
863  } else {
864    for (unsigned i = 0; i != 8; ++i)
865      if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
866          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
867        return false;
868  }
869  return true;
870}
871
872/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
873/// VPKUWUM instruction.
874bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
875                               SelectionDAG &DAG) {
876  unsigned j, k;
877  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
878    j = 0;
879    k = 1;
880  } else {
881    j = 2;
882    k = 3;
883  }
884  if (!isUnary) {
885    for (unsigned i = 0; i != 16; i += 2)
886      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j) ||
887          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+k))
888        return false;
889  } else {
890    for (unsigned i = 0; i != 8; i += 2)
891      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j) ||
892          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+k) ||
893          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j) ||
894          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+k))
895        return false;
896  }
897  return true;
898}
899
900/// isVMerge - Common function, used to match vmrg* shuffles.
901///
902static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
903                     unsigned LHSStart, unsigned RHSStart) {
904  if (N->getValueType(0) != MVT::v16i8)
905    return false;
906  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
907         "Unsupported merge size!");
908
909  for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
910    for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
911      if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
912                             LHSStart+j+i*UnitSize) ||
913          !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
914                             RHSStart+j+i*UnitSize))
915        return false;
916    }
917  return true;
918}
919
920/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
921/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
922bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
923                             bool isUnary, SelectionDAG &DAG) {
924  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
925    if (!isUnary)
926      return isVMerge(N, UnitSize, 0, 16);
927    return isVMerge(N, UnitSize, 0, 0);
928  } else {
929    if (!isUnary)
930      return isVMerge(N, UnitSize, 8, 24);
931    return isVMerge(N, UnitSize, 8, 8);
932  }
933}
934
935/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
936/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
937bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
938                             bool isUnary, SelectionDAG &DAG) {
939  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
940    if (!isUnary)
941      return isVMerge(N, UnitSize, 8, 24);
942    return isVMerge(N, UnitSize, 8, 8);
943  } else {
944    if (!isUnary)
945      return isVMerge(N, UnitSize, 0, 16);
946    return isVMerge(N, UnitSize, 0, 0);
947  }
948}
949
950
951/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
952/// amount, otherwise return -1.
953int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
954  if (N->getValueType(0) != MVT::v16i8)
955    return -1;
956
957  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
958
959  // Find the first non-undef value in the shuffle mask.
960  unsigned i;
961  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
962    /*search*/;
963
964  if (i == 16) return -1;  // all undef.
965
966  // Otherwise, check to see if the rest of the elements are consecutively
967  // numbered from this value.
968  unsigned ShiftAmt = SVOp->getMaskElt(i);
969  if (ShiftAmt < i) return -1;
970
971  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
972
973    ShiftAmt += i;
974
975    if (!isUnary) {
976      // Check the rest of the elements to see if they are consecutive.
977      for (++i; i != 16; ++i)
978        if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt - i))
979          return -1;
980    } else {
981      // Check the rest of the elements to see if they are consecutive.
982      for (++i; i != 16; ++i)
983        if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt - i) & 15))
984          return -1;
985    }
986
987  } else {  // Big Endian
988
989    ShiftAmt -= i;
990
991    if (!isUnary) {
992      // Check the rest of the elements to see if they are consecutive.
993      for (++i; i != 16; ++i)
994        if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
995          return -1;
996    } else {
997      // Check the rest of the elements to see if they are consecutive.
998      for (++i; i != 16; ++i)
999        if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1000          return -1;
1001    }
1002  }
1003  return ShiftAmt;
1004}
1005
1006/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1007/// specifies a splat of a single element that is suitable for input to
1008/// VSPLTB/VSPLTH/VSPLTW.
1009bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1010  assert(N->getValueType(0) == MVT::v16i8 &&
1011         (EltSize == 1 || EltSize == 2 || EltSize == 4));
1012
1013  // This is a splat operation if each element of the permute is the same, and
1014  // if the value doesn't reference the second vector.
1015  unsigned ElementBase = N->getMaskElt(0);
1016
1017  // FIXME: Handle UNDEF elements too!
1018  if (ElementBase >= 16)
1019    return false;
1020
1021  // Check that the indices are consecutive, in the case of a multi-byte element
1022  // splatted with a v16i8 mask.
1023  for (unsigned i = 1; i != EltSize; ++i)
1024    if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1025      return false;
1026
1027  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1028    if (N->getMaskElt(i) < 0) continue;
1029    for (unsigned j = 0; j != EltSize; ++j)
1030      if (N->getMaskElt(i+j) != N->getMaskElt(j))
1031        return false;
1032  }
1033  return true;
1034}
1035
1036/// isAllNegativeZeroVector - Returns true if all elements of build_vector
1037/// are -0.0.
1038bool PPC::isAllNegativeZeroVector(SDNode *N) {
1039  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
1040
1041  APInt APVal, APUndef;
1042  unsigned BitSize;
1043  bool HasAnyUndefs;
1044
1045  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
1046    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
1047      return CFP->getValueAPF().isNegZero();
1048
1049  return false;
1050}
1051
1052/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1053/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1054unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1055                                SelectionDAG &DAG) {
1056  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1057  assert(isSplatShuffleMask(SVOp, EltSize));
1058  if (DAG.getTarget().getDataLayout()->isLittleEndian())
1059    return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1060  else
1061    return SVOp->getMaskElt(0) / EltSize;
1062}
1063
1064/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1065/// by using a vspltis[bhw] instruction of the specified element size, return
1066/// the constant being splatted.  The ByteSize field indicates the number of
1067/// bytes of each element [124] -> [bhw].
1068SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1069  SDValue OpVal(nullptr, 0);
1070
1071  // If ByteSize of the splat is bigger than the element size of the
1072  // build_vector, then we have a case where we are checking for a splat where
1073  // multiple elements of the buildvector are folded together into a single
1074  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1075  unsigned EltSize = 16/N->getNumOperands();
1076  if (EltSize < ByteSize) {
1077    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1078    SDValue UniquedVals[4];
1079    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1080
1081    // See if all of the elements in the buildvector agree across.
1082    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1083      if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1084      // If the element isn't a constant, bail fully out.
1085      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1086
1087
1088      if (!UniquedVals[i&(Multiple-1)].getNode())
1089        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1090      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1091        return SDValue();  // no match.
1092    }
1093
1094    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1095    // either constant or undef values that are identical for each chunk.  See
1096    // if these chunks can form into a larger vspltis*.
1097
1098    // Check to see if all of the leading entries are either 0 or -1.  If
1099    // neither, then this won't fit into the immediate field.
1100    bool LeadingZero = true;
1101    bool LeadingOnes = true;
1102    for (unsigned i = 0; i != Multiple-1; ++i) {
1103      if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1104
1105      LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
1106      LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
1107    }
1108    // Finally, check the least significant entry.
1109    if (LeadingZero) {
1110      if (!UniquedVals[Multiple-1].getNode())
1111        return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
1112      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1113      if (Val < 16)
1114        return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
1115    }
1116    if (LeadingOnes) {
1117      if (!UniquedVals[Multiple-1].getNode())
1118        return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
1119      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1120      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1121        return DAG.getTargetConstant(Val, MVT::i32);
1122    }
1123
1124    return SDValue();
1125  }
1126
1127  // Check to see if this buildvec has a single non-undef value in its elements.
1128  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1129    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1130    if (!OpVal.getNode())
1131      OpVal = N->getOperand(i);
1132    else if (OpVal != N->getOperand(i))
1133      return SDValue();
1134  }
1135
1136  if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1137
1138  unsigned ValSizeInBytes = EltSize;
1139  uint64_t Value = 0;
1140  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1141    Value = CN->getZExtValue();
1142  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1143    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1144    Value = FloatToBits(CN->getValueAPF().convertToFloat());
1145  }
1146
1147  // If the splat value is larger than the element value, then we can never do
1148  // this splat.  The only case that we could fit the replicated bits into our
1149  // immediate field for would be zero, and we prefer to use vxor for it.
1150  if (ValSizeInBytes < ByteSize) return SDValue();
1151
1152  // If the element value is larger than the splat value, cut it in half and
1153  // check to see if the two halves are equal.  Continue doing this until we
1154  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
1155  while (ValSizeInBytes > ByteSize) {
1156    ValSizeInBytes >>= 1;
1157
1158    // If the top half equals the bottom half, we're still ok.
1159    if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
1160         (Value                        & ((1 << (8*ValSizeInBytes))-1)))
1161      return SDValue();
1162  }
1163
1164  // Properly sign extend the value.
1165  int MaskVal = SignExtend32(Value, ByteSize * 8);
1166
1167  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1168  if (MaskVal == 0) return SDValue();
1169
1170  // Finally, if this value fits in a 5 bit sext field, return it
1171  if (SignExtend32<5>(MaskVal) == MaskVal)
1172    return DAG.getTargetConstant(MaskVal, MVT::i32);
1173  return SDValue();
1174}
1175
1176//===----------------------------------------------------------------------===//
1177//  Addressing Mode Selection
1178//===----------------------------------------------------------------------===//
1179
1180/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
1181/// or 64-bit immediate, and if the value can be accurately represented as a
1182/// sign extension from a 16-bit value.  If so, this returns true and the
1183/// immediate.
1184static bool isIntS16Immediate(SDNode *N, short &Imm) {
1185  if (!isa<ConstantSDNode>(N))
1186    return false;
1187
1188  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
1189  if (N->getValueType(0) == MVT::i32)
1190    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
1191  else
1192    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
1193}
1194static bool isIntS16Immediate(SDValue Op, short &Imm) {
1195  return isIntS16Immediate(Op.getNode(), Imm);
1196}
1197
1198
1199/// SelectAddressRegReg - Given the specified addressed, check to see if it
1200/// can be represented as an indexed [r+r] operation.  Returns false if it
1201/// can be more efficiently represented with [r+imm].
1202bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
1203                                            SDValue &Index,
1204                                            SelectionDAG &DAG) const {
1205  short imm = 0;
1206  if (N.getOpcode() == ISD::ADD) {
1207    if (isIntS16Immediate(N.getOperand(1), imm))
1208      return false;    // r+i
1209    if (N.getOperand(1).getOpcode() == PPCISD::Lo)
1210      return false;    // r+i
1211
1212    Base = N.getOperand(0);
1213    Index = N.getOperand(1);
1214    return true;
1215  } else if (N.getOpcode() == ISD::OR) {
1216    if (isIntS16Immediate(N.getOperand(1), imm))
1217      return false;    // r+i can fold it if we can.
1218
1219    // If this is an or of disjoint bitfields, we can codegen this as an add
1220    // (for better address arithmetic) if the LHS and RHS of the OR are provably
1221    // disjoint.
1222    APInt LHSKnownZero, LHSKnownOne;
1223    APInt RHSKnownZero, RHSKnownOne;
1224    DAG.computeKnownBits(N.getOperand(0),
1225                         LHSKnownZero, LHSKnownOne);
1226
1227    if (LHSKnownZero.getBoolValue()) {
1228      DAG.computeKnownBits(N.getOperand(1),
1229                           RHSKnownZero, RHSKnownOne);
1230      // If all of the bits are known zero on the LHS or RHS, the add won't
1231      // carry.
1232      if (~(LHSKnownZero | RHSKnownZero) == 0) {
1233        Base = N.getOperand(0);
1234        Index = N.getOperand(1);
1235        return true;
1236      }
1237    }
1238  }
1239
1240  return false;
1241}
1242
1243// If we happen to be doing an i64 load or store into a stack slot that has
1244// less than a 4-byte alignment, then the frame-index elimination may need to
1245// use an indexed load or store instruction (because the offset may not be a
1246// multiple of 4). The extra register needed to hold the offset comes from the
1247// register scavenger, and it is possible that the scavenger will need to use
1248// an emergency spill slot. As a result, we need to make sure that a spill slot
1249// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
1250// stack slot.
1251static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
1252  // FIXME: This does not handle the LWA case.
1253  if (VT != MVT::i64)
1254    return;
1255
1256  // NOTE: We'll exclude negative FIs here, which come from argument
1257  // lowering, because there are no known test cases triggering this problem
1258  // using packed structures (or similar). We can remove this exclusion if
1259  // we find such a test case. The reason why this is so test-case driven is
1260  // because this entire 'fixup' is only to prevent crashes (from the
1261  // register scavenger) on not-really-valid inputs. For example, if we have:
1262  //   %a = alloca i1
1263  //   %b = bitcast i1* %a to i64*
1264  //   store i64* a, i64 b
1265  // then the store should really be marked as 'align 1', but is not. If it
1266  // were marked as 'align 1' then the indexed form would have been
1267  // instruction-selected initially, and the problem this 'fixup' is preventing
1268  // won't happen regardless.
1269  if (FrameIdx < 0)
1270    return;
1271
1272  MachineFunction &MF = DAG.getMachineFunction();
1273  MachineFrameInfo *MFI = MF.getFrameInfo();
1274
1275  unsigned Align = MFI->getObjectAlignment(FrameIdx);
1276  if (Align >= 4)
1277    return;
1278
1279  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1280  FuncInfo->setHasNonRISpills();
1281}
1282
1283/// Returns true if the address N can be represented by a base register plus
1284/// a signed 16-bit displacement [r+imm], and if it is not better
1285/// represented as reg+reg.  If Aligned is true, only accept displacements
1286/// suitable for STD and friends, i.e. multiples of 4.
1287bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
1288                                            SDValue &Base,
1289                                            SelectionDAG &DAG,
1290                                            bool Aligned) const {
1291  // FIXME dl should come from parent load or store, not from address
1292  SDLoc dl(N);
1293  // If this can be more profitably realized as r+r, fail.
1294  if (SelectAddressRegReg(N, Disp, Base, DAG))
1295    return false;
1296
1297  if (N.getOpcode() == ISD::ADD) {
1298    short imm = 0;
1299    if (isIntS16Immediate(N.getOperand(1), imm) &&
1300        (!Aligned || (imm & 3) == 0)) {
1301      Disp = DAG.getTargetConstant(imm, N.getValueType());
1302      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1303        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1304        fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1305      } else {
1306        Base = N.getOperand(0);
1307      }
1308      return true; // [r+i]
1309    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1310      // Match LOAD (ADD (X, Lo(G))).
1311      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1312             && "Cannot handle constant offsets yet!");
1313      Disp = N.getOperand(1).getOperand(0);  // The global address.
1314      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1315             Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1316             Disp.getOpcode() == ISD::TargetConstantPool ||
1317             Disp.getOpcode() == ISD::TargetJumpTable);
1318      Base = N.getOperand(0);
1319      return true;  // [&g+r]
1320    }
1321  } else if (N.getOpcode() == ISD::OR) {
1322    short imm = 0;
1323    if (isIntS16Immediate(N.getOperand(1), imm) &&
1324        (!Aligned || (imm & 3) == 0)) {
1325      // If this is an or of disjoint bitfields, we can codegen this as an add
1326      // (for better address arithmetic) if the LHS and RHS of the OR are
1327      // provably disjoint.
1328      APInt LHSKnownZero, LHSKnownOne;
1329      DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1330
1331      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1332        // If all of the bits are known zero on the LHS or RHS, the add won't
1333        // carry.
1334        Base = N.getOperand(0);
1335        Disp = DAG.getTargetConstant(imm, N.getValueType());
1336        return true;
1337      }
1338    }
1339  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1340    // Loading from a constant address.
1341
1342    // If this address fits entirely in a 16-bit sext immediate field, codegen
1343    // this as "d, 0"
1344    short Imm;
1345    if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
1346      Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
1347      Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1348                             CN->getValueType(0));
1349      return true;
1350    }
1351
1352    // Handle 32-bit sext immediates with LIS + addr mode.
1353    if ((CN->getValueType(0) == MVT::i32 ||
1354         (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
1355        (!Aligned || (CN->getZExtValue() & 3) == 0)) {
1356      int Addr = (int)CN->getZExtValue();
1357
1358      // Otherwise, break this down into an LIS + disp.
1359      Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
1360
1361      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
1362      unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1363      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1364      return true;
1365    }
1366  }
1367
1368  Disp = DAG.getTargetConstant(0, getPointerTy());
1369  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
1370    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1371    fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1372  } else
1373    Base = N;
1374  return true;      // [r+0]
1375}
1376
1377/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1378/// represented as an indexed [r+r] operation.
1379bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
1380                                                SDValue &Index,
1381                                                SelectionDAG &DAG) const {
1382  // Check to see if we can easily represent this as an [r+r] address.  This
1383  // will fail if it thinks that the address is more profitably represented as
1384  // reg+imm, e.g. where imm = 0.
1385  if (SelectAddressRegReg(N, Base, Index, DAG))
1386    return true;
1387
1388  // If the operand is an addition, always emit this as [r+r], since this is
1389  // better (for code size, and execution, as the memop does the add for free)
1390  // than emitting an explicit add.
1391  if (N.getOpcode() == ISD::ADD) {
1392    Base = N.getOperand(0);
1393    Index = N.getOperand(1);
1394    return true;
1395  }
1396
1397  // Otherwise, do it the hard way, using R0 as the base register.
1398  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1399                         N.getValueType());
1400  Index = N;
1401  return true;
1402}
1403
1404/// getPreIndexedAddressParts - returns true by value, base pointer and
1405/// offset pointer and addressing mode by reference if the node's address
1406/// can be legally represented as pre-indexed load / store address.
1407bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1408                                                  SDValue &Offset,
1409                                                  ISD::MemIndexedMode &AM,
1410                                                  SelectionDAG &DAG) const {
1411  if (DisablePPCPreinc) return false;
1412
1413  bool isLoad = true;
1414  SDValue Ptr;
1415  EVT VT;
1416  unsigned Alignment;
1417  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1418    Ptr = LD->getBasePtr();
1419    VT = LD->getMemoryVT();
1420    Alignment = LD->getAlignment();
1421  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1422    Ptr = ST->getBasePtr();
1423    VT  = ST->getMemoryVT();
1424    Alignment = ST->getAlignment();
1425    isLoad = false;
1426  } else
1427    return false;
1428
1429  // PowerPC doesn't have preinc load/store instructions for vectors.
1430  if (VT.isVector())
1431    return false;
1432
1433  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
1434
1435    // Common code will reject creating a pre-inc form if the base pointer
1436    // is a frame index, or if N is a store and the base pointer is either
1437    // the same as or a predecessor of the value being stored.  Check for
1438    // those situations here, and try with swapped Base/Offset instead.
1439    bool Swap = false;
1440
1441    if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
1442      Swap = true;
1443    else if (!isLoad) {
1444      SDValue Val = cast<StoreSDNode>(N)->getValue();
1445      if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
1446        Swap = true;
1447    }
1448
1449    if (Swap)
1450      std::swap(Base, Offset);
1451
1452    AM = ISD::PRE_INC;
1453    return true;
1454  }
1455
1456  // LDU/STU can only handle immediates that are a multiple of 4.
1457  if (VT != MVT::i64) {
1458    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
1459      return false;
1460  } else {
1461    // LDU/STU need an address with at least 4-byte alignment.
1462    if (Alignment < 4)
1463      return false;
1464
1465    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
1466      return false;
1467  }
1468
1469  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1470    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
1471    // sext i32 to i64 when addr mode is r+i.
1472    if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
1473        LD->getExtensionType() == ISD::SEXTLOAD &&
1474        isa<ConstantSDNode>(Offset))
1475      return false;
1476  }
1477
1478  AM = ISD::PRE_INC;
1479  return true;
1480}
1481
1482//===----------------------------------------------------------------------===//
1483//  LowerOperation implementation
1484//===----------------------------------------------------------------------===//
1485
1486/// GetLabelAccessInfo - Return true if we should reference labels using a
1487/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
1488static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
1489                               unsigned &LoOpFlags,
1490                               const GlobalValue *GV = nullptr) {
1491  HiOpFlags = PPCII::MO_HA;
1492  LoOpFlags = PPCII::MO_LO;
1493
1494  // Don't use the pic base if not in PIC relocation model.  Or if we are on a
1495  // non-darwin platform.  We don't support PIC on other platforms yet.
1496  bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
1497               TM.getSubtarget<PPCSubtarget>().isDarwin();
1498  if (isPIC) {
1499    HiOpFlags |= PPCII::MO_PIC_FLAG;
1500    LoOpFlags |= PPCII::MO_PIC_FLAG;
1501  }
1502
1503  // If this is a reference to a global value that requires a non-lazy-ptr, make
1504  // sure that instruction lowering adds it.
1505  if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
1506    HiOpFlags |= PPCII::MO_NLP_FLAG;
1507    LoOpFlags |= PPCII::MO_NLP_FLAG;
1508
1509    if (GV->hasHiddenVisibility()) {
1510      HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1511      LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1512    }
1513  }
1514
1515  return isPIC;
1516}
1517
1518static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
1519                             SelectionDAG &DAG) {
1520  EVT PtrVT = HiPart.getValueType();
1521  SDValue Zero = DAG.getConstant(0, PtrVT);
1522  SDLoc DL(HiPart);
1523
1524  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
1525  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
1526
1527  // With PIC, the first instruction is actually "GR+hi(&G)".
1528  if (isPIC)
1529    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
1530                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
1531
1532  // Generate non-pic code that has direct accesses to the constant pool.
1533  // The address of the global is just (hi(&g)+lo(&g)).
1534  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
1535}
1536
1537SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
1538                                             SelectionDAG &DAG) const {
1539  EVT PtrVT = Op.getValueType();
1540  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1541  const Constant *C = CP->getConstVal();
1542
1543  // 64-bit SVR4 ABI code is always position-independent.
1544  // The actual address of the GlobalValue is stored in the TOC.
1545  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1546    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
1547    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
1548                       DAG.getRegister(PPC::X2, MVT::i64));
1549  }
1550
1551  unsigned MOHiFlag, MOLoFlag;
1552  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1553  SDValue CPIHi =
1554    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
1555  SDValue CPILo =
1556    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
1557  return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
1558}
1559
1560SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1561  EVT PtrVT = Op.getValueType();
1562  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1563
1564  // 64-bit SVR4 ABI code is always position-independent.
1565  // The actual address of the GlobalValue is stored in the TOC.
1566  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1567    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1568    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
1569                       DAG.getRegister(PPC::X2, MVT::i64));
1570  }
1571
1572  unsigned MOHiFlag, MOLoFlag;
1573  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1574  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
1575  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
1576  return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
1577}
1578
1579SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
1580                                             SelectionDAG &DAG) const {
1581  EVT PtrVT = Op.getValueType();
1582
1583  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1584
1585  unsigned MOHiFlag, MOLoFlag;
1586  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1587  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
1588  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
1589  return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
1590}
1591
1592SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1593                                              SelectionDAG &DAG) const {
1594
1595  // FIXME: TLS addresses currently use medium model code sequences,
1596  // which is the most useful form.  Eventually support for small and
1597  // large models could be added if users need it, at the cost of
1598  // additional complexity.
1599  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1600  SDLoc dl(GA);
1601  const GlobalValue *GV = GA->getGlobal();
1602  EVT PtrVT = getPointerTy();
1603  bool is64bit = Subtarget.isPPC64();
1604
1605  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
1606
1607  if (Model == TLSModel::LocalExec) {
1608    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1609                                               PPCII::MO_TPREL_HA);
1610    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1611                                               PPCII::MO_TPREL_LO);
1612    SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
1613                                     is64bit ? MVT::i64 : MVT::i32);
1614    SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
1615    return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
1616  }
1617
1618  if (Model == TLSModel::InitialExec) {
1619    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1620    SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1621                                                PPCII::MO_TLS);
1622    SDValue GOTPtr;
1623    if (is64bit) {
1624      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1625      GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
1626                           PtrVT, GOTReg, TGA);
1627    } else
1628      GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
1629    SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
1630                                   PtrVT, TGA, GOTPtr);
1631    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
1632  }
1633
1634  if (Model == TLSModel::GeneralDynamic) {
1635    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1636    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1637    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
1638                                     GOTReg, TGA);
1639    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
1640                                   GOTEntryHi, TGA);
1641
1642    // We need a chain node, and don't have one handy.  The underlying
1643    // call has no side effects, so using the function entry node
1644    // suffices.
1645    SDValue Chain = DAG.getEntryNode();
1646    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
1647    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
1648    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
1649                                  PtrVT, ParmReg, TGA);
1650    // The return value from GET_TLS_ADDR really is in X3 already, but
1651    // some hacks are needed here to tie everything together.  The extra
1652    // copies dissolve during subsequent transforms.
1653    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
1654    return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
1655  }
1656
1657  if (Model == TLSModel::LocalDynamic) {
1658    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1659    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1660    SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
1661                                     GOTReg, TGA);
1662    SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
1663                                   GOTEntryHi, TGA);
1664
1665    // We need a chain node, and don't have one handy.  The underlying
1666    // call has no side effects, so using the function entry node
1667    // suffices.
1668    SDValue Chain = DAG.getEntryNode();
1669    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
1670    SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
1671    SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
1672                                  PtrVT, ParmReg, TGA);
1673    // The return value from GET_TLSLD_ADDR really is in X3 already, but
1674    // some hacks are needed here to tie everything together.  The extra
1675    // copies dissolve during subsequent transforms.
1676    Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
1677    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
1678                                      Chain, ParmReg, TGA);
1679    return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
1680  }
1681
1682  llvm_unreachable("Unknown TLS model!");
1683}
1684
1685SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
1686                                              SelectionDAG &DAG) const {
1687  EVT PtrVT = Op.getValueType();
1688  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1689  SDLoc DL(GSDN);
1690  const GlobalValue *GV = GSDN->getGlobal();
1691
1692  // 64-bit SVR4 ABI code is always position-independent.
1693  // The actual address of the GlobalValue is stored in the TOC.
1694  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1695    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
1696    return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
1697                       DAG.getRegister(PPC::X2, MVT::i64));
1698  }
1699
1700  unsigned MOHiFlag, MOLoFlag;
1701  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
1702
1703  SDValue GAHi =
1704    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
1705  SDValue GALo =
1706    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
1707
1708  SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
1709
1710  // If the global reference is actually to a non-lazy-pointer, we have to do an
1711  // extra load to get the address of the global.
1712  if (MOHiFlag & PPCII::MO_NLP_FLAG)
1713    Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
1714                      false, false, false, 0);
1715  return Ptr;
1716}
1717
1718SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1719  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1720  SDLoc dl(Op);
1721
1722  if (Op.getValueType() == MVT::v2i64) {
1723    // When the operands themselves are v2i64 values, we need to do something
1724    // special because VSX has no underlying comparison operations for these.
1725    if (Op.getOperand(0).getValueType() == MVT::v2i64) {
1726      // Equality can be handled by casting to the legal type for Altivec
1727      // comparisons, everything else needs to be expanded.
1728      if (CC == ISD::SETEQ || CC == ISD::SETNE) {
1729        return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
1730                 DAG.getSetCC(dl, MVT::v4i32,
1731                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
1732                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
1733                   CC));
1734      }
1735
1736      return SDValue();
1737    }
1738
1739    // We handle most of these in the usual way.
1740    return Op;
1741  }
1742
1743  // If we're comparing for equality to zero, expose the fact that this is
1744  // implented as a ctlz/srl pair on ppc, so that the dag combiner can
1745  // fold the new nodes.
1746  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1747    if (C->isNullValue() && CC == ISD::SETEQ) {
1748      EVT VT = Op.getOperand(0).getValueType();
1749      SDValue Zext = Op.getOperand(0);
1750      if (VT.bitsLT(MVT::i32)) {
1751        VT = MVT::i32;
1752        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
1753      }
1754      unsigned Log2b = Log2_32(VT.getSizeInBits());
1755      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
1756      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
1757                                DAG.getConstant(Log2b, MVT::i32));
1758      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
1759    }
1760    // Leave comparisons against 0 and -1 alone for now, since they're usually
1761    // optimized.  FIXME: revisit this when we can custom lower all setcc
1762    // optimizations.
1763    if (C->isAllOnesValue() || C->isNullValue())
1764      return SDValue();
1765  }
1766
1767  // If we have an integer seteq/setne, turn it into a compare against zero
1768  // by xor'ing the rhs with the lhs, which is faster than setting a
1769  // condition register, reading it back out, and masking the correct bit.  The
1770  // normal approach here uses sub to do this instead of xor.  Using xor exposes
1771  // the result to other bit-twiddling opportunities.
1772  EVT LHSVT = Op.getOperand(0).getValueType();
1773  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1774    EVT VT = Op.getValueType();
1775    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
1776                                Op.getOperand(1));
1777    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
1778  }
1779  return SDValue();
1780}
1781
1782SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
1783                                      const PPCSubtarget &Subtarget) const {
1784  SDNode *Node = Op.getNode();
1785  EVT VT = Node->getValueType(0);
1786  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1787  SDValue InChain = Node->getOperand(0);
1788  SDValue VAListPtr = Node->getOperand(1);
1789  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1790  SDLoc dl(Node);
1791
1792  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
1793
1794  // gpr_index
1795  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
1796                                    VAListPtr, MachinePointerInfo(SV), MVT::i8,
1797                                    false, false, 0);
1798  InChain = GprIndex.getValue(1);
1799
1800  if (VT == MVT::i64) {
1801    // Check if GprIndex is even
1802    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
1803                                 DAG.getConstant(1, MVT::i32));
1804    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
1805                                DAG.getConstant(0, MVT::i32), ISD::SETNE);
1806    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
1807                                          DAG.getConstant(1, MVT::i32));
1808    // Align GprIndex to be even if it isn't
1809    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
1810                           GprIndex);
1811  }
1812
1813  // fpr index is 1 byte after gpr
1814  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1815                               DAG.getConstant(1, MVT::i32));
1816
1817  // fpr
1818  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
1819                                    FprPtr, MachinePointerInfo(SV), MVT::i8,
1820                                    false, false, 0);
1821  InChain = FprIndex.getValue(1);
1822
1823  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1824                                       DAG.getConstant(8, MVT::i32));
1825
1826  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1827                                        DAG.getConstant(4, MVT::i32));
1828
1829  // areas
1830  SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
1831                                     MachinePointerInfo(), false, false,
1832                                     false, 0);
1833  InChain = OverflowArea.getValue(1);
1834
1835  SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
1836                                    MachinePointerInfo(), false, false,
1837                                    false, 0);
1838  InChain = RegSaveArea.getValue(1);
1839
1840  // select overflow_area if index > 8
1841  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
1842                            DAG.getConstant(8, MVT::i32), ISD::SETLT);
1843
1844  // adjustment constant gpr_index * 4/8
1845  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
1846                                    VT.isInteger() ? GprIndex : FprIndex,
1847                                    DAG.getConstant(VT.isInteger() ? 4 : 8,
1848                                                    MVT::i32));
1849
1850  // OurReg = RegSaveArea + RegConstant
1851  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
1852                               RegConstant);
1853
1854  // Floating types are 32 bytes into RegSaveArea
1855  if (VT.isFloatingPoint())
1856    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
1857                         DAG.getConstant(32, MVT::i32));
1858
1859  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
1860  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
1861                                   VT.isInteger() ? GprIndex : FprIndex,
1862                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1,
1863                                                   MVT::i32));
1864
1865  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
1866                              VT.isInteger() ? VAListPtr : FprPtr,
1867                              MachinePointerInfo(SV),
1868                              MVT::i8, false, false, 0);
1869
1870  // determine if we should load from reg_save_area or overflow_area
1871  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
1872
1873  // increase overflow_area by 4/8 if gpr/fpr > 8
1874  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
1875                                          DAG.getConstant(VT.isInteger() ? 4 : 8,
1876                                          MVT::i32));
1877
1878  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
1879                             OverflowAreaPlusN);
1880
1881  InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
1882                              OverflowAreaPtr,
1883                              MachinePointerInfo(),
1884                              MVT::i32, false, false, 0);
1885
1886  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
1887                     false, false, false, 0);
1888}
1889
1890SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
1891                                       const PPCSubtarget &Subtarget) const {
1892  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
1893
1894  // We have to copy the entire va_list struct:
1895  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
1896  return DAG.getMemcpy(Op.getOperand(0), Op,
1897                       Op.getOperand(1), Op.getOperand(2),
1898                       DAG.getConstant(12, MVT::i32), 8, false, true,
1899                       MachinePointerInfo(), MachinePointerInfo());
1900}
1901
1902SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
1903                                                  SelectionDAG &DAG) const {
1904  return Op.getOperand(0);
1905}
1906
1907SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
1908                                                SelectionDAG &DAG) const {
1909  SDValue Chain = Op.getOperand(0);
1910  SDValue Trmp = Op.getOperand(1); // trampoline
1911  SDValue FPtr = Op.getOperand(2); // nested function
1912  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
1913  SDLoc dl(Op);
1914
1915  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1916  bool isPPC64 = (PtrVT == MVT::i64);
1917  Type *IntPtrTy =
1918    DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
1919                                                             *DAG.getContext());
1920
1921  TargetLowering::ArgListTy Args;
1922  TargetLowering::ArgListEntry Entry;
1923
1924  Entry.Ty = IntPtrTy;
1925  Entry.Node = Trmp; Args.push_back(Entry);
1926
1927  // TrampSize == (isPPC64 ? 48 : 40);
1928  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
1929                               isPPC64 ? MVT::i64 : MVT::i32);
1930  Args.push_back(Entry);
1931
1932  Entry.Node = FPtr; Args.push_back(Entry);
1933  Entry.Node = Nest; Args.push_back(Entry);
1934
1935  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
1936  TargetLowering::CallLoweringInfo CLI(DAG);
1937  CLI.setDebugLoc(dl).setChain(Chain)
1938    .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
1939               DAG.getExternalSymbol("__trampoline_setup", PtrVT),
1940               std::move(Args), 0);
1941
1942  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
1943  return CallResult.second;
1944}
1945
1946SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
1947                                        const PPCSubtarget &Subtarget) const {
1948  MachineFunction &MF = DAG.getMachineFunction();
1949  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1950
1951  SDLoc dl(Op);
1952
1953  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
1954    // vastart just stores the address of the VarArgsFrameIndex slot into the
1955    // memory location argument.
1956    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1957    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1958    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1959    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
1960                        MachinePointerInfo(SV),
1961                        false, false, 0);
1962  }
1963
1964  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
1965  // We suppose the given va_list is already allocated.
1966  //
1967  // typedef struct {
1968  //  char gpr;     /* index into the array of 8 GPRs
1969  //                 * stored in the register save area
1970  //                 * gpr=0 corresponds to r3,
1971  //                 * gpr=1 to r4, etc.
1972  //                 */
1973  //  char fpr;     /* index into the array of 8 FPRs
1974  //                 * stored in the register save area
1975  //                 * fpr=0 corresponds to f1,
1976  //                 * fpr=1 to f2, etc.
1977  //                 */
1978  //  char *overflow_arg_area;
1979  //                /* location on stack that holds
1980  //                 * the next overflow argument
1981  //                 */
1982  //  char *reg_save_area;
1983  //               /* where r3:r10 and f1:f8 (if saved)
1984  //                * are stored
1985  //                */
1986  // } va_list[1];
1987
1988
1989  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
1990  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
1991
1992
1993  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1994
1995  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
1996                                            PtrVT);
1997  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1998                                 PtrVT);
1999
2000  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2001  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
2002
2003  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2004  SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
2005
2006  uint64_t FPROffset = 1;
2007  SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
2008
2009  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2010
2011  // Store first byte : number of int regs
2012  SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
2013                                         Op.getOperand(1),
2014                                         MachinePointerInfo(SV),
2015                                         MVT::i8, false, false, 0);
2016  uint64_t nextOffset = FPROffset;
2017  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2018                                  ConstFPROffset);
2019
2020  // Store second byte : number of float regs
2021  SDValue secondStore =
2022    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2023                      MachinePointerInfo(SV, nextOffset), MVT::i8,
2024                      false, false, 0);
2025  nextOffset += StackOffset;
2026  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2027
2028  // Store second word : arguments given on stack
2029  SDValue thirdStore =
2030    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2031                 MachinePointerInfo(SV, nextOffset),
2032                 false, false, 0);
2033  nextOffset += FrameOffset;
2034  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2035
2036  // Store third word : arguments given in registers
2037  return DAG.getStore(thirdStore, dl, FR, nextPtr,
2038                      MachinePointerInfo(SV, nextOffset),
2039                      false, false, 0);
2040
2041}
2042
2043#include "PPCGenCallingConv.inc"
2044
2045// Function whose sole purpose is to kill compiler warnings
2046// stemming from unused functions included from PPCGenCallingConv.inc.
2047CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2048  return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2049}
2050
2051bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2052                                      CCValAssign::LocInfo &LocInfo,
2053                                      ISD::ArgFlagsTy &ArgFlags,
2054                                      CCState &State) {
2055  return true;
2056}
2057
2058bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2059                                             MVT &LocVT,
2060                                             CCValAssign::LocInfo &LocInfo,
2061                                             ISD::ArgFlagsTy &ArgFlags,
2062                                             CCState &State) {
2063  static const MCPhysReg ArgRegs[] = {
2064    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2065    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2066  };
2067  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2068
2069  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
2070
2071  // Skip one register if the first unallocated register has an even register
2072  // number and there are still argument registers available which have not been
2073  // allocated yet. RegNum is actually an index into ArgRegs, which means we
2074  // need to skip a register if RegNum is odd.
2075  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2076    State.AllocateReg(ArgRegs[RegNum]);
2077  }
2078
2079  // Always return false here, as this function only makes sure that the first
2080  // unallocated register has an odd register number and does not actually
2081  // allocate a register for the current argument.
2082  return false;
2083}
2084
2085bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
2086                                               MVT &LocVT,
2087                                               CCValAssign::LocInfo &LocInfo,
2088                                               ISD::ArgFlagsTy &ArgFlags,
2089                                               CCState &State) {
2090  static const MCPhysReg ArgRegs[] = {
2091    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2092    PPC::F8
2093  };
2094
2095  const unsigned NumArgRegs = array_lengthof(ArgRegs);
2096
2097  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
2098
2099  // If there is only one Floating-point register left we need to put both f64
2100  // values of a split ppc_fp128 value on the stack.
2101  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
2102    State.AllocateReg(ArgRegs[RegNum]);
2103  }
2104
2105  // Always return false here, as this function only makes sure that the two f64
2106  // values a ppc_fp128 value is split into are both passed in registers or both
2107  // passed on the stack and does not actually allocate a register for the
2108  // current argument.
2109  return false;
2110}
2111
2112/// GetFPR - Get the set of FP registers that should be allocated for arguments,
2113/// on Darwin.
2114static const MCPhysReg *GetFPR() {
2115  static const MCPhysReg FPR[] = {
2116    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2117    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
2118  };
2119
2120  return FPR;
2121}
2122
2123/// CalculateStackSlotSize - Calculates the size reserved for this argument on
2124/// the stack.
2125static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
2126                                       unsigned PtrByteSize) {
2127  unsigned ArgSize = ArgVT.getStoreSize();
2128  if (Flags.isByVal())
2129    ArgSize = Flags.getByValSize();
2130  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2131
2132  return ArgSize;
2133}
2134
2135/// CalculateStackSlotAlignment - Calculates the alignment of this argument
2136/// on the stack.
2137static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
2138                                            unsigned PtrByteSize) {
2139  unsigned Align = PtrByteSize;
2140
2141  // Altivec parameters are padded to a 16 byte boundary.
2142  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2143      ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2144      ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
2145    Align = 16;
2146
2147  // ByVal parameters are aligned as requested.
2148  if (Flags.isByVal()) {
2149    unsigned BVAlign = Flags.getByValAlign();
2150    if (BVAlign > PtrByteSize) {
2151      if (BVAlign % PtrByteSize != 0)
2152          llvm_unreachable(
2153            "ByVal alignment is not a multiple of the pointer size");
2154
2155      Align = BVAlign;
2156    }
2157  }
2158
2159  return Align;
2160}
2161
2162/// EnsureStackAlignment - Round stack frame size up from NumBytes to
2163/// ensure minimum alignment required for target.
2164static unsigned EnsureStackAlignment(const TargetMachine &Target,
2165                                     unsigned NumBytes) {
2166  unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
2167  unsigned AlignMask = TargetAlign - 1;
2168  NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2169  return NumBytes;
2170}
2171
2172SDValue
2173PPCTargetLowering::LowerFormalArguments(SDValue Chain,
2174                                        CallingConv::ID CallConv, bool isVarArg,
2175                                        const SmallVectorImpl<ISD::InputArg>
2176                                          &Ins,
2177                                        SDLoc dl, SelectionDAG &DAG,
2178                                        SmallVectorImpl<SDValue> &InVals)
2179                                          const {
2180  if (Subtarget.isSVR4ABI()) {
2181    if (Subtarget.isPPC64())
2182      return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
2183                                         dl, DAG, InVals);
2184    else
2185      return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
2186                                         dl, DAG, InVals);
2187  } else {
2188    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
2189                                       dl, DAG, InVals);
2190  }
2191}
2192
2193SDValue
2194PPCTargetLowering::LowerFormalArguments_32SVR4(
2195                                      SDValue Chain,
2196                                      CallingConv::ID CallConv, bool isVarArg,
2197                                      const SmallVectorImpl<ISD::InputArg>
2198                                        &Ins,
2199                                      SDLoc dl, SelectionDAG &DAG,
2200                                      SmallVectorImpl<SDValue> &InVals) const {
2201
2202  // 32-bit SVR4 ABI Stack Frame Layout:
2203  //              +-----------------------------------+
2204  //        +-->  |            Back chain             |
2205  //        |     +-----------------------------------+
2206  //        |     | Floating-point register save area |
2207  //        |     +-----------------------------------+
2208  //        |     |    General register save area     |
2209  //        |     +-----------------------------------+
2210  //        |     |          CR save word             |
2211  //        |     +-----------------------------------+
2212  //        |     |         VRSAVE save word          |
2213  //        |     +-----------------------------------+
2214  //        |     |         Alignment padding         |
2215  //        |     +-----------------------------------+
2216  //        |     |     Vector register save area     |
2217  //        |     +-----------------------------------+
2218  //        |     |       Local variable space        |
2219  //        |     +-----------------------------------+
2220  //        |     |        Parameter list area        |
2221  //        |     +-----------------------------------+
2222  //        |     |           LR save word            |
2223  //        |     +-----------------------------------+
2224  // SP-->  +---  |            Back chain             |
2225  //              +-----------------------------------+
2226  //
2227  // Specifications:
2228  //   System V Application Binary Interface PowerPC Processor Supplement
2229  //   AltiVec Technology Programming Interface Manual
2230
2231  MachineFunction &MF = DAG.getMachineFunction();
2232  MachineFrameInfo *MFI = MF.getFrameInfo();
2233  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2234
2235  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2236  // Potential tail calls could cause overwriting of argument stack slots.
2237  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2238                       (CallConv == CallingConv::Fast));
2239  unsigned PtrByteSize = 4;
2240
2241  // Assign locations to all of the incoming arguments.
2242  SmallVector<CCValAssign, 16> ArgLocs;
2243  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2244                 getTargetMachine(), ArgLocs, *DAG.getContext());
2245
2246  // Reserve space for the linkage area on the stack.
2247  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false);
2248  CCInfo.AllocateStack(LinkageSize, PtrByteSize);
2249
2250  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
2251
2252  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2253    CCValAssign &VA = ArgLocs[i];
2254
2255    // Arguments stored in registers.
2256    if (VA.isRegLoc()) {
2257      const TargetRegisterClass *RC;
2258      EVT ValVT = VA.getValVT();
2259
2260      switch (ValVT.getSimpleVT().SimpleTy) {
2261        default:
2262          llvm_unreachable("ValVT not supported by formal arguments Lowering");
2263        case MVT::i1:
2264        case MVT::i32:
2265          RC = &PPC::GPRCRegClass;
2266          break;
2267        case MVT::f32:
2268          RC = &PPC::F4RCRegClass;
2269          break;
2270        case MVT::f64:
2271          if (Subtarget.hasVSX())
2272            RC = &PPC::VSFRCRegClass;
2273          else
2274            RC = &PPC::F8RCRegClass;
2275          break;
2276        case MVT::v16i8:
2277        case MVT::v8i16:
2278        case MVT::v4i32:
2279        case MVT::v4f32:
2280          RC = &PPC::VRRCRegClass;
2281          break;
2282        case MVT::v2f64:
2283        case MVT::v2i64:
2284          RC = &PPC::VSHRCRegClass;
2285          break;
2286      }
2287
2288      // Transform the arguments stored in physical registers into virtual ones.
2289      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2290      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
2291                                            ValVT == MVT::i1 ? MVT::i32 : ValVT);
2292
2293      if (ValVT == MVT::i1)
2294        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
2295
2296      InVals.push_back(ArgValue);
2297    } else {
2298      // Argument stored in memory.
2299      assert(VA.isMemLoc());
2300
2301      unsigned ArgSize = VA.getLocVT().getStoreSize();
2302      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
2303                                      isImmutable);
2304
2305      // Create load nodes to retrieve arguments from the stack.
2306      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2307      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2308                                   MachinePointerInfo(),
2309                                   false, false, false, 0));
2310    }
2311  }
2312
2313  // Assign locations to all of the incoming aggregate by value arguments.
2314  // Aggregates passed by value are stored in the local variable space of the
2315  // caller's stack frame, right above the parameter list area.
2316  SmallVector<CCValAssign, 16> ByValArgLocs;
2317  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2318                      getTargetMachine(), ByValArgLocs, *DAG.getContext());
2319
2320  // Reserve stack space for the allocations in CCInfo.
2321  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
2322
2323  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
2324
2325  // Area that is at least reserved in the caller of this function.
2326  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
2327  MinReservedArea = std::max(MinReservedArea, LinkageSize);
2328
2329  // Set the size that is at least reserved in caller of this function.  Tail
2330  // call optimized function's reserved stack space needs to be aligned so that
2331  // taking the difference between two stack areas will result in an aligned
2332  // stack.
2333  MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
2334  FuncInfo->setMinReservedArea(MinReservedArea);
2335
2336  SmallVector<SDValue, 8> MemOps;
2337
2338  // If the function takes variable number of arguments, make a frame index for
2339  // the start of the first vararg value... for expansion of llvm.va_start.
2340  if (isVarArg) {
2341    static const MCPhysReg GPArgRegs[] = {
2342      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2343      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2344    };
2345    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
2346
2347    static const MCPhysReg FPArgRegs[] = {
2348      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2349      PPC::F8
2350    };
2351    const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
2352
2353    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
2354                                                          NumGPArgRegs));
2355    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
2356                                                          NumFPArgRegs));
2357
2358    // Make room for NumGPArgRegs and NumFPArgRegs.
2359    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
2360                NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
2361
2362    FuncInfo->setVarArgsStackOffset(
2363      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2364                             CCInfo.getNextStackOffset(), true));
2365
2366    FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
2367    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2368
2369    // The fixed integer arguments of a variadic function are stored to the
2370    // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
2371    // the result of va_next.
2372    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
2373      // Get an existing live-in vreg, or add a new one.
2374      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
2375      if (!VReg)
2376        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
2377
2378      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2379      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2380                                   MachinePointerInfo(), false, false, 0);
2381      MemOps.push_back(Store);
2382      // Increment the address by four for the next argument to store
2383      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
2384      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2385    }
2386
2387    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
2388    // is set.
2389    // The double arguments are stored to the VarArgsFrameIndex
2390    // on the stack.
2391    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
2392      // Get an existing live-in vreg, or add a new one.
2393      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
2394      if (!VReg)
2395        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
2396
2397      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
2398      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2399                                   MachinePointerInfo(), false, false, 0);
2400      MemOps.push_back(Store);
2401      // Increment the address by eight for the next argument to store
2402      SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
2403                                         PtrVT);
2404      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2405    }
2406  }
2407
2408  if (!MemOps.empty())
2409    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2410
2411  return Chain;
2412}
2413
2414// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2415// value to MVT::i64 and then truncate to the correct register size.
2416SDValue
2417PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
2418                                     SelectionDAG &DAG, SDValue ArgVal,
2419                                     SDLoc dl) const {
2420  if (Flags.isSExt())
2421    ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
2422                         DAG.getValueType(ObjectVT));
2423  else if (Flags.isZExt())
2424    ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
2425                         DAG.getValueType(ObjectVT));
2426
2427  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
2428}
2429
2430SDValue
2431PPCTargetLowering::LowerFormalArguments_64SVR4(
2432                                      SDValue Chain,
2433                                      CallingConv::ID CallConv, bool isVarArg,
2434                                      const SmallVectorImpl<ISD::InputArg>
2435                                        &Ins,
2436                                      SDLoc dl, SelectionDAG &DAG,
2437                                      SmallVectorImpl<SDValue> &InVals) const {
2438  // TODO: add description of PPC stack frame format, or at least some docs.
2439  //
2440  bool isLittleEndian = Subtarget.isLittleEndian();
2441  MachineFunction &MF = DAG.getMachineFunction();
2442  MachineFrameInfo *MFI = MF.getFrameInfo();
2443  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2444
2445  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2446  // Potential tail calls could cause overwriting of argument stack slots.
2447  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2448                       (CallConv == CallingConv::Fast));
2449  unsigned PtrByteSize = 8;
2450
2451  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
2452  unsigned ArgOffset = LinkageSize;
2453
2454  static const MCPhysReg GPR[] = {
2455    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2456    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2457  };
2458
2459  static const MCPhysReg *FPR = GetFPR();
2460
2461  static const MCPhysReg VR[] = {
2462    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2463    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2464  };
2465  static const MCPhysReg VSRH[] = {
2466    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
2467    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
2468  };
2469
2470  const unsigned Num_GPR_Regs = array_lengthof(GPR);
2471  const unsigned Num_FPR_Regs = 13;
2472  const unsigned Num_VR_Regs  = array_lengthof(VR);
2473
2474  unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
2475
2476  // Add DAG nodes to load the arguments or copy them out of registers.  On
2477  // entry to a function on PPC, the arguments start after the linkage area,
2478  // although the first ones are often in registers.
2479
2480  SmallVector<SDValue, 8> MemOps;
2481  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
2482  unsigned CurArgIdx = 0;
2483  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
2484    SDValue ArgVal;
2485    bool needsLoad = false;
2486    EVT ObjectVT = Ins[ArgNo].VT;
2487    unsigned ObjSize = ObjectVT.getStoreSize();
2488    unsigned ArgSize = ObjSize;
2489    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2490    std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
2491    CurArgIdx = Ins[ArgNo].OrigArgIndex;
2492
2493    /* Respect alignment of argument on the stack.  */
2494    unsigned Align =
2495      CalculateStackSlotAlignment(ObjectVT, Flags, PtrByteSize);
2496    ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2497    unsigned CurArgOffset = ArgOffset;
2498
2499    /* Compute GPR index associated with argument offset.  */
2500    GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
2501    GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
2502
2503    // FIXME the codegen can be much improved in some cases.
2504    // We do not have to keep everything in memory.
2505    if (Flags.isByVal()) {
2506      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
2507      ObjSize = Flags.getByValSize();
2508      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2509      // Empty aggregate parameters do not take up registers.  Examples:
2510      //   struct { } a;
2511      //   union  { } b;
2512      //   int c[0];
2513      // etc.  However, we have to provide a place-holder in InVals, so
2514      // pretend we have an 8-byte item at the current address for that
2515      // purpose.
2516      if (!ObjSize) {
2517        int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2518        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2519        InVals.push_back(FIN);
2520        continue;
2521      }
2522
2523      // All aggregates smaller than 8 bytes must be passed right-justified.
2524      if (ObjSize < PtrByteSize && !isLittleEndian)
2525        CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
2526      // The value of the object is its address.
2527      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
2528      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2529      InVals.push_back(FIN);
2530
2531      if (ObjSize < 8) {
2532        if (GPR_idx != Num_GPR_Regs) {
2533          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2534          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2535          SDValue Store;
2536
2537          if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
2538            EVT ObjType = (ObjSize == 1 ? MVT::i8 :
2539                           (ObjSize == 2 ? MVT::i16 : MVT::i32));
2540            Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
2541                                      MachinePointerInfo(FuncArg),
2542                                      ObjType, false, false, 0);
2543          } else {
2544            // For sizes that don't fit a truncating store (3, 5, 6, 7),
2545            // store the whole register as-is to the parameter save area
2546            // slot.  The address of the parameter was already calculated
2547            // above (InVals.push_back(FIN)) to be the right-justified
2548            // offset within the slot.  For this store, we need a new
2549            // frame index that points at the beginning of the slot.
2550            int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2551            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2552            Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2553                                 MachinePointerInfo(FuncArg),
2554                                 false, false, 0);
2555          }
2556
2557          MemOps.push_back(Store);
2558        }
2559        // Whether we copied from a register or not, advance the offset
2560        // into the parameter save area by a full doubleword.
2561        ArgOffset += PtrByteSize;
2562        continue;
2563      }
2564
2565      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
2566        // Store whatever pieces of the object are in registers
2567        // to memory.  ArgOffset will be the address of the beginning
2568        // of the object.
2569        if (GPR_idx != Num_GPR_Regs) {
2570          unsigned VReg;
2571          VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2572          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2573          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2574          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2575          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2576                                       MachinePointerInfo(FuncArg, j),
2577                                       false, false, 0);
2578          MemOps.push_back(Store);
2579          ++GPR_idx;
2580          ArgOffset += PtrByteSize;
2581        } else {
2582          ArgOffset += ArgSize - j;
2583          break;
2584        }
2585      }
2586      continue;
2587    }
2588
2589    switch (ObjectVT.getSimpleVT().SimpleTy) {
2590    default: llvm_unreachable("Unhandled argument type!");
2591    case MVT::i1:
2592    case MVT::i32:
2593    case MVT::i64:
2594      if (GPR_idx != Num_GPR_Regs) {
2595        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2596        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
2597
2598        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
2599          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2600          // value to MVT::i64 and then truncate to the correct register size.
2601          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
2602      } else {
2603        needsLoad = true;
2604        ArgSize = PtrByteSize;
2605      }
2606      ArgOffset += 8;
2607      break;
2608
2609    case MVT::f32:
2610    case MVT::f64:
2611      if (FPR_idx != Num_FPR_Regs) {
2612        unsigned VReg;
2613
2614        if (ObjectVT == MVT::f32)
2615          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
2616        else
2617          VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
2618                                            &PPC::VSFRCRegClass :
2619                                            &PPC::F8RCRegClass);
2620
2621        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2622        ++FPR_idx;
2623      } else {
2624        needsLoad = true;
2625        ArgSize = PtrByteSize;
2626      }
2627
2628      ArgOffset += 8;
2629      break;
2630    case MVT::v4f32:
2631    case MVT::v4i32:
2632    case MVT::v8i16:
2633    case MVT::v16i8:
2634    case MVT::v2f64:
2635    case MVT::v2i64:
2636      if (VR_idx != Num_VR_Regs) {
2637        unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
2638                        MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
2639                        MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
2640        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2641        ++VR_idx;
2642      } else {
2643        needsLoad = true;
2644      }
2645      ArgOffset += 16;
2646      break;
2647    }
2648
2649    // We need to load the argument to a virtual register if we determined
2650    // above that we ran out of physical registers of the appropriate type.
2651    if (needsLoad) {
2652      if (ObjSize < ArgSize && !isLittleEndian)
2653        CurArgOffset += ArgSize - ObjSize;
2654      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
2655      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2656      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
2657                           false, false, false, 0);
2658    }
2659
2660    InVals.push_back(ArgVal);
2661  }
2662
2663  // Area that is at least reserved in the caller of this function.
2664  unsigned MinReservedArea;
2665  MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
2666
2667  // Set the size that is at least reserved in caller of this function.  Tail
2668  // call optimized functions' reserved stack space needs to be aligned so that
2669  // taking the difference between two stack areas will result in an aligned
2670  // stack.
2671  MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
2672  FuncInfo->setMinReservedArea(MinReservedArea);
2673
2674  // If the function takes variable number of arguments, make a frame index for
2675  // the start of the first vararg value... for expansion of llvm.va_start.
2676  if (isVarArg) {
2677    int Depth = ArgOffset;
2678
2679    FuncInfo->setVarArgsFrameIndex(
2680      MFI->CreateFixedObject(PtrByteSize, Depth, true));
2681    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2682
2683    // If this function is vararg, store any remaining integer argument regs
2684    // to their spots on the stack so that they may be loaded by deferencing the
2685    // result of va_next.
2686    for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
2687         GPR_idx < Num_GPR_Regs; ++GPR_idx) {
2688      unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2689      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2690      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2691                                   MachinePointerInfo(), false, false, 0);
2692      MemOps.push_back(Store);
2693      // Increment the address by four for the next argument to store
2694      SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
2695      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2696    }
2697  }
2698
2699  if (!MemOps.empty())
2700    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2701
2702  return Chain;
2703}
2704
2705SDValue
2706PPCTargetLowering::LowerFormalArguments_Darwin(
2707                                      SDValue Chain,
2708                                      CallingConv::ID CallConv, bool isVarArg,
2709                                      const SmallVectorImpl<ISD::InputArg>
2710                                        &Ins,
2711                                      SDLoc dl, SelectionDAG &DAG,
2712                                      SmallVectorImpl<SDValue> &InVals) const {
2713  // TODO: add description of PPC stack frame format, or at least some docs.
2714  //
2715  MachineFunction &MF = DAG.getMachineFunction();
2716  MachineFrameInfo *MFI = MF.getFrameInfo();
2717  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2718
2719  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2720  bool isPPC64 = PtrVT == MVT::i64;
2721  // Potential tail calls could cause overwriting of argument stack slots.
2722  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2723                       (CallConv == CallingConv::Fast));
2724  unsigned PtrByteSize = isPPC64 ? 8 : 4;
2725
2726  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
2727  unsigned ArgOffset = LinkageSize;
2728  // Area that is at least reserved in caller of this function.
2729  unsigned MinReservedArea = ArgOffset;
2730
2731  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
2732    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2733    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2734  };
2735  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
2736    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2737    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2738  };
2739
2740  static const MCPhysReg *FPR = GetFPR();
2741
2742  static const MCPhysReg VR[] = {
2743    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2744    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2745  };
2746
2747  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
2748  const unsigned Num_FPR_Regs = 13;
2749  const unsigned Num_VR_Regs  = array_lengthof( VR);
2750
2751  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
2752
2753  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
2754
2755  // In 32-bit non-varargs functions, the stack space for vectors is after the
2756  // stack space for non-vectors.  We do not use this space unless we have
2757  // too many vectors to fit in registers, something that only occurs in
2758  // constructed examples:), but we have to walk the arglist to figure
2759  // that out...for the pathological case, compute VecArgOffset as the
2760  // start of the vector parameter area.  Computing VecArgOffset is the
2761  // entire point of the following loop.
2762  unsigned VecArgOffset = ArgOffset;
2763  if (!isVarArg && !isPPC64) {
2764    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
2765         ++ArgNo) {
2766      EVT ObjectVT = Ins[ArgNo].VT;
2767      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2768
2769      if (Flags.isByVal()) {
2770        // ObjSize is the true size, ArgSize rounded up to multiple of regs.
2771        unsigned ObjSize = Flags.getByValSize();
2772        unsigned ArgSize =
2773                ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2774        VecArgOffset += ArgSize;
2775        continue;
2776      }
2777
2778      switch(ObjectVT.getSimpleVT().SimpleTy) {
2779      default: llvm_unreachable("Unhandled argument type!");
2780      case MVT::i1:
2781      case MVT::i32:
2782      case MVT::f32:
2783        VecArgOffset += 4;
2784        break;
2785      case MVT::i64:  // PPC64
2786      case MVT::f64:
2787        // FIXME: We are guaranteed to be !isPPC64 at this point.
2788        // Does MVT::i64 apply?
2789        VecArgOffset += 8;
2790        break;
2791      case MVT::v4f32:
2792      case MVT::v4i32:
2793      case MVT::v8i16:
2794      case MVT::v16i8:
2795        // Nothing to do, we're only looking at Nonvector args here.
2796        break;
2797      }
2798    }
2799  }
2800  // We've found where the vector parameter area in memory is.  Skip the
2801  // first 12 parameters; these don't use that memory.
2802  VecArgOffset = ((VecArgOffset+15)/16)*16;
2803  VecArgOffset += 12*16;
2804
2805  // Add DAG nodes to load the arguments or copy them out of registers.  On
2806  // entry to a function on PPC, the arguments start after the linkage area,
2807  // although the first ones are often in registers.
2808
2809  SmallVector<SDValue, 8> MemOps;
2810  unsigned nAltivecParamsAtEnd = 0;
2811  Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
2812  unsigned CurArgIdx = 0;
2813  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
2814    SDValue ArgVal;
2815    bool needsLoad = false;
2816    EVT ObjectVT = Ins[ArgNo].VT;
2817    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
2818    unsigned ArgSize = ObjSize;
2819    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2820    std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
2821    CurArgIdx = Ins[ArgNo].OrigArgIndex;
2822
2823    unsigned CurArgOffset = ArgOffset;
2824
2825    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
2826    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
2827        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
2828      if (isVarArg || isPPC64) {
2829        MinReservedArea = ((MinReservedArea+15)/16)*16;
2830        MinReservedArea += CalculateStackSlotSize(ObjectVT,
2831                                                  Flags,
2832                                                  PtrByteSize);
2833      } else  nAltivecParamsAtEnd++;
2834    } else
2835      // Calculate min reserved area.
2836      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
2837                                                Flags,
2838                                                PtrByteSize);
2839
2840    // FIXME the codegen can be much improved in some cases.
2841    // We do not have to keep everything in memory.
2842    if (Flags.isByVal()) {
2843      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
2844      ObjSize = Flags.getByValSize();
2845      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2846      // Objects of size 1 and 2 are right justified, everything else is
2847      // left justified.  This means the memory address is adjusted forwards.
2848      if (ObjSize==1 || ObjSize==2) {
2849        CurArgOffset = CurArgOffset + (4 - ObjSize);
2850      }
2851      // The value of the object is its address.
2852      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
2853      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2854      InVals.push_back(FIN);
2855      if (ObjSize==1 || ObjSize==2) {
2856        if (GPR_idx != Num_GPR_Regs) {
2857          unsigned VReg;
2858          if (isPPC64)
2859            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2860          else
2861            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2862          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2863          EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
2864          SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
2865                                            MachinePointerInfo(FuncArg),
2866                                            ObjType, false, false, 0);
2867          MemOps.push_back(Store);
2868          ++GPR_idx;
2869        }
2870
2871        ArgOffset += PtrByteSize;
2872
2873        continue;
2874      }
2875      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
2876        // Store whatever pieces of the object are in registers
2877        // to memory.  ArgOffset will be the address of the beginning
2878        // of the object.
2879        if (GPR_idx != Num_GPR_Regs) {
2880          unsigned VReg;
2881          if (isPPC64)
2882            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2883          else
2884            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2885          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2886          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2887          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2888          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2889                                       MachinePointerInfo(FuncArg, j),
2890                                       false, false, 0);
2891          MemOps.push_back(Store);
2892          ++GPR_idx;
2893          ArgOffset += PtrByteSize;
2894        } else {
2895          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
2896          break;
2897        }
2898      }
2899      continue;
2900    }
2901
2902    switch (ObjectVT.getSimpleVT().SimpleTy) {
2903    default: llvm_unreachable("Unhandled argument type!");
2904    case MVT::i1:
2905    case MVT::i32:
2906      if (!isPPC64) {
2907        if (GPR_idx != Num_GPR_Regs) {
2908          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
2909          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
2910
2911          if (ObjectVT == MVT::i1)
2912            ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
2913
2914          ++GPR_idx;
2915        } else {
2916          needsLoad = true;
2917          ArgSize = PtrByteSize;
2918        }
2919        // All int arguments reserve stack space in the Darwin ABI.
2920        ArgOffset += PtrByteSize;
2921        break;
2922      }
2923      // FALLTHROUGH
2924    case MVT::i64:  // PPC64
2925      if (GPR_idx != Num_GPR_Regs) {
2926        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2927        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
2928
2929        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
2930          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2931          // value to MVT::i64 and then truncate to the correct register size.
2932          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
2933
2934        ++GPR_idx;
2935      } else {
2936        needsLoad = true;
2937        ArgSize = PtrByteSize;
2938      }
2939      // All int arguments reserve stack space in the Darwin ABI.
2940      ArgOffset += 8;
2941      break;
2942
2943    case MVT::f32:
2944    case MVT::f64:
2945      // Every 4 bytes of argument space consumes one of the GPRs available for
2946      // argument passing.
2947      if (GPR_idx != Num_GPR_Regs) {
2948        ++GPR_idx;
2949        if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
2950          ++GPR_idx;
2951      }
2952      if (FPR_idx != Num_FPR_Regs) {
2953        unsigned VReg;
2954
2955        if (ObjectVT == MVT::f32)
2956          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
2957        else
2958          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
2959
2960        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2961        ++FPR_idx;
2962      } else {
2963        needsLoad = true;
2964      }
2965
2966      // All FP arguments reserve stack space in the Darwin ABI.
2967      ArgOffset += isPPC64 ? 8 : ObjSize;
2968      break;
2969    case MVT::v4f32:
2970    case MVT::v4i32:
2971    case MVT::v8i16:
2972    case MVT::v16i8:
2973      // Note that vector arguments in registers don't reserve stack space,
2974      // except in varargs functions.
2975      if (VR_idx != Num_VR_Regs) {
2976        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
2977        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2978        if (isVarArg) {
2979          while ((ArgOffset % 16) != 0) {
2980            ArgOffset += PtrByteSize;
2981            if (GPR_idx != Num_GPR_Regs)
2982              GPR_idx++;
2983          }
2984          ArgOffset += 16;
2985          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
2986        }
2987        ++VR_idx;
2988      } else {
2989        if (!isVarArg && !isPPC64) {
2990          // Vectors go after all the nonvectors.
2991          CurArgOffset = VecArgOffset;
2992          VecArgOffset += 16;
2993        } else {
2994          // Vectors are aligned.
2995          ArgOffset = ((ArgOffset+15)/16)*16;
2996          CurArgOffset = ArgOffset;
2997          ArgOffset += 16;
2998        }
2999        needsLoad = true;
3000      }
3001      break;
3002    }
3003
3004    // We need to load the argument to a virtual register if we determined above
3005    // that we ran out of physical registers of the appropriate type.
3006    if (needsLoad) {
3007      int FI = MFI->CreateFixedObject(ObjSize,
3008                                      CurArgOffset + (ArgSize - ObjSize),
3009                                      isImmutable);
3010      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3011      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3012                           false, false, false, 0);
3013    }
3014
3015    InVals.push_back(ArgVal);
3016  }
3017
3018  // Allow for Altivec parameters at the end, if needed.
3019  if (nAltivecParamsAtEnd) {
3020    MinReservedArea = ((MinReservedArea+15)/16)*16;
3021    MinReservedArea += 16*nAltivecParamsAtEnd;
3022  }
3023
3024  // Area that is at least reserved in the caller of this function.
3025  MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
3026
3027  // Set the size that is at least reserved in caller of this function.  Tail
3028  // call optimized functions' reserved stack space needs to be aligned so that
3029  // taking the difference between two stack areas will result in an aligned
3030  // stack.
3031  MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
3032  FuncInfo->setMinReservedArea(MinReservedArea);
3033
3034  // If the function takes variable number of arguments, make a frame index for
3035  // the start of the first vararg value... for expansion of llvm.va_start.
3036  if (isVarArg) {
3037    int Depth = ArgOffset;
3038
3039    FuncInfo->setVarArgsFrameIndex(
3040      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
3041                             Depth, true));
3042    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3043
3044    // If this function is vararg, store any remaining integer argument regs
3045    // to their spots on the stack so that they may be loaded by deferencing the
3046    // result of va_next.
3047    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
3048      unsigned VReg;
3049
3050      if (isPPC64)
3051        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3052      else
3053        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3054
3055      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3056      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3057                                   MachinePointerInfo(), false, false, 0);
3058      MemOps.push_back(Store);
3059      // Increment the address by four for the next argument to store
3060      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
3061      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3062    }
3063  }
3064
3065  if (!MemOps.empty())
3066    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3067
3068  return Chain;
3069}
3070
3071/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
3072/// adjusted to accommodate the arguments for the tailcall.
3073static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
3074                                   unsigned ParamSize) {
3075
3076  if (!isTailCall) return 0;
3077
3078  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3079  unsigned CallerMinReservedArea = FI->getMinReservedArea();
3080  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
3081  // Remember only if the new adjustement is bigger.
3082  if (SPDiff < FI->getTailCallSPDelta())
3083    FI->setTailCallSPDelta(SPDiff);
3084
3085  return SPDiff;
3086}
3087
3088/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3089/// for tail call optimization. Targets which want to do tail call
3090/// optimization should implement this function.
3091bool
3092PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
3093                                                     CallingConv::ID CalleeCC,
3094                                                     bool isVarArg,
3095                                      const SmallVectorImpl<ISD::InputArg> &Ins,
3096                                                     SelectionDAG& DAG) const {
3097  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
3098    return false;
3099
3100  // Variable argument functions are not supported.
3101  if (isVarArg)
3102    return false;
3103
3104  MachineFunction &MF = DAG.getMachineFunction();
3105  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
3106  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
3107    // Functions containing by val parameters are not supported.
3108    for (unsigned i = 0; i != Ins.size(); i++) {
3109       ISD::ArgFlagsTy Flags = Ins[i].Flags;
3110       if (Flags.isByVal()) return false;
3111    }
3112
3113    // Non-PIC/GOT tail calls are supported.
3114    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
3115      return true;
3116
3117    // At the moment we can only do local tail calls (in same module, hidden
3118    // or protected) if we are generating PIC.
3119    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
3120      return G->getGlobal()->hasHiddenVisibility()
3121          || G->getGlobal()->hasProtectedVisibility();
3122  }
3123
3124  return false;
3125}
3126
3127/// isCallCompatibleAddress - Return the immediate to use if the specified
3128/// 32-bit value is representable in the immediate field of a BxA instruction.
3129static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
3130  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
3131  if (!C) return nullptr;
3132
3133  int Addr = C->getZExtValue();
3134  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
3135      SignExtend32<26>(Addr) != Addr)
3136    return nullptr;  // Top 6 bits have to be sext of immediate.
3137
3138  return DAG.getConstant((int)C->getZExtValue() >> 2,
3139                         DAG.getTargetLoweringInfo().getPointerTy()).getNode();
3140}
3141
3142namespace {
3143
3144struct TailCallArgumentInfo {
3145  SDValue Arg;
3146  SDValue FrameIdxOp;
3147  int       FrameIdx;
3148
3149  TailCallArgumentInfo() : FrameIdx(0) {}
3150};
3151
3152}
3153
3154/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
3155static void
3156StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
3157                                           SDValue Chain,
3158                   const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
3159                   SmallVectorImpl<SDValue> &MemOpChains,
3160                   SDLoc dl) {
3161  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
3162    SDValue Arg = TailCallArgs[i].Arg;
3163    SDValue FIN = TailCallArgs[i].FrameIdxOp;
3164    int FI = TailCallArgs[i].FrameIdx;
3165    // Store relative to framepointer.
3166    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
3167                                       MachinePointerInfo::getFixedStack(FI),
3168                                       false, false, 0));
3169  }
3170}
3171
3172/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
3173/// the appropriate stack slot for the tail call optimized function call.
3174static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
3175                                               MachineFunction &MF,
3176                                               SDValue Chain,
3177                                               SDValue OldRetAddr,
3178                                               SDValue OldFP,
3179                                               int SPDiff,
3180                                               bool isPPC64,
3181                                               bool isDarwinABI,
3182                                               SDLoc dl) {
3183  if (SPDiff) {
3184    // Calculate the new stack slot for the return address.
3185    int SlotSize = isPPC64 ? 8 : 4;
3186    int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
3187                                                                   isDarwinABI);
3188    int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
3189                                                          NewRetAddrLoc, true);
3190    EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3191    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
3192    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
3193                         MachinePointerInfo::getFixedStack(NewRetAddr),
3194                         false, false, 0);
3195
3196    // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
3197    // slot as the FP is never overwritten.
3198    if (isDarwinABI) {
3199      int NewFPLoc =
3200        SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
3201      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
3202                                                          true);
3203      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
3204      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
3205                           MachinePointerInfo::getFixedStack(NewFPIdx),
3206                           false, false, 0);
3207    }
3208  }
3209  return Chain;
3210}
3211
3212/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
3213/// the position of the argument.
3214static void
3215CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
3216                         SDValue Arg, int SPDiff, unsigned ArgOffset,
3217                     SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
3218  int Offset = ArgOffset + SPDiff;
3219  uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
3220  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3221  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3222  SDValue FIN = DAG.getFrameIndex(FI, VT);
3223  TailCallArgumentInfo Info;
3224  Info.Arg = Arg;
3225  Info.FrameIdxOp = FIN;
3226  Info.FrameIdx = FI;
3227  TailCallArguments.push_back(Info);
3228}
3229
3230/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
3231/// stack slot. Returns the chain as result and the loaded frame pointers in
3232/// LROpOut/FPOpout. Used when tail calling.
3233SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
3234                                                        int SPDiff,
3235                                                        SDValue Chain,
3236                                                        SDValue &LROpOut,
3237                                                        SDValue &FPOpOut,
3238                                                        bool isDarwinABI,
3239                                                        SDLoc dl) const {
3240  if (SPDiff) {
3241    // Load the LR and FP stack slot for later adjusting.
3242    EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
3243    LROpOut = getReturnAddrFrameIndex(DAG);
3244    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
3245                          false, false, false, 0);
3246    Chain = SDValue(LROpOut.getNode(), 1);
3247
3248    // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
3249    // slot as the FP is never overwritten.
3250    if (isDarwinABI) {
3251      FPOpOut = getFramePointerFrameIndex(DAG);
3252      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
3253                            false, false, false, 0);
3254      Chain = SDValue(FPOpOut.getNode(), 1);
3255    }
3256  }
3257  return Chain;
3258}
3259
3260/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
3261/// by "Src" to address "Dst" of size "Size".  Alignment information is
3262/// specified by the specific parameter attribute. The copy will be passed as
3263/// a byval function parameter.
3264/// Sometimes what we are copying is the end of a larger object, the part that
3265/// does not fit in registers.
3266static SDValue
3267CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
3268                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
3269                          SDLoc dl) {
3270  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
3271  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
3272                       false, false, MachinePointerInfo(),
3273                       MachinePointerInfo());
3274}
3275
3276/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
3277/// tail calls.
3278static void
3279LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
3280                 SDValue Arg, SDValue PtrOff, int SPDiff,
3281                 unsigned ArgOffset, bool isPPC64, bool isTailCall,
3282                 bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
3283                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
3284                 SDLoc dl) {
3285  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3286  if (!isTailCall) {
3287    if (isVector) {
3288      SDValue StackPtr;
3289      if (isPPC64)
3290        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
3291      else
3292        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3293      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
3294                           DAG.getConstant(ArgOffset, PtrVT));
3295    }
3296    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
3297                                       MachinePointerInfo(), false, false, 0));
3298  // Calculate and remember argument location.
3299  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
3300                                  TailCallArguments);
3301}
3302
3303static
3304void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
3305                     SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
3306                     SDValue LROp, SDValue FPOp, bool isDarwinABI,
3307                     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
3308  MachineFunction &MF = DAG.getMachineFunction();
3309
3310  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
3311  // might overwrite each other in case of tail call optimization.
3312  SmallVector<SDValue, 8> MemOpChains2;
3313  // Do not flag preceding copytoreg stuff together with the following stuff.
3314  InFlag = SDValue();
3315  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
3316                                    MemOpChains2, dl);
3317  if (!MemOpChains2.empty())
3318    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3319
3320  // Store the return address to the appropriate stack slot.
3321  Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
3322                                        isPPC64, isDarwinABI, dl);
3323
3324  // Emit callseq_end just before tailcall node.
3325  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
3326                             DAG.getIntPtrConstant(0, true), InFlag, dl);
3327  InFlag = Chain.getValue(1);
3328}
3329
3330static
3331unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
3332                     SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
3333                     SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
3334                     SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
3335                     const PPCSubtarget &Subtarget) {
3336
3337  bool isPPC64 = Subtarget.isPPC64();
3338  bool isSVR4ABI = Subtarget.isSVR4ABI();
3339
3340  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3341  NodeTys.push_back(MVT::Other);   // Returns a chain
3342  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
3343
3344  unsigned CallOpc = PPCISD::CALL;
3345
3346  bool needIndirectCall = true;
3347  if (!isSVR4ABI || !isPPC64)
3348    if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
3349      // If this is an absolute destination address, use the munged value.
3350      Callee = SDValue(Dest, 0);
3351      needIndirectCall = false;
3352    }
3353
3354  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3355    // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
3356    // Use indirect calls for ALL functions calls in JIT mode, since the
3357    // far-call stubs may be outside relocation limits for a BL instruction.
3358    if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
3359      unsigned OpFlags = 0;
3360      if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
3361          (Subtarget.getTargetTriple().isMacOSX() &&
3362           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
3363          (G->getGlobal()->isDeclaration() ||
3364           G->getGlobal()->isWeakForLinker())) {
3365        // PC-relative references to external symbols should go through $stub,
3366        // unless we're building with the leopard linker or later, which
3367        // automatically synthesizes these stubs.
3368        OpFlags = PPCII::MO_DARWIN_STUB;
3369      }
3370
3371      // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
3372      // every direct call is) turn it into a TargetGlobalAddress /
3373      // TargetExternalSymbol node so that legalize doesn't hack it.
3374      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
3375                                          Callee.getValueType(),
3376                                          0, OpFlags);
3377      needIndirectCall = false;
3378    }
3379  }
3380
3381  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3382    unsigned char OpFlags = 0;
3383
3384    if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
3385        (Subtarget.getTargetTriple().isMacOSX() &&
3386         Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
3387      // PC-relative references to external symbols should go through $stub,
3388      // unless we're building with the leopard linker or later, which
3389      // automatically synthesizes these stubs.
3390      OpFlags = PPCII::MO_DARWIN_STUB;
3391    }
3392
3393    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
3394                                         OpFlags);
3395    needIndirectCall = false;
3396  }
3397
3398  if (needIndirectCall) {
3399    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
3400    // to do the call, we can't use PPCISD::CALL.
3401    SDValue MTCTROps[] = {Chain, Callee, InFlag};
3402
3403    if (isSVR4ABI && isPPC64) {
3404      // Function pointers in the 64-bit SVR4 ABI do not point to the function
3405      // entry point, but to the function descriptor (the function entry point
3406      // address is part of the function descriptor though).
3407      // The function descriptor is a three doubleword structure with the
3408      // following fields: function entry point, TOC base address and
3409      // environment pointer.
3410      // Thus for a call through a function pointer, the following actions need
3411      // to be performed:
3412      //   1. Save the TOC of the caller in the TOC save area of its stack
3413      //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
3414      //   2. Load the address of the function entry point from the function
3415      //      descriptor.
3416      //   3. Load the TOC of the callee from the function descriptor into r2.
3417      //   4. Load the environment pointer from the function descriptor into
3418      //      r11.
3419      //   5. Branch to the function entry point address.
3420      //   6. On return of the callee, the TOC of the caller needs to be
3421      //      restored (this is done in FinishCall()).
3422      //
3423      // All those operations are flagged together to ensure that no other
3424      // operations can be scheduled in between. E.g. without flagging the
3425      // operations together, a TOC access in the caller could be scheduled
3426      // between the load of the callee TOC and the branch to the callee, which
3427      // results in the TOC access going through the TOC of the callee instead
3428      // of going through the TOC of the caller, which leads to incorrect code.
3429
3430      // Load the address of the function entry point from the function
3431      // descriptor.
3432      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
3433      SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
3434                              makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
3435      Chain = LoadFuncPtr.getValue(1);
3436      InFlag = LoadFuncPtr.getValue(2);
3437
3438      // Load environment pointer into r11.
3439      // Offset of the environment pointer within the function descriptor.
3440      SDValue PtrOff = DAG.getIntPtrConstant(16);
3441
3442      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
3443      SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
3444                                       InFlag);
3445      Chain = LoadEnvPtr.getValue(1);
3446      InFlag = LoadEnvPtr.getValue(2);
3447
3448      SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
3449                                        InFlag);
3450      Chain = EnvVal.getValue(0);
3451      InFlag = EnvVal.getValue(1);
3452
3453      // Load TOC of the callee into r2. We are using a target-specific load
3454      // with r2 hard coded, because the result of a target-independent load
3455      // would never go directly into r2, since r2 is a reserved register (which
3456      // prevents the register allocator from allocating it), resulting in an
3457      // additional register being allocated and an unnecessary move instruction
3458      // being generated.
3459      VTs = DAG.getVTList(MVT::Other, MVT::Glue);
3460      SDValue TOCOff = DAG.getIntPtrConstant(8);
3461      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
3462      SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
3463                                       AddTOC, InFlag);
3464      Chain = LoadTOCPtr.getValue(0);
3465      InFlag = LoadTOCPtr.getValue(1);
3466
3467      MTCTROps[0] = Chain;
3468      MTCTROps[1] = LoadFuncPtr;
3469      MTCTROps[2] = InFlag;
3470    }
3471
3472    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
3473                        makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
3474    InFlag = Chain.getValue(1);
3475
3476    NodeTys.clear();
3477    NodeTys.push_back(MVT::Other);
3478    NodeTys.push_back(MVT::Glue);
3479    Ops.push_back(Chain);
3480    CallOpc = PPCISD::BCTRL;
3481    Callee.setNode(nullptr);
3482    // Add use of X11 (holding environment pointer)
3483    if (isSVR4ABI && isPPC64)
3484      Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
3485    // Add CTR register as callee so a bctr can be emitted later.
3486    if (isTailCall)
3487      Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
3488  }
3489
3490  // If this is a direct call, pass the chain and the callee.
3491  if (Callee.getNode()) {
3492    Ops.push_back(Chain);
3493    Ops.push_back(Callee);
3494  }
3495  // If this is a tail call add stack pointer delta.
3496  if (isTailCall)
3497    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
3498
3499  // Add argument registers to the end of the list so that they are known live
3500  // into the call.
3501  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3502    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3503                                  RegsToPass[i].second.getValueType()));
3504
3505  return CallOpc;
3506}
3507
3508static
3509bool isLocalCall(const SDValue &Callee)
3510{
3511  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
3512    return !G->getGlobal()->isDeclaration() &&
3513           !G->getGlobal()->isWeakForLinker();
3514  return false;
3515}
3516
3517SDValue
3518PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
3519                                   CallingConv::ID CallConv, bool isVarArg,
3520                                   const SmallVectorImpl<ISD::InputArg> &Ins,
3521                                   SDLoc dl, SelectionDAG &DAG,
3522                                   SmallVectorImpl<SDValue> &InVals) const {
3523
3524  SmallVector<CCValAssign, 16> RVLocs;
3525  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3526                    getTargetMachine(), RVLocs, *DAG.getContext());
3527  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
3528
3529  // Copy all of the result registers out of their specified physreg.
3530  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
3531    CCValAssign &VA = RVLocs[i];
3532    assert(VA.isRegLoc() && "Can only return in registers!");
3533
3534    SDValue Val = DAG.getCopyFromReg(Chain, dl,
3535                                     VA.getLocReg(), VA.getLocVT(), InFlag);
3536    Chain = Val.getValue(1);
3537    InFlag = Val.getValue(2);
3538
3539    switch (VA.getLocInfo()) {
3540    default: llvm_unreachable("Unknown loc info!");
3541    case CCValAssign::Full: break;
3542    case CCValAssign::AExt:
3543      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3544      break;
3545    case CCValAssign::ZExt:
3546      Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
3547                        DAG.getValueType(VA.getValVT()));
3548      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3549      break;
3550    case CCValAssign::SExt:
3551      Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
3552                        DAG.getValueType(VA.getValVT()));
3553      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3554      break;
3555    }
3556
3557    InVals.push_back(Val);
3558  }
3559
3560  return Chain;
3561}
3562
3563SDValue
3564PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
3565                              bool isTailCall, bool isVarArg,
3566                              SelectionDAG &DAG,
3567                              SmallVector<std::pair<unsigned, SDValue>, 8>
3568                                &RegsToPass,
3569                              SDValue InFlag, SDValue Chain,
3570                              SDValue &Callee,
3571                              int SPDiff, unsigned NumBytes,
3572                              const SmallVectorImpl<ISD::InputArg> &Ins,
3573                              SmallVectorImpl<SDValue> &InVals) const {
3574  std::vector<EVT> NodeTys;
3575  SmallVector<SDValue, 8> Ops;
3576  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
3577                                 isTailCall, RegsToPass, Ops, NodeTys,
3578                                 Subtarget);
3579
3580  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
3581  if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
3582    Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
3583
3584  // When performing tail call optimization the callee pops its arguments off
3585  // the stack. Account for this here so these bytes can be pushed back on in
3586  // PPCFrameLowering::eliminateCallFramePseudoInstr.
3587  int BytesCalleePops =
3588    (CallConv == CallingConv::Fast &&
3589     getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
3590
3591  // Add a register mask operand representing the call-preserved registers.
3592  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
3593  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
3594  assert(Mask && "Missing call preserved mask for calling convention");
3595  Ops.push_back(DAG.getRegisterMask(Mask));
3596
3597  if (InFlag.getNode())
3598    Ops.push_back(InFlag);
3599
3600  // Emit tail call.
3601  if (isTailCall) {
3602    assert(((Callee.getOpcode() == ISD::Register &&
3603             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
3604            Callee.getOpcode() == ISD::TargetExternalSymbol ||
3605            Callee.getOpcode() == ISD::TargetGlobalAddress ||
3606            isa<ConstantSDNode>(Callee)) &&
3607    "Expecting an global address, external symbol, absolute value or register");
3608
3609    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
3610  }
3611
3612  // Add a NOP immediately after the branch instruction when using the 64-bit
3613  // SVR4 ABI. At link time, if caller and callee are in a different module and
3614  // thus have a different TOC, the call will be replaced with a call to a stub
3615  // function which saves the current TOC, loads the TOC of the callee and
3616  // branches to the callee. The NOP will be replaced with a load instruction
3617  // which restores the TOC of the caller from the TOC save slot of the current
3618  // stack frame. If caller and callee belong to the same module (and have the
3619  // same TOC), the NOP will remain unchanged.
3620
3621  bool needsTOCRestore = false;
3622  if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {
3623    if (CallOpc == PPCISD::BCTRL) {
3624      // This is a call through a function pointer.
3625      // Restore the caller TOC from the save area into R2.
3626      // See PrepareCall() for more information about calls through function
3627      // pointers in the 64-bit SVR4 ABI.
3628      // We are using a target-specific load with r2 hard coded, because the
3629      // result of a target-independent load would never go directly into r2,
3630      // since r2 is a reserved register (which prevents the register allocator
3631      // from allocating it), resulting in an additional register being
3632      // allocated and an unnecessary move instruction being generated.
3633      needsTOCRestore = true;
3634    } else if ((CallOpc == PPCISD::CALL) &&
3635               (!isLocalCall(Callee) ||
3636                DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
3637      // Otherwise insert NOP for non-local calls.
3638      CallOpc = PPCISD::CALL_NOP;
3639    }
3640  }
3641
3642  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
3643  InFlag = Chain.getValue(1);
3644
3645  if (needsTOCRestore) {
3646    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
3647    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3648    SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
3649    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
3650    SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
3651    SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
3652    Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
3653    InFlag = Chain.getValue(1);
3654  }
3655
3656  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
3657                             DAG.getIntPtrConstant(BytesCalleePops, true),
3658                             InFlag, dl);
3659  if (!Ins.empty())
3660    InFlag = Chain.getValue(1);
3661
3662  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
3663                         Ins, dl, DAG, InVals);
3664}
3665
3666SDValue
3667PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3668                             SmallVectorImpl<SDValue> &InVals) const {
3669  SelectionDAG &DAG                     = CLI.DAG;
3670  SDLoc &dl                             = CLI.DL;
3671  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3672  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
3673  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
3674  SDValue Chain                         = CLI.Chain;
3675  SDValue Callee                        = CLI.Callee;
3676  bool &isTailCall                      = CLI.IsTailCall;
3677  CallingConv::ID CallConv              = CLI.CallConv;
3678  bool isVarArg                         = CLI.IsVarArg;
3679
3680  if (isTailCall)
3681    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
3682                                                   Ins, DAG);
3683
3684  if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
3685    report_fatal_error("failed to perform tail call elimination on a call "
3686                       "site marked musttail");
3687
3688  if (Subtarget.isSVR4ABI()) {
3689    if (Subtarget.isPPC64())
3690      return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
3691                              isTailCall, Outs, OutVals, Ins,
3692                              dl, DAG, InVals);
3693    else
3694      return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
3695                              isTailCall, Outs, OutVals, Ins,
3696                              dl, DAG, InVals);
3697  }
3698
3699  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
3700                          isTailCall, Outs, OutVals, Ins,
3701                          dl, DAG, InVals);
3702}
3703
3704SDValue
3705PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
3706                                    CallingConv::ID CallConv, bool isVarArg,
3707                                    bool isTailCall,
3708                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
3709                                    const SmallVectorImpl<SDValue> &OutVals,
3710                                    const SmallVectorImpl<ISD::InputArg> &Ins,
3711                                    SDLoc dl, SelectionDAG &DAG,
3712                                    SmallVectorImpl<SDValue> &InVals) const {
3713  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
3714  // of the 32-bit SVR4 ABI stack frame layout.
3715
3716  assert((CallConv == CallingConv::C ||
3717          CallConv == CallingConv::Fast) && "Unknown calling convention!");
3718
3719  unsigned PtrByteSize = 4;
3720
3721  MachineFunction &MF = DAG.getMachineFunction();
3722
3723  // Mark this function as potentially containing a function that contains a
3724  // tail call. As a consequence the frame pointer will be used for dynamicalloc
3725  // and restoring the callers stack pointer in this functions epilog. This is
3726  // done because by tail calling the called function might overwrite the value
3727  // in this function's (MF) stack pointer stack slot 0(SP).
3728  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
3729      CallConv == CallingConv::Fast)
3730    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
3731
3732  // Count how many bytes are to be pushed on the stack, including the linkage
3733  // area, parameter list area and the part of the local variable space which
3734  // contains copies of aggregates which are passed by value.
3735
3736  // Assign locations to all of the outgoing arguments.
3737  SmallVector<CCValAssign, 16> ArgLocs;
3738  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3739                 getTargetMachine(), ArgLocs, *DAG.getContext());
3740
3741  // Reserve space for the linkage area on the stack.
3742  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
3743
3744  if (isVarArg) {
3745    // Handle fixed and variable vector arguments differently.
3746    // Fixed vector arguments go into registers as long as registers are
3747    // available. Variable vector arguments always go into memory.
3748    unsigned NumArgs = Outs.size();
3749
3750    for (unsigned i = 0; i != NumArgs; ++i) {
3751      MVT ArgVT = Outs[i].VT;
3752      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3753      bool Result;
3754
3755      if (Outs[i].IsFixed) {
3756        Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
3757                               CCInfo);
3758      } else {
3759        Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
3760                                      ArgFlags, CCInfo);
3761      }
3762
3763      if (Result) {
3764#ifndef NDEBUG
3765        errs() << "Call operand #" << i << " has unhandled type "
3766             << EVT(ArgVT).getEVTString() << "\n";
3767#endif
3768        llvm_unreachable(nullptr);
3769      }
3770    }
3771  } else {
3772    // All arguments are treated the same.
3773    CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
3774  }
3775
3776  // Assign locations to all of the outgoing aggregate by value arguments.
3777  SmallVector<CCValAssign, 16> ByValArgLocs;
3778  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3779                      getTargetMachine(), ByValArgLocs, *DAG.getContext());
3780
3781  // Reserve stack space for the allocations in CCInfo.
3782  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3783
3784  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
3785
3786  // Size of the linkage area, parameter list area and the part of the local
3787  // space variable where copies of aggregates which are passed by value are
3788  // stored.
3789  unsigned NumBytes = CCByValInfo.getNextStackOffset();
3790
3791  // Calculate by how many bytes the stack has to be adjusted in case of tail
3792  // call optimization.
3793  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
3794
3795  // Adjust the stack pointer for the new arguments...
3796  // These operations are automatically eliminated by the prolog/epilog pass
3797  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
3798                               dl);
3799  SDValue CallSeqStart = Chain;
3800
3801  // Load the return address and frame pointer so it can be moved somewhere else
3802  // later.
3803  SDValue LROp, FPOp;
3804  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
3805                                       dl);
3806
3807  // Set up a copy of the stack pointer for use loading and storing any
3808  // arguments that may not fit in the registers available for argument
3809  // passing.
3810  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3811
3812  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3813  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
3814  SmallVector<SDValue, 8> MemOpChains;
3815
3816  bool seenFloatArg = false;
3817  // Walk the register/memloc assignments, inserting copies/loads.
3818  for (unsigned i = 0, j = 0, e = ArgLocs.size();
3819       i != e;
3820       ++i) {
3821    CCValAssign &VA = ArgLocs[i];
3822    SDValue Arg = OutVals[i];
3823    ISD::ArgFlagsTy Flags = Outs[i].Flags;
3824
3825    if (Flags.isByVal()) {
3826      // Argument is an aggregate which is passed by value, thus we need to
3827      // create a copy of it in the local variable space of the current stack
3828      // frame (which is the stack frame of the caller) and pass the address of
3829      // this copy to the callee.
3830      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
3831      CCValAssign &ByValVA = ByValArgLocs[j++];
3832      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
3833
3834      // Memory reserved in the local variable space of the callers stack frame.
3835      unsigned LocMemOffset = ByValVA.getLocMemOffset();
3836
3837      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
3838      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
3839
3840      // Create a copy of the argument in the local area of the current
3841      // stack frame.
3842      SDValue MemcpyCall =
3843        CreateCopyOfByValArgument(Arg, PtrOff,
3844                                  CallSeqStart.getNode()->getOperand(0),
3845                                  Flags, DAG, dl);
3846
3847      // This must go outside the CALLSEQ_START..END.
3848      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3849                           CallSeqStart.getNode()->getOperand(1),
3850                           SDLoc(MemcpyCall));
3851      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3852                             NewCallSeqStart.getNode());
3853      Chain = CallSeqStart = NewCallSeqStart;
3854
3855      // Pass the address of the aggregate copy on the stack either in a
3856      // physical register or in the parameter list area of the current stack
3857      // frame to the callee.
3858      Arg = PtrOff;
3859    }
3860
3861    if (VA.isRegLoc()) {
3862      if (Arg.getValueType() == MVT::i1)
3863        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
3864
3865      seenFloatArg |= VA.getLocVT().isFloatingPoint();
3866      // Put argument in a physical register.
3867      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3868    } else {
3869      // Put argument in the parameter list area of the current stack frame.
3870      assert(VA.isMemLoc());
3871      unsigned LocMemOffset = VA.getLocMemOffset();
3872
3873      if (!isTailCall) {
3874        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
3875        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
3876
3877        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
3878                                           MachinePointerInfo(),
3879                                           false, false, 0));
3880      } else {
3881        // Calculate and remember argument location.
3882        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
3883                                 TailCallArguments);
3884      }
3885    }
3886  }
3887
3888  if (!MemOpChains.empty())
3889    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3890
3891  // Build a sequence of copy-to-reg nodes chained together with token chain
3892  // and flag operands which copy the outgoing args into the appropriate regs.
3893  SDValue InFlag;
3894  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3895    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3896                             RegsToPass[i].second, InFlag);
3897    InFlag = Chain.getValue(1);
3898  }
3899
3900  // Set CR bit 6 to true if this is a vararg call with floating args passed in
3901  // registers.
3902  if (isVarArg) {
3903    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
3904    SDValue Ops[] = { Chain, InFlag };
3905
3906    Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
3907                        dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
3908
3909    InFlag = Chain.getValue(1);
3910  }
3911
3912  if (isTailCall)
3913    PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
3914                    false, TailCallArguments);
3915
3916  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
3917                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
3918                    Ins, InVals);
3919}
3920
3921// Copy an argument into memory, being careful to do this outside the
3922// call sequence for the call to which the argument belongs.
3923SDValue
3924PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
3925                                              SDValue CallSeqStart,
3926                                              ISD::ArgFlagsTy Flags,
3927                                              SelectionDAG &DAG,
3928                                              SDLoc dl) const {
3929  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
3930                        CallSeqStart.getNode()->getOperand(0),
3931                        Flags, DAG, dl);
3932  // The MEMCPY must go outside the CALLSEQ_START..END.
3933  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
3934                             CallSeqStart.getNode()->getOperand(1),
3935                             SDLoc(MemcpyCall));
3936  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
3937                         NewCallSeqStart.getNode());
3938  return NewCallSeqStart;
3939}
3940
3941SDValue
3942PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
3943                                    CallingConv::ID CallConv, bool isVarArg,
3944                                    bool isTailCall,
3945                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
3946                                    const SmallVectorImpl<SDValue> &OutVals,
3947                                    const SmallVectorImpl<ISD::InputArg> &Ins,
3948                                    SDLoc dl, SelectionDAG &DAG,
3949                                    SmallVectorImpl<SDValue> &InVals) const {
3950
3951  bool isLittleEndian = Subtarget.isLittleEndian();
3952  unsigned NumOps = Outs.size();
3953
3954  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3955  unsigned PtrByteSize = 8;
3956
3957  MachineFunction &MF = DAG.getMachineFunction();
3958
3959  // Mark this function as potentially containing a function that contains a
3960  // tail call. As a consequence the frame pointer will be used for dynamicalloc
3961  // and restoring the callers stack pointer in this functions epilog. This is
3962  // done because by tail calling the called function might overwrite the value
3963  // in this function's (MF) stack pointer stack slot 0(SP).
3964  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
3965      CallConv == CallingConv::Fast)
3966    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
3967
3968  // Count how many bytes are to be pushed on the stack, including the linkage
3969  // area, and parameter passing area.  We start with at least 48 bytes, which
3970  // is reserved space for [SP][CR][LR][3 x unused].
3971  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
3972  unsigned NumBytes = LinkageSize;
3973
3974  // Add up all the space actually used.
3975  for (unsigned i = 0; i != NumOps; ++i) {
3976    ISD::ArgFlagsTy Flags = Outs[i].Flags;
3977    EVT ArgVT = Outs[i].VT;
3978
3979    /* Respect alignment of argument on the stack.  */
3980    unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize);
3981    NumBytes = ((NumBytes + Align - 1) / Align) * Align;
3982
3983    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3984  }
3985
3986  unsigned NumBytesActuallyUsed = NumBytes;
3987
3988  // The prolog code of the callee may store up to 8 GPR argument registers to
3989  // the stack, allowing va_start to index over them in memory if its varargs.
3990  // Because we cannot tell if this is needed on the caller side, we have to
3991  // conservatively assume that it is needed.  As such, make sure we have at
3992  // least enough stack space for the caller to store the 8 GPRs.
3993  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
3994
3995  // Tail call needs the stack to be aligned.
3996  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
3997      CallConv == CallingConv::Fast)
3998    NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
3999
4000  // Calculate by how many bytes the stack has to be adjusted in case of tail
4001  // call optimization.
4002  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4003
4004  // To protect arguments on the stack from being clobbered in a tail call,
4005  // force all the loads to happen before doing any other lowering.
4006  if (isTailCall)
4007    Chain = DAG.getStackArgumentTokenFactor(Chain);
4008
4009  // Adjust the stack pointer for the new arguments...
4010  // These operations are automatically eliminated by the prolog/epilog pass
4011  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
4012                               dl);
4013  SDValue CallSeqStart = Chain;
4014
4015  // Load the return address and frame pointer so it can be move somewhere else
4016  // later.
4017  SDValue LROp, FPOp;
4018  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
4019                                       dl);
4020
4021  // Set up a copy of the stack pointer for use loading and storing any
4022  // arguments that may not fit in the registers available for argument
4023  // passing.
4024  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4025
4026  // Figure out which arguments are going to go in registers, and which in
4027  // memory.  Also, if this is a vararg function, floating point operations
4028  // must be stored to our stack, and loaded into integer regs as well, if
4029  // any integer regs are available for argument passing.
4030  unsigned ArgOffset = LinkageSize;
4031  unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
4032
4033  static const MCPhysReg GPR[] = {
4034    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4035    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4036  };
4037  static const MCPhysReg *FPR = GetFPR();
4038
4039  static const MCPhysReg VR[] = {
4040    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4041    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4042  };
4043  static const MCPhysReg VSRH[] = {
4044    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
4045    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
4046  };
4047
4048  const unsigned NumGPRs = array_lengthof(GPR);
4049  const unsigned NumFPRs = 13;
4050  const unsigned NumVRs  = array_lengthof(VR);
4051
4052  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4053  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4054
4055  SmallVector<SDValue, 8> MemOpChains;
4056  for (unsigned i = 0; i != NumOps; ++i) {
4057    SDValue Arg = OutVals[i];
4058    ISD::ArgFlagsTy Flags = Outs[i].Flags;
4059
4060    /* Respect alignment of argument on the stack.  */
4061    unsigned Align =
4062      CalculateStackSlotAlignment(Outs[i].VT, Flags, PtrByteSize);
4063    ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
4064
4065    /* Compute GPR index associated with argument offset.  */
4066    GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4067    GPR_idx = std::min(GPR_idx, NumGPRs);
4068
4069    // PtrOff will be used to store the current argument to the stack if a
4070    // register cannot be found for it.
4071    SDValue PtrOff;
4072
4073    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
4074
4075    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4076
4077    // Promote integers to 64-bit values.
4078    if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
4079      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
4080      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4081      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
4082    }
4083
4084    // FIXME memcpy is used way more than necessary.  Correctness first.
4085    // Note: "by value" is code for passing a structure by value, not
4086    // basic types.
4087    if (Flags.isByVal()) {
4088      // Note: Size includes alignment padding, so
4089      //   struct x { short a; char b; }
4090      // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
4091      // These are the proper values we need for right-justifying the
4092      // aggregate in a parameter register.
4093      unsigned Size = Flags.getByValSize();
4094
4095      // An empty aggregate parameter takes up no storage and no
4096      // registers.
4097      if (Size == 0)
4098        continue;
4099
4100      // All aggregates smaller than 8 bytes must be passed right-justified.
4101      if (Size==1 || Size==2 || Size==4) {
4102        EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
4103        if (GPR_idx != NumGPRs) {
4104          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
4105                                        MachinePointerInfo(), VT,
4106                                        false, false, 0);
4107          MemOpChains.push_back(Load.getValue(1));
4108          RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
4109
4110          ArgOffset += PtrByteSize;
4111          continue;
4112        }
4113      }
4114
4115      if (GPR_idx == NumGPRs && Size < 8) {
4116        SDValue AddPtr = PtrOff;
4117        if (!isLittleEndian) {
4118          SDValue Const = DAG.getConstant(PtrByteSize - Size,
4119                                          PtrOff.getValueType());
4120          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4121        }
4122        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4123                                                          CallSeqStart,
4124                                                          Flags, DAG, dl);
4125        ArgOffset += PtrByteSize;
4126        continue;
4127      }
4128      // Copy entire object into memory.  There are cases where gcc-generated
4129      // code assumes it is there, even if it could be put entirely into
4130      // registers.  (This is not what the doc says.)
4131
4132      // FIXME: The above statement is likely due to a misunderstanding of the
4133      // documents.  All arguments must be copied into the parameter area BY
4134      // THE CALLEE in the event that the callee takes the address of any
4135      // formal argument.  That has not yet been implemented.  However, it is
4136      // reasonable to use the stack area as a staging area for the register
4137      // load.
4138
4139      // Skip this for small aggregates, as we will use the same slot for a
4140      // right-justified copy, below.
4141      if (Size >= 8)
4142        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
4143                                                          CallSeqStart,
4144                                                          Flags, DAG, dl);
4145
4146      // When a register is available, pass a small aggregate right-justified.
4147      if (Size < 8 && GPR_idx != NumGPRs) {
4148        // The easiest way to get this right-justified in a register
4149        // is to copy the structure into the rightmost portion of a
4150        // local variable slot, then load the whole slot into the
4151        // register.
4152        // FIXME: The memcpy seems to produce pretty awful code for
4153        // small aggregates, particularly for packed ones.
4154        // FIXME: It would be preferable to use the slot in the
4155        // parameter save area instead of a new local variable.
4156        SDValue AddPtr = PtrOff;
4157        if (!isLittleEndian) {
4158          SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
4159          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4160        }
4161        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4162                                                          CallSeqStart,
4163                                                          Flags, DAG, dl);
4164
4165        // Load the slot into the register.
4166        SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
4167                                   MachinePointerInfo(),
4168                                   false, false, false, 0);
4169        MemOpChains.push_back(Load.getValue(1));
4170        RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
4171
4172        // Done with this argument.
4173        ArgOffset += PtrByteSize;
4174        continue;
4175      }
4176
4177      // For aggregates larger than PtrByteSize, copy the pieces of the
4178      // object that fit into registers from the parameter save area.
4179      for (unsigned j=0; j<Size; j+=PtrByteSize) {
4180        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
4181        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
4182        if (GPR_idx != NumGPRs) {
4183          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
4184                                     MachinePointerInfo(),
4185                                     false, false, false, 0);
4186          MemOpChains.push_back(Load.getValue(1));
4187          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4188          ArgOffset += PtrByteSize;
4189        } else {
4190          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
4191          break;
4192        }
4193      }
4194      continue;
4195    }
4196
4197    switch (Arg.getSimpleValueType().SimpleTy) {
4198    default: llvm_unreachable("Unexpected ValueType for argument!");
4199    case MVT::i1:
4200    case MVT::i32:
4201    case MVT::i64:
4202      if (GPR_idx != NumGPRs) {
4203        RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
4204      } else {
4205        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4206                         true, isTailCall, false, MemOpChains,
4207                         TailCallArguments, dl);
4208      }
4209      ArgOffset += PtrByteSize;
4210      break;
4211    case MVT::f32:
4212    case MVT::f64:
4213      if (FPR_idx != NumFPRs) {
4214        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
4215
4216        if (isVarArg) {
4217          // A single float or an aggregate containing only a single float
4218          // must be passed right-justified in the stack doubleword, and
4219          // in the GPR, if one is available.
4220          SDValue StoreOff;
4221          if (Arg.getSimpleValueType().SimpleTy == MVT::f32 &&
4222              !isLittleEndian) {
4223            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
4224            StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
4225          } else
4226            StoreOff = PtrOff;
4227
4228          SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,
4229                                       MachinePointerInfo(), false, false, 0);
4230          MemOpChains.push_back(Store);
4231
4232          // Float varargs are always shadowed in available integer registers
4233          if (GPR_idx != NumGPRs) {
4234            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
4235                                       MachinePointerInfo(), false, false,
4236                                       false, 0);
4237            MemOpChains.push_back(Load.getValue(1));
4238            RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
4239          }
4240        }
4241      } else {
4242        // Single-precision floating-point values are mapped to the
4243        // second (rightmost) word of the stack doubleword.
4244        if (Arg.getValueType() == MVT::f32 && !isLittleEndian) {
4245          SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
4246          PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
4247        }
4248
4249        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4250                         true, isTailCall, false, MemOpChains,
4251                         TailCallArguments, dl);
4252      }
4253      ArgOffset += 8;
4254      break;
4255    case MVT::v4f32:
4256    case MVT::v4i32:
4257    case MVT::v8i16:
4258    case MVT::v16i8:
4259    case MVT::v2f64:
4260    case MVT::v2i64:
4261      // For a varargs call, named arguments go into VRs or on the stack as
4262      // usual; unnamed arguments always go to the stack or the corresponding
4263      // GPRs when within range.  For now, we always put the value in both
4264      // locations (or even all three).
4265      if (isVarArg) {
4266        // We could elide this store in the case where the object fits
4267        // entirely in R registers.  Maybe later.
4268        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4269                                     MachinePointerInfo(), false, false, 0);
4270        MemOpChains.push_back(Store);
4271        if (VR_idx != NumVRs) {
4272          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
4273                                     MachinePointerInfo(),
4274                                     false, false, false, 0);
4275          MemOpChains.push_back(Load.getValue(1));
4276
4277          unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
4278                           Arg.getSimpleValueType() == MVT::v2i64) ?
4279                          VSRH[VR_idx] : VR[VR_idx];
4280          ++VR_idx;
4281
4282          RegsToPass.push_back(std::make_pair(VReg, Load));
4283        }
4284        ArgOffset += 16;
4285        for (unsigned i=0; i<16; i+=PtrByteSize) {
4286          if (GPR_idx == NumGPRs)
4287            break;
4288          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
4289                                  DAG.getConstant(i, PtrVT));
4290          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
4291                                     false, false, false, 0);
4292          MemOpChains.push_back(Load.getValue(1));
4293          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4294        }
4295        break;
4296      }
4297
4298      // Non-varargs Altivec params go into VRs or on the stack.
4299      if (VR_idx != NumVRs) {
4300        unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
4301                         Arg.getSimpleValueType() == MVT::v2i64) ?
4302                        VSRH[VR_idx] : VR[VR_idx];
4303        ++VR_idx;
4304
4305        RegsToPass.push_back(std::make_pair(VReg, Arg));
4306      } else {
4307        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4308                         true, isTailCall, true, MemOpChains,
4309                         TailCallArguments, dl);
4310      }
4311      ArgOffset += 16;
4312      break;
4313    }
4314  }
4315
4316  assert(NumBytesActuallyUsed == ArgOffset);
4317  (void)NumBytesActuallyUsed;
4318
4319  if (!MemOpChains.empty())
4320    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4321
4322  // Check if this is an indirect call (MTCTR/BCTRL).
4323  // See PrepareCall() for more information about calls through function
4324  // pointers in the 64-bit SVR4 ABI.
4325  if (!isTailCall &&
4326      !dyn_cast<GlobalAddressSDNode>(Callee) &&
4327      !dyn_cast<ExternalSymbolSDNode>(Callee)) {
4328    // Load r2 into a virtual register and store it to the TOC save area.
4329    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
4330    // TOC save area offset.
4331    unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
4332    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
4333    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4334    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
4335                         false, false, 0);
4336  }
4337
4338  // Build a sequence of copy-to-reg nodes chained together with token chain
4339  // and flag operands which copy the outgoing args into the appropriate regs.
4340  SDValue InFlag;
4341  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4342    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4343                             RegsToPass[i].second, InFlag);
4344    InFlag = Chain.getValue(1);
4345  }
4346
4347  if (isTailCall)
4348    PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
4349                    FPOp, true, TailCallArguments);
4350
4351  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
4352                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
4353                    Ins, InVals);
4354}
4355
4356SDValue
4357PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
4358                                    CallingConv::ID CallConv, bool isVarArg,
4359                                    bool isTailCall,
4360                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
4361                                    const SmallVectorImpl<SDValue> &OutVals,
4362                                    const SmallVectorImpl<ISD::InputArg> &Ins,
4363                                    SDLoc dl, SelectionDAG &DAG,
4364                                    SmallVectorImpl<SDValue> &InVals) const {
4365
4366  unsigned NumOps = Outs.size();
4367
4368  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4369  bool isPPC64 = PtrVT == MVT::i64;
4370  unsigned PtrByteSize = isPPC64 ? 8 : 4;
4371
4372  MachineFunction &MF = DAG.getMachineFunction();
4373
4374  // Mark this function as potentially containing a function that contains a
4375  // tail call. As a consequence the frame pointer will be used for dynamicalloc
4376  // and restoring the callers stack pointer in this functions epilog. This is
4377  // done because by tail calling the called function might overwrite the value
4378  // in this function's (MF) stack pointer stack slot 0(SP).
4379  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4380      CallConv == CallingConv::Fast)
4381    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4382
4383  // Count how many bytes are to be pushed on the stack, including the linkage
4384  // area, and parameter passing area.  We start with 24/48 bytes, which is
4385  // prereserved space for [SP][CR][LR][3 x unused].
4386  unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
4387  unsigned NumBytes = LinkageSize;
4388
4389  // Add up all the space actually used.
4390  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
4391  // they all go in registers, but we must reserve stack space for them for
4392  // possible use by the caller.  In varargs or 64-bit calls, parameters are
4393  // assigned stack space in order, with padding so Altivec parameters are
4394  // 16-byte aligned.
4395  unsigned nAltivecParamsAtEnd = 0;
4396  for (unsigned i = 0; i != NumOps; ++i) {
4397    ISD::ArgFlagsTy Flags = Outs[i].Flags;
4398    EVT ArgVT = Outs[i].VT;
4399    // Varargs Altivec parameters are padded to a 16 byte boundary.
4400    if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4401        ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4402        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
4403      if (!isVarArg && !isPPC64) {
4404        // Non-varargs Altivec parameters go after all the non-Altivec
4405        // parameters; handle those later so we know how much padding we need.
4406        nAltivecParamsAtEnd++;
4407        continue;
4408      }
4409      // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
4410      NumBytes = ((NumBytes+15)/16)*16;
4411    }
4412    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4413  }
4414
4415  // Allow for Altivec parameters at the end, if needed.
4416  if (nAltivecParamsAtEnd) {
4417    NumBytes = ((NumBytes+15)/16)*16;
4418    NumBytes += 16*nAltivecParamsAtEnd;
4419  }
4420
4421  // The prolog code of the callee may store up to 8 GPR argument registers to
4422  // the stack, allowing va_start to index over them in memory if its varargs.
4423  // Because we cannot tell if this is needed on the caller side, we have to
4424  // conservatively assume that it is needed.  As such, make sure we have at
4425  // least enough stack space for the caller to store the 8 GPRs.
4426  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
4427
4428  // Tail call needs the stack to be aligned.
4429  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4430      CallConv == CallingConv::Fast)
4431    NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
4432
4433  // Calculate by how many bytes the stack has to be adjusted in case of tail
4434  // call optimization.
4435  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4436
4437  // To protect arguments on the stack from being clobbered in a tail call,
4438  // force all the loads to happen before doing any other lowering.
4439  if (isTailCall)
4440    Chain = DAG.getStackArgumentTokenFactor(Chain);
4441
4442  // Adjust the stack pointer for the new arguments...
4443  // These operations are automatically eliminated by the prolog/epilog pass
4444  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
4445                               dl);
4446  SDValue CallSeqStart = Chain;
4447
4448  // Load the return address and frame pointer so it can be move somewhere else
4449  // later.
4450  SDValue LROp, FPOp;
4451  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
4452                                       dl);
4453
4454  // Set up a copy of the stack pointer for use loading and storing any
4455  // arguments that may not fit in the registers available for argument
4456  // passing.
4457  SDValue StackPtr;
4458  if (isPPC64)
4459    StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4460  else
4461    StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4462
4463  // Figure out which arguments are going to go in registers, and which in
4464  // memory.  Also, if this is a vararg function, floating point operations
4465  // must be stored to our stack, and loaded into integer regs as well, if
4466  // any integer regs are available for argument passing.
4467  unsigned ArgOffset = LinkageSize;
4468  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4469
4470  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4471    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4472    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4473  };
4474  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4475    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4476    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4477  };
4478  static const MCPhysReg *FPR = GetFPR();
4479
4480  static const MCPhysReg VR[] = {
4481    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4482    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4483  };
4484  const unsigned NumGPRs = array_lengthof(GPR_32);
4485  const unsigned NumFPRs = 13;
4486  const unsigned NumVRs  = array_lengthof(VR);
4487
4488  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4489
4490  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4491  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4492
4493  SmallVector<SDValue, 8> MemOpChains;
4494  for (unsigned i = 0; i != NumOps; ++i) {
4495    SDValue Arg = OutVals[i];
4496    ISD::ArgFlagsTy Flags = Outs[i].Flags;
4497
4498    // PtrOff will be used to store the current argument to the stack if a
4499    // register cannot be found for it.
4500    SDValue PtrOff;
4501
4502    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
4503
4504    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4505
4506    // On PPC64, promote integers to 64-bit values.
4507    if (isPPC64 && Arg.getValueType() == MVT::i32) {
4508      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
4509      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4510      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
4511    }
4512
4513    // FIXME memcpy is used way more than necessary.  Correctness first.
4514    // Note: "by value" is code for passing a structure by value, not
4515    // basic types.
4516    if (Flags.isByVal()) {
4517      unsigned Size = Flags.getByValSize();
4518      // Very small objects are passed right-justified.  Everything else is
4519      // passed left-justified.
4520      if (Size==1 || Size==2) {
4521        EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
4522        if (GPR_idx != NumGPRs) {
4523          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
4524                                        MachinePointerInfo(), VT,
4525                                        false, false, 0);
4526          MemOpChains.push_back(Load.getValue(1));
4527          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4528
4529          ArgOffset += PtrByteSize;
4530        } else {
4531          SDValue Const = DAG.getConstant(PtrByteSize - Size,
4532                                          PtrOff.getValueType());
4533          SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4534          Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4535                                                            CallSeqStart,
4536                                                            Flags, DAG, dl);
4537          ArgOffset += PtrByteSize;
4538        }
4539        continue;
4540      }
4541      // Copy entire object into memory.  There are cases where gcc-generated
4542      // code assumes it is there, even if it could be put entirely into
4543      // registers.  (This is not what the doc says.)
4544      Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
4545                                                        CallSeqStart,
4546                                                        Flags, DAG, dl);
4547
4548      // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
4549      // copy the pieces of the object that fit into registers from the
4550      // parameter save area.
4551      for (unsigned j=0; j<Size; j+=PtrByteSize) {
4552        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
4553        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
4554        if (GPR_idx != NumGPRs) {
4555          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
4556                                     MachinePointerInfo(),
4557                                     false, false, false, 0);
4558          MemOpChains.push_back(Load.getValue(1));
4559          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4560          ArgOffset += PtrByteSize;
4561        } else {
4562          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
4563          break;
4564        }
4565      }
4566      continue;
4567    }
4568
4569    switch (Arg.getSimpleValueType().SimpleTy) {
4570    default: llvm_unreachable("Unexpected ValueType for argument!");
4571    case MVT::i1:
4572    case MVT::i32:
4573    case MVT::i64:
4574      if (GPR_idx != NumGPRs) {
4575        if (Arg.getValueType() == MVT::i1)
4576          Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
4577
4578        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
4579      } else {
4580        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4581                         isPPC64, isTailCall, false, MemOpChains,
4582                         TailCallArguments, dl);
4583      }
4584      ArgOffset += PtrByteSize;
4585      break;
4586    case MVT::f32:
4587    case MVT::f64:
4588      if (FPR_idx != NumFPRs) {
4589        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
4590
4591        if (isVarArg) {
4592          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4593                                       MachinePointerInfo(), false, false, 0);
4594          MemOpChains.push_back(Store);
4595
4596          // Float varargs are always shadowed in available integer registers
4597          if (GPR_idx != NumGPRs) {
4598            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
4599                                       MachinePointerInfo(), false, false,
4600                                       false, 0);
4601            MemOpChains.push_back(Load.getValue(1));
4602            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4603          }
4604          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
4605            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
4606            PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
4607            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
4608                                       MachinePointerInfo(),
4609                                       false, false, false, 0);
4610            MemOpChains.push_back(Load.getValue(1));
4611            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4612          }
4613        } else {
4614          // If we have any FPRs remaining, we may also have GPRs remaining.
4615          // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
4616          // GPRs.
4617          if (GPR_idx != NumGPRs)
4618            ++GPR_idx;
4619          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
4620              !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
4621            ++GPR_idx;
4622        }
4623      } else
4624        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4625                         isPPC64, isTailCall, false, MemOpChains,
4626                         TailCallArguments, dl);
4627      if (isPPC64)
4628        ArgOffset += 8;
4629      else
4630        ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
4631      break;
4632    case MVT::v4f32:
4633    case MVT::v4i32:
4634    case MVT::v8i16:
4635    case MVT::v16i8:
4636      if (isVarArg) {
4637        // These go aligned on the stack, or in the corresponding R registers
4638        // when within range.  The Darwin PPC ABI doc claims they also go in
4639        // V registers; in fact gcc does this only for arguments that are
4640        // prototyped, not for those that match the ...  We do it for all
4641        // arguments, seems to work.
4642        while (ArgOffset % 16 !=0) {
4643          ArgOffset += PtrByteSize;
4644          if (GPR_idx != NumGPRs)
4645            GPR_idx++;
4646        }
4647        // We could elide this store in the case where the object fits
4648        // entirely in R registers.  Maybe later.
4649        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4650                            DAG.getConstant(ArgOffset, PtrVT));
4651        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4652                                     MachinePointerInfo(), false, false, 0);
4653        MemOpChains.push_back(Store);
4654        if (VR_idx != NumVRs) {
4655          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
4656                                     MachinePointerInfo(),
4657                                     false, false, false, 0);
4658          MemOpChains.push_back(Load.getValue(1));
4659          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
4660        }
4661        ArgOffset += 16;
4662        for (unsigned i=0; i<16; i+=PtrByteSize) {
4663          if (GPR_idx == NumGPRs)
4664            break;
4665          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
4666                                  DAG.getConstant(i, PtrVT));
4667          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
4668                                     false, false, false, 0);
4669          MemOpChains.push_back(Load.getValue(1));
4670          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4671        }
4672        break;
4673      }
4674
4675      // Non-varargs Altivec params generally go in registers, but have
4676      // stack space allocated at the end.
4677      if (VR_idx != NumVRs) {
4678        // Doesn't have GPR space allocated.
4679        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
4680      } else if (nAltivecParamsAtEnd==0) {
4681        // We are emitting Altivec params in order.
4682        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4683                         isPPC64, isTailCall, true, MemOpChains,
4684                         TailCallArguments, dl);
4685        ArgOffset += 16;
4686      }
4687      break;
4688    }
4689  }
4690  // If all Altivec parameters fit in registers, as they usually do,
4691  // they get stack space following the non-Altivec parameters.  We
4692  // don't track this here because nobody below needs it.
4693  // If there are more Altivec parameters than fit in registers emit
4694  // the stores here.
4695  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
4696    unsigned j = 0;
4697    // Offset is aligned; skip 1st 12 params which go in V registers.
4698    ArgOffset = ((ArgOffset+15)/16)*16;
4699    ArgOffset += 12*16;
4700    for (unsigned i = 0; i != NumOps; ++i) {
4701      SDValue Arg = OutVals[i];
4702      EVT ArgType = Outs[i].VT;
4703      if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
4704          ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
4705        if (++j > NumVRs) {
4706          SDValue PtrOff;
4707          // We are emitting Altivec params in order.
4708          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4709                           isPPC64, isTailCall, true, MemOpChains,
4710                           TailCallArguments, dl);
4711          ArgOffset += 16;
4712        }
4713      }
4714    }
4715  }
4716
4717  if (!MemOpChains.empty())
4718    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4719
4720  // On Darwin, R12 must contain the address of an indirect callee.  This does
4721  // not mean the MTCTR instruction must use R12; it's easier to model this as
4722  // an extra parameter, so do that.
4723  if (!isTailCall &&
4724      !dyn_cast<GlobalAddressSDNode>(Callee) &&
4725      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
4726      !isBLACompatibleAddress(Callee, DAG))
4727    RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
4728                                                   PPC::R12), Callee));
4729
4730  // Build a sequence of copy-to-reg nodes chained together with token chain
4731  // and flag operands which copy the outgoing args into the appropriate regs.
4732  SDValue InFlag;
4733  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4734    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4735                             RegsToPass[i].second, InFlag);
4736    InFlag = Chain.getValue(1);
4737  }
4738
4739  if (isTailCall)
4740    PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
4741                    FPOp, true, TailCallArguments);
4742
4743  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
4744                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
4745                    Ins, InVals);
4746}
4747
4748bool
4749PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
4750                                  MachineFunction &MF, bool isVarArg,
4751                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
4752                                  LLVMContext &Context) const {
4753  SmallVector<CCValAssign, 16> RVLocs;
4754  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
4755                 RVLocs, Context);
4756  return CCInfo.CheckReturn(Outs, RetCC_PPC);
4757}
4758
4759SDValue
4760PPCTargetLowering::LowerReturn(SDValue Chain,
4761                               CallingConv::ID CallConv, bool isVarArg,
4762                               const SmallVectorImpl<ISD::OutputArg> &Outs,
4763                               const SmallVectorImpl<SDValue> &OutVals,
4764                               SDLoc dl, SelectionDAG &DAG) const {
4765
4766  SmallVector<CCValAssign, 16> RVLocs;
4767  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4768                 getTargetMachine(), RVLocs, *DAG.getContext());
4769  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
4770
4771  SDValue Flag;
4772  SmallVector<SDValue, 4> RetOps(1, Chain);
4773
4774  // Copy the result values into the output registers.
4775  for (unsigned i = 0; i != RVLocs.size(); ++i) {
4776    CCValAssign &VA = RVLocs[i];
4777    assert(VA.isRegLoc() && "Can only return in registers!");
4778
4779    SDValue Arg = OutVals[i];
4780
4781    switch (VA.getLocInfo()) {
4782    default: llvm_unreachable("Unknown loc info!");
4783    case CCValAssign::Full: break;
4784    case CCValAssign::AExt:
4785      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
4786      break;
4787    case CCValAssign::ZExt:
4788      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
4789      break;
4790    case CCValAssign::SExt:
4791      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
4792      break;
4793    }
4794
4795    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
4796    Flag = Chain.getValue(1);
4797    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4798  }
4799
4800  RetOps[0] = Chain;  // Update chain.
4801
4802  // Add the flag if we have it.
4803  if (Flag.getNode())
4804    RetOps.push_back(Flag);
4805
4806  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
4807}
4808
4809SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
4810                                   const PPCSubtarget &Subtarget) const {
4811  // When we pop the dynamic allocation we need to restore the SP link.
4812  SDLoc dl(Op);
4813
4814  // Get the corect type for pointers.
4815  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4816
4817  // Construct the stack pointer operand.
4818  bool isPPC64 = Subtarget.isPPC64();
4819  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
4820  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
4821
4822  // Get the operands for the STACKRESTORE.
4823  SDValue Chain = Op.getOperand(0);
4824  SDValue SaveSP = Op.getOperand(1);
4825
4826  // Load the old link SP.
4827  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
4828                                   MachinePointerInfo(),
4829                                   false, false, false, 0);
4830
4831  // Restore the stack pointer.
4832  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
4833
4834  // Store the old link SP.
4835  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
4836                      false, false, 0);
4837}
4838
4839
4840
4841SDValue
4842PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
4843  MachineFunction &MF = DAG.getMachineFunction();
4844  bool isPPC64 = Subtarget.isPPC64();
4845  bool isDarwinABI = Subtarget.isDarwinABI();
4846  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4847
4848  // Get current frame pointer save index.  The users of this index will be
4849  // primarily DYNALLOC instructions.
4850  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
4851  int RASI = FI->getReturnAddrSaveIndex();
4852
4853  // If the frame pointer save index hasn't been defined yet.
4854  if (!RASI) {
4855    // Find out what the fix offset of the frame pointer save area.
4856    int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
4857    // Allocate the frame index for frame pointer save area.
4858    RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
4859    // Save the result.
4860    FI->setReturnAddrSaveIndex(RASI);
4861  }
4862  return DAG.getFrameIndex(RASI, PtrVT);
4863}
4864
4865SDValue
4866PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
4867  MachineFunction &MF = DAG.getMachineFunction();
4868  bool isPPC64 = Subtarget.isPPC64();
4869  bool isDarwinABI = Subtarget.isDarwinABI();
4870  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4871
4872  // Get current frame pointer save index.  The users of this index will be
4873  // primarily DYNALLOC instructions.
4874  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
4875  int FPSI = FI->getFramePointerSaveIndex();
4876
4877  // If the frame pointer save index hasn't been defined yet.
4878  if (!FPSI) {
4879    // Find out what the fix offset of the frame pointer save area.
4880    int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
4881                                                           isDarwinABI);
4882
4883    // Allocate the frame index for frame pointer save area.
4884    FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
4885    // Save the result.
4886    FI->setFramePointerSaveIndex(FPSI);
4887  }
4888  return DAG.getFrameIndex(FPSI, PtrVT);
4889}
4890
4891SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4892                                         SelectionDAG &DAG,
4893                                         const PPCSubtarget &Subtarget) const {
4894  // Get the inputs.
4895  SDValue Chain = Op.getOperand(0);
4896  SDValue Size  = Op.getOperand(1);
4897  SDLoc dl(Op);
4898
4899  // Get the corect type for pointers.
4900  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4901  // Negate the size.
4902  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
4903                                  DAG.getConstant(0, PtrVT), Size);
4904  // Construct a node for the frame pointer save index.
4905  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
4906  // Build a DYNALLOC node.
4907  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
4908  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
4909  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
4910}
4911
4912SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
4913                                               SelectionDAG &DAG) const {
4914  SDLoc DL(Op);
4915  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
4916                     DAG.getVTList(MVT::i32, MVT::Other),
4917                     Op.getOperand(0), Op.getOperand(1));
4918}
4919
4920SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
4921                                                SelectionDAG &DAG) const {
4922  SDLoc DL(Op);
4923  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
4924                     Op.getOperand(0), Op.getOperand(1));
4925}
4926
4927SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
4928  assert(Op.getValueType() == MVT::i1 &&
4929         "Custom lowering only for i1 loads");
4930
4931  // First, load 8 bits into 32 bits, then truncate to 1 bit.
4932
4933  SDLoc dl(Op);
4934  LoadSDNode *LD = cast<LoadSDNode>(Op);
4935
4936  SDValue Chain = LD->getChain();
4937  SDValue BasePtr = LD->getBasePtr();
4938  MachineMemOperand *MMO = LD->getMemOperand();
4939
4940  SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
4941                                 BasePtr, MVT::i8, MMO);
4942  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
4943
4944  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
4945  return DAG.getMergeValues(Ops, dl);
4946}
4947
4948SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
4949  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
4950         "Custom lowering only for i1 stores");
4951
4952  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
4953
4954  SDLoc dl(Op);
4955  StoreSDNode *ST = cast<StoreSDNode>(Op);
4956
4957  SDValue Chain = ST->getChain();
4958  SDValue BasePtr = ST->getBasePtr();
4959  SDValue Value = ST->getValue();
4960  MachineMemOperand *MMO = ST->getMemOperand();
4961
4962  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
4963  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
4964}
4965
4966// FIXME: Remove this once the ANDI glue bug is fixed:
4967SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
4968  assert(Op.getValueType() == MVT::i1 &&
4969         "Custom lowering only for i1 results");
4970
4971  SDLoc DL(Op);
4972  return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
4973                     Op.getOperand(0));
4974}
4975
4976/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
4977/// possible.
4978SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4979  // Not FP? Not a fsel.
4980  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
4981      !Op.getOperand(2).getValueType().isFloatingPoint())
4982    return Op;
4983
4984  // We might be able to do better than this under some circumstances, but in
4985  // general, fsel-based lowering of select is a finite-math-only optimization.
4986  // For more information, see section F.3 of the 2.06 ISA specification.
4987  if (!DAG.getTarget().Options.NoInfsFPMath ||
4988      !DAG.getTarget().Options.NoNaNsFPMath)
4989    return Op;
4990
4991  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4992
4993  EVT ResVT = Op.getValueType();
4994  EVT CmpVT = Op.getOperand(0).getValueType();
4995  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
4996  SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
4997  SDLoc dl(Op);
4998
4999  // If the RHS of the comparison is a 0.0, we don't need to do the
5000  // subtraction at all.
5001  SDValue Sel1;
5002  if (isFloatingPointZero(RHS))
5003    switch (CC) {
5004    default: break;       // SETUO etc aren't handled by fsel.
5005    case ISD::SETNE:
5006      std::swap(TV, FV);
5007    case ISD::SETEQ:
5008      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5009        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5010      Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
5011      if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
5012        Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
5013      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5014                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
5015    case ISD::SETULT:
5016    case ISD::SETLT:
5017      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
5018    case ISD::SETOGE:
5019    case ISD::SETGE:
5020      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5021        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5022      return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
5023    case ISD::SETUGT:
5024    case ISD::SETGT:
5025      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
5026    case ISD::SETOLE:
5027    case ISD::SETLE:
5028      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5029        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5030      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5031                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
5032    }
5033
5034  SDValue Cmp;
5035  switch (CC) {
5036  default: break;       // SETUO etc aren't handled by fsel.
5037  case ISD::SETNE:
5038    std::swap(TV, FV);
5039  case ISD::SETEQ:
5040    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5041    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5042      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5043    Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5044    if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
5045      Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
5046    return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5047                       DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
5048  case ISD::SETULT:
5049  case ISD::SETLT:
5050    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5051    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5052      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5053    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
5054  case ISD::SETOGE:
5055  case ISD::SETGE:
5056    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5057    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5058      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5059    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5060  case ISD::SETUGT:
5061  case ISD::SETGT:
5062    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
5063    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5064      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5065    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
5066  case ISD::SETOLE:
5067  case ISD::SETLE:
5068    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
5069    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5070      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5071    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5072  }
5073  return Op;
5074}
5075
5076// FIXME: Split this code up when LegalizeDAGTypes lands.
5077SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
5078                                           SDLoc dl) const {
5079  assert(Op.getOperand(0).getValueType().isFloatingPoint());
5080  SDValue Src = Op.getOperand(0);
5081  if (Src.getValueType() == MVT::f32)
5082    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
5083
5084  SDValue Tmp;
5085  switch (Op.getSimpleValueType().SimpleTy) {
5086  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
5087  case MVT::i32:
5088    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
5089                        (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
5090                                                   PPCISD::FCTIDZ),
5091                      dl, MVT::f64, Src);
5092    break;
5093  case MVT::i64:
5094    assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
5095           "i64 FP_TO_UINT is supported only with FPCVT");
5096    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
5097                                                        PPCISD::FCTIDUZ,
5098                      dl, MVT::f64, Src);
5099    break;
5100  }
5101
5102  // Convert the FP value to an int value through memory.
5103  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
5104    (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
5105  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
5106  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
5107  MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
5108
5109  // Emit a store to the stack slot.
5110  SDValue Chain;
5111  if (i32Stack) {
5112    MachineFunction &MF = DAG.getMachineFunction();
5113    MachineMemOperand *MMO =
5114      MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
5115    SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
5116    Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
5117              DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
5118  } else
5119    Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
5120                         MPI, false, false, 0);
5121
5122  // Result is a load from the stack slot.  If loading 4 bytes, make sure to
5123  // add in a bias.
5124  if (Op.getValueType() == MVT::i32 && !i32Stack) {
5125    FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
5126                        DAG.getConstant(4, FIPtr.getValueType()));
5127    MPI = MachinePointerInfo();
5128  }
5129
5130  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
5131                     false, false, false, 0);
5132}
5133
5134SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
5135                                           SelectionDAG &DAG) const {
5136  SDLoc dl(Op);
5137  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
5138  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
5139    return SDValue();
5140
5141  if (Op.getOperand(0).getValueType() == MVT::i1)
5142    return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
5143                       DAG.getConstantFP(1.0, Op.getValueType()),
5144                       DAG.getConstantFP(0.0, Op.getValueType()));
5145
5146  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
5147         "UINT_TO_FP is supported only with FPCVT");
5148
5149  // If we have FCFIDS, then use it when converting to single-precision.
5150  // Otherwise, convert to double-precision and then round.
5151  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
5152                   (Op.getOpcode() == ISD::UINT_TO_FP ?
5153                    PPCISD::FCFIDUS : PPCISD::FCFIDS) :
5154                   (Op.getOpcode() == ISD::UINT_TO_FP ?
5155                    PPCISD::FCFIDU : PPCISD::FCFID);
5156  MVT      FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
5157                   MVT::f32 : MVT::f64;
5158
5159  if (Op.getOperand(0).getValueType() == MVT::i64) {
5160    SDValue SINT = Op.getOperand(0);
5161    // When converting to single-precision, we actually need to convert
5162    // to double-precision first and then round to single-precision.
5163    // To avoid double-rounding effects during that operation, we have
5164    // to prepare the input operand.  Bits that might be truncated when
5165    // converting to double-precision are replaced by a bit that won't
5166    // be lost at this stage, but is below the single-precision rounding
5167    // position.
5168    //
5169    // However, if -enable-unsafe-fp-math is in effect, accept double
5170    // rounding to avoid the extra overhead.
5171    if (Op.getValueType() == MVT::f32 &&
5172        !Subtarget.hasFPCVT() &&
5173        !DAG.getTarget().Options.UnsafeFPMath) {
5174
5175      // Twiddle input to make sure the low 11 bits are zero.  (If this
5176      // is the case, we are guaranteed the value will fit into the 53 bit
5177      // mantissa of an IEEE double-precision value without rounding.)
5178      // If any of those low 11 bits were not zero originally, make sure
5179      // bit 12 (value 2048) is set instead, so that the final rounding
5180      // to single-precision gets the correct result.
5181      SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
5182                                  SINT, DAG.getConstant(2047, MVT::i64));
5183      Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
5184                          Round, DAG.getConstant(2047, MVT::i64));
5185      Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
5186      Round = DAG.getNode(ISD::AND, dl, MVT::i64,
5187                          Round, DAG.getConstant(-2048, MVT::i64));
5188
5189      // However, we cannot use that value unconditionally: if the magnitude
5190      // of the input value is small, the bit-twiddling we did above might
5191      // end up visibly changing the output.  Fortunately, in that case, we
5192      // don't need to twiddle bits since the original input will convert
5193      // exactly to double-precision floating-point already.  Therefore,
5194      // construct a conditional to use the original value if the top 11
5195      // bits are all sign-bit copies, and use the rounded value computed
5196      // above otherwise.
5197      SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
5198                                 SINT, DAG.getConstant(53, MVT::i32));
5199      Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
5200                         Cond, DAG.getConstant(1, MVT::i64));
5201      Cond = DAG.getSetCC(dl, MVT::i32,
5202                          Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
5203
5204      SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
5205    }
5206
5207    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
5208    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
5209
5210    if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
5211      FP = DAG.getNode(ISD::FP_ROUND, dl,
5212                       MVT::f32, FP, DAG.getIntPtrConstant(0));
5213    return FP;
5214  }
5215
5216  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
5217         "Unhandled INT_TO_FP type in custom expander!");
5218  // Since we only generate this in 64-bit mode, we can take advantage of
5219  // 64-bit registers.  In particular, sign extend the input value into the
5220  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
5221  // then lfd it and fcfid it.
5222  MachineFunction &MF = DAG.getMachineFunction();
5223  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
5224  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5225
5226  SDValue Ld;
5227  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
5228    int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
5229    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
5230
5231    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
5232                                 MachinePointerInfo::getFixedStack(FrameIdx),
5233                                 false, false, 0);
5234
5235    assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
5236           "Expected an i32 store");
5237    MachineMemOperand *MMO =
5238      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
5239                              MachineMemOperand::MOLoad, 4, 4);
5240    SDValue Ops[] = { Store, FIdx };
5241    Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
5242                                   PPCISD::LFIWZX : PPCISD::LFIWAX,
5243                                 dl, DAG.getVTList(MVT::f64, MVT::Other),
5244                                 Ops, MVT::i32, MMO);
5245  } else {
5246    assert(Subtarget.isPPC64() &&
5247           "i32->FP without LFIWAX supported only on PPC64");
5248
5249    int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
5250    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
5251
5252    SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
5253                                Op.getOperand(0));
5254
5255    // STD the extended value into the stack slot.
5256    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
5257                                 MachinePointerInfo::getFixedStack(FrameIdx),
5258                                 false, false, 0);
5259
5260    // Load the value as a double.
5261    Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
5262                     MachinePointerInfo::getFixedStack(FrameIdx),
5263                     false, false, false, 0);
5264  }
5265
5266  // FCFID it and return it.
5267  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
5268  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
5269    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
5270  return FP;
5271}
5272
5273SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
5274                                            SelectionDAG &DAG) const {
5275  SDLoc dl(Op);
5276  /*
5277   The rounding mode is in bits 30:31 of FPSR, and has the following
5278   settings:
5279     00 Round to nearest
5280     01 Round to 0
5281     10 Round to +inf
5282     11 Round to -inf
5283
5284  FLT_ROUNDS, on the other hand, expects the following:
5285    -1 Undefined
5286     0 Round to 0
5287     1 Round to nearest
5288     2 Round to +inf
5289     3 Round to -inf
5290
5291  To perform the conversion, we do:
5292    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
5293  */
5294
5295  MachineFunction &MF = DAG.getMachineFunction();
5296  EVT VT = Op.getValueType();
5297  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5298
5299  // Save FP Control Word to register
5300  EVT NodeTys[] = {
5301    MVT::f64,    // return register
5302    MVT::Glue    // unused in this context
5303  };
5304  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
5305
5306  // Save FP register to stack slot
5307  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
5308  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
5309  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
5310                               StackSlot, MachinePointerInfo(), false, false,0);
5311
5312  // Load FP Control Word from low 32 bits of stack slot.
5313  SDValue Four = DAG.getConstant(4, PtrVT);
5314  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
5315  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
5316                            false, false, false, 0);
5317
5318  // Transform as necessary
5319  SDValue CWD1 =
5320    DAG.getNode(ISD::AND, dl, MVT::i32,
5321                CWD, DAG.getConstant(3, MVT::i32));
5322  SDValue CWD2 =
5323    DAG.getNode(ISD::SRL, dl, MVT::i32,
5324                DAG.getNode(ISD::AND, dl, MVT::i32,
5325                            DAG.getNode(ISD::XOR, dl, MVT::i32,
5326                                        CWD, DAG.getConstant(3, MVT::i32)),
5327                            DAG.getConstant(3, MVT::i32)),
5328                DAG.getConstant(1, MVT::i32));
5329
5330  SDValue RetVal =
5331    DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
5332
5333  return DAG.getNode((VT.getSizeInBits() < 16 ?
5334                      ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
5335}
5336
5337SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
5338  EVT VT = Op.getValueType();
5339  unsigned BitWidth = VT.getSizeInBits();
5340  SDLoc dl(Op);
5341  assert(Op.getNumOperands() == 3 &&
5342         VT == Op.getOperand(1).getValueType() &&
5343         "Unexpected SHL!");
5344
5345  // Expand into a bunch of logical ops.  Note that these ops
5346  // depend on the PPC behavior for oversized shift amounts.
5347  SDValue Lo = Op.getOperand(0);
5348  SDValue Hi = Op.getOperand(1);
5349  SDValue Amt = Op.getOperand(2);
5350  EVT AmtVT = Amt.getValueType();
5351
5352  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
5353                             DAG.getConstant(BitWidth, AmtVT), Amt);
5354  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
5355  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
5356  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
5357  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
5358                             DAG.getConstant(-BitWidth, AmtVT));
5359  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
5360  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
5361  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
5362  SDValue OutOps[] = { OutLo, OutHi };
5363  return DAG.getMergeValues(OutOps, dl);
5364}
5365
5366SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
5367  EVT VT = Op.getValueType();
5368  SDLoc dl(Op);
5369  unsigned BitWidth = VT.getSizeInBits();
5370  assert(Op.getNumOperands() == 3 &&
5371         VT == Op.getOperand(1).getValueType() &&
5372         "Unexpected SRL!");
5373
5374  // Expand into a bunch of logical ops.  Note that these ops
5375  // depend on the PPC behavior for oversized shift amounts.
5376  SDValue Lo = Op.getOperand(0);
5377  SDValue Hi = Op.getOperand(1);
5378  SDValue Amt = Op.getOperand(2);
5379  EVT AmtVT = Amt.getValueType();
5380
5381  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
5382                             DAG.getConstant(BitWidth, AmtVT), Amt);
5383  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
5384  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
5385  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
5386  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
5387                             DAG.getConstant(-BitWidth, AmtVT));
5388  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
5389  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
5390  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
5391  SDValue OutOps[] = { OutLo, OutHi };
5392  return DAG.getMergeValues(OutOps, dl);
5393}
5394
5395SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
5396  SDLoc dl(Op);
5397  EVT VT = Op.getValueType();
5398  unsigned BitWidth = VT.getSizeInBits();
5399  assert(Op.getNumOperands() == 3 &&
5400         VT == Op.getOperand(1).getValueType() &&
5401         "Unexpected SRA!");
5402
5403  // Expand into a bunch of logical ops, followed by a select_cc.
5404  SDValue Lo = Op.getOperand(0);
5405  SDValue Hi = Op.getOperand(1);
5406  SDValue Amt = Op.getOperand(2);
5407  EVT AmtVT = Amt.getValueType();
5408
5409  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
5410                             DAG.getConstant(BitWidth, AmtVT), Amt);
5411  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
5412  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
5413  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
5414  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
5415                             DAG.getConstant(-BitWidth, AmtVT));
5416  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
5417  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
5418  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
5419                                  Tmp4, Tmp6, ISD::SETLE);
5420  SDValue OutOps[] = { OutLo, OutHi };
5421  return DAG.getMergeValues(OutOps, dl);
5422}
5423
5424//===----------------------------------------------------------------------===//
5425// Vector related lowering.
5426//
5427
5428/// BuildSplatI - Build a canonical splati of Val with an element size of
5429/// SplatSize.  Cast the result to VT.
5430static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
5431                             SelectionDAG &DAG, SDLoc dl) {
5432  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
5433
5434  static const EVT VTys[] = { // canonical VT to use for each size.
5435    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
5436  };
5437
5438  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
5439
5440  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
5441  if (Val == -1)
5442    SplatSize = 1;
5443
5444  EVT CanonicalVT = VTys[SplatSize-1];
5445
5446  // Build a canonical splat for this value.
5447  SDValue Elt = DAG.getConstant(Val, MVT::i32);
5448  SmallVector<SDValue, 8> Ops;
5449  Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
5450  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
5451  return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
5452}
5453
5454/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
5455/// specified intrinsic ID.
5456static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
5457                                SelectionDAG &DAG, SDLoc dl,
5458                                EVT DestVT = MVT::Other) {
5459  if (DestVT == MVT::Other) DestVT = Op.getValueType();
5460  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
5461                     DAG.getConstant(IID, MVT::i32), Op);
5462}
5463
5464/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
5465/// specified intrinsic ID.
5466static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
5467                                SelectionDAG &DAG, SDLoc dl,
5468                                EVT DestVT = MVT::Other) {
5469  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
5470  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
5471                     DAG.getConstant(IID, MVT::i32), LHS, RHS);
5472}
5473
5474/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
5475/// specified intrinsic ID.
5476static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
5477                                SDValue Op2, SelectionDAG &DAG,
5478                                SDLoc dl, EVT DestVT = MVT::Other) {
5479  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
5480  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
5481                     DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
5482}
5483
5484
5485/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
5486/// amount.  The result has the specified value type.
5487static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
5488                             EVT VT, SelectionDAG &DAG, SDLoc dl) {
5489  // Force LHS/RHS to be the right type.
5490  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
5491  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
5492
5493  int Ops[16];
5494  for (unsigned i = 0; i != 16; ++i)
5495    Ops[i] = i + Amt;
5496  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
5497  return DAG.getNode(ISD::BITCAST, dl, VT, T);
5498}
5499
5500// If this is a case we can't handle, return null and let the default
5501// expansion code take care of it.  If we CAN select this case, and if it
5502// selects to a single instruction, return Op.  Otherwise, if we can codegen
5503// this case more efficiently than a constant pool load, lower it to the
5504// sequence of ops that should be used.
5505SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
5506                                             SelectionDAG &DAG) const {
5507  SDLoc dl(Op);
5508  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
5509  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
5510
5511  // Check if this is a splat of a constant value.
5512  APInt APSplatBits, APSplatUndef;
5513  unsigned SplatBitSize;
5514  bool HasAnyUndefs;
5515  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
5516                             HasAnyUndefs, 0, true) || SplatBitSize > 32)
5517    return SDValue();
5518
5519  unsigned SplatBits = APSplatBits.getZExtValue();
5520  unsigned SplatUndef = APSplatUndef.getZExtValue();
5521  unsigned SplatSize = SplatBitSize / 8;
5522
5523  // First, handle single instruction cases.
5524
5525  // All zeros?
5526  if (SplatBits == 0) {
5527    // Canonicalize all zero vectors to be v4i32.
5528    if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
5529      SDValue Z = DAG.getConstant(0, MVT::i32);
5530      Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
5531      Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
5532    }
5533    return Op;
5534  }
5535
5536  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
5537  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
5538                    (32-SplatBitSize));
5539  if (SextVal >= -16 && SextVal <= 15)
5540    return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
5541
5542
5543  // Two instruction sequences.
5544
5545  // If this value is in the range [-32,30] and is even, use:
5546  //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
5547  // If this value is in the range [17,31] and is odd, use:
5548  //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
5549  // If this value is in the range [-31,-17] and is odd, use:
5550  //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
5551  // Note the last two are three-instruction sequences.
5552  if (SextVal >= -32 && SextVal <= 31) {
5553    // To avoid having these optimizations undone by constant folding,
5554    // we convert to a pseudo that will be expanded later into one of
5555    // the above forms.
5556    SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
5557    EVT VT = (SplatSize == 1 ? MVT::v16i8 :
5558              (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
5559    SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32);
5560    SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
5561    if (VT == Op.getValueType())
5562      return RetVal;
5563    else
5564      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
5565  }
5566
5567  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
5568  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
5569  // for fneg/fabs.
5570  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
5571    // Make -1 and vspltisw -1:
5572    SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
5573
5574    // Make the VSLW intrinsic, computing 0x8000_0000.
5575    SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
5576                                   OnesV, DAG, dl);
5577
5578    // xor by OnesV to invert it.
5579    Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
5580    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5581  }
5582
5583  // The remaining cases assume either big endian element order or
5584  // a splat-size that equates to the element size of the vector
5585  // to be built.  An example that doesn't work for little endian is
5586  // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
5587  // and a vector element size of 16 bits.  The code below will
5588  // produce the vector in big endian element order, which for little
5589  // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
5590
5591  // For now, just avoid these optimizations in that case.
5592  // FIXME: Develop correct optimizations for LE with mismatched
5593  // splat and element sizes.
5594
5595  if (Subtarget.isLittleEndian() &&
5596      SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
5597    return SDValue();
5598
5599  // Check to see if this is a wide variety of vsplti*, binop self cases.
5600  static const signed char SplatCsts[] = {
5601    -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
5602    -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
5603  };
5604
5605  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
5606    // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
5607    // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
5608    int i = SplatCsts[idx];
5609
5610    // Figure out what shift amount will be used by altivec if shifted by i in
5611    // this splat size.
5612    unsigned TypeShiftAmt = i & (SplatBitSize-1);
5613
5614    // vsplti + shl self.
5615    if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
5616      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5617      static const unsigned IIDs[] = { // Intrinsic to use for each size.
5618        Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
5619        Intrinsic::ppc_altivec_vslw
5620      };
5621      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5622      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5623    }
5624
5625    // vsplti + srl self.
5626    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
5627      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5628      static const unsigned IIDs[] = { // Intrinsic to use for each size.
5629        Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
5630        Intrinsic::ppc_altivec_vsrw
5631      };
5632      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5633      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5634    }
5635
5636    // vsplti + sra self.
5637    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
5638      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5639      static const unsigned IIDs[] = { // Intrinsic to use for each size.
5640        Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
5641        Intrinsic::ppc_altivec_vsraw
5642      };
5643      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5644      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5645    }
5646
5647    // vsplti + rol self.
5648    if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
5649                         ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
5650      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5651      static const unsigned IIDs[] = { // Intrinsic to use for each size.
5652        Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
5653        Intrinsic::ppc_altivec_vrlw
5654      };
5655      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5656      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5657    }
5658
5659    // t = vsplti c, result = vsldoi t, t, 1
5660    if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
5661      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5662      return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
5663    }
5664    // t = vsplti c, result = vsldoi t, t, 2
5665    if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
5666      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5667      return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
5668    }
5669    // t = vsplti c, result = vsldoi t, t, 3
5670    if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
5671      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5672      return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
5673    }
5674  }
5675
5676  return SDValue();
5677}
5678
5679/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
5680/// the specified operations to build the shuffle.
5681static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
5682                                      SDValue RHS, SelectionDAG &DAG,
5683                                      SDLoc dl) {
5684  unsigned OpNum = (PFEntry >> 26) & 0x0F;
5685  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
5686  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
5687
5688  enum {
5689    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5690    OP_VMRGHW,
5691    OP_VMRGLW,
5692    OP_VSPLTISW0,
5693    OP_VSPLTISW1,
5694    OP_VSPLTISW2,
5695    OP_VSPLTISW3,
5696    OP_VSLDOI4,
5697    OP_VSLDOI8,
5698    OP_VSLDOI12
5699  };
5700
5701  if (OpNum == OP_COPY) {
5702    if (LHSID == (1*9+2)*9+3) return LHS;
5703    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
5704    return RHS;
5705  }
5706
5707  SDValue OpLHS, OpRHS;
5708  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
5709  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
5710
5711  int ShufIdxs[16];
5712  switch (OpNum) {
5713  default: llvm_unreachable("Unknown i32 permute!");
5714  case OP_VMRGHW:
5715    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
5716    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
5717    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
5718    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
5719    break;
5720  case OP_VMRGLW:
5721    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
5722    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
5723    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
5724    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
5725    break;
5726  case OP_VSPLTISW0:
5727    for (unsigned i = 0; i != 16; ++i)
5728      ShufIdxs[i] = (i&3)+0;
5729    break;
5730  case OP_VSPLTISW1:
5731    for (unsigned i = 0; i != 16; ++i)
5732      ShufIdxs[i] = (i&3)+4;
5733    break;
5734  case OP_VSPLTISW2:
5735    for (unsigned i = 0; i != 16; ++i)
5736      ShufIdxs[i] = (i&3)+8;
5737    break;
5738  case OP_VSPLTISW3:
5739    for (unsigned i = 0; i != 16; ++i)
5740      ShufIdxs[i] = (i&3)+12;
5741    break;
5742  case OP_VSLDOI4:
5743    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
5744  case OP_VSLDOI8:
5745    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
5746  case OP_VSLDOI12:
5747    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
5748  }
5749  EVT VT = OpLHS.getValueType();
5750  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
5751  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
5752  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
5753  return DAG.getNode(ISD::BITCAST, dl, VT, T);
5754}
5755
5756/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
5757/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
5758/// return the code it can be lowered into.  Worst case, it can always be
5759/// lowered into a vperm.
5760SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
5761                                               SelectionDAG &DAG) const {
5762  SDLoc dl(Op);
5763  SDValue V1 = Op.getOperand(0);
5764  SDValue V2 = Op.getOperand(1);
5765  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
5766  EVT VT = Op.getValueType();
5767  bool isLittleEndian = Subtarget.isLittleEndian();
5768
5769  // Cases that are handled by instructions that take permute immediates
5770  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
5771  // selected by the instruction selector.
5772  if (V2.getOpcode() == ISD::UNDEF) {
5773    if (PPC::isSplatShuffleMask(SVOp, 1) ||
5774        PPC::isSplatShuffleMask(SVOp, 2) ||
5775        PPC::isSplatShuffleMask(SVOp, 4) ||
5776        PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) ||
5777        PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) ||
5778        PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 ||
5779        PPC::isVMRGLShuffleMask(SVOp, 1, true, DAG) ||
5780        PPC::isVMRGLShuffleMask(SVOp, 2, true, DAG) ||
5781        PPC::isVMRGLShuffleMask(SVOp, 4, true, DAG) ||
5782        PPC::isVMRGHShuffleMask(SVOp, 1, true, DAG) ||
5783        PPC::isVMRGHShuffleMask(SVOp, 2, true, DAG) ||
5784        PPC::isVMRGHShuffleMask(SVOp, 4, true, DAG)) {
5785      return Op;
5786    }
5787  }
5788
5789  // Altivec has a variety of "shuffle immediates" that take two vector inputs
5790  // and produce a fixed permutation.  If any of these match, do not lower to
5791  // VPERM.
5792  if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) ||
5793      PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) ||
5794      PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 ||
5795      PPC::isVMRGLShuffleMask(SVOp, 1, false, DAG) ||
5796      PPC::isVMRGLShuffleMask(SVOp, 2, false, DAG) ||
5797      PPC::isVMRGLShuffleMask(SVOp, 4, false, DAG) ||
5798      PPC::isVMRGHShuffleMask(SVOp, 1, false, DAG) ||
5799      PPC::isVMRGHShuffleMask(SVOp, 2, false, DAG) ||
5800      PPC::isVMRGHShuffleMask(SVOp, 4, false, DAG))
5801    return Op;
5802
5803  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
5804  // perfect shuffle table to emit an optimal matching sequence.
5805  ArrayRef<int> PermMask = SVOp->getMask();
5806
5807  unsigned PFIndexes[4];
5808  bool isFourElementShuffle = true;
5809  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
5810    unsigned EltNo = 8;   // Start out undef.
5811    for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
5812      if (PermMask[i*4+j] < 0)
5813        continue;   // Undef, ignore it.
5814
5815      unsigned ByteSource = PermMask[i*4+j];
5816      if ((ByteSource & 3) != j) {
5817        isFourElementShuffle = false;
5818        break;
5819      }
5820
5821      if (EltNo == 8) {
5822        EltNo = ByteSource/4;
5823      } else if (EltNo != ByteSource/4) {
5824        isFourElementShuffle = false;
5825        break;
5826      }
5827    }
5828    PFIndexes[i] = EltNo;
5829  }
5830
5831  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
5832  // perfect shuffle vector to determine if it is cost effective to do this as
5833  // discrete instructions, or whether we should use a vperm.
5834  // For now, we skip this for little endian until such time as we have a
5835  // little-endian perfect shuffle table.
5836  if (isFourElementShuffle && !isLittleEndian) {
5837    // Compute the index in the perfect shuffle table.
5838    unsigned PFTableIndex =
5839      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
5840
5841    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
5842    unsigned Cost  = (PFEntry >> 30);
5843
5844    // Determining when to avoid vperm is tricky.  Many things affect the cost
5845    // of vperm, particularly how many times the perm mask needs to be computed.
5846    // For example, if the perm mask can be hoisted out of a loop or is already
5847    // used (perhaps because there are multiple permutes with the same shuffle
5848    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
5849    // the loop requires an extra register.
5850    //
5851    // As a compromise, we only emit discrete instructions if the shuffle can be
5852    // generated in 3 or fewer operations.  When we have loop information
5853    // available, if this block is within a loop, we should avoid using vperm
5854    // for 3-operation perms and use a constant pool load instead.
5855    if (Cost < 3)
5856      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
5857  }
5858
5859  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
5860  // vector that will get spilled to the constant pool.
5861  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
5862
5863  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
5864  // that it is in input element units, not in bytes.  Convert now.
5865
5866  // For little endian, the order of the input vectors is reversed, and
5867  // the permutation mask is complemented with respect to 31.  This is
5868  // necessary to produce proper semantics with the big-endian-biased vperm
5869  // instruction.
5870  EVT EltVT = V1.getValueType().getVectorElementType();
5871  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
5872
5873  SmallVector<SDValue, 16> ResultMask;
5874  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
5875    unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
5876
5877    for (unsigned j = 0; j != BytesPerElement; ++j)
5878      if (isLittleEndian)
5879        ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),
5880                                             MVT::i32));
5881      else
5882        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
5883                                             MVT::i32));
5884  }
5885
5886  SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
5887                                  ResultMask);
5888  if (isLittleEndian)
5889    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
5890                       V2, V1, VPermMask);
5891  else
5892    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
5893                       V1, V2, VPermMask);
5894}
5895
5896/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
5897/// altivec comparison.  If it is, return true and fill in Opc/isDot with
5898/// information about the intrinsic.
5899static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
5900                                  bool &isDot) {
5901  unsigned IntrinsicID =
5902    cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
5903  CompareOpc = -1;
5904  isDot = false;
5905  switch (IntrinsicID) {
5906  default: return false;
5907    // Comparison predicates.
5908  case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
5909  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
5910  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
5911  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
5912  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
5913  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
5914  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
5915  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
5916  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
5917  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
5918  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
5919  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
5920  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
5921
5922    // Normal Comparisons.
5923  case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
5924  case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
5925  case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
5926  case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
5927  case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
5928  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
5929  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
5930  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
5931  case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
5932  case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
5933  case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
5934  case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
5935  case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
5936  }
5937  return true;
5938}
5939
5940/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
5941/// lower, do it, otherwise return null.
5942SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
5943                                                   SelectionDAG &DAG) const {
5944  // If this is a lowered altivec predicate compare, CompareOpc is set to the
5945  // opcode number of the comparison.
5946  SDLoc dl(Op);
5947  int CompareOpc;
5948  bool isDot;
5949  if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
5950    return SDValue();    // Don't custom lower most intrinsics.
5951
5952  // If this is a non-dot comparison, make the VCMP node and we are done.
5953  if (!isDot) {
5954    SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
5955                              Op.getOperand(1), Op.getOperand(2),
5956                              DAG.getConstant(CompareOpc, MVT::i32));
5957    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
5958  }
5959
5960  // Create the PPCISD altivec 'dot' comparison node.
5961  SDValue Ops[] = {
5962    Op.getOperand(2),  // LHS
5963    Op.getOperand(3),  // RHS
5964    DAG.getConstant(CompareOpc, MVT::i32)
5965  };
5966  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
5967  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
5968
5969  // Now that we have the comparison, emit a copy from the CR to a GPR.
5970  // This is flagged to the above dot comparison.
5971  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
5972                                DAG.getRegister(PPC::CR6, MVT::i32),
5973                                CompNode.getValue(1));
5974
5975  // Unpack the result based on how the target uses it.
5976  unsigned BitNo;   // Bit # of CR6.
5977  bool InvertBit;   // Invert result?
5978  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
5979  default:  // Can't happen, don't crash on invalid number though.
5980  case 0:   // Return the value of the EQ bit of CR6.
5981    BitNo = 0; InvertBit = false;
5982    break;
5983  case 1:   // Return the inverted value of the EQ bit of CR6.
5984    BitNo = 0; InvertBit = true;
5985    break;
5986  case 2:   // Return the value of the LT bit of CR6.
5987    BitNo = 2; InvertBit = false;
5988    break;
5989  case 3:   // Return the inverted value of the LT bit of CR6.
5990    BitNo = 2; InvertBit = true;
5991    break;
5992  }
5993
5994  // Shift the bit into the low position.
5995  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
5996                      DAG.getConstant(8-(3-BitNo), MVT::i32));
5997  // Isolate the bit.
5998  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
5999                      DAG.getConstant(1, MVT::i32));
6000
6001  // If we are supposed to, toggle the bit.
6002  if (InvertBit)
6003    Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
6004                        DAG.getConstant(1, MVT::i32));
6005  return Flags;
6006}
6007
6008SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
6009                                                  SelectionDAG &DAG) const {
6010  SDLoc dl(Op);
6011  // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
6012  // instructions), but for smaller types, we need to first extend up to v2i32
6013  // before doing going farther.
6014  if (Op.getValueType() == MVT::v2i64) {
6015    EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
6016    if (ExtVT != MVT::v2i32) {
6017      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
6018      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
6019                       DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
6020                                        ExtVT.getVectorElementType(), 4)));
6021      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
6022      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
6023                       DAG.getValueType(MVT::v2i32));
6024    }
6025
6026    return Op;
6027  }
6028
6029  return SDValue();
6030}
6031
6032SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
6033                                                   SelectionDAG &DAG) const {
6034  SDLoc dl(Op);
6035  // Create a stack slot that is 16-byte aligned.
6036  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
6037  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
6038  EVT PtrVT = getPointerTy();
6039  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6040
6041  // Store the input value into Value#0 of the stack slot.
6042  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
6043                               Op.getOperand(0), FIdx, MachinePointerInfo(),
6044                               false, false, 0);
6045  // Load it out.
6046  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
6047                     false, false, false, 0);
6048}
6049
6050SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
6051  SDLoc dl(Op);
6052  if (Op.getValueType() == MVT::v4i32) {
6053    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6054
6055    SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
6056    SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
6057
6058    SDValue RHSSwap =   // = vrlw RHS, 16
6059      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
6060
6061    // Shrinkify inputs to v8i16.
6062    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
6063    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
6064    RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
6065
6066    // Low parts multiplied together, generating 32-bit results (we ignore the
6067    // top parts).
6068    SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
6069                                        LHS, RHS, DAG, dl, MVT::v4i32);
6070
6071    SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
6072                                      LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
6073    // Shift the high parts up 16 bits.
6074    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
6075                              Neg16, DAG, dl);
6076    return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
6077  } else if (Op.getValueType() == MVT::v8i16) {
6078    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6079
6080    SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
6081
6082    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
6083                            LHS, RHS, Zero, DAG, dl);
6084  } else if (Op.getValueType() == MVT::v16i8) {
6085    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6086    bool isLittleEndian = Subtarget.isLittleEndian();
6087
6088    // Multiply the even 8-bit parts, producing 16-bit sums.
6089    SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
6090                                           LHS, RHS, DAG, dl, MVT::v8i16);
6091    EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
6092
6093    // Multiply the odd 8-bit parts, producing 16-bit sums.
6094    SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
6095                                          LHS, RHS, DAG, dl, MVT::v8i16);
6096    OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
6097
6098    // Merge the results together.  Because vmuleub and vmuloub are
6099    // instructions with a big-endian bias, we must reverse the
6100    // element numbering and reverse the meaning of "odd" and "even"
6101    // when generating little endian code.
6102    int Ops[16];
6103    for (unsigned i = 0; i != 8; ++i) {
6104      if (isLittleEndian) {
6105        Ops[i*2  ] = 2*i;
6106        Ops[i*2+1] = 2*i+16;
6107      } else {
6108        Ops[i*2  ] = 2*i+1;
6109        Ops[i*2+1] = 2*i+1+16;
6110      }
6111    }
6112    if (isLittleEndian)
6113      return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
6114    else
6115      return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
6116  } else {
6117    llvm_unreachable("Unknown mul to lower!");
6118  }
6119}
6120
6121/// LowerOperation - Provide custom lowering hooks for some operations.
6122///
6123SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
6124  switch (Op.getOpcode()) {
6125  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
6126  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
6127  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
6128  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
6129  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
6130  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
6131  case ISD::SETCC:              return LowerSETCC(Op, DAG);
6132  case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
6133  case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
6134  case ISD::VASTART:
6135    return LowerVASTART(Op, DAG, Subtarget);
6136
6137  case ISD::VAARG:
6138    return LowerVAARG(Op, DAG, Subtarget);
6139
6140  case ISD::VACOPY:
6141    return LowerVACOPY(Op, DAG, Subtarget);
6142
6143  case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, Subtarget);
6144  case ISD::DYNAMIC_STACKALLOC:
6145    return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
6146
6147  case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
6148  case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
6149
6150  case ISD::LOAD:               return LowerLOAD(Op, DAG);
6151  case ISD::STORE:              return LowerSTORE(Op, DAG);
6152  case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
6153  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
6154  case ISD::FP_TO_UINT:
6155  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
6156                                                       SDLoc(Op));
6157  case ISD::UINT_TO_FP:
6158  case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
6159  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
6160
6161  // Lower 64-bit shifts.
6162  case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
6163  case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
6164  case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
6165
6166  // Vector-related lowering.
6167  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
6168  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
6169  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6170  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
6171  case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
6172  case ISD::MUL:                return LowerMUL(Op, DAG);
6173
6174  // For counter-based loop handling.
6175  case ISD::INTRINSIC_W_CHAIN:  return SDValue();
6176
6177  // Frame & Return address.
6178  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
6179  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
6180  }
6181}
6182
6183void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
6184                                           SmallVectorImpl<SDValue>&Results,
6185                                           SelectionDAG &DAG) const {
6186  const TargetMachine &TM = getTargetMachine();
6187  SDLoc dl(N);
6188  switch (N->getOpcode()) {
6189  default:
6190    llvm_unreachable("Do not know how to custom type legalize this operation!");
6191  case ISD::INTRINSIC_W_CHAIN: {
6192    if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
6193        Intrinsic::ppc_is_decremented_ctr_nonzero)
6194      break;
6195
6196    assert(N->getValueType(0) == MVT::i1 &&
6197           "Unexpected result type for CTR decrement intrinsic");
6198    EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
6199    SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
6200    SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
6201                                 N->getOperand(1));
6202
6203    Results.push_back(NewInt);
6204    Results.push_back(NewInt.getValue(1));
6205    break;
6206  }
6207  case ISD::VAARG: {
6208    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
6209        || TM.getSubtarget<PPCSubtarget>().isPPC64())
6210      return;
6211
6212    EVT VT = N->getValueType(0);
6213
6214    if (VT == MVT::i64) {
6215      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
6216
6217      Results.push_back(NewNode);
6218      Results.push_back(NewNode.getValue(1));
6219    }
6220    return;
6221  }
6222  case ISD::FP_ROUND_INREG: {
6223    assert(N->getValueType(0) == MVT::ppcf128);
6224    assert(N->getOperand(0).getValueType() == MVT::ppcf128);
6225    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
6226                             MVT::f64, N->getOperand(0),
6227                             DAG.getIntPtrConstant(0));
6228    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
6229                             MVT::f64, N->getOperand(0),
6230                             DAG.getIntPtrConstant(1));
6231
6232    // Add the two halves of the long double in round-to-zero mode.
6233    SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
6234
6235    // We know the low half is about to be thrown away, so just use something
6236    // convenient.
6237    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
6238                                FPreg, FPreg));
6239    return;
6240  }
6241  case ISD::FP_TO_SINT:
6242    // LowerFP_TO_INT() can only handle f32 and f64.
6243    if (N->getOperand(0).getValueType() == MVT::ppcf128)
6244      return;
6245    Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
6246    return;
6247  }
6248}
6249
6250
6251//===----------------------------------------------------------------------===//
6252//  Other Lowering Code
6253//===----------------------------------------------------------------------===//
6254
6255MachineBasicBlock *
6256PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
6257                                    bool is64bit, unsigned BinOpcode) const {
6258  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
6259  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6260
6261  const BasicBlock *LLVM_BB = BB->getBasicBlock();
6262  MachineFunction *F = BB->getParent();
6263  MachineFunction::iterator It = BB;
6264  ++It;
6265
6266  unsigned dest = MI->getOperand(0).getReg();
6267  unsigned ptrA = MI->getOperand(1).getReg();
6268  unsigned ptrB = MI->getOperand(2).getReg();
6269  unsigned incr = MI->getOperand(3).getReg();
6270  DebugLoc dl = MI->getDebugLoc();
6271
6272  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
6273  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
6274  F->insert(It, loopMBB);
6275  F->insert(It, exitMBB);
6276  exitMBB->splice(exitMBB->begin(), BB,
6277                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
6278  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
6279
6280  MachineRegisterInfo &RegInfo = F->getRegInfo();
6281  unsigned TmpReg = (!BinOpcode) ? incr :
6282    RegInfo.createVirtualRegister(
6283       is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
6284                 (const TargetRegisterClass *) &PPC::GPRCRegClass);
6285
6286  //  thisMBB:
6287  //   ...
6288  //   fallthrough --> loopMBB
6289  BB->addSuccessor(loopMBB);
6290
6291  //  loopMBB:
6292  //   l[wd]arx dest, ptr
6293  //   add r0, dest, incr
6294  //   st[wd]cx. r0, ptr
6295  //   bne- loopMBB
6296  //   fallthrough --> exitMBB
6297  BB = loopMBB;
6298  BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
6299    .addReg(ptrA).addReg(ptrB);
6300  if (BinOpcode)
6301    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
6302  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
6303    .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
6304  BuildMI(BB, dl, TII->get(PPC::BCC))
6305    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
6306  BB->addSuccessor(loopMBB);
6307  BB->addSuccessor(exitMBB);
6308
6309  //  exitMBB:
6310  //   ...
6311  BB = exitMBB;
6312  return BB;
6313}
6314
6315MachineBasicBlock *
6316PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
6317                                            MachineBasicBlock *BB,
6318                                            bool is8bit,    // operation
6319                                            unsigned BinOpcode) const {
6320  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
6321  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6322  // In 64 bit mode we have to use 64 bits for addresses, even though the
6323  // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
6324  // registers without caring whether they're 32 or 64, but here we're
6325  // doing actual arithmetic on the addresses.
6326  bool is64bit = Subtarget.isPPC64();
6327  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
6328
6329  const BasicBlock *LLVM_BB = BB->getBasicBlock();
6330  MachineFunction *F = BB->getParent();
6331  MachineFunction::iterator It = BB;
6332  ++It;
6333
6334  unsigned dest = MI->getOperand(0).getReg();
6335  unsigned ptrA = MI->getOperand(1).getReg();
6336  unsigned ptrB = MI->getOperand(2).getReg();
6337  unsigned incr = MI->getOperand(3).getReg();
6338  DebugLoc dl = MI->getDebugLoc();
6339
6340  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
6341  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
6342  F->insert(It, loopMBB);
6343  F->insert(It, exitMBB);
6344  exitMBB->splice(exitMBB->begin(), BB,
6345                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
6346  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
6347
6348  MachineRegisterInfo &RegInfo = F->getRegInfo();
6349  const TargetRegisterClass *RC =
6350    is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
6351              (const TargetRegisterClass *) &PPC::GPRCRegClass;
6352  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
6353  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
6354  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
6355  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
6356  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
6357  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
6358  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
6359  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
6360  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
6361  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
6362  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
6363  unsigned Ptr1Reg;
6364  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
6365
6366  //  thisMBB:
6367  //   ...
6368  //   fallthrough --> loopMBB
6369  BB->addSuccessor(loopMBB);
6370
6371  // The 4-byte load must be aligned, while a char or short may be
6372  // anywhere in the word.  Hence all this nasty bookkeeping code.
6373  //   add ptr1, ptrA, ptrB [copy if ptrA==0]
6374  //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
6375  //   xori shift, shift1, 24 [16]
6376  //   rlwinm ptr, ptr1, 0, 0, 29
6377  //   slw incr2, incr, shift
6378  //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
6379  //   slw mask, mask2, shift
6380  //  loopMBB:
6381  //   lwarx tmpDest, ptr
6382  //   add tmp, tmpDest, incr2
6383  //   andc tmp2, tmpDest, mask
6384  //   and tmp3, tmp, mask
6385  //   or tmp4, tmp3, tmp2
6386  //   stwcx. tmp4, ptr
6387  //   bne- loopMBB
6388  //   fallthrough --> exitMBB
6389  //   srw dest, tmpDest, shift
6390  if (ptrA != ZeroReg) {
6391    Ptr1Reg = RegInfo.createVirtualRegister(RC);
6392    BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
6393      .addReg(ptrA).addReg(ptrB);
6394  } else {
6395    Ptr1Reg = ptrB;
6396  }
6397  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
6398      .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
6399  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
6400      .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
6401  if (is64bit)
6402    BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
6403      .addReg(Ptr1Reg).addImm(0).addImm(61);
6404  else
6405    BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
6406      .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
6407  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
6408      .addReg(incr).addReg(ShiftReg);
6409  if (is8bit)
6410    BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
6411  else {
6412    BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
6413    BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
6414  }
6415  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
6416      .addReg(Mask2Reg).addReg(ShiftReg);
6417
6418  BB = loopMBB;
6419  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
6420    .addReg(ZeroReg).addReg(PtrReg);
6421  if (BinOpcode)
6422    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
6423      .addReg(Incr2Reg).addReg(TmpDestReg);
6424  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
6425    .addReg(TmpDestReg).addReg(MaskReg);
6426  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
6427    .addReg(TmpReg).addReg(MaskReg);
6428  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
6429    .addReg(Tmp3Reg).addReg(Tmp2Reg);
6430  BuildMI(BB, dl, TII->get(PPC::STWCX))
6431    .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
6432  BuildMI(BB, dl, TII->get(PPC::BCC))
6433    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
6434  BB->addSuccessor(loopMBB);
6435  BB->addSuccessor(exitMBB);
6436
6437  //  exitMBB:
6438  //   ...
6439  BB = exitMBB;
6440  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
6441    .addReg(ShiftReg);
6442  return BB;
6443}
6444
6445llvm::MachineBasicBlock*
6446PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
6447                                    MachineBasicBlock *MBB) const {
6448  DebugLoc DL = MI->getDebugLoc();
6449  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6450
6451  MachineFunction *MF = MBB->getParent();
6452  MachineRegisterInfo &MRI = MF->getRegInfo();
6453
6454  const BasicBlock *BB = MBB->getBasicBlock();
6455  MachineFunction::iterator I = MBB;
6456  ++I;
6457
6458  // Memory Reference
6459  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
6460  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
6461
6462  unsigned DstReg = MI->getOperand(0).getReg();
6463  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
6464  assert(RC->hasType(MVT::i32) && "Invalid destination!");
6465  unsigned mainDstReg = MRI.createVirtualRegister(RC);
6466  unsigned restoreDstReg = MRI.createVirtualRegister(RC);
6467
6468  MVT PVT = getPointerTy();
6469  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
6470         "Invalid Pointer Size!");
6471  // For v = setjmp(buf), we generate
6472  //
6473  // thisMBB:
6474  //  SjLjSetup mainMBB
6475  //  bl mainMBB
6476  //  v_restore = 1
6477  //  b sinkMBB
6478  //
6479  // mainMBB:
6480  //  buf[LabelOffset] = LR
6481  //  v_main = 0
6482  //
6483  // sinkMBB:
6484  //  v = phi(main, restore)
6485  //
6486
6487  MachineBasicBlock *thisMBB = MBB;
6488  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
6489  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
6490  MF->insert(I, mainMBB);
6491  MF->insert(I, sinkMBB);
6492
6493  MachineInstrBuilder MIB;
6494
6495  // Transfer the remainder of BB and its successor edges to sinkMBB.
6496  sinkMBB->splice(sinkMBB->begin(), MBB,
6497                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
6498  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6499
6500  // Note that the structure of the jmp_buf used here is not compatible
6501  // with that used by libc, and is not designed to be. Specifically, it
6502  // stores only those 'reserved' registers that LLVM does not otherwise
6503  // understand how to spill. Also, by convention, by the time this
6504  // intrinsic is called, Clang has already stored the frame address in the
6505  // first slot of the buffer and stack address in the third. Following the
6506  // X86 target code, we'll store the jump address in the second slot. We also
6507  // need to save the TOC pointer (R2) to handle jumps between shared
6508  // libraries, and that will be stored in the fourth slot. The thread
6509  // identifier (R13) is not affected.
6510
6511  // thisMBB:
6512  const int64_t LabelOffset = 1 * PVT.getStoreSize();
6513  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
6514  const int64_t BPOffset    = 4 * PVT.getStoreSize();
6515
6516  // Prepare IP either in reg.
6517  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
6518  unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
6519  unsigned BufReg = MI->getOperand(1).getReg();
6520
6521  if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
6522    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
6523            .addReg(PPC::X2)
6524            .addImm(TOCOffset)
6525            .addReg(BufReg);
6526    MIB.setMemRefs(MMOBegin, MMOEnd);
6527  }
6528
6529  // Naked functions never have a base pointer, and so we use r1. For all
6530  // other functions, this decision must be delayed until during PEI.
6531  unsigned BaseReg;
6532  if (MF->getFunction()->getAttributes().hasAttribute(
6533          AttributeSet::FunctionIndex, Attribute::Naked))
6534    BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
6535  else
6536    BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
6537
6538  MIB = BuildMI(*thisMBB, MI, DL,
6539                TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
6540          .addReg(BaseReg)
6541          .addImm(BPOffset)
6542          .addReg(BufReg);
6543  MIB.setMemRefs(MMOBegin, MMOEnd);
6544
6545  // Setup
6546  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
6547  const PPCRegisterInfo *TRI =
6548    static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
6549  MIB.addRegMask(TRI->getNoPreservedMask());
6550
6551  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
6552
6553  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
6554          .addMBB(mainMBB);
6555  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
6556
6557  thisMBB->addSuccessor(mainMBB, /* weight */ 0);
6558  thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
6559
6560  // mainMBB:
6561  //  mainDstReg = 0
6562  MIB = BuildMI(mainMBB, DL,
6563    TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
6564
6565  // Store IP
6566  if (Subtarget.isPPC64()) {
6567    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
6568            .addReg(LabelReg)
6569            .addImm(LabelOffset)
6570            .addReg(BufReg);
6571  } else {
6572    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
6573            .addReg(LabelReg)
6574            .addImm(LabelOffset)
6575            .addReg(BufReg);
6576  }
6577
6578  MIB.setMemRefs(MMOBegin, MMOEnd);
6579
6580  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
6581  mainMBB->addSuccessor(sinkMBB);
6582
6583  // sinkMBB:
6584  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
6585          TII->get(PPC::PHI), DstReg)
6586    .addReg(mainDstReg).addMBB(mainMBB)
6587    .addReg(restoreDstReg).addMBB(thisMBB);
6588
6589  MI->eraseFromParent();
6590  return sinkMBB;
6591}
6592
6593MachineBasicBlock *
6594PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
6595                                     MachineBasicBlock *MBB) const {
6596  DebugLoc DL = MI->getDebugLoc();
6597  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6598
6599  MachineFunction *MF = MBB->getParent();
6600  MachineRegisterInfo &MRI = MF->getRegInfo();
6601
6602  // Memory Reference
6603  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
6604  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
6605
6606  MVT PVT = getPointerTy();
6607  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
6608         "Invalid Pointer Size!");
6609
6610  const TargetRegisterClass *RC =
6611    (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6612  unsigned Tmp = MRI.createVirtualRegister(RC);
6613  // Since FP is only updated here but NOT referenced, it's treated as GPR.
6614  unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
6615  unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
6616  unsigned BP  = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
6617
6618  MachineInstrBuilder MIB;
6619
6620  const int64_t LabelOffset = 1 * PVT.getStoreSize();
6621  const int64_t SPOffset    = 2 * PVT.getStoreSize();
6622  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
6623  const int64_t BPOffset    = 4 * PVT.getStoreSize();
6624
6625  unsigned BufReg = MI->getOperand(0).getReg();
6626
6627  // Reload FP (the jumped-to function may not have had a
6628  // frame pointer, and if so, then its r31 will be restored
6629  // as necessary).
6630  if (PVT == MVT::i64) {
6631    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
6632            .addImm(0)
6633            .addReg(BufReg);
6634  } else {
6635    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
6636            .addImm(0)
6637            .addReg(BufReg);
6638  }
6639  MIB.setMemRefs(MMOBegin, MMOEnd);
6640
6641  // Reload IP
6642  if (PVT == MVT::i64) {
6643    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
6644            .addImm(LabelOffset)
6645            .addReg(BufReg);
6646  } else {
6647    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
6648            .addImm(LabelOffset)
6649            .addReg(BufReg);
6650  }
6651  MIB.setMemRefs(MMOBegin, MMOEnd);
6652
6653  // Reload SP
6654  if (PVT == MVT::i64) {
6655    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
6656            .addImm(SPOffset)
6657            .addReg(BufReg);
6658  } else {
6659    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
6660            .addImm(SPOffset)
6661            .addReg(BufReg);
6662  }
6663  MIB.setMemRefs(MMOBegin, MMOEnd);
6664
6665  // Reload BP
6666  if (PVT == MVT::i64) {
6667    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
6668            .addImm(BPOffset)
6669            .addReg(BufReg);
6670  } else {
6671    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
6672            .addImm(BPOffset)
6673            .addReg(BufReg);
6674  }
6675  MIB.setMemRefs(MMOBegin, MMOEnd);
6676
6677  // Reload TOC
6678  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
6679    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
6680            .addImm(TOCOffset)
6681            .addReg(BufReg);
6682
6683    MIB.setMemRefs(MMOBegin, MMOEnd);
6684  }
6685
6686  // Jump
6687  BuildMI(*MBB, MI, DL,
6688          TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
6689  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
6690
6691  MI->eraseFromParent();
6692  return MBB;
6693}
6694
6695MachineBasicBlock *
6696PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
6697                                               MachineBasicBlock *BB) const {
6698  if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
6699      MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
6700    return emitEHSjLjSetJmp(MI, BB);
6701  } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
6702             MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
6703    return emitEHSjLjLongJmp(MI, BB);
6704  }
6705
6706  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6707
6708  // To "insert" these instructions we actually have to insert their
6709  // control-flow patterns.
6710  const BasicBlock *LLVM_BB = BB->getBasicBlock();
6711  MachineFunction::iterator It = BB;
6712  ++It;
6713
6714  MachineFunction *F = BB->getParent();
6715
6716  if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
6717                                 MI->getOpcode() == PPC::SELECT_CC_I8 ||
6718                                 MI->getOpcode() == PPC::SELECT_I4 ||
6719                                 MI->getOpcode() == PPC::SELECT_I8)) {
6720    SmallVector<MachineOperand, 2> Cond;
6721    if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
6722        MI->getOpcode() == PPC::SELECT_CC_I8)
6723      Cond.push_back(MI->getOperand(4));
6724    else
6725      Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
6726    Cond.push_back(MI->getOperand(1));
6727
6728    DebugLoc dl = MI->getDebugLoc();
6729    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6730    TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
6731                      Cond, MI->getOperand(2).getReg(),
6732                      MI->getOperand(3).getReg());
6733  } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
6734             MI->getOpcode() == PPC::SELECT_CC_I8 ||
6735             MI->getOpcode() == PPC::SELECT_CC_F4 ||
6736             MI->getOpcode() == PPC::SELECT_CC_F8 ||
6737             MI->getOpcode() == PPC::SELECT_CC_VRRC ||
6738             MI->getOpcode() == PPC::SELECT_I4 ||
6739             MI->getOpcode() == PPC::SELECT_I8 ||
6740             MI->getOpcode() == PPC::SELECT_F4 ||
6741             MI->getOpcode() == PPC::SELECT_F8 ||
6742             MI->getOpcode() == PPC::SELECT_VRRC) {
6743    // The incoming instruction knows the destination vreg to set, the
6744    // condition code register to branch on, the true/false values to
6745    // select between, and a branch opcode to use.
6746
6747    //  thisMBB:
6748    //  ...
6749    //   TrueVal = ...
6750    //   cmpTY ccX, r1, r2
6751    //   bCC copy1MBB
6752    //   fallthrough --> copy0MBB
6753    MachineBasicBlock *thisMBB = BB;
6754    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
6755    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
6756    DebugLoc dl = MI->getDebugLoc();
6757    F->insert(It, copy0MBB);
6758    F->insert(It, sinkMBB);
6759
6760    // Transfer the remainder of BB and its successor edges to sinkMBB.
6761    sinkMBB->splice(sinkMBB->begin(), BB,
6762                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
6763    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
6764
6765    // Next, add the true and fallthrough blocks as its successors.
6766    BB->addSuccessor(copy0MBB);
6767    BB->addSuccessor(sinkMBB);
6768
6769    if (MI->getOpcode() == PPC::SELECT_I4 ||
6770        MI->getOpcode() == PPC::SELECT_I8 ||
6771        MI->getOpcode() == PPC::SELECT_F4 ||
6772        MI->getOpcode() == PPC::SELECT_F8 ||
6773        MI->getOpcode() == PPC::SELECT_VRRC) {
6774      BuildMI(BB, dl, TII->get(PPC::BC))
6775        .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
6776    } else {
6777      unsigned SelectPred = MI->getOperand(4).getImm();
6778      BuildMI(BB, dl, TII->get(PPC::BCC))
6779        .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
6780    }
6781
6782    //  copy0MBB:
6783    //   %FalseValue = ...
6784    //   # fallthrough to sinkMBB
6785    BB = copy0MBB;
6786
6787    // Update machine-CFG edges
6788    BB->addSuccessor(sinkMBB);
6789
6790    //  sinkMBB:
6791    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
6792    //  ...
6793    BB = sinkMBB;
6794    BuildMI(*BB, BB->begin(), dl,
6795            TII->get(PPC::PHI), MI->getOperand(0).getReg())
6796      .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
6797      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
6798  }
6799  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
6800    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
6801  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
6802    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
6803  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
6804    BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
6805  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
6806    BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
6807
6808  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
6809    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
6810  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
6811    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
6812  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
6813    BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
6814  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
6815    BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
6816
6817  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
6818    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
6819  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
6820    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
6821  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
6822    BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
6823  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
6824    BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
6825
6826  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
6827    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
6828  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
6829    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
6830  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
6831    BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
6832  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
6833    BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
6834
6835  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
6836    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
6837  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
6838    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
6839  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
6840    BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
6841  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
6842    BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
6843
6844  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
6845    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
6846  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
6847    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
6848  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
6849    BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
6850  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
6851    BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
6852
6853  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
6854    BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
6855  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
6856    BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
6857  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
6858    BB = EmitAtomicBinary(MI, BB, false, 0);
6859  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
6860    BB = EmitAtomicBinary(MI, BB, true, 0);
6861
6862  else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
6863           MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
6864    bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
6865
6866    unsigned dest   = MI->getOperand(0).getReg();
6867    unsigned ptrA   = MI->getOperand(1).getReg();
6868    unsigned ptrB   = MI->getOperand(2).getReg();
6869    unsigned oldval = MI->getOperand(3).getReg();
6870    unsigned newval = MI->getOperand(4).getReg();
6871    DebugLoc dl     = MI->getDebugLoc();
6872
6873    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
6874    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
6875    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
6876    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
6877    F->insert(It, loop1MBB);
6878    F->insert(It, loop2MBB);
6879    F->insert(It, midMBB);
6880    F->insert(It, exitMBB);
6881    exitMBB->splice(exitMBB->begin(), BB,
6882                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
6883    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
6884
6885    //  thisMBB:
6886    //   ...
6887    //   fallthrough --> loopMBB
6888    BB->addSuccessor(loop1MBB);
6889
6890    // loop1MBB:
6891    //   l[wd]arx dest, ptr
6892    //   cmp[wd] dest, oldval
6893    //   bne- midMBB
6894    // loop2MBB:
6895    //   st[wd]cx. newval, ptr
6896    //   bne- loopMBB
6897    //   b exitBB
6898    // midMBB:
6899    //   st[wd]cx. dest, ptr
6900    // exitBB:
6901    BB = loop1MBB;
6902    BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
6903      .addReg(ptrA).addReg(ptrB);
6904    BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
6905      .addReg(oldval).addReg(dest);
6906    BuildMI(BB, dl, TII->get(PPC::BCC))
6907      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
6908    BB->addSuccessor(loop2MBB);
6909    BB->addSuccessor(midMBB);
6910
6911    BB = loop2MBB;
6912    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
6913      .addReg(newval).addReg(ptrA).addReg(ptrB);
6914    BuildMI(BB, dl, TII->get(PPC::BCC))
6915      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
6916    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
6917    BB->addSuccessor(loop1MBB);
6918    BB->addSuccessor(exitMBB);
6919
6920    BB = midMBB;
6921    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
6922      .addReg(dest).addReg(ptrA).addReg(ptrB);
6923    BB->addSuccessor(exitMBB);
6924
6925    //  exitMBB:
6926    //   ...
6927    BB = exitMBB;
6928  } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
6929             MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
6930    // We must use 64-bit registers for addresses when targeting 64-bit,
6931    // since we're actually doing arithmetic on them.  Other registers
6932    // can be 32-bit.
6933    bool is64bit = Subtarget.isPPC64();
6934    bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
6935
6936    unsigned dest   = MI->getOperand(0).getReg();
6937    unsigned ptrA   = MI->getOperand(1).getReg();
6938    unsigned ptrB   = MI->getOperand(2).getReg();
6939    unsigned oldval = MI->getOperand(3).getReg();
6940    unsigned newval = MI->getOperand(4).getReg();
6941    DebugLoc dl     = MI->getDebugLoc();
6942
6943    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
6944    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
6945    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
6946    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
6947    F->insert(It, loop1MBB);
6948    F->insert(It, loop2MBB);
6949    F->insert(It, midMBB);
6950    F->insert(It, exitMBB);
6951    exitMBB->splice(exitMBB->begin(), BB,
6952                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
6953    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
6954
6955    MachineRegisterInfo &RegInfo = F->getRegInfo();
6956    const TargetRegisterClass *RC =
6957      is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
6958                (const TargetRegisterClass *) &PPC::GPRCRegClass;
6959    unsigned PtrReg = RegInfo.createVirtualRegister(RC);
6960    unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
6961    unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
6962    unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
6963    unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
6964    unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
6965    unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
6966    unsigned MaskReg = RegInfo.createVirtualRegister(RC);
6967    unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
6968    unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
6969    unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
6970    unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
6971    unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
6972    unsigned Ptr1Reg;
6973    unsigned TmpReg = RegInfo.createVirtualRegister(RC);
6974    unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
6975    //  thisMBB:
6976    //   ...
6977    //   fallthrough --> loopMBB
6978    BB->addSuccessor(loop1MBB);
6979
6980    // The 4-byte load must be aligned, while a char or short may be
6981    // anywhere in the word.  Hence all this nasty bookkeeping code.
6982    //   add ptr1, ptrA, ptrB [copy if ptrA==0]
6983    //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
6984    //   xori shift, shift1, 24 [16]
6985    //   rlwinm ptr, ptr1, 0, 0, 29
6986    //   slw newval2, newval, shift
6987    //   slw oldval2, oldval,shift
6988    //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
6989    //   slw mask, mask2, shift
6990    //   and newval3, newval2, mask
6991    //   and oldval3, oldval2, mask
6992    // loop1MBB:
6993    //   lwarx tmpDest, ptr
6994    //   and tmp, tmpDest, mask
6995    //   cmpw tmp, oldval3
6996    //   bne- midMBB
6997    // loop2MBB:
6998    //   andc tmp2, tmpDest, mask
6999    //   or tmp4, tmp2, newval3
7000    //   stwcx. tmp4, ptr
7001    //   bne- loop1MBB
7002    //   b exitBB
7003    // midMBB:
7004    //   stwcx. tmpDest, ptr
7005    // exitBB:
7006    //   srw dest, tmpDest, shift
7007    if (ptrA != ZeroReg) {
7008      Ptr1Reg = RegInfo.createVirtualRegister(RC);
7009      BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
7010        .addReg(ptrA).addReg(ptrB);
7011    } else {
7012      Ptr1Reg = ptrB;
7013    }
7014    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
7015        .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
7016    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
7017        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
7018    if (is64bit)
7019      BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
7020        .addReg(Ptr1Reg).addImm(0).addImm(61);
7021    else
7022      BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
7023        .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
7024    BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
7025        .addReg(newval).addReg(ShiftReg);
7026    BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
7027        .addReg(oldval).addReg(ShiftReg);
7028    if (is8bit)
7029      BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
7030    else {
7031      BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
7032      BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
7033        .addReg(Mask3Reg).addImm(65535);
7034    }
7035    BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
7036        .addReg(Mask2Reg).addReg(ShiftReg);
7037    BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
7038        .addReg(NewVal2Reg).addReg(MaskReg);
7039    BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
7040        .addReg(OldVal2Reg).addReg(MaskReg);
7041
7042    BB = loop1MBB;
7043    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
7044        .addReg(ZeroReg).addReg(PtrReg);
7045    BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
7046        .addReg(TmpDestReg).addReg(MaskReg);
7047    BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
7048        .addReg(TmpReg).addReg(OldVal3Reg);
7049    BuildMI(BB, dl, TII->get(PPC::BCC))
7050        .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
7051    BB->addSuccessor(loop2MBB);
7052    BB->addSuccessor(midMBB);
7053
7054    BB = loop2MBB;
7055    BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
7056        .addReg(TmpDestReg).addReg(MaskReg);
7057    BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
7058        .addReg(Tmp2Reg).addReg(NewVal3Reg);
7059    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
7060        .addReg(ZeroReg).addReg(PtrReg);
7061    BuildMI(BB, dl, TII->get(PPC::BCC))
7062      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
7063    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
7064    BB->addSuccessor(loop1MBB);
7065    BB->addSuccessor(exitMBB);
7066
7067    BB = midMBB;
7068    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
7069      .addReg(ZeroReg).addReg(PtrReg);
7070    BB->addSuccessor(exitMBB);
7071
7072    //  exitMBB:
7073    //   ...
7074    BB = exitMBB;
7075    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
7076      .addReg(ShiftReg);
7077  } else if (MI->getOpcode() == PPC::FADDrtz) {
7078    // This pseudo performs an FADD with rounding mode temporarily forced
7079    // to round-to-zero.  We emit this via custom inserter since the FPSCR
7080    // is not modeled at the SelectionDAG level.
7081    unsigned Dest = MI->getOperand(0).getReg();
7082    unsigned Src1 = MI->getOperand(1).getReg();
7083    unsigned Src2 = MI->getOperand(2).getReg();
7084    DebugLoc dl   = MI->getDebugLoc();
7085
7086    MachineRegisterInfo &RegInfo = F->getRegInfo();
7087    unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
7088
7089    // Save FPSCR value.
7090    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
7091
7092    // Set rounding mode to round-to-zero.
7093    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
7094    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
7095
7096    // Perform addition.
7097    BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
7098
7099    // Restore FPSCR value.
7100    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
7101  } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
7102             MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
7103             MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
7104             MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
7105    unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
7106                       MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
7107                      PPC::ANDIo8 : PPC::ANDIo;
7108    bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
7109                 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
7110
7111    MachineRegisterInfo &RegInfo = F->getRegInfo();
7112    unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
7113                                                  &PPC::GPRCRegClass :
7114                                                  &PPC::G8RCRegClass);
7115
7116    DebugLoc dl   = MI->getDebugLoc();
7117    BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
7118      .addReg(MI->getOperand(1).getReg()).addImm(1);
7119    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
7120            MI->getOperand(0).getReg())
7121      .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
7122  } else {
7123    llvm_unreachable("Unexpected instr type to insert");
7124  }
7125
7126  MI->eraseFromParent();   // The pseudo instruction is gone now.
7127  return BB;
7128}
7129
7130//===----------------------------------------------------------------------===//
7131// Target Optimization Hooks
7132//===----------------------------------------------------------------------===//
7133
7134SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
7135                                               DAGCombinerInfo &DCI) const {
7136  if (DCI.isAfterLegalizeVectorOps())
7137    return SDValue();
7138
7139  EVT VT = Op.getValueType();
7140
7141  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
7142      (VT == MVT::f64 && Subtarget.hasFRE())  ||
7143      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
7144      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
7145
7146    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
7147    // For the reciprocal, we need to find the zero of the function:
7148    //   F(X) = A X - 1 [which has a zero at X = 1/A]
7149    //     =>
7150    //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
7151    //     does not require additional intermediate precision]
7152
7153    // Convergence is quadratic, so we essentially double the number of digits
7154    // correct after every iteration. The minimum architected relative
7155    // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
7156    // 23 digits and double has 52 digits.
7157    int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
7158    if (VT.getScalarType() == MVT::f64)
7159      ++Iterations;
7160
7161    SelectionDAG &DAG = DCI.DAG;
7162    SDLoc dl(Op);
7163
7164    SDValue FPOne =
7165      DAG.getConstantFP(1.0, VT.getScalarType());
7166    if (VT.isVector()) {
7167      assert(VT.getVectorNumElements() == 4 &&
7168             "Unknown vector type");
7169      FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
7170                          FPOne, FPOne, FPOne, FPOne);
7171    }
7172
7173    SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
7174    DCI.AddToWorklist(Est.getNode());
7175
7176    // Newton iterations: Est = Est + Est (1 - Arg * Est)
7177    for (int i = 0; i < Iterations; ++i) {
7178      SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
7179      DCI.AddToWorklist(NewEst.getNode());
7180
7181      NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
7182      DCI.AddToWorklist(NewEst.getNode());
7183
7184      NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
7185      DCI.AddToWorklist(NewEst.getNode());
7186
7187      Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
7188      DCI.AddToWorklist(Est.getNode());
7189    }
7190
7191    return Est;
7192  }
7193
7194  return SDValue();
7195}
7196
7197SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
7198                                             DAGCombinerInfo &DCI) const {
7199  if (DCI.isAfterLegalizeVectorOps())
7200    return SDValue();
7201
7202  EVT VT = Op.getValueType();
7203
7204  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
7205      (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
7206      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
7207      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
7208
7209    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
7210    // For the reciprocal sqrt, we need to find the zero of the function:
7211    //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
7212    //     =>
7213    //   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
7214    // As a result, we precompute A/2 prior to the iteration loop.
7215
7216    // Convergence is quadratic, so we essentially double the number of digits
7217    // correct after every iteration. The minimum architected relative
7218    // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
7219    // 23 digits and double has 52 digits.
7220    int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
7221    if (VT.getScalarType() == MVT::f64)
7222      ++Iterations;
7223
7224    SelectionDAG &DAG = DCI.DAG;
7225    SDLoc dl(Op);
7226
7227    SDValue FPThreeHalves =
7228      DAG.getConstantFP(1.5, VT.getScalarType());
7229    if (VT.isVector()) {
7230      assert(VT.getVectorNumElements() == 4 &&
7231             "Unknown vector type");
7232      FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
7233                                  FPThreeHalves, FPThreeHalves,
7234                                  FPThreeHalves, FPThreeHalves);
7235    }
7236
7237    SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
7238    DCI.AddToWorklist(Est.getNode());
7239
7240    // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
7241    // this entire sequence requires only one FP constant.
7242    SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
7243    DCI.AddToWorklist(HalfArg.getNode());
7244
7245    HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
7246    DCI.AddToWorklist(HalfArg.getNode());
7247
7248    // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
7249    for (int i = 0; i < Iterations; ++i) {
7250      SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
7251      DCI.AddToWorklist(NewEst.getNode());
7252
7253      NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
7254      DCI.AddToWorklist(NewEst.getNode());
7255
7256      NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
7257      DCI.AddToWorklist(NewEst.getNode());
7258
7259      Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
7260      DCI.AddToWorklist(Est.getNode());
7261    }
7262
7263    return Est;
7264  }
7265
7266  return SDValue();
7267}
7268
7269// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
7270// not enforce equality of the chain operands.
7271static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
7272                            unsigned Bytes, int Dist,
7273                            SelectionDAG &DAG) {
7274  EVT VT = LS->getMemoryVT();
7275  if (VT.getSizeInBits() / 8 != Bytes)
7276    return false;
7277
7278  SDValue Loc = LS->getBasePtr();
7279  SDValue BaseLoc = Base->getBasePtr();
7280  if (Loc.getOpcode() == ISD::FrameIndex) {
7281    if (BaseLoc.getOpcode() != ISD::FrameIndex)
7282      return false;
7283    const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
7284    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
7285    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
7286    int FS  = MFI->getObjectSize(FI);
7287    int BFS = MFI->getObjectSize(BFI);
7288    if (FS != BFS || FS != (int)Bytes) return false;
7289    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
7290  }
7291
7292  // Handle X+C
7293  if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
7294      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
7295    return true;
7296
7297  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7298  const GlobalValue *GV1 = nullptr;
7299  const GlobalValue *GV2 = nullptr;
7300  int64_t Offset1 = 0;
7301  int64_t Offset2 = 0;
7302  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
7303  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
7304  if (isGA1 && isGA2 && GV1 == GV2)
7305    return Offset1 == (Offset2 + Dist*Bytes);
7306  return false;
7307}
7308
7309// Return true is there is a nearyby consecutive load to the one provided
7310// (regardless of alignment). We search up and down the chain, looking though
7311// token factors and other loads (but nothing else). As a result, a true
7312// results indicates that it is safe to create a new consecutive load adjacent
7313// to the load provided.
7314static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
7315  SDValue Chain = LD->getChain();
7316  EVT VT = LD->getMemoryVT();
7317
7318  SmallSet<SDNode *, 16> LoadRoots;
7319  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
7320  SmallSet<SDNode *, 16> Visited;
7321
7322  // First, search up the chain, branching to follow all token-factor operands.
7323  // If we find a consecutive load, then we're done, otherwise, record all
7324  // nodes just above the top-level loads and token factors.
7325  while (!Queue.empty()) {
7326    SDNode *ChainNext = Queue.pop_back_val();
7327    if (!Visited.insert(ChainNext))
7328      continue;
7329
7330    if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
7331      if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
7332        return true;
7333
7334      if (!Visited.count(ChainLD->getChain().getNode()))
7335        Queue.push_back(ChainLD->getChain().getNode());
7336    } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
7337      for (const SDUse &O : ChainNext->ops())
7338        if (!Visited.count(O.getNode()))
7339          Queue.push_back(O.getNode());
7340    } else
7341      LoadRoots.insert(ChainNext);
7342  }
7343
7344  // Second, search down the chain, starting from the top-level nodes recorded
7345  // in the first phase. These top-level nodes are the nodes just above all
7346  // loads and token factors. Starting with their uses, recursively look though
7347  // all loads (just the chain uses) and token factors to find a consecutive
7348  // load.
7349  Visited.clear();
7350  Queue.clear();
7351
7352  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
7353       IE = LoadRoots.end(); I != IE; ++I) {
7354    Queue.push_back(*I);
7355
7356    while (!Queue.empty()) {
7357      SDNode *LoadRoot = Queue.pop_back_val();
7358      if (!Visited.insert(LoadRoot))
7359        continue;
7360
7361      if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
7362        if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
7363          return true;
7364
7365      for (SDNode::use_iterator UI = LoadRoot->use_begin(),
7366           UE = LoadRoot->use_end(); UI != UE; ++UI)
7367        if (((isa<LoadSDNode>(*UI) &&
7368            cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
7369            UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
7370          Queue.push_back(*UI);
7371    }
7372  }
7373
7374  return false;
7375}
7376
7377SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
7378                                                  DAGCombinerInfo &DCI) const {
7379  SelectionDAG &DAG = DCI.DAG;
7380  SDLoc dl(N);
7381
7382  assert(Subtarget.useCRBits() &&
7383         "Expecting to be tracking CR bits");
7384  // If we're tracking CR bits, we need to be careful that we don't have:
7385  //   trunc(binary-ops(zext(x), zext(y)))
7386  // or
7387  //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
7388  // such that we're unnecessarily moving things into GPRs when it would be
7389  // better to keep them in CR bits.
7390
7391  // Note that trunc here can be an actual i1 trunc, or can be the effective
7392  // truncation that comes from a setcc or select_cc.
7393  if (N->getOpcode() == ISD::TRUNCATE &&
7394      N->getValueType(0) != MVT::i1)
7395    return SDValue();
7396
7397  if (N->getOperand(0).getValueType() != MVT::i32 &&
7398      N->getOperand(0).getValueType() != MVT::i64)
7399    return SDValue();
7400
7401  if (N->getOpcode() == ISD::SETCC ||
7402      N->getOpcode() == ISD::SELECT_CC) {
7403    // If we're looking at a comparison, then we need to make sure that the
7404    // high bits (all except for the first) don't matter the result.
7405    ISD::CondCode CC =
7406      cast<CondCodeSDNode>(N->getOperand(
7407        N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
7408    unsigned OpBits = N->getOperand(0).getValueSizeInBits();
7409
7410    if (ISD::isSignedIntSetCC(CC)) {
7411      if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
7412          DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
7413        return SDValue();
7414    } else if (ISD::isUnsignedIntSetCC(CC)) {
7415      if (!DAG.MaskedValueIsZero(N->getOperand(0),
7416                                 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
7417          !DAG.MaskedValueIsZero(N->getOperand(1),
7418                                 APInt::getHighBitsSet(OpBits, OpBits-1)))
7419        return SDValue();
7420    } else {
7421      // This is neither a signed nor an unsigned comparison, just make sure
7422      // that the high bits are equal.
7423      APInt Op1Zero, Op1One;
7424      APInt Op2Zero, Op2One;
7425      DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
7426      DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
7427
7428      // We don't really care about what is known about the first bit (if
7429      // anything), so clear it in all masks prior to comparing them.
7430      Op1Zero.clearBit(0); Op1One.clearBit(0);
7431      Op2Zero.clearBit(0); Op2One.clearBit(0);
7432
7433      if (Op1Zero != Op2Zero || Op1One != Op2One)
7434        return SDValue();
7435    }
7436  }
7437
7438  // We now know that the higher-order bits are irrelevant, we just need to
7439  // make sure that all of the intermediate operations are bit operations, and
7440  // all inputs are extensions.
7441  if (N->getOperand(0).getOpcode() != ISD::AND &&
7442      N->getOperand(0).getOpcode() != ISD::OR  &&
7443      N->getOperand(0).getOpcode() != ISD::XOR &&
7444      N->getOperand(0).getOpcode() != ISD::SELECT &&
7445      N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
7446      N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
7447      N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
7448      N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
7449      N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
7450    return SDValue();
7451
7452  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
7453      N->getOperand(1).getOpcode() != ISD::AND &&
7454      N->getOperand(1).getOpcode() != ISD::OR  &&
7455      N->getOperand(1).getOpcode() != ISD::XOR &&
7456      N->getOperand(1).getOpcode() != ISD::SELECT &&
7457      N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
7458      N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
7459      N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
7460      N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
7461      N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
7462    return SDValue();
7463
7464  SmallVector<SDValue, 4> Inputs;
7465  SmallVector<SDValue, 8> BinOps, PromOps;
7466  SmallPtrSet<SDNode *, 16> Visited;
7467
7468  for (unsigned i = 0; i < 2; ++i) {
7469    if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
7470          N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
7471          N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
7472          N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
7473        isa<ConstantSDNode>(N->getOperand(i)))
7474      Inputs.push_back(N->getOperand(i));
7475    else
7476      BinOps.push_back(N->getOperand(i));
7477
7478    if (N->getOpcode() == ISD::TRUNCATE)
7479      break;
7480  }
7481
7482  // Visit all inputs, collect all binary operations (and, or, xor and
7483  // select) that are all fed by extensions.
7484  while (!BinOps.empty()) {
7485    SDValue BinOp = BinOps.back();
7486    BinOps.pop_back();
7487
7488    if (!Visited.insert(BinOp.getNode()))
7489      continue;
7490
7491    PromOps.push_back(BinOp);
7492
7493    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
7494      // The condition of the select is not promoted.
7495      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
7496        continue;
7497      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
7498        continue;
7499
7500      if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
7501            BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
7502            BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
7503           BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
7504          isa<ConstantSDNode>(BinOp.getOperand(i))) {
7505        Inputs.push_back(BinOp.getOperand(i));
7506      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
7507                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
7508                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
7509                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
7510                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
7511                 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
7512                 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
7513                 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
7514                 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
7515        BinOps.push_back(BinOp.getOperand(i));
7516      } else {
7517        // We have an input that is not an extension or another binary
7518        // operation; we'll abort this transformation.
7519        return SDValue();
7520      }
7521    }
7522  }
7523
7524  // Make sure that this is a self-contained cluster of operations (which
7525  // is not quite the same thing as saying that everything has only one
7526  // use).
7527  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
7528    if (isa<ConstantSDNode>(Inputs[i]))
7529      continue;
7530
7531    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
7532                              UE = Inputs[i].getNode()->use_end();
7533         UI != UE; ++UI) {
7534      SDNode *User = *UI;
7535      if (User != N && !Visited.count(User))
7536        return SDValue();
7537
7538      // Make sure that we're not going to promote the non-output-value
7539      // operand(s) or SELECT or SELECT_CC.
7540      // FIXME: Although we could sometimes handle this, and it does occur in
7541      // practice that one of the condition inputs to the select is also one of
7542      // the outputs, we currently can't deal with this.
7543      if (User->getOpcode() == ISD::SELECT) {
7544        if (User->getOperand(0) == Inputs[i])
7545          return SDValue();
7546      } else if (User->getOpcode() == ISD::SELECT_CC) {
7547        if (User->getOperand(0) == Inputs[i] ||
7548            User->getOperand(1) == Inputs[i])
7549          return SDValue();
7550      }
7551    }
7552  }
7553
7554  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
7555    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
7556                              UE = PromOps[i].getNode()->use_end();
7557         UI != UE; ++UI) {
7558      SDNode *User = *UI;
7559      if (User != N && !Visited.count(User))
7560        return SDValue();
7561
7562      // Make sure that we're not going to promote the non-output-value
7563      // operand(s) or SELECT or SELECT_CC.
7564      // FIXME: Although we could sometimes handle this, and it does occur in
7565      // practice that one of the condition inputs to the select is also one of
7566      // the outputs, we currently can't deal with this.
7567      if (User->getOpcode() == ISD::SELECT) {
7568        if (User->getOperand(0) == PromOps[i])
7569          return SDValue();
7570      } else if (User->getOpcode() == ISD::SELECT_CC) {
7571        if (User->getOperand(0) == PromOps[i] ||
7572            User->getOperand(1) == PromOps[i])
7573          return SDValue();
7574      }
7575    }
7576  }
7577
7578  // Replace all inputs with the extension operand.
7579  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
7580    // Constants may have users outside the cluster of to-be-promoted nodes,
7581    // and so we need to replace those as we do the promotions.
7582    if (isa<ConstantSDNode>(Inputs[i]))
7583      continue;
7584    else
7585      DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
7586  }
7587
7588  // Replace all operations (these are all the same, but have a different
7589  // (i1) return type). DAG.getNode will validate that the types of
7590  // a binary operator match, so go through the list in reverse so that
7591  // we've likely promoted both operands first. Any intermediate truncations or
7592  // extensions disappear.
7593  while (!PromOps.empty()) {
7594    SDValue PromOp = PromOps.back();
7595    PromOps.pop_back();
7596
7597    if (PromOp.getOpcode() == ISD::TRUNCATE ||
7598        PromOp.getOpcode() == ISD::SIGN_EXTEND ||
7599        PromOp.getOpcode() == ISD::ZERO_EXTEND ||
7600        PromOp.getOpcode() == ISD::ANY_EXTEND) {
7601      if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
7602          PromOp.getOperand(0).getValueType() != MVT::i1) {
7603        // The operand is not yet ready (see comment below).
7604        PromOps.insert(PromOps.begin(), PromOp);
7605        continue;
7606      }
7607
7608      SDValue RepValue = PromOp.getOperand(0);
7609      if (isa<ConstantSDNode>(RepValue))
7610        RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
7611
7612      DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
7613      continue;
7614    }
7615
7616    unsigned C;
7617    switch (PromOp.getOpcode()) {
7618    default:             C = 0; break;
7619    case ISD::SELECT:    C = 1; break;
7620    case ISD::SELECT_CC: C = 2; break;
7621    }
7622
7623    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
7624         PromOp.getOperand(C).getValueType() != MVT::i1) ||
7625        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
7626         PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
7627      // The to-be-promoted operands of this node have not yet been
7628      // promoted (this should be rare because we're going through the
7629      // list backward, but if one of the operands has several users in
7630      // this cluster of to-be-promoted nodes, it is possible).
7631      PromOps.insert(PromOps.begin(), PromOp);
7632      continue;
7633    }
7634
7635    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
7636                                PromOp.getNode()->op_end());
7637
7638    // If there are any constant inputs, make sure they're replaced now.
7639    for (unsigned i = 0; i < 2; ++i)
7640      if (isa<ConstantSDNode>(Ops[C+i]))
7641        Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
7642
7643    DAG.ReplaceAllUsesOfValueWith(PromOp,
7644      DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
7645  }
7646
7647  // Now we're left with the initial truncation itself.
7648  if (N->getOpcode() == ISD::TRUNCATE)
7649    return N->getOperand(0);
7650
7651  // Otherwise, this is a comparison. The operands to be compared have just
7652  // changed type (to i1), but everything else is the same.
7653  return SDValue(N, 0);
7654}
7655
7656SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
7657                                                  DAGCombinerInfo &DCI) const {
7658  SelectionDAG &DAG = DCI.DAG;
7659  SDLoc dl(N);
7660
7661  // If we're tracking CR bits, we need to be careful that we don't have:
7662  //   zext(binary-ops(trunc(x), trunc(y)))
7663  // or
7664  //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
7665  // such that we're unnecessarily moving things into CR bits that can more
7666  // efficiently stay in GPRs. Note that if we're not certain that the high
7667  // bits are set as required by the final extension, we still may need to do
7668  // some masking to get the proper behavior.
7669
7670  // This same functionality is important on PPC64 when dealing with
7671  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
7672  // the return values of functions. Because it is so similar, it is handled
7673  // here as well.
7674
7675  if (N->getValueType(0) != MVT::i32 &&
7676      N->getValueType(0) != MVT::i64)
7677    return SDValue();
7678
7679  if (!((N->getOperand(0).getValueType() == MVT::i1 &&
7680        Subtarget.useCRBits()) ||
7681       (N->getOperand(0).getValueType() == MVT::i32 &&
7682        Subtarget.isPPC64())))
7683    return SDValue();
7684
7685  if (N->getOperand(0).getOpcode() != ISD::AND &&
7686      N->getOperand(0).getOpcode() != ISD::OR  &&
7687      N->getOperand(0).getOpcode() != ISD::XOR &&
7688      N->getOperand(0).getOpcode() != ISD::SELECT &&
7689      N->getOperand(0).getOpcode() != ISD::SELECT_CC)
7690    return SDValue();
7691
7692  SmallVector<SDValue, 4> Inputs;
7693  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
7694  SmallPtrSet<SDNode *, 16> Visited;
7695
7696  // Visit all inputs, collect all binary operations (and, or, xor and
7697  // select) that are all fed by truncations.
7698  while (!BinOps.empty()) {
7699    SDValue BinOp = BinOps.back();
7700    BinOps.pop_back();
7701
7702    if (!Visited.insert(BinOp.getNode()))
7703      continue;
7704
7705    PromOps.push_back(BinOp);
7706
7707    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
7708      // The condition of the select is not promoted.
7709      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
7710        continue;
7711      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
7712        continue;
7713
7714      if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
7715          isa<ConstantSDNode>(BinOp.getOperand(i))) {
7716        Inputs.push_back(BinOp.getOperand(i));
7717      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
7718                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
7719                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
7720                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
7721                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
7722        BinOps.push_back(BinOp.getOperand(i));
7723      } else {
7724        // We have an input that is not a truncation or another binary
7725        // operation; we'll abort this transformation.
7726        return SDValue();
7727      }
7728    }
7729  }
7730
7731  // Make sure that this is a self-contained cluster of operations (which
7732  // is not quite the same thing as saying that everything has only one
7733  // use).
7734  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
7735    if (isa<ConstantSDNode>(Inputs[i]))
7736      continue;
7737
7738    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
7739                              UE = Inputs[i].getNode()->use_end();
7740         UI != UE; ++UI) {
7741      SDNode *User = *UI;
7742      if (User != N && !Visited.count(User))
7743        return SDValue();
7744
7745      // Make sure that we're not going to promote the non-output-value
7746      // operand(s) or SELECT or SELECT_CC.
7747      // FIXME: Although we could sometimes handle this, and it does occur in
7748      // practice that one of the condition inputs to the select is also one of
7749      // the outputs, we currently can't deal with this.
7750      if (User->getOpcode() == ISD::SELECT) {
7751        if (User->getOperand(0) == Inputs[i])
7752          return SDValue();
7753      } else if (User->getOpcode() == ISD::SELECT_CC) {
7754        if (User->getOperand(0) == Inputs[i] ||
7755            User->getOperand(1) == Inputs[i])
7756          return SDValue();
7757      }
7758    }
7759  }
7760
7761  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
7762    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
7763                              UE = PromOps[i].getNode()->use_end();
7764         UI != UE; ++UI) {
7765      SDNode *User = *UI;
7766      if (User != N && !Visited.count(User))
7767        return SDValue();
7768
7769      // Make sure that we're not going to promote the non-output-value
7770      // operand(s) or SELECT or SELECT_CC.
7771      // FIXME: Although we could sometimes handle this, and it does occur in
7772      // practice that one of the condition inputs to the select is also one of
7773      // the outputs, we currently can't deal with this.
7774      if (User->getOpcode() == ISD::SELECT) {
7775        if (User->getOperand(0) == PromOps[i])
7776          return SDValue();
7777      } else if (User->getOpcode() == ISD::SELECT_CC) {
7778        if (User->getOperand(0) == PromOps[i] ||
7779            User->getOperand(1) == PromOps[i])
7780          return SDValue();
7781      }
7782    }
7783  }
7784
7785  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
7786  bool ReallyNeedsExt = false;
7787  if (N->getOpcode() != ISD::ANY_EXTEND) {
7788    // If all of the inputs are not already sign/zero extended, then
7789    // we'll still need to do that at the end.
7790    for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
7791      if (isa<ConstantSDNode>(Inputs[i]))
7792        continue;
7793
7794      unsigned OpBits =
7795        Inputs[i].getOperand(0).getValueSizeInBits();
7796      assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
7797
7798      if ((N->getOpcode() == ISD::ZERO_EXTEND &&
7799           !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
7800                                  APInt::getHighBitsSet(OpBits,
7801                                                        OpBits-PromBits))) ||
7802          (N->getOpcode() == ISD::SIGN_EXTEND &&
7803           DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
7804             (OpBits-(PromBits-1)))) {
7805        ReallyNeedsExt = true;
7806        break;
7807      }
7808    }
7809  }
7810
7811  // Replace all inputs, either with the truncation operand, or a
7812  // truncation or extension to the final output type.
7813  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
7814    // Constant inputs need to be replaced with the to-be-promoted nodes that
7815    // use them because they might have users outside of the cluster of
7816    // promoted nodes.
7817    if (isa<ConstantSDNode>(Inputs[i]))
7818      continue;
7819
7820    SDValue InSrc = Inputs[i].getOperand(0);
7821    if (Inputs[i].getValueType() == N->getValueType(0))
7822      DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
7823    else if (N->getOpcode() == ISD::SIGN_EXTEND)
7824      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
7825        DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
7826    else if (N->getOpcode() == ISD::ZERO_EXTEND)
7827      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
7828        DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
7829    else
7830      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
7831        DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
7832  }
7833
7834  // Replace all operations (these are all the same, but have a different
7835  // (promoted) return type). DAG.getNode will validate that the types of
7836  // a binary operator match, so go through the list in reverse so that
7837  // we've likely promoted both operands first.
7838  while (!PromOps.empty()) {
7839    SDValue PromOp = PromOps.back();
7840    PromOps.pop_back();
7841
7842    unsigned C;
7843    switch (PromOp.getOpcode()) {
7844    default:             C = 0; break;
7845    case ISD::SELECT:    C = 1; break;
7846    case ISD::SELECT_CC: C = 2; break;
7847    }
7848
7849    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
7850         PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
7851        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
7852         PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
7853      // The to-be-promoted operands of this node have not yet been
7854      // promoted (this should be rare because we're going through the
7855      // list backward, but if one of the operands has several users in
7856      // this cluster of to-be-promoted nodes, it is possible).
7857      PromOps.insert(PromOps.begin(), PromOp);
7858      continue;
7859    }
7860
7861    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
7862                                PromOp.getNode()->op_end());
7863
7864    // If this node has constant inputs, then they'll need to be promoted here.
7865    for (unsigned i = 0; i < 2; ++i) {
7866      if (!isa<ConstantSDNode>(Ops[C+i]))
7867        continue;
7868      if (Ops[C+i].getValueType() == N->getValueType(0))
7869        continue;
7870
7871      if (N->getOpcode() == ISD::SIGN_EXTEND)
7872        Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
7873      else if (N->getOpcode() == ISD::ZERO_EXTEND)
7874        Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
7875      else
7876        Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
7877    }
7878
7879    DAG.ReplaceAllUsesOfValueWith(PromOp,
7880      DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
7881  }
7882
7883  // Now we're left with the initial extension itself.
7884  if (!ReallyNeedsExt)
7885    return N->getOperand(0);
7886
7887  // To zero extend, just mask off everything except for the first bit (in the
7888  // i1 case).
7889  if (N->getOpcode() == ISD::ZERO_EXTEND)
7890    return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
7891                       DAG.getConstant(APInt::getLowBitsSet(
7892                                         N->getValueSizeInBits(0), PromBits),
7893                                       N->getValueType(0)));
7894
7895  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
7896         "Invalid extension type");
7897  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
7898  SDValue ShiftCst =
7899    DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);
7900  return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
7901                     DAG.getNode(ISD::SHL, dl, N->getValueType(0),
7902                                 N->getOperand(0), ShiftCst), ShiftCst);
7903}
7904
7905SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
7906                                             DAGCombinerInfo &DCI) const {
7907  const TargetMachine &TM = getTargetMachine();
7908  SelectionDAG &DAG = DCI.DAG;
7909  SDLoc dl(N);
7910  switch (N->getOpcode()) {
7911  default: break;
7912  case PPCISD::SHL:
7913    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
7914      if (C->isNullValue())   // 0 << V -> 0.
7915        return N->getOperand(0);
7916    }
7917    break;
7918  case PPCISD::SRL:
7919    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
7920      if (C->isNullValue())   // 0 >>u V -> 0.
7921        return N->getOperand(0);
7922    }
7923    break;
7924  case PPCISD::SRA:
7925    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
7926      if (C->isNullValue() ||   //  0 >>s V -> 0.
7927          C->isAllOnesValue())    // -1 >>s V -> -1.
7928        return N->getOperand(0);
7929    }
7930    break;
7931  case ISD::SIGN_EXTEND:
7932  case ISD::ZERO_EXTEND:
7933  case ISD::ANY_EXTEND:
7934    return DAGCombineExtBoolTrunc(N, DCI);
7935  case ISD::TRUNCATE:
7936  case ISD::SETCC:
7937  case ISD::SELECT_CC:
7938    return DAGCombineTruncBoolExt(N, DCI);
7939  case ISD::FDIV: {
7940    assert(TM.Options.UnsafeFPMath &&
7941           "Reciprocal estimates require UnsafeFPMath");
7942
7943    if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
7944      SDValue RV =
7945        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
7946      if (RV.getNode()) {
7947        DCI.AddToWorklist(RV.getNode());
7948        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
7949                           N->getOperand(0), RV);
7950      }
7951    } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
7952               N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
7953      SDValue RV =
7954        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
7955                                 DCI);
7956      if (RV.getNode()) {
7957        DCI.AddToWorklist(RV.getNode());
7958        RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
7959                         N->getValueType(0), RV);
7960        DCI.AddToWorklist(RV.getNode());
7961        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
7962                           N->getOperand(0), RV);
7963      }
7964    } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
7965               N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
7966      SDValue RV =
7967        DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
7968                                 DCI);
7969      if (RV.getNode()) {
7970        DCI.AddToWorklist(RV.getNode());
7971        RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
7972                         N->getValueType(0), RV,
7973                         N->getOperand(1).getOperand(1));
7974        DCI.AddToWorklist(RV.getNode());
7975        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
7976                           N->getOperand(0), RV);
7977      }
7978    }
7979
7980    SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
7981    if (RV.getNode()) {
7982      DCI.AddToWorklist(RV.getNode());
7983      return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
7984                         N->getOperand(0), RV);
7985    }
7986
7987    }
7988    break;
7989  case ISD::FSQRT: {
7990    assert(TM.Options.UnsafeFPMath &&
7991           "Reciprocal estimates require UnsafeFPMath");
7992
7993    // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
7994    // reciprocal sqrt.
7995    SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
7996    if (RV.getNode()) {
7997      DCI.AddToWorklist(RV.getNode());
7998      RV = DAGCombineFastRecip(RV, DCI);
7999      if (RV.getNode()) {
8000        // Unfortunately, RV is now NaN if the input was exactly 0. Select out
8001        // this case and force the answer to 0.
8002
8003        EVT VT = RV.getValueType();
8004
8005        SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
8006        if (VT.isVector()) {
8007          assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
8008          Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
8009        }
8010
8011        SDValue ZeroCmp =
8012          DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
8013                       N->getOperand(0), Zero, ISD::SETEQ);
8014        DCI.AddToWorklist(ZeroCmp.getNode());
8015        DCI.AddToWorklist(RV.getNode());
8016
8017        RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
8018                         ZeroCmp, Zero, RV);
8019        return RV;
8020      }
8021    }
8022
8023    }
8024    break;
8025  case ISD::SINT_TO_FP:
8026    if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
8027      if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
8028        // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
8029        // We allow the src/dst to be either f32/f64, but the intermediate
8030        // type must be i64.
8031        if (N->getOperand(0).getValueType() == MVT::i64 &&
8032            N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
8033          SDValue Val = N->getOperand(0).getOperand(0);
8034          if (Val.getValueType() == MVT::f32) {
8035            Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
8036            DCI.AddToWorklist(Val.getNode());
8037          }
8038
8039          Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
8040          DCI.AddToWorklist(Val.getNode());
8041          Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
8042          DCI.AddToWorklist(Val.getNode());
8043          if (N->getValueType(0) == MVT::f32) {
8044            Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
8045                              DAG.getIntPtrConstant(0));
8046            DCI.AddToWorklist(Val.getNode());
8047          }
8048          return Val;
8049        } else if (N->getOperand(0).getValueType() == MVT::i32) {
8050          // If the intermediate type is i32, we can avoid the load/store here
8051          // too.
8052        }
8053      }
8054    }
8055    break;
8056  case ISD::STORE:
8057    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
8058    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
8059        !cast<StoreSDNode>(N)->isTruncatingStore() &&
8060        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
8061        N->getOperand(1).getValueType() == MVT::i32 &&
8062        N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
8063      SDValue Val = N->getOperand(1).getOperand(0);
8064      if (Val.getValueType() == MVT::f32) {
8065        Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
8066        DCI.AddToWorklist(Val.getNode());
8067      }
8068      Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
8069      DCI.AddToWorklist(Val.getNode());
8070
8071      SDValue Ops[] = {
8072        N->getOperand(0), Val, N->getOperand(2),
8073        DAG.getValueType(N->getOperand(1).getValueType())
8074      };
8075
8076      Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8077              DAG.getVTList(MVT::Other), Ops,
8078              cast<StoreSDNode>(N)->getMemoryVT(),
8079              cast<StoreSDNode>(N)->getMemOperand());
8080      DCI.AddToWorklist(Val.getNode());
8081      return Val;
8082    }
8083
8084    // Turn STORE (BSWAP) -> sthbrx/stwbrx.
8085    if (cast<StoreSDNode>(N)->isUnindexed() &&
8086        N->getOperand(1).getOpcode() == ISD::BSWAP &&
8087        N->getOperand(1).getNode()->hasOneUse() &&
8088        (N->getOperand(1).getValueType() == MVT::i32 ||
8089         N->getOperand(1).getValueType() == MVT::i16 ||
8090         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
8091          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
8092          N->getOperand(1).getValueType() == MVT::i64))) {
8093      SDValue BSwapOp = N->getOperand(1).getOperand(0);
8094      // Do an any-extend to 32-bits if this is a half-word input.
8095      if (BSwapOp.getValueType() == MVT::i16)
8096        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
8097
8098      SDValue Ops[] = {
8099        N->getOperand(0), BSwapOp, N->getOperand(2),
8100        DAG.getValueType(N->getOperand(1).getValueType())
8101      };
8102      return
8103        DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
8104                                Ops, cast<StoreSDNode>(N)->getMemoryVT(),
8105                                cast<StoreSDNode>(N)->getMemOperand());
8106    }
8107    break;
8108  case ISD::LOAD: {
8109    LoadSDNode *LD = cast<LoadSDNode>(N);
8110    EVT VT = LD->getValueType(0);
8111    Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
8112    unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
8113    if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
8114        TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
8115        (VT == MVT::v16i8 || VT == MVT::v8i16 ||
8116         VT == MVT::v4i32 || VT == MVT::v4f32) &&
8117        LD->getAlignment() < ABIAlignment) {
8118      // This is a type-legal unaligned Altivec load.
8119      SDValue Chain = LD->getChain();
8120      SDValue Ptr = LD->getBasePtr();
8121      bool isLittleEndian = Subtarget.isLittleEndian();
8122
8123      // This implements the loading of unaligned vectors as described in
8124      // the venerable Apple Velocity Engine overview. Specifically:
8125      // https://developer.apple.com/hardwaredrivers/ve/alignment.html
8126      // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
8127      //
8128      // The general idea is to expand a sequence of one or more unaligned
8129      // loads into an alignment-based permutation-control instruction (lvsl
8130      // or lvsr), a series of regular vector loads (which always truncate
8131      // their input address to an aligned address), and a series of
8132      // permutations.  The results of these permutations are the requested
8133      // loaded values.  The trick is that the last "extra" load is not taken
8134      // from the address you might suspect (sizeof(vector) bytes after the
8135      // last requested load), but rather sizeof(vector) - 1 bytes after the
8136      // last requested vector. The point of this is to avoid a page fault if
8137      // the base address happened to be aligned. This works because if the
8138      // base address is aligned, then adding less than a full vector length
8139      // will cause the last vector in the sequence to be (re)loaded.
8140      // Otherwise, the next vector will be fetched as you might suspect was
8141      // necessary.
8142
8143      // We might be able to reuse the permutation generation from
8144      // a different base address offset from this one by an aligned amount.
8145      // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
8146      // optimization later.
8147      Intrinsic::ID Intr = (isLittleEndian ?
8148                            Intrinsic::ppc_altivec_lvsr :
8149                            Intrinsic::ppc_altivec_lvsl);
8150      SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
8151
8152      // Refine the alignment of the original load (a "new" load created here
8153      // which was identical to the first except for the alignment would be
8154      // merged with the existing node regardless).
8155      MachineFunction &MF = DAG.getMachineFunction();
8156      MachineMemOperand *MMO =
8157        MF.getMachineMemOperand(LD->getPointerInfo(),
8158                                LD->getMemOperand()->getFlags(),
8159                                LD->getMemoryVT().getStoreSize(),
8160                                ABIAlignment);
8161      LD->refineAlignment(MMO);
8162      SDValue BaseLoad = SDValue(LD, 0);
8163
8164      // Note that the value of IncOffset (which is provided to the next
8165      // load's pointer info offset value, and thus used to calculate the
8166      // alignment), and the value of IncValue (which is actually used to
8167      // increment the pointer value) are different! This is because we
8168      // require the next load to appear to be aligned, even though it
8169      // is actually offset from the base pointer by a lesser amount.
8170      int IncOffset = VT.getSizeInBits() / 8;
8171      int IncValue = IncOffset;
8172
8173      // Walk (both up and down) the chain looking for another load at the real
8174      // (aligned) offset (the alignment of the other load does not matter in
8175      // this case). If found, then do not use the offset reduction trick, as
8176      // that will prevent the loads from being later combined (as they would
8177      // otherwise be duplicates).
8178      if (!findConsecutiveLoad(LD, DAG))
8179        --IncValue;
8180
8181      SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
8182      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
8183
8184      SDValue ExtraLoad =
8185        DAG.getLoad(VT, dl, Chain, Ptr,
8186                    LD->getPointerInfo().getWithOffset(IncOffset),
8187                    LD->isVolatile(), LD->isNonTemporal(),
8188                    LD->isInvariant(), ABIAlignment);
8189
8190      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8191        BaseLoad.getValue(1), ExtraLoad.getValue(1));
8192
8193      if (BaseLoad.getValueType() != MVT::v4i32)
8194        BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
8195
8196      if (ExtraLoad.getValueType() != MVT::v4i32)
8197        ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
8198
8199      // Because vperm has a big-endian bias, we must reverse the order
8200      // of the input vectors and complement the permute control vector
8201      // when generating little endian code.  We have already handled the
8202      // latter by using lvsr instead of lvsl, so just reverse BaseLoad
8203      // and ExtraLoad here.
8204      SDValue Perm;
8205      if (isLittleEndian)
8206        Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
8207                                ExtraLoad, BaseLoad, PermCntl, DAG, dl);
8208      else
8209        Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
8210                                BaseLoad, ExtraLoad, PermCntl, DAG, dl);
8211
8212      if (VT != MVT::v4i32)
8213        Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
8214
8215      // Now we need to be really careful about how we update the users of the
8216      // original load. We cannot just call DCI.CombineTo (or
8217      // DAG.ReplaceAllUsesWith for that matter), because the load still has
8218      // uses created here (the permutation for example) that need to stay.
8219      SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8220      while (UI != UE) {
8221        SDUse &Use = UI.getUse();
8222        SDNode *User = *UI;
8223        // Note: BaseLoad is checked here because it might not be N, but a
8224        // bitcast of N.
8225        if (User == Perm.getNode() || User == BaseLoad.getNode() ||
8226            User == TF.getNode() || Use.getResNo() > 1) {
8227          ++UI;
8228          continue;
8229        }
8230
8231        SDValue To = Use.getResNo() ? TF : Perm;
8232        ++UI;
8233
8234        SmallVector<SDValue, 8> Ops;
8235        for (const SDUse &O : User->ops()) {
8236          if (O == Use)
8237            Ops.push_back(To);
8238          else
8239            Ops.push_back(O);
8240        }
8241
8242        DAG.UpdateNodeOperands(User, Ops);
8243      }
8244
8245      return SDValue(N, 0);
8246    }
8247    }
8248    break;
8249  case ISD::INTRINSIC_WO_CHAIN: {
8250    bool isLittleEndian = Subtarget.isLittleEndian();
8251    Intrinsic::ID Intr = (isLittleEndian ?
8252                          Intrinsic::ppc_altivec_lvsr :
8253                          Intrinsic::ppc_altivec_lvsl);
8254    if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
8255        N->getOperand(1)->getOpcode() == ISD::ADD) {
8256      SDValue Add = N->getOperand(1);
8257
8258      if (DAG.MaskedValueIsZero(Add->getOperand(1),
8259            APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
8260              Add.getValueType().getScalarType().getSizeInBits()))) {
8261        SDNode *BasePtr = Add->getOperand(0).getNode();
8262        for (SDNode::use_iterator UI = BasePtr->use_begin(),
8263             UE = BasePtr->use_end(); UI != UE; ++UI) {
8264          if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
8265              cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
8266                Intr) {
8267            // We've found another LVSL/LVSR, and this address is an aligned
8268            // multiple of that one. The results will be the same, so use the
8269            // one we've just found instead.
8270
8271            return SDValue(*UI, 0);
8272          }
8273        }
8274      }
8275    }
8276    }
8277
8278    break;
8279  case ISD::BSWAP:
8280    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
8281    if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8282        N->getOperand(0).hasOneUse() &&
8283        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
8284         (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
8285          TM.getSubtarget<PPCSubtarget>().isPPC64() &&
8286          N->getValueType(0) == MVT::i64))) {
8287      SDValue Load = N->getOperand(0);
8288      LoadSDNode *LD = cast<LoadSDNode>(Load);
8289      // Create the byte-swapping load.
8290      SDValue Ops[] = {
8291        LD->getChain(),    // Chain
8292        LD->getBasePtr(),  // Ptr
8293        DAG.getValueType(N->getValueType(0)) // VT
8294      };
8295      SDValue BSLoad =
8296        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
8297                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
8298                                              MVT::i64 : MVT::i32, MVT::Other),
8299                                Ops, LD->getMemoryVT(), LD->getMemOperand());
8300
8301      // If this is an i16 load, insert the truncate.
8302      SDValue ResVal = BSLoad;
8303      if (N->getValueType(0) == MVT::i16)
8304        ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
8305
8306      // First, combine the bswap away.  This makes the value produced by the
8307      // load dead.
8308      DCI.CombineTo(N, ResVal);
8309
8310      // Next, combine the load away, we give it a bogus result value but a real
8311      // chain result.  The result value is dead because the bswap is dead.
8312      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8313
8314      // Return N so it doesn't get rechecked!
8315      return SDValue(N, 0);
8316    }
8317
8318    break;
8319  case PPCISD::VCMP: {
8320    // If a VCMPo node already exists with exactly the same operands as this
8321    // node, use its result instead of this node (VCMPo computes both a CR6 and
8322    // a normal output).
8323    //
8324    if (!N->getOperand(0).hasOneUse() &&
8325        !N->getOperand(1).hasOneUse() &&
8326        !N->getOperand(2).hasOneUse()) {
8327
8328      // Scan all of the users of the LHS, looking for VCMPo's that match.
8329      SDNode *VCMPoNode = nullptr;
8330
8331      SDNode *LHSN = N->getOperand(0).getNode();
8332      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
8333           UI != E; ++UI)
8334        if (UI->getOpcode() == PPCISD::VCMPo &&
8335            UI->getOperand(1) == N->getOperand(1) &&
8336            UI->getOperand(2) == N->getOperand(2) &&
8337            UI->getOperand(0) == N->getOperand(0)) {
8338          VCMPoNode = *UI;
8339          break;
8340        }
8341
8342      // If there is no VCMPo node, or if the flag value has a single use, don't
8343      // transform this.
8344      if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
8345        break;
8346
8347      // Look at the (necessarily single) use of the flag value.  If it has a
8348      // chain, this transformation is more complex.  Note that multiple things
8349      // could use the value result, which we should ignore.
8350      SDNode *FlagUser = nullptr;
8351      for (SDNode::use_iterator UI = VCMPoNode->use_begin();
8352           FlagUser == nullptr; ++UI) {
8353        assert(UI != VCMPoNode->use_end() && "Didn't find user!");
8354        SDNode *User = *UI;
8355        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
8356          if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
8357            FlagUser = User;
8358            break;
8359          }
8360        }
8361      }
8362
8363      // If the user is a MFOCRF instruction, we know this is safe.
8364      // Otherwise we give up for right now.
8365      if (FlagUser->getOpcode() == PPCISD::MFOCRF)
8366        return SDValue(VCMPoNode, 0);
8367    }
8368    break;
8369  }
8370  case ISD::BRCOND: {
8371    SDValue Cond = N->getOperand(1);
8372    SDValue Target = N->getOperand(2);
8373
8374    if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
8375        cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
8376          Intrinsic::ppc_is_decremented_ctr_nonzero) {
8377
8378      // We now need to make the intrinsic dead (it cannot be instruction
8379      // selected).
8380      DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
8381      assert(Cond.getNode()->hasOneUse() &&
8382             "Counter decrement has more than one use");
8383
8384      return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
8385                         N->getOperand(0), Target);
8386    }
8387  }
8388  break;
8389  case ISD::BR_CC: {
8390    // If this is a branch on an altivec predicate comparison, lower this so
8391    // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
8392    // lowering is done pre-legalize, because the legalizer lowers the predicate
8393    // compare down to code that is difficult to reassemble.
8394    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
8395    SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
8396
8397    // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
8398    // value. If so, pass-through the AND to get to the intrinsic.
8399    if (LHS.getOpcode() == ISD::AND &&
8400        LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
8401        cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
8402          Intrinsic::ppc_is_decremented_ctr_nonzero &&
8403        isa<ConstantSDNode>(LHS.getOperand(1)) &&
8404        !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
8405          isZero())
8406      LHS = LHS.getOperand(0);
8407
8408    if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
8409        cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
8410          Intrinsic::ppc_is_decremented_ctr_nonzero &&
8411        isa<ConstantSDNode>(RHS)) {
8412      assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
8413             "Counter decrement comparison is not EQ or NE");
8414
8415      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
8416      bool isBDNZ = (CC == ISD::SETEQ && Val) ||
8417                    (CC == ISD::SETNE && !Val);
8418
8419      // We now need to make the intrinsic dead (it cannot be instruction
8420      // selected).
8421      DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
8422      assert(LHS.getNode()->hasOneUse() &&
8423             "Counter decrement has more than one use");
8424
8425      return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
8426                         N->getOperand(0), N->getOperand(4));
8427    }
8428
8429    int CompareOpc;
8430    bool isDot;
8431
8432    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
8433        isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
8434        getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
8435      assert(isDot && "Can't compare against a vector result!");
8436
8437      // If this is a comparison against something other than 0/1, then we know
8438      // that the condition is never/always true.
8439      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
8440      if (Val != 0 && Val != 1) {
8441        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
8442          return N->getOperand(0);
8443        // Always !=, turn it into an unconditional branch.
8444        return DAG.getNode(ISD::BR, dl, MVT::Other,
8445                           N->getOperand(0), N->getOperand(4));
8446      }
8447
8448      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
8449
8450      // Create the PPCISD altivec 'dot' comparison node.
8451      SDValue Ops[] = {
8452        LHS.getOperand(2),  // LHS of compare
8453        LHS.getOperand(3),  // RHS of compare
8454        DAG.getConstant(CompareOpc, MVT::i32)
8455      };
8456      EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
8457      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
8458
8459      // Unpack the result based on how the target uses it.
8460      PPC::Predicate CompOpc;
8461      switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
8462      default:  // Can't happen, don't crash on invalid number though.
8463      case 0:   // Branch on the value of the EQ bit of CR6.
8464        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
8465        break;
8466      case 1:   // Branch on the inverted value of the EQ bit of CR6.
8467        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
8468        break;
8469      case 2:   // Branch on the value of the LT bit of CR6.
8470        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
8471        break;
8472      case 3:   // Branch on the inverted value of the LT bit of CR6.
8473        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
8474        break;
8475      }
8476
8477      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
8478                         DAG.getConstant(CompOpc, MVT::i32),
8479                         DAG.getRegister(PPC::CR6, MVT::i32),
8480                         N->getOperand(4), CompNode.getValue(1));
8481    }
8482    break;
8483  }
8484  }
8485
8486  return SDValue();
8487}
8488
8489//===----------------------------------------------------------------------===//
8490// Inline Assembly Support
8491//===----------------------------------------------------------------------===//
8492
8493void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
8494                                                      APInt &KnownZero,
8495                                                      APInt &KnownOne,
8496                                                      const SelectionDAG &DAG,
8497                                                      unsigned Depth) const {
8498  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
8499  switch (Op.getOpcode()) {
8500  default: break;
8501  case PPCISD::LBRX: {
8502    // lhbrx is known to have the top bits cleared out.
8503    if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
8504      KnownZero = 0xFFFF0000;
8505    break;
8506  }
8507  case ISD::INTRINSIC_WO_CHAIN: {
8508    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
8509    default: break;
8510    case Intrinsic::ppc_altivec_vcmpbfp_p:
8511    case Intrinsic::ppc_altivec_vcmpeqfp_p:
8512    case Intrinsic::ppc_altivec_vcmpequb_p:
8513    case Intrinsic::ppc_altivec_vcmpequh_p:
8514    case Intrinsic::ppc_altivec_vcmpequw_p:
8515    case Intrinsic::ppc_altivec_vcmpgefp_p:
8516    case Intrinsic::ppc_altivec_vcmpgtfp_p:
8517    case Intrinsic::ppc_altivec_vcmpgtsb_p:
8518    case Intrinsic::ppc_altivec_vcmpgtsh_p:
8519    case Intrinsic::ppc_altivec_vcmpgtsw_p:
8520    case Intrinsic::ppc_altivec_vcmpgtub_p:
8521    case Intrinsic::ppc_altivec_vcmpgtuh_p:
8522    case Intrinsic::ppc_altivec_vcmpgtuw_p:
8523      KnownZero = ~1U;  // All bits but the low one are known to be zero.
8524      break;
8525    }
8526  }
8527  }
8528}
8529
8530
8531/// getConstraintType - Given a constraint, return the type of
8532/// constraint it is for this target.
8533PPCTargetLowering::ConstraintType
8534PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
8535  if (Constraint.size() == 1) {
8536    switch (Constraint[0]) {
8537    default: break;
8538    case 'b':
8539    case 'r':
8540    case 'f':
8541    case 'v':
8542    case 'y':
8543      return C_RegisterClass;
8544    case 'Z':
8545      // FIXME: While Z does indicate a memory constraint, it specifically
8546      // indicates an r+r address (used in conjunction with the 'y' modifier
8547      // in the replacement string). Currently, we're forcing the base
8548      // register to be r0 in the asm printer (which is interpreted as zero)
8549      // and forming the complete address in the second register. This is
8550      // suboptimal.
8551      return C_Memory;
8552    }
8553  } else if (Constraint == "wc") { // individual CR bits.
8554    return C_RegisterClass;
8555  } else if (Constraint == "wa" || Constraint == "wd" ||
8556             Constraint == "wf" || Constraint == "ws") {
8557    return C_RegisterClass; // VSX registers.
8558  }
8559  return TargetLowering::getConstraintType(Constraint);
8560}
8561
8562/// Examine constraint type and operand type and determine a weight value.
8563/// This object must already have been set up with the operand type
8564/// and the current alternative constraint selected.
8565TargetLowering::ConstraintWeight
8566PPCTargetLowering::getSingleConstraintMatchWeight(
8567    AsmOperandInfo &info, const char *constraint) const {
8568  ConstraintWeight weight = CW_Invalid;
8569  Value *CallOperandVal = info.CallOperandVal;
8570    // If we don't have a value, we can't do a match,
8571    // but allow it at the lowest weight.
8572  if (!CallOperandVal)
8573    return CW_Default;
8574  Type *type = CallOperandVal->getType();
8575
8576  // Look at the constraint type.
8577  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
8578    return CW_Register; // an individual CR bit.
8579  else if ((StringRef(constraint) == "wa" ||
8580            StringRef(constraint) == "wd" ||
8581            StringRef(constraint) == "wf") &&
8582           type->isVectorTy())
8583    return CW_Register;
8584  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
8585    return CW_Register;
8586
8587  switch (*constraint) {
8588  default:
8589    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
8590    break;
8591  case 'b':
8592    if (type->isIntegerTy())
8593      weight = CW_Register;
8594    break;
8595  case 'f':
8596    if (type->isFloatTy())
8597      weight = CW_Register;
8598    break;
8599  case 'd':
8600    if (type->isDoubleTy())
8601      weight = CW_Register;
8602    break;
8603  case 'v':
8604    if (type->isVectorTy())
8605      weight = CW_Register;
8606    break;
8607  case 'y':
8608    weight = CW_Register;
8609    break;
8610  case 'Z':
8611    weight = CW_Memory;
8612    break;
8613  }
8614  return weight;
8615}
8616
8617std::pair<unsigned, const TargetRegisterClass*>
8618PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
8619                                                MVT VT) const {
8620  if (Constraint.size() == 1) {
8621    // GCC RS6000 Constraint Letters
8622    switch (Constraint[0]) {
8623    case 'b':   // R1-R31
8624      if (VT == MVT::i64 && Subtarget.isPPC64())
8625        return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
8626      return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
8627    case 'r':   // R0-R31
8628      if (VT == MVT::i64 && Subtarget.isPPC64())
8629        return std::make_pair(0U, &PPC::G8RCRegClass);
8630      return std::make_pair(0U, &PPC::GPRCRegClass);
8631    case 'f':
8632      if (VT == MVT::f32 || VT == MVT::i32)
8633        return std::make_pair(0U, &PPC::F4RCRegClass);
8634      if (VT == MVT::f64 || VT == MVT::i64)
8635        return std::make_pair(0U, &PPC::F8RCRegClass);
8636      break;
8637    case 'v':
8638      return std::make_pair(0U, &PPC::VRRCRegClass);
8639    case 'y':   // crrc
8640      return std::make_pair(0U, &PPC::CRRCRegClass);
8641    }
8642  } else if (Constraint == "wc") { // an individual CR bit.
8643    return std::make_pair(0U, &PPC::CRBITRCRegClass);
8644  } else if (Constraint == "wa" || Constraint == "wd" ||
8645             Constraint == "wf") {
8646    return std::make_pair(0U, &PPC::VSRCRegClass);
8647  } else if (Constraint == "ws") {
8648    return std::make_pair(0U, &PPC::VSFRCRegClass);
8649  }
8650
8651  std::pair<unsigned, const TargetRegisterClass*> R =
8652    TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
8653
8654  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
8655  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
8656  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
8657  // register.
8658  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
8659  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
8660  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
8661      PPC::GPRCRegClass.contains(R.first)) {
8662    const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
8663    return std::make_pair(TRI->getMatchingSuperReg(R.first,
8664                            PPC::sub_32, &PPC::G8RCRegClass),
8665                          &PPC::G8RCRegClass);
8666  }
8667
8668  return R;
8669}
8670
8671
8672/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
8673/// vector.  If it is invalid, don't add anything to Ops.
8674void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
8675                                                     std::string &Constraint,
8676                                                     std::vector<SDValue>&Ops,
8677                                                     SelectionDAG &DAG) const {
8678  SDValue Result;
8679
8680  // Only support length 1 constraints.
8681  if (Constraint.length() > 1) return;
8682
8683  char Letter = Constraint[0];
8684  switch (Letter) {
8685  default: break;
8686  case 'I':
8687  case 'J':
8688  case 'K':
8689  case 'L':
8690  case 'M':
8691  case 'N':
8692  case 'O':
8693  case 'P': {
8694    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
8695    if (!CST) return; // Must be an immediate to match.
8696    unsigned Value = CST->getZExtValue();
8697    switch (Letter) {
8698    default: llvm_unreachable("Unknown constraint letter!");
8699    case 'I':  // "I" is a signed 16-bit constant.
8700      if ((short)Value == (int)Value)
8701        Result = DAG.getTargetConstant(Value, Op.getValueType());
8702      break;
8703    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
8704    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
8705      if ((short)Value == 0)
8706        Result = DAG.getTargetConstant(Value, Op.getValueType());
8707      break;
8708    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
8709      if ((Value >> 16) == 0)
8710        Result = DAG.getTargetConstant(Value, Op.getValueType());
8711      break;
8712    case 'M':  // "M" is a constant that is greater than 31.
8713      if (Value > 31)
8714        Result = DAG.getTargetConstant(Value, Op.getValueType());
8715      break;
8716    case 'N':  // "N" is a positive constant that is an exact power of two.
8717      if ((int)Value > 0 && isPowerOf2_32(Value))
8718        Result = DAG.getTargetConstant(Value, Op.getValueType());
8719      break;
8720    case 'O':  // "O" is the constant zero.
8721      if (Value == 0)
8722        Result = DAG.getTargetConstant(Value, Op.getValueType());
8723      break;
8724    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
8725      if ((short)-Value == (int)-Value)
8726        Result = DAG.getTargetConstant(Value, Op.getValueType());
8727      break;
8728    }
8729    break;
8730  }
8731  }
8732
8733  if (Result.getNode()) {
8734    Ops.push_back(Result);
8735    return;
8736  }
8737
8738  // Handle standard constraint letters.
8739  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8740}
8741
8742// isLegalAddressingMode - Return true if the addressing mode represented
8743// by AM is legal for this target, for a load/store of the specified type.
8744bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
8745                                              Type *Ty) const {
8746  // FIXME: PPC does not allow r+i addressing modes for vectors!
8747
8748  // PPC allows a sign-extended 16-bit immediate field.
8749  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
8750    return false;
8751
8752  // No global is ever allowed as a base.
8753  if (AM.BaseGV)
8754    return false;
8755
8756  // PPC only support r+r,
8757  switch (AM.Scale) {
8758  case 0:  // "r+i" or just "i", depending on HasBaseReg.
8759    break;
8760  case 1:
8761    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
8762      return false;
8763    // Otherwise we have r+r or r+i.
8764    break;
8765  case 2:
8766    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
8767      return false;
8768    // Allow 2*r as r+r.
8769    break;
8770  default:
8771    // No other scales are supported.
8772    return false;
8773  }
8774
8775  return true;
8776}
8777
8778SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
8779                                           SelectionDAG &DAG) const {
8780  MachineFunction &MF = DAG.getMachineFunction();
8781  MachineFrameInfo *MFI = MF.getFrameInfo();
8782  MFI->setReturnAddressIsTaken(true);
8783
8784  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
8785    return SDValue();
8786
8787  SDLoc dl(Op);
8788  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8789
8790  // Make sure the function does not optimize away the store of the RA to
8791  // the stack.
8792  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
8793  FuncInfo->setLRStoreRequired();
8794  bool isPPC64 = Subtarget.isPPC64();
8795  bool isDarwinABI = Subtarget.isDarwinABI();
8796
8797  if (Depth > 0) {
8798    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
8799    SDValue Offset =
8800
8801      DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
8802                      isPPC64? MVT::i64 : MVT::i32);
8803    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
8804                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
8805                                   FrameAddr, Offset),
8806                       MachinePointerInfo(), false, false, false, 0);
8807  }
8808
8809  // Just load the return address off the stack.
8810  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
8811  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
8812                     RetAddrFI, MachinePointerInfo(), false, false, false, 0);
8813}
8814
8815SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
8816                                          SelectionDAG &DAG) const {
8817  SDLoc dl(Op);
8818  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8819
8820  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
8821  bool isPPC64 = PtrVT == MVT::i64;
8822
8823  MachineFunction &MF = DAG.getMachineFunction();
8824  MachineFrameInfo *MFI = MF.getFrameInfo();
8825  MFI->setFrameAddressIsTaken(true);
8826
8827  // Naked functions never have a frame pointer, and so we use r1. For all
8828  // other functions, this decision must be delayed until during PEI.
8829  unsigned FrameReg;
8830  if (MF.getFunction()->getAttributes().hasAttribute(
8831        AttributeSet::FunctionIndex, Attribute::Naked))
8832    FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
8833  else
8834    FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
8835
8836  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
8837                                         PtrVT);
8838  while (Depth--)
8839    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
8840                            FrameAddr, MachinePointerInfo(), false, false,
8841                            false, 0);
8842  return FrameAddr;
8843}
8844
8845// FIXME? Maybe this could be a TableGen attribute on some registers and
8846// this table could be generated automatically from RegInfo.
8847unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
8848                                              EVT VT) const {
8849  bool isPPC64 = Subtarget.isPPC64();
8850  bool isDarwinABI = Subtarget.isDarwinABI();
8851
8852  if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
8853      (!isPPC64 && VT != MVT::i32))
8854    report_fatal_error("Invalid register global variable type");
8855
8856  bool is64Bit = isPPC64 && VT == MVT::i64;
8857  unsigned Reg = StringSwitch<unsigned>(RegName)
8858                   .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
8859                   .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2))
8860                   .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
8861                                  (is64Bit ? PPC::X13 : PPC::R13))
8862                   .Default(0);
8863
8864  if (Reg)
8865    return Reg;
8866  report_fatal_error("Invalid register name global variable");
8867}
8868
8869bool
8870PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
8871  // The PowerPC target isn't yet aware of offsets.
8872  return false;
8873}
8874
8875/// getOptimalMemOpType - Returns the target specific optimal type for load
8876/// and store operations as a result of memset, memcpy, and memmove
8877/// lowering. If DstAlign is zero that means it's safe to destination
8878/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
8879/// means there isn't a need to check it against alignment requirement,
8880/// probably because the source does not need to be loaded. If 'IsMemset' is
8881/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
8882/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
8883/// source is constant so it does not need to be loaded.
8884/// It returns EVT::Other if the type should be determined using generic
8885/// target-independent logic.
8886EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
8887                                           unsigned DstAlign, unsigned SrcAlign,
8888                                           bool IsMemset, bool ZeroMemset,
8889                                           bool MemcpyStrSrc,
8890                                           MachineFunction &MF) const {
8891  if (Subtarget.isPPC64()) {
8892    return MVT::i64;
8893  } else {
8894    return MVT::i32;
8895  }
8896}
8897
8898/// \brief Returns true if it is beneficial to convert a load of a constant
8899/// to just the constant itself.
8900bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
8901                                                          Type *Ty) const {
8902  assert(Ty->isIntegerTy());
8903
8904  unsigned BitSize = Ty->getPrimitiveSizeInBits();
8905  if (BitSize == 0 || BitSize > 64)
8906    return false;
8907  return true;
8908}
8909
8910bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
8911  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
8912    return false;
8913  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
8914  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
8915  return NumBits1 == 64 && NumBits2 == 32;
8916}
8917
8918bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
8919  if (!VT1.isInteger() || !VT2.isInteger())
8920    return false;
8921  unsigned NumBits1 = VT1.getSizeInBits();
8922  unsigned NumBits2 = VT2.getSizeInBits();
8923  return NumBits1 == 64 && NumBits2 == 32;
8924}
8925
8926bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
8927  return isInt<16>(Imm) || isUInt<16>(Imm);
8928}
8929
8930bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
8931  return isInt<16>(Imm) || isUInt<16>(Imm);
8932}
8933
8934bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
8935                                                      unsigned,
8936                                                      bool *Fast) const {
8937  if (DisablePPCUnaligned)
8938    return false;
8939
8940  // PowerPC supports unaligned memory access for simple non-vector types.
8941  // Although accessing unaligned addresses is not as efficient as accessing
8942  // aligned addresses, it is generally more efficient than manual expansion,
8943  // and generally only traps for software emulation when crossing page
8944  // boundaries.
8945
8946  if (!VT.isSimple())
8947    return false;
8948
8949  if (VT.getSimpleVT().isVector()) {
8950    if (Subtarget.hasVSX()) {
8951      if (VT != MVT::v2f64 && VT != MVT::v2i64)
8952        return false;
8953    } else {
8954      return false;
8955    }
8956  }
8957
8958  if (VT == MVT::ppcf128)
8959    return false;
8960
8961  if (Fast)
8962    *Fast = true;
8963
8964  return true;
8965}
8966
8967bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
8968  VT = VT.getScalarType();
8969
8970  if (!VT.isSimple())
8971    return false;
8972
8973  switch (VT.getSimpleVT().SimpleTy) {
8974  case MVT::f32:
8975  case MVT::f64:
8976    return true;
8977  default:
8978    break;
8979  }
8980
8981  return false;
8982}
8983
8984bool
8985PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
8986                     EVT VT , unsigned DefinedValues) const {
8987  if (VT == MVT::v2i64)
8988    return false;
8989
8990  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
8991}
8992
8993Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
8994  if (DisableILPPref || Subtarget.enableMachineScheduler())
8995    return TargetLowering::getSchedulingPreference(N);
8996
8997  return Sched::ILP;
8998}
8999
9000// Create a fast isel object.
9001FastISel *
9002PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
9003                                  const TargetLibraryInfo *LibInfo) const {
9004  return PPC::createFastISel(FuncInfo, LibInfo);
9005}
9006